| // SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense |
| |
| // Allocation and initialization of tiles for paths. |
| |
| #version 450 |
| #extension GL_GOOGLE_include_directive : enable |
| |
| #include "mem.h" |
| #include "setup.h" |
| |
| #define LG_TILE_ALLOC_WG (7 + LG_WG_FACTOR) |
| #define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG) |
| |
| layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in; |
| |
| layout(set = 0, binding = 1) readonly buffer ConfigBuf { |
| Config conf; |
| }; |
| |
| layout(binding = 2) readonly buffer SceneBuf { |
| uint[] scene; |
| }; |
| |
| #include "drawtag.h" |
| #include "tile.h" |
| |
| // scale factors useful for converting coordinates to tiles |
| #define SX (1.0 / float(TILE_WIDTH_PX)) |
| #define SY (1.0 / float(TILE_HEIGHT_PX)) |
| |
| shared uint sh_tile_count[TILE_ALLOC_WG]; |
| shared uint sh_tile_offset; |
| |
| vec4 load_draw_bbox(uint draw_ix) { |
| uint base = (conf.draw_bbox_alloc.offset >> 2) + 4 * draw_ix; |
| float x0 = uintBitsToFloat(memory[base]); |
| float y0 = uintBitsToFloat(memory[base + 1]); |
| float x1 = uintBitsToFloat(memory[base + 2]); |
| float y1 = uintBitsToFloat(memory[base + 3]); |
| vec4 bbox = vec4(x0, y0, x1, y1); |
| return bbox; |
| } |
| |
| void main() { |
| if (!check_deps(STAGE_BINNING)) { |
| return; |
| } |
| uint th_ix = gl_LocalInvocationID.x; |
| uint element_ix = gl_GlobalInvocationID.x; |
| // At the moment, element_ix == path_ix. The clip-intersected bounding boxes |
| // for elements (draw objects) are computed in the binning stage, but at some |
| // point we'll probably want to break that correspondence. Tiles should be |
| // allocated for paths, not draw objs. EndClip doesn't need an allocation. |
| PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size); |
| uint drawtag_base = conf.drawtag_offset >> 2; |
| |
| uint drawtag = Drawtag_Nop; |
| if (element_ix < conf.n_elements) { |
| drawtag = scene[drawtag_base + element_ix]; |
| } |
| int x0 = 0, y0 = 0, x1 = 0, y1 = 0; |
| // Allocate an empty path for EndClip; at some point we'll change |
| // this to be per path rather than per draw object. |
| if (drawtag != Drawtag_Nop && drawtag != Drawtag_EndClip) { |
| vec4 bbox = load_draw_bbox(element_ix); |
| x0 = int(floor(bbox.x * SX)); |
| y0 = int(floor(bbox.y * SY)); |
| x1 = int(ceil(bbox.z * SX)); |
| y1 = int(ceil(bbox.w * SY)); |
| } |
| x0 = clamp(x0, 0, int(conf.width_in_tiles)); |
| y0 = clamp(y0, 0, int(conf.height_in_tiles)); |
| x1 = clamp(x1, 0, int(conf.width_in_tiles)); |
| y1 = clamp(y1, 0, int(conf.height_in_tiles)); |
| |
| Path path; |
| path.bbox = uvec4(x0, y0, x1, y1); |
| uint tile_count = (x1 - x0) * (y1 - y0); |
| |
| sh_tile_count[th_ix] = tile_count; |
| uint total_tile_count = tile_count; |
| // Prefix sum of sh_tile_count |
| for (uint i = 0; i < LG_TILE_ALLOC_WG; i++) { |
| barrier(); |
| if (th_ix >= (1u << i)) { |
| total_tile_count += sh_tile_count[th_ix - (1u << i)]; |
| } |
| barrier(); |
| sh_tile_count[th_ix] = total_tile_count; |
| } |
| if (th_ix == TILE_ALLOC_WG - 1) { |
| sh_tile_offset = malloc_stage(total_tile_count * Tile_size, conf.mem_size, STAGE_TILE_ALLOC); |
| } |
| barrier(); |
| uint offset_start = sh_tile_offset; |
| if (offset_start == MALLOC_FAILED) { |
| return; |
| } |
| |
| if (element_ix < conf.n_elements) { |
| uint tile_subix = th_ix > 0 ? sh_tile_count[th_ix - 1] : 0; |
| path.tiles = TileRef(offset_start + Tile_size * tile_subix); |
| Path_write(conf.tile_alloc, path_ref, path); |
| } |
| |
| // Zero out allocated tiles efficiently |
| uint total_count = sh_tile_count[TILE_ALLOC_WG - 1] * (Tile_size / 4); |
| uint start_ix = offset_start >> 2; |
| for (uint i = th_ix; i < total_count; i += TILE_ALLOC_WG) { |
| memory[start_ix + i] = 0; |
| } |
| } |