| // SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense |
| |
| // The leaf scan pass for draw tag scan implemented as a tree reduction. |
| // This stage can be fused with its consumer but is separate now. |
| |
| #version 450 |
| #extension GL_GOOGLE_include_directive : enable |
| |
| #include "mem.h" |
| #include "setup.h" |
| |
| #define N_ROWS 8 |
| #define LG_WG_SIZE (7 + LG_WG_FACTOR) |
| #define WG_SIZE (1 << LG_WG_SIZE) |
| #define PARTITION_SIZE (WG_SIZE * N_ROWS) |
| |
| layout(local_size_x = WG_SIZE, local_size_y = 1) in; |
| |
| layout(binding = 1) readonly buffer ConfigBuf { |
| Config conf; |
| }; |
| |
| layout(binding = 2) readonly buffer SceneBuf { |
| uint[] scene; |
| }; |
| |
| #include "scene.h" |
| #include "tile.h" |
| #include "drawtag.h" |
| #include "blend.h" |
| |
| #define Monoid DrawMonoid |
| |
| layout(set = 0, binding = 3) readonly buffer ParentBuf { |
| Monoid[] parent; |
| }; |
| |
| shared Monoid sh_scratch[WG_SIZE]; |
| |
| void main() { |
| Monoid local[N_ROWS]; |
| |
| uint ix = gl_GlobalInvocationID.x * N_ROWS; |
| uint drawtag_base = conf.drawtag_offset >> 2; |
| uint tag_word = scene[drawtag_base + ix]; |
| |
| Monoid agg = map_tag(tag_word); |
| local[0] = agg; |
| for (uint i = 1; i < N_ROWS; i++) { |
| tag_word = scene[drawtag_base + ix + i]; |
| agg = combine_draw_monoid(agg, map_tag(tag_word)); |
| local[i] = agg; |
| } |
| sh_scratch[gl_LocalInvocationID.x] = agg; |
| for (uint i = 0; i < LG_WG_SIZE; i++) { |
| barrier(); |
| if (gl_LocalInvocationID.x >= (1u << i)) { |
| Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i)]; |
| agg = combine_draw_monoid(other, agg); |
| } |
| barrier(); |
| sh_scratch[gl_LocalInvocationID.x] = agg; |
| } |
| |
| barrier(); |
| Monoid row = draw_monoid_identity(); |
| if (gl_WorkGroupID.x > 0) { |
| row = parent[gl_WorkGroupID.x - 1]; |
| } |
| if (gl_LocalInvocationID.x > 0) { |
| row = combine_draw_monoid(row, sh_scratch[gl_LocalInvocationID.x - 1]); |
| } |
| uint drawdata_base = conf.drawdata_offset >> 2; |
| uint drawinfo_base = conf.drawinfo_alloc.offset >> 2; |
| uint out_ix = gl_GlobalInvocationID.x * N_ROWS; |
| uint out_base = (conf.drawmonoid_alloc.offset >> 2) + out_ix * 4; |
| uint clip_out_base = conf.clip_alloc.offset >> 2; |
| for (uint i = 0; i < N_ROWS; i++) { |
| Monoid m = row; |
| if (i > 0) { |
| m = combine_draw_monoid(m, local[i - 1]); |
| } |
| // m now holds exclusive scan of draw monoid |
| memory[out_base + i * 4] = m.path_ix; |
| memory[out_base + i * 4 + 1] = m.clip_ix; |
| memory[out_base + i * 4 + 2] = m.scene_offset; |
| memory[out_base + i * 4 + 3] = m.info_offset; |
| |
| // u32 offset of drawobj data |
| uint dd = drawdata_base + (m.scene_offset >> 2); |
| uint di = drawinfo_base + (m.info_offset >> 2); |
| |
| // For compatibility, we'll generate an Annotated object, same as old |
| // pipeline. However, going forward we'll get rid of that, and have |
| // later stages read scene + bbox etc. |
| tag_word = scene[drawtag_base + ix + i]; |
| if (tag_word == Drawtag_FillColor || tag_word == Drawtag_FillLinGradient || tag_word == Drawtag_FillRadGradient || |
| tag_word == Drawtag_FillImage || tag_word == Drawtag_BeginClip) { |
| uint bbox_offset = (conf.path_bbox_alloc.offset >> 2) + 6 * m.path_ix; |
| float bbox_l = float(memory[bbox_offset]) - 32768.0; |
| float bbox_t = float(memory[bbox_offset + 1]) - 32768.0; |
| float bbox_r = float(memory[bbox_offset + 2]) - 32768.0; |
| float bbox_b = float(memory[bbox_offset + 3]) - 32768.0; |
| vec4 bbox = vec4(bbox_l, bbox_t, bbox_r, bbox_b); |
| float linewidth = uintBitsToFloat(memory[bbox_offset + 4]); |
| uint fill_mode = uint(linewidth >= 0.0); |
| vec4 mat; |
| vec2 translate; |
| if (linewidth >= 0.0 || tag_word == Drawtag_FillLinGradient || tag_word == Drawtag_FillRadGradient) { |
| uint trans_ix = memory[bbox_offset + 5]; |
| uint t = (conf.trans_offset >> 2) + trans_ix * 6; |
| mat = uintBitsToFloat(uvec4(scene[t], scene[t + 1], scene[t + 2], scene[t + 3])); |
| if (tag_word == Drawtag_FillLinGradient || tag_word == Drawtag_FillRadGradient) { |
| translate = uintBitsToFloat(uvec2(scene[t + 4], scene[t + 5])); |
| } |
| } |
| if (linewidth >= 0.0) { |
| // TODO: need to deal with anisotropic case |
| linewidth *= sqrt(abs(mat.x * mat.w - mat.y * mat.z)); |
| } |
| switch (tag_word) { |
| case Drawtag_FillColor: |
| case Drawtag_FillImage: |
| memory[di] = floatBitsToUint(linewidth); |
| break; |
| case Drawtag_FillLinGradient: |
| memory[di] = floatBitsToUint(linewidth); |
| vec2 p0 = uintBitsToFloat(uvec2(scene[dd + 1], scene[dd + 2])); |
| vec2 p1 = uintBitsToFloat(uvec2(scene[dd + 3], scene[dd + 4])); |
| p0 = mat.xy * p0.x + mat.zw * p0.y + translate; |
| p1 = mat.xy * p1.x + mat.zw * p1.y + translate; |
| vec2 dxy = p1 - p0; |
| float scale = 1.0 / (dxy.x * dxy.x + dxy.y * dxy.y); |
| float line_x = dxy.x * scale; |
| float line_y = dxy.y * scale; |
| float line_c = -(p0.x * line_x + p0.y * line_y); |
| memory[di + 1] = floatBitsToUint(line_x); |
| memory[di + 2] = floatBitsToUint(line_y); |
| memory[di + 3] = floatBitsToUint(line_c); |
| break; |
| case Drawtag_FillRadGradient: |
| p0 = uintBitsToFloat(uvec2(scene[dd + 1], scene[dd + 2])); |
| p1 = uintBitsToFloat(uvec2(scene[dd + 3], scene[dd + 4])); |
| float r0 = uintBitsToFloat(scene[dd + 5]); |
| float r1 = uintBitsToFloat(scene[dd + 6]); |
| float inv_det = 1.0 / (mat.x * mat.w - mat.y * mat.z); |
| vec4 inv_mat = inv_det * vec4(mat.w, -mat.y, -mat.z, mat.x); |
| vec2 inv_tr = inv_mat.xz * translate.x + inv_mat.yw * translate.y; |
| inv_tr += p0; |
| vec2 center1 = p1 - p0; |
| float rr = r1 / (r1 - r0); |
| float rainv = rr / (r1 * r1 - dot(center1, center1)); |
| vec2 c1 = center1 * rainv; |
| float ra = rr * rainv; |
| float roff = rr - 1.0; |
| memory[di] = floatBitsToUint(linewidth); |
| memory[di + 1] = floatBitsToUint(inv_mat.x); |
| memory[di + 2] = floatBitsToUint(inv_mat.y); |
| memory[di + 3] = floatBitsToUint(inv_mat.z); |
| memory[di + 4] = floatBitsToUint(inv_mat.w); |
| memory[di + 5] = floatBitsToUint(inv_tr.x); |
| memory[di + 6] = floatBitsToUint(inv_tr.y); |
| memory[di + 7] = floatBitsToUint(c1.x); |
| memory[di + 8] = floatBitsToUint(c1.y); |
| memory[di + 9] = floatBitsToUint(ra); |
| memory[di + 10] = floatBitsToUint(roff); |
| break; |
| case Drawtag_BeginClip: |
| break; |
| } |
| } |
| // Generate clip stream. |
| if (tag_word == Drawtag_BeginClip || tag_word == Drawtag_EndClip) { |
| uint path_ix = ~(out_ix + i); |
| if (tag_word == Drawtag_BeginClip) { |
| path_ix = m.path_ix; |
| } |
| memory[clip_out_base + m.clip_ix] = path_ix; |
| } |
| } |
| } |