| struct Alloc |
| { |
| uint offset; |
| }; |
| |
| struct TagMonoid |
| { |
| uint trans_ix; |
| uint linewidth_ix; |
| uint pathseg_ix; |
| uint path_ix; |
| uint pathseg_offset; |
| }; |
| |
| struct TransformSegRef |
| { |
| uint offset; |
| }; |
| |
| struct TransformSeg |
| { |
| float4 mat; |
| float2 translate; |
| }; |
| |
| struct PathCubicRef |
| { |
| uint offset; |
| }; |
| |
| struct PathCubic |
| { |
| float2 p0; |
| float2 p1; |
| float2 p2; |
| float2 p3; |
| uint path_ix; |
| uint trans_ix; |
| float2 stroke; |
| }; |
| |
| struct PathSegRef |
| { |
| uint offset; |
| }; |
| |
| struct Monoid |
| { |
| float4 bbox; |
| uint flags; |
| }; |
| |
| struct Config |
| { |
| uint n_elements; |
| uint n_pathseg; |
| uint width_in_tiles; |
| uint height_in_tiles; |
| Alloc tile_alloc; |
| Alloc bin_alloc; |
| Alloc ptcl_alloc; |
| Alloc pathseg_alloc; |
| Alloc anno_alloc; |
| Alloc trans_alloc; |
| Alloc bbox_alloc; |
| Alloc drawmonoid_alloc; |
| uint n_trans; |
| uint n_path; |
| uint trans_offset; |
| uint linewidth_offset; |
| uint pathtag_offset; |
| uint pathseg_offset; |
| }; |
| |
| static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); |
| |
| static const TagMonoid _135 = { 0u, 0u, 0u, 0u, 0u }; |
| static const Monoid _567 = { 0.0f.xxxx, 0u }; |
| |
| RWByteAddressBuffer _111 : register(u0, space0); |
| ByteAddressBuffer _574 : register(t2, space0); |
| ByteAddressBuffer _639 : register(t1, space0); |
| ByteAddressBuffer _709 : register(t3, space0); |
| |
| static uint3 gl_WorkGroupID; |
| static uint3 gl_LocalInvocationID; |
| static uint3 gl_GlobalInvocationID; |
| struct SPIRV_Cross_Input |
| { |
| uint3 gl_WorkGroupID : SV_GroupID; |
| uint3 gl_LocalInvocationID : SV_GroupThreadID; |
| uint3 gl_GlobalInvocationID : SV_DispatchThreadID; |
| }; |
| |
| groupshared TagMonoid sh_tag[256]; |
| groupshared Monoid sh_scratch[256]; |
| |
| TagMonoid reduce_tag(uint tag_word) |
| { |
| uint point_count = tag_word & 50529027u; |
| TagMonoid c; |
| c.pathseg_ix = uint(int(countbits((point_count * 7u) & 67372036u))); |
| c.linewidth_ix = uint(int(countbits(tag_word & 1077952576u))); |
| c.path_ix = uint(int(countbits(tag_word & 269488144u))); |
| c.trans_ix = uint(int(countbits(tag_word & 538976288u))); |
| uint n_points = point_count + ((tag_word >> uint(2)) & 16843009u); |
| uint a = n_points + (n_points & (((tag_word >> uint(3)) & 16843009u) * 15u)); |
| a += (a >> uint(8)); |
| a += (a >> uint(16)); |
| c.pathseg_offset = a & 255u; |
| return c; |
| } |
| |
| TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b) |
| { |
| TagMonoid c; |
| c.trans_ix = a.trans_ix + b.trans_ix; |
| c.linewidth_ix = a.linewidth_ix + b.linewidth_ix; |
| c.pathseg_ix = a.pathseg_ix + b.pathseg_ix; |
| c.path_ix = a.path_ix + b.path_ix; |
| c.pathseg_offset = a.pathseg_offset + b.pathseg_offset; |
| return c; |
| } |
| |
| TagMonoid tag_monoid_identity() |
| { |
| return _135; |
| } |
| |
| float2 read_f32_point(uint ix) |
| { |
| float x = asfloat(_574.Load(ix * 4 + 0)); |
| float y = asfloat(_574.Load((ix + 1u) * 4 + 0)); |
| return float2(x, y); |
| } |
| |
| float2 read_i16_point(uint ix) |
| { |
| uint raw = _574.Load(ix * 4 + 0); |
| float x = float(int(raw << uint(16)) >> 16); |
| float y = float(int(raw) >> 16); |
| return float2(x, y); |
| } |
| |
| bool touch_mem(Alloc alloc, uint offset) |
| { |
| return true; |
| } |
| |
| uint read_mem(Alloc alloc, uint offset) |
| { |
| Alloc param = alloc; |
| uint param_1 = offset; |
| if (!touch_mem(param, param_1)) |
| { |
| return 0u; |
| } |
| uint v = _111.Load(offset * 4 + 8); |
| return v; |
| } |
| |
| TransformSeg TransformSeg_read(Alloc a, TransformSegRef ref) |
| { |
| uint ix = ref.offset >> uint(2); |
| Alloc param = a; |
| uint param_1 = ix + 0u; |
| uint raw0 = read_mem(param, param_1); |
| Alloc param_2 = a; |
| uint param_3 = ix + 1u; |
| uint raw1 = read_mem(param_2, param_3); |
| Alloc param_4 = a; |
| uint param_5 = ix + 2u; |
| uint raw2 = read_mem(param_4, param_5); |
| Alloc param_6 = a; |
| uint param_7 = ix + 3u; |
| uint raw3 = read_mem(param_6, param_7); |
| Alloc param_8 = a; |
| uint param_9 = ix + 4u; |
| uint raw4 = read_mem(param_8, param_9); |
| Alloc param_10 = a; |
| uint param_11 = ix + 5u; |
| uint raw5 = read_mem(param_10, param_11); |
| TransformSeg s; |
| s.mat = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3)); |
| s.translate = float2(asfloat(raw4), asfloat(raw5)); |
| return s; |
| } |
| |
| void write_mem(Alloc alloc, uint offset, uint val) |
| { |
| Alloc param = alloc; |
| uint param_1 = offset; |
| if (!touch_mem(param, param_1)) |
| { |
| return; |
| } |
| _111.Store(offset * 4 + 8, val); |
| } |
| |
| void PathCubic_write(Alloc a, PathCubicRef ref, PathCubic s) |
| { |
| uint ix = ref.offset >> uint(2); |
| Alloc param = a; |
| uint param_1 = ix + 0u; |
| uint param_2 = asuint(s.p0.x); |
| write_mem(param, param_1, param_2); |
| Alloc param_3 = a; |
| uint param_4 = ix + 1u; |
| uint param_5 = asuint(s.p0.y); |
| write_mem(param_3, param_4, param_5); |
| Alloc param_6 = a; |
| uint param_7 = ix + 2u; |
| uint param_8 = asuint(s.p1.x); |
| write_mem(param_6, param_7, param_8); |
| Alloc param_9 = a; |
| uint param_10 = ix + 3u; |
| uint param_11 = asuint(s.p1.y); |
| write_mem(param_9, param_10, param_11); |
| Alloc param_12 = a; |
| uint param_13 = ix + 4u; |
| uint param_14 = asuint(s.p2.x); |
| write_mem(param_12, param_13, param_14); |
| Alloc param_15 = a; |
| uint param_16 = ix + 5u; |
| uint param_17 = asuint(s.p2.y); |
| write_mem(param_15, param_16, param_17); |
| Alloc param_18 = a; |
| uint param_19 = ix + 6u; |
| uint param_20 = asuint(s.p3.x); |
| write_mem(param_18, param_19, param_20); |
| Alloc param_21 = a; |
| uint param_22 = ix + 7u; |
| uint param_23 = asuint(s.p3.y); |
| write_mem(param_21, param_22, param_23); |
| Alloc param_24 = a; |
| uint param_25 = ix + 8u; |
| uint param_26 = s.path_ix; |
| write_mem(param_24, param_25, param_26); |
| Alloc param_27 = a; |
| uint param_28 = ix + 9u; |
| uint param_29 = s.trans_ix; |
| write_mem(param_27, param_28, param_29); |
| Alloc param_30 = a; |
| uint param_31 = ix + 10u; |
| uint param_32 = asuint(s.stroke.x); |
| write_mem(param_30, param_31, param_32); |
| Alloc param_33 = a; |
| uint param_34 = ix + 11u; |
| uint param_35 = asuint(s.stroke.y); |
| write_mem(param_33, param_34, param_35); |
| } |
| |
| void PathSeg_Cubic_write(Alloc a, PathSegRef ref, uint flags, PathCubic s) |
| { |
| Alloc param = a; |
| uint param_1 = ref.offset >> uint(2); |
| uint param_2 = (flags << uint(16)) | 1u; |
| write_mem(param, param_1, param_2); |
| PathCubicRef _458 = { ref.offset + 4u }; |
| Alloc param_3 = a; |
| PathCubicRef param_4 = _458; |
| PathCubic param_5 = s; |
| PathCubic_write(param_3, param_4, param_5); |
| } |
| |
| Monoid combine_monoid(Monoid a, Monoid b) |
| { |
| Monoid c; |
| c.bbox = b.bbox; |
| bool _472 = (a.flags & 1u) == 0u; |
| bool _480; |
| if (_472) |
| { |
| _480 = b.bbox.z <= b.bbox.x; |
| } |
| else |
| { |
| _480 = _472; |
| } |
| bool _488; |
| if (_480) |
| { |
| _488 = b.bbox.w <= b.bbox.y; |
| } |
| else |
| { |
| _488 = _480; |
| } |
| if (_488) |
| { |
| c.bbox = a.bbox; |
| } |
| else |
| { |
| bool _498 = (a.flags & 1u) == 0u; |
| bool _505; |
| if (_498) |
| { |
| _505 = (b.flags & 2u) == 0u; |
| } |
| else |
| { |
| _505 = _498; |
| } |
| bool _522; |
| if (_505) |
| { |
| bool _512 = a.bbox.z > a.bbox.x; |
| bool _521; |
| if (!_512) |
| { |
| _521 = a.bbox.w > a.bbox.y; |
| } |
| else |
| { |
| _521 = _512; |
| } |
| _522 = _521; |
| } |
| else |
| { |
| _522 = _505; |
| } |
| if (_522) |
| { |
| float4 _529 = c.bbox; |
| float2 _531 = min(a.bbox.xy, _529.xy); |
| c.bbox.x = _531.x; |
| c.bbox.y = _531.y; |
| float4 _540 = c.bbox; |
| float2 _542 = max(a.bbox.zw, _540.zw); |
| c.bbox.z = _542.x; |
| c.bbox.w = _542.y; |
| } |
| } |
| c.flags = (a.flags & 2u) | b.flags; |
| c.flags |= ((a.flags & 1u) << uint(1)); |
| return c; |
| } |
| |
| Monoid monoid_identity() |
| { |
| return _567; |
| } |
| |
| uint round_down(float x) |
| { |
| return uint(max(0.0f, floor(x) + 32768.0f)); |
| } |
| |
| uint round_up(float x) |
| { |
| return uint(min(65535.0f, ceil(x) + 32768.0f)); |
| } |
| |
| void comp_main() |
| { |
| uint ix = gl_GlobalInvocationID.x * 4u; |
| uint tag_word = _574.Load(((_639.Load(64) >> uint(2)) + (ix >> uint(2))) * 4 + 0); |
| uint param = tag_word; |
| TagMonoid local_tm = reduce_tag(param); |
| sh_tag[gl_LocalInvocationID.x] = local_tm; |
| for (uint i = 0u; i < 8u; i++) |
| { |
| GroupMemoryBarrierWithGroupSync(); |
| if (gl_LocalInvocationID.x >= (1u << i)) |
| { |
| TagMonoid other = sh_tag[gl_LocalInvocationID.x - (1u << i)]; |
| TagMonoid param_1 = other; |
| TagMonoid param_2 = local_tm; |
| local_tm = combine_tag_monoid(param_1, param_2); |
| } |
| GroupMemoryBarrierWithGroupSync(); |
| sh_tag[gl_LocalInvocationID.x] = local_tm; |
| } |
| GroupMemoryBarrierWithGroupSync(); |
| TagMonoid tm = tag_monoid_identity(); |
| if (gl_WorkGroupID.x > 0u) |
| { |
| TagMonoid _715; |
| _715.trans_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 0); |
| _715.linewidth_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 4); |
| _715.pathseg_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 8); |
| _715.path_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 12); |
| _715.pathseg_offset = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 16); |
| tm.trans_ix = _715.trans_ix; |
| tm.linewidth_ix = _715.linewidth_ix; |
| tm.pathseg_ix = _715.pathseg_ix; |
| tm.path_ix = _715.path_ix; |
| tm.pathseg_offset = _715.pathseg_offset; |
| } |
| if (gl_LocalInvocationID.x > 0u) |
| { |
| TagMonoid param_3 = tm; |
| TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u]; |
| tm = combine_tag_monoid(param_3, param_4); |
| } |
| uint ps_ix = (_639.Load(68) >> uint(2)) + tm.pathseg_offset; |
| uint lw_ix = (_639.Load(60) >> uint(2)) + tm.linewidth_ix; |
| uint save_path_ix = tm.path_ix; |
| uint trans_ix = tm.trans_ix; |
| TransformSegRef _770 = { _639.Load(36) + (trans_ix * 24u) }; |
| TransformSegRef trans_ref = _770; |
| PathSegRef _780 = { _639.Load(28) + (tm.pathseg_ix * 52u) }; |
| PathSegRef ps_ref = _780; |
| float linewidth[4]; |
| uint save_trans_ix[4]; |
| float2 p0; |
| float2 p1; |
| float2 p2; |
| float2 p3; |
| Alloc param_13; |
| Monoid local[4]; |
| PathCubic cubic; |
| Alloc param_15; |
| for (uint i_1 = 0u; i_1 < 4u; i_1++) |
| { |
| linewidth[i_1] = asfloat(_574.Load(lw_ix * 4 + 0)); |
| save_trans_ix[i_1] = trans_ix; |
| uint tag_byte = tag_word >> (i_1 * 8u); |
| uint seg_type = tag_byte & 3u; |
| if (seg_type != 0u) |
| { |
| if ((tag_byte & 8u) != 0u) |
| { |
| uint param_5 = ps_ix; |
| p0 = read_f32_point(param_5); |
| uint param_6 = ps_ix + 2u; |
| p1 = read_f32_point(param_6); |
| if (seg_type >= 2u) |
| { |
| uint param_7 = ps_ix + 4u; |
| p2 = read_f32_point(param_7); |
| if (seg_type == 3u) |
| { |
| uint param_8 = ps_ix + 6u; |
| p3 = read_f32_point(param_8); |
| } |
| } |
| } |
| else |
| { |
| uint param_9 = ps_ix; |
| p0 = read_i16_point(param_9); |
| uint param_10 = ps_ix + 1u; |
| p1 = read_i16_point(param_10); |
| if (seg_type >= 2u) |
| { |
| uint param_11 = ps_ix + 2u; |
| p2 = read_i16_point(param_11); |
| if (seg_type == 3u) |
| { |
| uint param_12 = ps_ix + 3u; |
| p3 = read_i16_point(param_12); |
| } |
| } |
| } |
| Alloc _876; |
| _876.offset = _639.Load(36); |
| param_13.offset = _876.offset; |
| TransformSegRef param_14 = trans_ref; |
| TransformSeg transform = TransformSeg_read(param_13, param_14); |
| p0 = ((transform.mat.xy * p0.x) + (transform.mat.zw * p0.y)) + transform.translate; |
| p1 = ((transform.mat.xy * p1.x) + (transform.mat.zw * p1.y)) + transform.translate; |
| float4 bbox = float4(min(p0, p1), max(p0, p1)); |
| if (seg_type >= 2u) |
| { |
| p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate; |
| float4 _946 = bbox; |
| float2 _949 = min(_946.xy, p2); |
| bbox.x = _949.x; |
| bbox.y = _949.y; |
| float4 _954 = bbox; |
| float2 _957 = max(_954.zw, p2); |
| bbox.z = _957.x; |
| bbox.w = _957.y; |
| if (seg_type == 3u) |
| { |
| p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate; |
| float4 _982 = bbox; |
| float2 _985 = min(_982.xy, p3); |
| bbox.x = _985.x; |
| bbox.y = _985.y; |
| float4 _990 = bbox; |
| float2 _993 = max(_990.zw, p3); |
| bbox.z = _993.x; |
| bbox.w = _993.y; |
| } |
| else |
| { |
| p3 = p2; |
| p2 = lerp(p1, p2, 0.3333333432674407958984375f.xx); |
| p1 = lerp(p1, p0, 0.3333333432674407958984375f.xx); |
| } |
| } |
| else |
| { |
| p3 = p1; |
| p2 = lerp(p3, p0, 0.3333333432674407958984375f.xx); |
| p1 = lerp(p0, p3, 0.3333333432674407958984375f.xx); |
| } |
| float2 stroke = 0.0f.xx; |
| if (linewidth[i_1] >= 0.0f) |
| { |
| stroke = float2(length(transform.mat.xz), length(transform.mat.yw)) * (0.5f * linewidth[i_1]); |
| bbox += float4(-stroke, stroke); |
| } |
| local[i_1].bbox = bbox; |
| local[i_1].flags = 0u; |
| cubic.p0 = p0; |
| cubic.p1 = p1; |
| cubic.p2 = p2; |
| cubic.p3 = p3; |
| cubic.path_ix = tm.path_ix; |
| cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1; |
| cubic.stroke = stroke; |
| uint fill_mode = uint(linewidth[i_1] >= 0.0f); |
| Alloc _1088; |
| _1088.offset = _639.Load(28); |
| param_15.offset = _1088.offset; |
| PathSegRef param_16 = ps_ref; |
| uint param_17 = fill_mode; |
| PathCubic param_18 = cubic; |
| PathSeg_Cubic_write(param_15, param_16, param_17, param_18); |
| ps_ref.offset += 52u; |
| uint n_points = (tag_byte & 3u) + ((tag_byte >> uint(2)) & 1u); |
| uint n_words = n_points + (n_points & (((tag_byte >> uint(3)) & 1u) * 15u)); |
| ps_ix += n_words; |
| } |
| else |
| { |
| local[i_1].bbox = 0.0f.xxxx; |
| uint is_path = (tag_byte >> uint(4)) & 1u; |
| local[i_1].flags = is_path; |
| tm.path_ix += is_path; |
| trans_ix += ((tag_byte >> uint(5)) & 1u); |
| trans_ref.offset += (((tag_byte >> uint(5)) & 1u) * 24u); |
| lw_ix += ((tag_byte >> uint(6)) & 1u); |
| } |
| } |
| Monoid agg = local[0]; |
| for (uint i_2 = 1u; i_2 < 4u; i_2++) |
| { |
| Monoid param_19 = agg; |
| Monoid param_20 = local[i_2]; |
| agg = combine_monoid(param_19, param_20); |
| local[i_2] = agg; |
| } |
| sh_scratch[gl_LocalInvocationID.x] = agg; |
| for (uint i_3 = 0u; i_3 < 8u; i_3++) |
| { |
| GroupMemoryBarrierWithGroupSync(); |
| if (gl_LocalInvocationID.x >= (1u << i_3)) |
| { |
| Monoid other_1 = sh_scratch[gl_LocalInvocationID.x - (1u << i_3)]; |
| Monoid param_21 = other_1; |
| Monoid param_22 = agg; |
| agg = combine_monoid(param_21, param_22); |
| } |
| GroupMemoryBarrierWithGroupSync(); |
| sh_scratch[gl_LocalInvocationID.x] = agg; |
| } |
| GroupMemoryBarrierWithGroupSync(); |
| uint path_ix = save_path_ix; |
| uint bbox_out_ix = (_639.Load(40) >> uint(2)) + (path_ix * 6u); |
| Monoid row = monoid_identity(); |
| if (gl_LocalInvocationID.x > 0u) |
| { |
| row = sh_scratch[gl_LocalInvocationID.x - 1u]; |
| } |
| for (uint i_4 = 0u; i_4 < 4u; i_4++) |
| { |
| Monoid param_23 = row; |
| Monoid param_24 = local[i_4]; |
| Monoid m = combine_monoid(param_23, param_24); |
| bool do_atomic = false; |
| bool _1263 = i_4 == 3u; |
| bool _1269; |
| if (_1263) |
| { |
| _1269 = gl_LocalInvocationID.x == 255u; |
| } |
| else |
| { |
| _1269 = _1263; |
| } |
| if (_1269) |
| { |
| do_atomic = true; |
| } |
| if ((m.flags & 1u) != 0u) |
| { |
| _111.Store((bbox_out_ix + 4u) * 4 + 8, asuint(linewidth[i_4])); |
| _111.Store((bbox_out_ix + 5u) * 4 + 8, save_trans_ix[i_4]); |
| if ((m.flags & 2u) == 0u) |
| { |
| do_atomic = true; |
| } |
| else |
| { |
| float param_25 = m.bbox.x; |
| _111.Store(bbox_out_ix * 4 + 8, round_down(param_25)); |
| float param_26 = m.bbox.y; |
| _111.Store((bbox_out_ix + 1u) * 4 + 8, round_down(param_26)); |
| float param_27 = m.bbox.z; |
| _111.Store((bbox_out_ix + 2u) * 4 + 8, round_up(param_27)); |
| float param_28 = m.bbox.w; |
| _111.Store((bbox_out_ix + 3u) * 4 + 8, round_up(param_28)); |
| bbox_out_ix += 6u; |
| do_atomic = false; |
| } |
| } |
| if (do_atomic) |
| { |
| bool _1334 = m.bbox.z > m.bbox.x; |
| bool _1343; |
| if (!_1334) |
| { |
| _1343 = m.bbox.w > m.bbox.y; |
| } |
| else |
| { |
| _1343 = _1334; |
| } |
| if (_1343) |
| { |
| float param_29 = m.bbox.x; |
| uint _1352; |
| _111.InterlockedMin(bbox_out_ix * 4 + 8, round_down(param_29), _1352); |
| float param_30 = m.bbox.y; |
| uint _1360; |
| _111.InterlockedMin((bbox_out_ix + 1u) * 4 + 8, round_down(param_30), _1360); |
| float param_31 = m.bbox.z; |
| uint _1368; |
| _111.InterlockedMax((bbox_out_ix + 2u) * 4 + 8, round_up(param_31), _1368); |
| float param_32 = m.bbox.w; |
| uint _1376; |
| _111.InterlockedMax((bbox_out_ix + 3u) * 4 + 8, round_up(param_32), _1376); |
| } |
| bbox_out_ix += 6u; |
| } |
| } |
| } |
| |
| [numthreads(256, 1, 1)] |
| void main(SPIRV_Cross_Input stage_input) |
| { |
| gl_WorkGroupID = stage_input.gl_WorkGroupID; |
| gl_LocalInvocationID = stage_input.gl_LocalInvocationID; |
| gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; |
| comp_main(); |
| } |