| #pragma clang diagnostic ignored "-Wmissing-prototypes" |
| |
| #include <metal_stdlib> |
| #include <simd/simd.h> |
| |
| using namespace metal; |
| |
| struct TagMonoid |
| { |
| uint trans_ix; |
| uint linewidth_ix; |
| uint pathseg_ix; |
| uint path_ix; |
| uint pathseg_offset; |
| }; |
| |
| struct Alloc |
| { |
| uint offset; |
| }; |
| |
| struct Config |
| { |
| uint n_elements; |
| uint n_pathseg; |
| uint width_in_tiles; |
| uint height_in_tiles; |
| Alloc tile_alloc; |
| Alloc bin_alloc; |
| Alloc ptcl_alloc; |
| Alloc pathseg_alloc; |
| Alloc anno_alloc; |
| Alloc trans_alloc; |
| Alloc bbox_alloc; |
| Alloc drawmonoid_alloc; |
| uint n_trans; |
| uint n_path; |
| uint trans_offset; |
| uint linewidth_offset; |
| uint pathtag_offset; |
| uint pathseg_offset; |
| }; |
| |
| struct ConfigBuf |
| { |
| Config conf; |
| }; |
| |
| struct SceneBuf |
| { |
| uint scene[1]; |
| }; |
| |
| struct TagMonoid_1 |
| { |
| uint trans_ix; |
| uint linewidth_ix; |
| uint pathseg_ix; |
| uint path_ix; |
| uint pathseg_offset; |
| }; |
| |
| struct OutBuf |
| { |
| TagMonoid_1 outbuf[1]; |
| }; |
| |
| struct Memory |
| { |
| uint mem_offset; |
| uint mem_error; |
| uint memory[1]; |
| }; |
| |
| constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(128u, 1u, 1u); |
| |
| static inline __attribute__((always_inline)) |
| TagMonoid reduce_tag(thread const uint& tag_word) |
| { |
| uint point_count = tag_word & 50529027u; |
| TagMonoid c; |
| c.pathseg_ix = uint(int(popcount((point_count * 7u) & 67372036u))); |
| c.linewidth_ix = uint(int(popcount(tag_word & 1077952576u))); |
| c.path_ix = uint(int(popcount(tag_word & 269488144u))); |
| c.trans_ix = uint(int(popcount(tag_word & 538976288u))); |
| uint n_points = point_count + ((tag_word >> uint(2)) & 16843009u); |
| uint a = n_points + (n_points & (((tag_word >> uint(3)) & 16843009u) * 15u)); |
| a += (a >> uint(8)); |
| a += (a >> uint(16)); |
| c.pathseg_offset = a & 255u; |
| return c; |
| } |
| |
| static inline __attribute__((always_inline)) |
| TagMonoid combine_tag_monoid(thread const TagMonoid& a, thread const TagMonoid& b) |
| { |
| TagMonoid c; |
| c.trans_ix = a.trans_ix + b.trans_ix; |
| c.linewidth_ix = a.linewidth_ix + b.linewidth_ix; |
| c.pathseg_ix = a.pathseg_ix + b.pathseg_ix; |
| c.path_ix = a.path_ix + b.path_ix; |
| c.pathseg_offset = a.pathseg_offset + b.pathseg_offset; |
| return c; |
| } |
| |
| kernel void main0(const device ConfigBuf& _139 [[buffer(1)]], const device SceneBuf& _150 [[buffer(2)]], device OutBuf& _237 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) |
| { |
| threadgroup TagMonoid sh_scratch[128]; |
| uint ix = gl_GlobalInvocationID.x * 2u; |
| uint scene_ix = (_139.conf.pathtag_offset >> uint(2)) + ix; |
| uint tag_word = _150.scene[scene_ix]; |
| uint param = tag_word; |
| TagMonoid agg = reduce_tag(param); |
| for (uint i = 1u; i < 2u; i++) |
| { |
| tag_word = _150.scene[scene_ix + i]; |
| uint param_1 = tag_word; |
| TagMonoid param_2 = agg; |
| TagMonoid param_3 = reduce_tag(param_1); |
| agg = combine_tag_monoid(param_2, param_3); |
| } |
| sh_scratch[gl_LocalInvocationID.x] = agg; |
| for (uint i_1 = 0u; i_1 < 7u; i_1++) |
| { |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| if ((gl_LocalInvocationID.x + (1u << i_1)) < 128u) |
| { |
| TagMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)]; |
| TagMonoid param_4 = agg; |
| TagMonoid param_5 = other; |
| agg = combine_tag_monoid(param_4, param_5); |
| } |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| sh_scratch[gl_LocalInvocationID.x] = agg; |
| } |
| if (gl_LocalInvocationID.x == 0u) |
| { |
| _237.outbuf[gl_WorkGroupID.x].trans_ix = agg.trans_ix; |
| _237.outbuf[gl_WorkGroupID.x].linewidth_ix = agg.linewidth_ix; |
| _237.outbuf[gl_WorkGroupID.x].pathseg_ix = agg.pathseg_ix; |
| _237.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix; |
| _237.outbuf[gl_WorkGroupID.x].pathseg_offset = agg.pathseg_offset; |
| } |
| } |
| |