| #pragma clang diagnostic ignored "-Wmissing-prototypes" |
| |
| #include <metal_stdlib> |
| #include <simd/simd.h> |
| |
| using namespace metal; |
| |
| struct ElementRef |
| { |
| uint offset; |
| }; |
| |
| struct ElementTag |
| { |
| uint tag; |
| uint flags; |
| }; |
| |
| struct DrawMonoid |
| { |
| uint path_ix; |
| uint clip_ix; |
| }; |
| |
| struct SceneBuf |
| { |
| uint scene[1]; |
| }; |
| |
| struct DrawMonoid_1 |
| { |
| uint path_ix; |
| uint clip_ix; |
| }; |
| |
| struct OutBuf |
| { |
| DrawMonoid_1 outbuf[1]; |
| }; |
| |
| struct Memory |
| { |
| uint mem_offset; |
| uint mem_error; |
| uint memory[1]; |
| }; |
| |
| constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); |
| |
| struct Alloc |
| { |
| uint offset; |
| }; |
| |
| struct Config |
| { |
| uint n_elements; |
| uint n_pathseg; |
| uint width_in_tiles; |
| uint height_in_tiles; |
| Alloc tile_alloc; |
| Alloc bin_alloc; |
| Alloc ptcl_alloc; |
| Alloc pathseg_alloc; |
| Alloc anno_alloc; |
| Alloc trans_alloc; |
| Alloc bbox_alloc; |
| Alloc drawmonoid_alloc; |
| uint n_trans; |
| uint n_path; |
| uint trans_offset; |
| uint linewidth_offset; |
| uint pathtag_offset; |
| uint pathseg_offset; |
| }; |
| |
| struct ConfigBuf |
| { |
| Config conf; |
| }; |
| |
| static inline __attribute__((always_inline)) |
| ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_46) |
| { |
| uint tag_and_flags = v_46.scene[ref.offset >> uint(2)]; |
| return ElementTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; |
| } |
| |
| static inline __attribute__((always_inline)) |
| DrawMonoid map_tag(thread const uint& tag_word) |
| { |
| switch (tag_word) |
| { |
| case 4u: |
| case 5u: |
| case 6u: |
| { |
| return DrawMonoid{ 1u, 0u }; |
| } |
| case 9u: |
| case 10u: |
| { |
| return DrawMonoid{ 1u, 1u }; |
| } |
| default: |
| { |
| return DrawMonoid{ 0u, 0u }; |
| } |
| } |
| } |
| |
| static inline __attribute__((always_inline)) |
| ElementRef Element_index(thread const ElementRef& ref, thread const uint& index) |
| { |
| return ElementRef{ ref.offset + (index * 36u) }; |
| } |
| |
| static inline __attribute__((always_inline)) |
| DrawMonoid combine_tag_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b) |
| { |
| DrawMonoid c; |
| c.path_ix = a.path_ix + b.path_ix; |
| c.clip_ix = a.clip_ix + b.clip_ix; |
| return c; |
| } |
| |
| kernel void main0(const device SceneBuf& v_46 [[buffer(2)]], device OutBuf& _199 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) |
| { |
| threadgroup DrawMonoid sh_scratch[256]; |
| uint ix = gl_GlobalInvocationID.x * 8u; |
| ElementRef ref = ElementRef{ ix * 36u }; |
| ElementRef param = ref; |
| uint tag_word = Element_tag(param, v_46).tag; |
| uint param_1 = tag_word; |
| DrawMonoid agg = map_tag(param_1); |
| for (uint i = 1u; i < 8u; i++) |
| { |
| ElementRef param_2 = ref; |
| uint param_3 = i; |
| ElementRef param_4 = Element_index(param_2, param_3); |
| tag_word = Element_tag(param_4, v_46).tag; |
| uint param_5 = tag_word; |
| DrawMonoid param_6 = agg; |
| DrawMonoid param_7 = map_tag(param_5); |
| agg = combine_tag_monoid(param_6, param_7); |
| } |
| sh_scratch[gl_LocalInvocationID.x] = agg; |
| for (uint i_1 = 0u; i_1 < 8u; i_1++) |
| { |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u) |
| { |
| DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)]; |
| DrawMonoid param_8 = agg; |
| DrawMonoid param_9 = other; |
| agg = combine_tag_monoid(param_8, param_9); |
| } |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| sh_scratch[gl_LocalInvocationID.x] = agg; |
| } |
| if (gl_LocalInvocationID.x == 0u) |
| { |
| _199.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix; |
| _199.outbuf[gl_WorkGroupID.x].clip_ix = agg.clip_ix; |
| } |
| } |
| |