blob: 6c0a64f9833fb10b2e756392528772434e099061 [file] [log] [blame]
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct TagMonoid
{
uint trans_ix;
uint linewidth_ix;
uint pathseg_ix;
uint path_ix;
uint pathseg_offset;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
uint n_trans;
uint n_path;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
};
struct ConfigBuf
{
Config conf;
};
struct SceneBuf
{
uint scene[1];
};
struct TagMonoid_1
{
uint trans_ix;
uint linewidth_ix;
uint pathseg_ix;
uint path_ix;
uint pathseg_offset;
};
struct OutBuf
{
TagMonoid_1 outbuf[1];
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(128u, 1u, 1u);
static inline __attribute__((always_inline))
TagMonoid reduce_tag(thread const uint& tag_word)
{
uint point_count = tag_word & 50529027u;
TagMonoid c;
c.pathseg_ix = uint(int(popcount((point_count * 7u) & 67372036u)));
c.linewidth_ix = uint(int(popcount(tag_word & 1077952576u)));
c.path_ix = uint(int(popcount(tag_word & 269488144u)));
c.trans_ix = uint(int(popcount(tag_word & 538976288u)));
uint n_points = point_count + ((tag_word >> uint(2)) & 16843009u);
uint a = n_points + (n_points & (((tag_word >> uint(3)) & 16843009u) * 15u));
a += (a >> uint(8));
a += (a >> uint(16));
c.pathseg_offset = a & 255u;
return c;
}
static inline __attribute__((always_inline))
TagMonoid combine_tag_monoid(thread const TagMonoid& a, thread const TagMonoid& b)
{
TagMonoid c;
c.trans_ix = a.trans_ix + b.trans_ix;
c.linewidth_ix = a.linewidth_ix + b.linewidth_ix;
c.pathseg_ix = a.pathseg_ix + b.pathseg_ix;
c.path_ix = a.path_ix + b.path_ix;
c.pathseg_offset = a.pathseg_offset + b.pathseg_offset;
return c;
}
kernel void main0(const device ConfigBuf& _139 [[buffer(1)]], const device SceneBuf& _150 [[buffer(2)]], device OutBuf& _237 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup TagMonoid sh_scratch[128];
uint ix = gl_GlobalInvocationID.x * 2u;
uint scene_ix = (_139.conf.pathtag_offset >> uint(2)) + ix;
uint tag_word = _150.scene[scene_ix];
uint param = tag_word;
TagMonoid agg = reduce_tag(param);
for (uint i = 1u; i < 2u; i++)
{
tag_word = _150.scene[scene_ix + i];
uint param_1 = tag_word;
TagMonoid param_2 = agg;
TagMonoid param_3 = reduce_tag(param_1);
agg = combine_tag_monoid(param_2, param_3);
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 7u; i_1++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if ((gl_LocalInvocationID.x + (1u << i_1)) < 128u)
{
TagMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
TagMonoid param_4 = agg;
TagMonoid param_5 = other;
agg = combine_tag_monoid(param_4, param_5);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
if (gl_LocalInvocationID.x == 0u)
{
_237.outbuf[gl_WorkGroupID.x].trans_ix = agg.trans_ix;
_237.outbuf[gl_WorkGroupID.x].linewidth_ix = agg.linewidth_ix;
_237.outbuf[gl_WorkGroupID.x].pathseg_ix = agg.pathseg_ix;
_237.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix;
_237.outbuf[gl_WorkGroupID.x].pathseg_offset = agg.pathseg_offset;
}
}