| #pragma clang diagnostic ignored "-Wmissing-prototypes" |
| #pragma clang diagnostic ignored "-Wunused-variable" |
| |
| #include <metal_stdlib> |
| #include <simd/simd.h> |
| #include <metal_atomic> |
| |
| using namespace metal; |
| |
| // Implementation of the GLSL findLSB() function |
| template<typename T> |
| inline T spvFindLSB(T x) |
| { |
| return select(ctz(x), T(-1), x == T(0)); |
| } |
| |
| struct Alloc |
| { |
| uint offset; |
| }; |
| |
| struct MallocResult |
| { |
| Alloc alloc; |
| bool failed; |
| }; |
| |
| struct BinInstanceRef |
| { |
| uint offset; |
| }; |
| |
| struct BinInstance |
| { |
| uint element_ix; |
| }; |
| |
| struct PathRef |
| { |
| uint offset; |
| }; |
| |
| struct TileRef |
| { |
| uint offset; |
| }; |
| |
| struct Path |
| { |
| uint4 bbox; |
| TileRef tiles; |
| }; |
| |
| struct TileSegRef |
| { |
| uint offset; |
| }; |
| |
| struct Tile |
| { |
| TileSegRef tile; |
| int backdrop; |
| }; |
| |
| struct CmdStrokeRef |
| { |
| uint offset; |
| }; |
| |
| struct CmdStroke |
| { |
| uint tile_ref; |
| float half_width; |
| }; |
| |
| struct CmdFillRef |
| { |
| uint offset; |
| }; |
| |
| struct CmdFill |
| { |
| uint tile_ref; |
| int backdrop; |
| }; |
| |
| struct CmdColorRef |
| { |
| uint offset; |
| }; |
| |
| struct CmdColor |
| { |
| uint rgba_color; |
| }; |
| |
| struct CmdLinGradRef |
| { |
| uint offset; |
| }; |
| |
| struct CmdLinGrad |
| { |
| uint index; |
| float line_x; |
| float line_y; |
| float line_c; |
| }; |
| |
| struct CmdRadGradRef |
| { |
| uint offset; |
| }; |
| |
| struct CmdRadGrad |
| { |
| uint index; |
| float4 mat; |
| float2 xlat; |
| float2 c1; |
| float ra; |
| float roff; |
| }; |
| |
| struct CmdImageRef |
| { |
| uint offset; |
| }; |
| |
| struct CmdImage |
| { |
| uint index; |
| int2 offset; |
| }; |
| |
| struct CmdEndClipRef |
| { |
| uint offset; |
| }; |
| |
| struct CmdEndClip |
| { |
| uint blend; |
| }; |
| |
| struct CmdJumpRef |
| { |
| uint offset; |
| }; |
| |
| struct CmdJump |
| { |
| uint new_ref; |
| }; |
| |
| struct CmdRef |
| { |
| uint offset; |
| }; |
| |
| struct Memory |
| { |
| uint mem_offset; |
| uint mem_error; |
| uint memory[1]; |
| }; |
| |
| struct Alloc_1 |
| { |
| uint offset; |
| }; |
| |
| struct Config |
| { |
| uint n_elements; |
| uint n_pathseg; |
| uint width_in_tiles; |
| uint height_in_tiles; |
| Alloc_1 tile_alloc; |
| Alloc_1 bin_alloc; |
| Alloc_1 ptcl_alloc; |
| Alloc_1 pathseg_alloc; |
| Alloc_1 anno_alloc; |
| Alloc_1 trans_alloc; |
| Alloc_1 path_bbox_alloc; |
| Alloc_1 drawmonoid_alloc; |
| Alloc_1 clip_alloc; |
| Alloc_1 clip_bic_alloc; |
| Alloc_1 clip_stack_alloc; |
| Alloc_1 clip_bbox_alloc; |
| Alloc_1 draw_bbox_alloc; |
| Alloc_1 drawinfo_alloc; |
| uint n_trans; |
| uint n_path; |
| uint n_clip; |
| uint trans_offset; |
| uint linewidth_offset; |
| uint pathtag_offset; |
| uint pathseg_offset; |
| uint drawtag_offset; |
| uint drawdata_offset; |
| }; |
| |
| struct ConfigBuf |
| { |
| Config conf; |
| }; |
| |
| struct SceneBuf |
| { |
| uint scene[1]; |
| }; |
| |
| constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); |
| |
| static inline __attribute__((always_inline)) |
| Alloc slice_mem(thread const Alloc& a, thread const uint& offset, thread const uint& size) |
| { |
| return Alloc{ a.offset + offset }; |
| } |
| |
| static inline __attribute__((always_inline)) |
| bool touch_mem(thread const Alloc& alloc, thread const uint& offset) |
| { |
| return true; |
| } |
| |
| static inline __attribute__((always_inline)) |
| uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| Alloc param = alloc; |
| uint param_1 = offset; |
| if (!touch_mem(param, param_1)) |
| { |
| return 0u; |
| } |
| uint v = v_260.memory[offset]; |
| return v; |
| } |
| |
| static inline __attribute__((always_inline)) |
| Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok) |
| { |
| Alloc a; |
| a.offset = offset; |
| return a; |
| } |
| |
| static inline __attribute__((always_inline)) |
| BinInstanceRef BinInstance_index(thread const BinInstanceRef& ref, thread const uint& index) |
| { |
| return BinInstanceRef{ ref.offset + (index * 4u) }; |
| } |
| |
| static inline __attribute__((always_inline)) |
| BinInstance BinInstance_read(thread const Alloc& a, thread const BinInstanceRef& ref, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| uint ix = ref.offset >> uint(2); |
| Alloc param = a; |
| uint param_1 = ix + 0u; |
| uint raw0 = read_mem(param, param_1, v_260, v_260BufferSize); |
| BinInstance s; |
| s.element_ix = raw0; |
| return s; |
| } |
| |
| static inline __attribute__((always_inline)) |
| Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| uint ix = ref.offset >> uint(2); |
| Alloc param = a; |
| uint param_1 = ix + 0u; |
| uint raw0 = read_mem(param, param_1, v_260, v_260BufferSize); |
| Alloc param_2 = a; |
| uint param_3 = ix + 1u; |
| uint raw1 = read_mem(param_2, param_3, v_260, v_260BufferSize); |
| Alloc param_4 = a; |
| uint param_5 = ix + 2u; |
| uint raw2 = read_mem(param_4, param_5, v_260, v_260BufferSize); |
| Path s; |
| s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); |
| s.tiles = TileRef{ raw2 }; |
| return s; |
| } |
| |
| static inline __attribute__((always_inline)) |
| void write_tile_alloc(thread const uint& el_ix, thread const Alloc& a) |
| { |
| } |
| |
| static inline __attribute__((always_inline)) |
| Alloc read_tile_alloc(thread const uint& el_ix, thread const bool& mem_ok, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| uint param = 0u; |
| uint param_1 = uint(int((v_260BufferSize - 8) / 4) * 4); |
| bool param_2 = mem_ok; |
| return new_alloc(param, param_1, param_2); |
| } |
| |
| static inline __attribute__((always_inline)) |
| Tile Tile_read(thread const Alloc& a, thread const TileRef& ref, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| uint ix = ref.offset >> uint(2); |
| Alloc param = a; |
| uint param_1 = ix + 0u; |
| uint raw0 = read_mem(param, param_1, v_260, v_260BufferSize); |
| Alloc param_2 = a; |
| uint param_3 = ix + 1u; |
| uint raw1 = read_mem(param_2, param_3, v_260, v_260BufferSize); |
| Tile s; |
| s.tile = TileSegRef{ raw0 }; |
| s.backdrop = int(raw1); |
| return s; |
| } |
| |
| static inline __attribute__((always_inline)) |
| MallocResult malloc(thread const uint& size, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| uint _266 = atomic_fetch_add_explicit((device atomic_uint*)&v_260.mem_offset, size, memory_order_relaxed); |
| uint offset = _266; |
| MallocResult r; |
| r.failed = (offset + size) > uint(int((v_260BufferSize - 8) / 4) * 4); |
| uint param = offset; |
| uint param_1 = size; |
| bool param_2 = !r.failed; |
| r.alloc = new_alloc(param, param_1, param_2); |
| if (r.failed) |
| { |
| uint _295 = atomic_fetch_max_explicit((device atomic_uint*)&v_260.mem_error, 1u, memory_order_relaxed); |
| return r; |
| } |
| return r; |
| } |
| |
| static inline __attribute__((always_inline)) |
| void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| Alloc param = alloc; |
| uint param_1 = offset; |
| if (!touch_mem(param, param_1)) |
| { |
| return; |
| } |
| v_260.memory[offset] = val; |
| } |
| |
| static inline __attribute__((always_inline)) |
| void CmdJump_write(thread const Alloc& a, thread const CmdJumpRef& ref, thread const CmdJump& s, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| uint ix = ref.offset >> uint(2); |
| Alloc param = a; |
| uint param_1 = ix + 0u; |
| uint param_2 = s.new_ref; |
| write_mem(param, param_1, param_2, v_260, v_260BufferSize); |
| } |
| |
| static inline __attribute__((always_inline)) |
| void Cmd_Jump_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdJump& s, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| Alloc param = a; |
| uint param_1 = ref.offset >> uint(2); |
| uint param_2 = 11u; |
| write_mem(param, param_1, param_2, v_260, v_260BufferSize); |
| Alloc param_3 = a; |
| CmdJumpRef param_4 = CmdJumpRef{ ref.offset + 4u }; |
| CmdJump param_5 = s; |
| CmdJump_write(param_3, param_4, param_5, v_260, v_260BufferSize); |
| } |
| |
| static inline __attribute__((always_inline)) |
| bool alloc_cmd(thread Alloc& cmd_alloc, thread CmdRef& cmd_ref, thread uint& cmd_limit, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| if (cmd_ref.offset < cmd_limit) |
| { |
| return true; |
| } |
| uint param = 1024u; |
| MallocResult _913 = malloc(param, v_260, v_260BufferSize); |
| MallocResult new_cmd = _913; |
| if (new_cmd.failed) |
| { |
| return false; |
| } |
| CmdJump jump = CmdJump{ new_cmd.alloc.offset }; |
| Alloc param_1 = cmd_alloc; |
| CmdRef param_2 = cmd_ref; |
| CmdJump param_3 = jump; |
| Cmd_Jump_write(param_1, param_2, param_3, v_260, v_260BufferSize); |
| cmd_alloc = new_cmd.alloc; |
| cmd_ref = CmdRef{ cmd_alloc.offset }; |
| cmd_limit = (cmd_alloc.offset + 1024u) - 144u; |
| return true; |
| } |
| |
| static inline __attribute__((always_inline)) |
| void CmdFill_write(thread const Alloc& a, thread const CmdFillRef& ref, thread const CmdFill& s, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| uint ix = ref.offset >> uint(2); |
| Alloc param = a; |
| uint param_1 = ix + 0u; |
| uint param_2 = s.tile_ref; |
| write_mem(param, param_1, param_2, v_260, v_260BufferSize); |
| Alloc param_3 = a; |
| uint param_4 = ix + 1u; |
| uint param_5 = uint(s.backdrop); |
| write_mem(param_3, param_4, param_5, v_260, v_260BufferSize); |
| } |
| |
| static inline __attribute__((always_inline)) |
| void Cmd_Fill_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdFill& s, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| Alloc param = a; |
| uint param_1 = ref.offset >> uint(2); |
| uint param_2 = 1u; |
| write_mem(param, param_1, param_2, v_260, v_260BufferSize); |
| Alloc param_3 = a; |
| CmdFillRef param_4 = CmdFillRef{ ref.offset + 4u }; |
| CmdFill param_5 = s; |
| CmdFill_write(param_3, param_4, param_5, v_260, v_260BufferSize); |
| } |
| |
| static inline __attribute__((always_inline)) |
| void Cmd_Solid_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| Alloc param = a; |
| uint param_1 = ref.offset >> uint(2); |
| uint param_2 = 3u; |
| write_mem(param, param_1, param_2, v_260, v_260BufferSize); |
| } |
| |
| static inline __attribute__((always_inline)) |
| void CmdStroke_write(thread const Alloc& a, thread const CmdStrokeRef& ref, thread const CmdStroke& s, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| uint ix = ref.offset >> uint(2); |
| Alloc param = a; |
| uint param_1 = ix + 0u; |
| uint param_2 = s.tile_ref; |
| write_mem(param, param_1, param_2, v_260, v_260BufferSize); |
| Alloc param_3 = a; |
| uint param_4 = ix + 1u; |
| uint param_5 = as_type<uint>(s.half_width); |
| write_mem(param_3, param_4, param_5, v_260, v_260BufferSize); |
| } |
| |
| static inline __attribute__((always_inline)) |
| void Cmd_Stroke_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdStroke& s, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| Alloc param = a; |
| uint param_1 = ref.offset >> uint(2); |
| uint param_2 = 2u; |
| write_mem(param, param_1, param_2, v_260, v_260BufferSize); |
| Alloc param_3 = a; |
| CmdStrokeRef param_4 = CmdStrokeRef{ ref.offset + 4u }; |
| CmdStroke param_5 = s; |
| CmdStroke_write(param_3, param_4, param_5, v_260, v_260BufferSize); |
| } |
| |
| static inline __attribute__((always_inline)) |
| void write_fill(thread const Alloc& alloc, thread CmdRef& cmd_ref, thread const Tile& tile, thread const float& linewidth, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| if (linewidth < 0.0) |
| { |
| if (tile.tile.offset != 0u) |
| { |
| CmdFill cmd_fill = CmdFill{ tile.tile.offset, tile.backdrop }; |
| Alloc param = alloc; |
| CmdRef param_1 = cmd_ref; |
| CmdFill param_2 = cmd_fill; |
| Cmd_Fill_write(param, param_1, param_2, v_260, v_260BufferSize); |
| cmd_ref.offset += 12u; |
| } |
| else |
| { |
| Alloc param_3 = alloc; |
| CmdRef param_4 = cmd_ref; |
| Cmd_Solid_write(param_3, param_4, v_260, v_260BufferSize); |
| cmd_ref.offset += 4u; |
| } |
| } |
| else |
| { |
| CmdStroke cmd_stroke = CmdStroke{ tile.tile.offset, 0.5 * linewidth }; |
| Alloc param_5 = alloc; |
| CmdRef param_6 = cmd_ref; |
| CmdStroke param_7 = cmd_stroke; |
| Cmd_Stroke_write(param_5, param_6, param_7, v_260, v_260BufferSize); |
| cmd_ref.offset += 12u; |
| } |
| } |
| |
| static inline __attribute__((always_inline)) |
| void CmdColor_write(thread const Alloc& a, thread const CmdColorRef& ref, thread const CmdColor& s, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| uint ix = ref.offset >> uint(2); |
| Alloc param = a; |
| uint param_1 = ix + 0u; |
| uint param_2 = s.rgba_color; |
| write_mem(param, param_1, param_2, v_260, v_260BufferSize); |
| } |
| |
| static inline __attribute__((always_inline)) |
| void Cmd_Color_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdColor& s, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| Alloc param = a; |
| uint param_1 = ref.offset >> uint(2); |
| uint param_2 = 5u; |
| write_mem(param, param_1, param_2, v_260, v_260BufferSize); |
| Alloc param_3 = a; |
| CmdColorRef param_4 = CmdColorRef{ ref.offset + 4u }; |
| CmdColor param_5 = s; |
| CmdColor_write(param_3, param_4, param_5, v_260, v_260BufferSize); |
| } |
| |
| static inline __attribute__((always_inline)) |
| void CmdLinGrad_write(thread const Alloc& a, thread const CmdLinGradRef& ref, thread const CmdLinGrad& s, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| uint ix = ref.offset >> uint(2); |
| Alloc param = a; |
| uint param_1 = ix + 0u; |
| uint param_2 = s.index; |
| write_mem(param, param_1, param_2, v_260, v_260BufferSize); |
| Alloc param_3 = a; |
| uint param_4 = ix + 1u; |
| uint param_5 = as_type<uint>(s.line_x); |
| write_mem(param_3, param_4, param_5, v_260, v_260BufferSize); |
| Alloc param_6 = a; |
| uint param_7 = ix + 2u; |
| uint param_8 = as_type<uint>(s.line_y); |
| write_mem(param_6, param_7, param_8, v_260, v_260BufferSize); |
| Alloc param_9 = a; |
| uint param_10 = ix + 3u; |
| uint param_11 = as_type<uint>(s.line_c); |
| write_mem(param_9, param_10, param_11, v_260, v_260BufferSize); |
| } |
| |
| static inline __attribute__((always_inline)) |
| void Cmd_LinGrad_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdLinGrad& s, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| Alloc param = a; |
| uint param_1 = ref.offset >> uint(2); |
| uint param_2 = 6u; |
| write_mem(param, param_1, param_2, v_260, v_260BufferSize); |
| Alloc param_3 = a; |
| CmdLinGradRef param_4 = CmdLinGradRef{ ref.offset + 4u }; |
| CmdLinGrad param_5 = s; |
| CmdLinGrad_write(param_3, param_4, param_5, v_260, v_260BufferSize); |
| } |
| |
| static inline __attribute__((always_inline)) |
| void CmdRadGrad_write(thread const Alloc& a, thread const CmdRadGradRef& ref, thread const CmdRadGrad& s, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| uint ix = ref.offset >> uint(2); |
| Alloc param = a; |
| uint param_1 = ix + 0u; |
| uint param_2 = s.index; |
| write_mem(param, param_1, param_2, v_260, v_260BufferSize); |
| Alloc param_3 = a; |
| uint param_4 = ix + 1u; |
| uint param_5 = as_type<uint>(s.mat.x); |
| write_mem(param_3, param_4, param_5, v_260, v_260BufferSize); |
| Alloc param_6 = a; |
| uint param_7 = ix + 2u; |
| uint param_8 = as_type<uint>(s.mat.y); |
| write_mem(param_6, param_7, param_8, v_260, v_260BufferSize); |
| Alloc param_9 = a; |
| uint param_10 = ix + 3u; |
| uint param_11 = as_type<uint>(s.mat.z); |
| write_mem(param_9, param_10, param_11, v_260, v_260BufferSize); |
| Alloc param_12 = a; |
| uint param_13 = ix + 4u; |
| uint param_14 = as_type<uint>(s.mat.w); |
| write_mem(param_12, param_13, param_14, v_260, v_260BufferSize); |
| Alloc param_15 = a; |
| uint param_16 = ix + 5u; |
| uint param_17 = as_type<uint>(s.xlat.x); |
| write_mem(param_15, param_16, param_17, v_260, v_260BufferSize); |
| Alloc param_18 = a; |
| uint param_19 = ix + 6u; |
| uint param_20 = as_type<uint>(s.xlat.y); |
| write_mem(param_18, param_19, param_20, v_260, v_260BufferSize); |
| Alloc param_21 = a; |
| uint param_22 = ix + 7u; |
| uint param_23 = as_type<uint>(s.c1.x); |
| write_mem(param_21, param_22, param_23, v_260, v_260BufferSize); |
| Alloc param_24 = a; |
| uint param_25 = ix + 8u; |
| uint param_26 = as_type<uint>(s.c1.y); |
| write_mem(param_24, param_25, param_26, v_260, v_260BufferSize); |
| Alloc param_27 = a; |
| uint param_28 = ix + 9u; |
| uint param_29 = as_type<uint>(s.ra); |
| write_mem(param_27, param_28, param_29, v_260, v_260BufferSize); |
| Alloc param_30 = a; |
| uint param_31 = ix + 10u; |
| uint param_32 = as_type<uint>(s.roff); |
| write_mem(param_30, param_31, param_32, v_260, v_260BufferSize); |
| } |
| |
| static inline __attribute__((always_inline)) |
| void Cmd_RadGrad_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdRadGrad& s, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| Alloc param = a; |
| uint param_1 = ref.offset >> uint(2); |
| uint param_2 = 7u; |
| write_mem(param, param_1, param_2, v_260, v_260BufferSize); |
| Alloc param_3 = a; |
| CmdRadGradRef param_4 = CmdRadGradRef{ ref.offset + 4u }; |
| CmdRadGrad param_5 = s; |
| CmdRadGrad_write(param_3, param_4, param_5, v_260, v_260BufferSize); |
| } |
| |
| static inline __attribute__((always_inline)) |
| void CmdImage_write(thread const Alloc& a, thread const CmdImageRef& ref, thread const CmdImage& s, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| uint ix = ref.offset >> uint(2); |
| Alloc param = a; |
| uint param_1 = ix + 0u; |
| uint param_2 = s.index; |
| write_mem(param, param_1, param_2, v_260, v_260BufferSize); |
| Alloc param_3 = a; |
| uint param_4 = ix + 1u; |
| uint param_5 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16)); |
| write_mem(param_3, param_4, param_5, v_260, v_260BufferSize); |
| } |
| |
| static inline __attribute__((always_inline)) |
| void Cmd_Image_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdImage& s, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| Alloc param = a; |
| uint param_1 = ref.offset >> uint(2); |
| uint param_2 = 8u; |
| write_mem(param, param_1, param_2, v_260, v_260BufferSize); |
| Alloc param_3 = a; |
| CmdImageRef param_4 = CmdImageRef{ ref.offset + 4u }; |
| CmdImage param_5 = s; |
| CmdImage_write(param_3, param_4, param_5, v_260, v_260BufferSize); |
| } |
| |
| static inline __attribute__((always_inline)) |
| void Cmd_BeginClip_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| Alloc param = a; |
| uint param_1 = ref.offset >> uint(2); |
| uint param_2 = 9u; |
| write_mem(param, param_1, param_2, v_260, v_260BufferSize); |
| } |
| |
| static inline __attribute__((always_inline)) |
| void CmdEndClip_write(thread const Alloc& a, thread const CmdEndClipRef& ref, thread const CmdEndClip& s, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| uint ix = ref.offset >> uint(2); |
| Alloc param = a; |
| uint param_1 = ix + 0u; |
| uint param_2 = s.blend; |
| write_mem(param, param_1, param_2, v_260, v_260BufferSize); |
| } |
| |
| static inline __attribute__((always_inline)) |
| void Cmd_EndClip_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdEndClip& s, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| Alloc param = a; |
| uint param_1 = ref.offset >> uint(2); |
| uint param_2 = 10u; |
| write_mem(param, param_1, param_2, v_260, v_260BufferSize); |
| Alloc param_3 = a; |
| CmdEndClipRef param_4 = CmdEndClipRef{ ref.offset + 4u }; |
| CmdEndClip param_5 = s; |
| CmdEndClip_write(param_3, param_4, param_5, v_260, v_260BufferSize); |
| } |
| |
| static inline __attribute__((always_inline)) |
| void Cmd_End_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_260, constant uint& v_260BufferSize) |
| { |
| Alloc param = a; |
| uint param_1 = ref.offset >> uint(2); |
| uint param_2 = 0u; |
| write_mem(param, param_1, param_2, v_260, v_260BufferSize); |
| } |
| |
| kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_260 [[buffer(0)]], const device ConfigBuf& _1005 [[buffer(1)]], const device SceneBuf& _1372 [[buffer(2)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) |
| { |
| threadgroup uint sh_bitmaps[8][256]; |
| threadgroup Alloc sh_part_elements[256]; |
| threadgroup uint sh_part_count[256]; |
| threadgroup uint sh_elements[256]; |
| threadgroup uint sh_tile_stride[256]; |
| threadgroup uint sh_tile_width[256]; |
| threadgroup uint sh_tile_x0[256]; |
| threadgroup uint sh_tile_y0[256]; |
| threadgroup uint sh_tile_base[256]; |
| threadgroup uint sh_tile_count[256]; |
| constant uint& v_260BufferSize = spvBufferSizeConstants[0]; |
| uint width_in_bins = ((_1005.conf.width_in_tiles + 16u) - 1u) / 16u; |
| uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x; |
| uint partition_ix = 0u; |
| uint n_partitions = ((_1005.conf.n_elements + 256u) - 1u) / 256u; |
| uint th_ix = gl_LocalInvocationID.x; |
| uint bin_tile_x = 16u * gl_WorkGroupID.x; |
| uint bin_tile_y = 16u * gl_WorkGroupID.y; |
| uint tile_x = gl_LocalInvocationID.x % 16u; |
| uint tile_y = gl_LocalInvocationID.x / 16u; |
| uint this_tile_ix = (((bin_tile_y + tile_y) * _1005.conf.width_in_tiles) + bin_tile_x) + tile_x; |
| Alloc param; |
| param.offset = _1005.conf.ptcl_alloc.offset; |
| uint param_1 = this_tile_ix * 1024u; |
| uint param_2 = 1024u; |
| Alloc cmd_alloc = slice_mem(param, param_1, param_2); |
| CmdRef cmd_ref = CmdRef{ cmd_alloc.offset }; |
| uint cmd_limit = (cmd_ref.offset + 1024u) - 144u; |
| uint clip_depth = 0u; |
| uint clip_zero_depth = 0u; |
| uint rd_ix = 0u; |
| uint wr_ix = 0u; |
| uint part_start_ix = 0u; |
| uint ready_ix = 0u; |
| uint drawmonoid_start = _1005.conf.drawmonoid_alloc.offset >> uint(2); |
| uint drawtag_start = _1005.conf.drawtag_offset >> uint(2); |
| uint drawdata_start = _1005.conf.drawdata_offset >> uint(2); |
| uint drawinfo_start = _1005.conf.drawinfo_alloc.offset >> uint(2); |
| bool mem_ok = v_260.mem_error == 0u; |
| Alloc param_3; |
| Alloc param_5; |
| uint _1304; |
| uint element_ix; |
| Alloc param_14; |
| uint tile_count; |
| uint _1605; |
| float linewidth; |
| CmdLinGrad cmd_lin; |
| CmdRadGrad cmd_rad; |
| while (true) |
| { |
| for (uint i = 0u; i < 8u; i++) |
| { |
| sh_bitmaps[i][th_ix] = 0u; |
| } |
| bool _1356; |
| for (;;) |
| { |
| if ((ready_ix == wr_ix) && (partition_ix < n_partitions)) |
| { |
| part_start_ix = ready_ix; |
| uint count = 0u; |
| bool _1154 = th_ix < 256u; |
| bool _1162; |
| if (_1154) |
| { |
| _1162 = (partition_ix + th_ix) < n_partitions; |
| } |
| else |
| { |
| _1162 = _1154; |
| } |
| if (_1162) |
| { |
| uint in_ix = (_1005.conf.bin_alloc.offset >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); |
| param_3.offset = _1005.conf.bin_alloc.offset; |
| uint param_4 = in_ix; |
| count = read_mem(param_3, param_4, v_260, v_260BufferSize); |
| param_5.offset = _1005.conf.bin_alloc.offset; |
| uint param_6 = in_ix + 1u; |
| uint offset = read_mem(param_5, param_6, v_260, v_260BufferSize); |
| uint param_7 = offset; |
| uint param_8 = count * 4u; |
| bool param_9 = mem_ok; |
| sh_part_elements[th_ix] = new_alloc(param_7, param_8, param_9); |
| } |
| for (uint i_1 = 0u; i_1 < 8u; i_1++) |
| { |
| if (th_ix < 256u) |
| { |
| sh_part_count[th_ix] = count; |
| } |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| if (th_ix < 256u) |
| { |
| if (th_ix >= (1u << i_1)) |
| { |
| count += sh_part_count[th_ix - (1u << i_1)]; |
| } |
| } |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| } |
| if (th_ix < 256u) |
| { |
| sh_part_count[th_ix] = part_start_ix + count; |
| } |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| ready_ix = sh_part_count[255]; |
| partition_ix += 256u; |
| } |
| uint ix = rd_ix + th_ix; |
| if (((ix >= wr_ix) && (ix < ready_ix)) && mem_ok) |
| { |
| uint part_ix = 0u; |
| for (uint i_2 = 0u; i_2 < 8u; i_2++) |
| { |
| uint probe = part_ix + (128u >> i_2); |
| if (ix >= sh_part_count[probe - 1u]) |
| { |
| part_ix = probe; |
| } |
| } |
| if (part_ix > 0u) |
| { |
| _1304 = sh_part_count[part_ix - 1u]; |
| } |
| else |
| { |
| _1304 = part_start_ix; |
| } |
| ix -= _1304; |
| Alloc bin_alloc = sh_part_elements[part_ix]; |
| BinInstanceRef inst_ref = BinInstanceRef{ bin_alloc.offset }; |
| BinInstanceRef param_10 = inst_ref; |
| uint param_11 = ix; |
| Alloc param_12 = bin_alloc; |
| BinInstanceRef param_13 = BinInstance_index(param_10, param_11); |
| BinInstance inst = BinInstance_read(param_12, param_13, v_260, v_260BufferSize); |
| sh_elements[th_ix] = inst.element_ix; |
| } |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| wr_ix = min((rd_ix + 256u), ready_ix); |
| bool _1346 = (wr_ix - rd_ix) < 256u; |
| if (_1346) |
| { |
| _1356 = (wr_ix < ready_ix) || (partition_ix < n_partitions); |
| } |
| else |
| { |
| _1356 = _1346; |
| } |
| if (_1356) |
| { |
| continue; |
| } |
| else |
| { |
| break; |
| } |
| } |
| uint tag = 0u; |
| if ((th_ix + rd_ix) < wr_ix) |
| { |
| element_ix = sh_elements[th_ix]; |
| tag = _1372.scene[drawtag_start + element_ix]; |
| } |
| switch (tag) |
| { |
| case 68u: |
| case 72u: |
| case 276u: |
| case 732u: |
| case 5u: |
| case 37u: |
| { |
| uint drawmonoid_base = drawmonoid_start + (4u * element_ix); |
| uint path_ix = v_260.memory[drawmonoid_base]; |
| param_14.offset = _1005.conf.tile_alloc.offset; |
| PathRef param_15 = PathRef{ _1005.conf.tile_alloc.offset + (path_ix * 12u) }; |
| Path path = Path_read(param_14, param_15, v_260, v_260BufferSize); |
| uint stride = path.bbox.z - path.bbox.x; |
| sh_tile_stride[th_ix] = stride; |
| int dx = int(path.bbox.x) - int(bin_tile_x); |
| int dy = int(path.bbox.y) - int(bin_tile_y); |
| int x0 = clamp(dx, 0, 16); |
| int y0 = clamp(dy, 0, 16); |
| int x1 = clamp(int(path.bbox.z) - int(bin_tile_x), 0, 16); |
| int y1 = clamp(int(path.bbox.w) - int(bin_tile_y), 0, 16); |
| sh_tile_width[th_ix] = uint(x1 - x0); |
| sh_tile_x0[th_ix] = uint(x0); |
| sh_tile_y0[th_ix] = uint(y0); |
| tile_count = uint(x1 - x0) * uint(y1 - y0); |
| uint base = path.tiles.offset - (((uint(dy) * stride) + uint(dx)) * 8u); |
| sh_tile_base[th_ix] = base; |
| uint param_16 = path.tiles.offset; |
| uint param_17 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; |
| bool param_18 = mem_ok; |
| Alloc path_alloc = new_alloc(param_16, param_17, param_18); |
| uint param_19 = th_ix; |
| Alloc param_20 = path_alloc; |
| write_tile_alloc(param_19, param_20); |
| break; |
| } |
| default: |
| { |
| tile_count = 0u; |
| break; |
| } |
| } |
| sh_tile_count[th_ix] = tile_count; |
| for (uint i_3 = 0u; i_3 < 8u; i_3++) |
| { |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| if (th_ix >= (1u << i_3)) |
| { |
| tile_count += sh_tile_count[th_ix - (1u << i_3)]; |
| } |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| sh_tile_count[th_ix] = tile_count; |
| } |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| uint total_tile_count = sh_tile_count[255]; |
| for (uint ix_1 = th_ix; ix_1 < total_tile_count; ix_1 += 256u) |
| { |
| uint el_ix = 0u; |
| for (uint i_4 = 0u; i_4 < 8u; i_4++) |
| { |
| uint probe_1 = el_ix + (128u >> i_4); |
| if (ix_1 >= sh_tile_count[probe_1 - 1u]) |
| { |
| el_ix = probe_1; |
| } |
| } |
| uint element_ix_1 = sh_elements[el_ix]; |
| uint tag_1 = _1372.scene[drawtag_start + element_ix_1]; |
| if (el_ix > 0u) |
| { |
| _1605 = sh_tile_count[el_ix - 1u]; |
| } |
| else |
| { |
| _1605 = 0u; |
| } |
| uint seq_ix = ix_1 - _1605; |
| uint width = sh_tile_width[el_ix]; |
| uint x = sh_tile_x0[el_ix] + (seq_ix % width); |
| uint y = sh_tile_y0[el_ix] + (seq_ix / width); |
| bool include_tile = false; |
| if (mem_ok) |
| { |
| uint param_21 = el_ix; |
| bool param_22 = mem_ok; |
| Alloc param_23 = read_tile_alloc(param_21, param_22, v_260, v_260BufferSize); |
| TileRef param_24 = TileRef{ sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) }; |
| Tile tile = Tile_read(param_23, param_24, v_260, v_260BufferSize); |
| bool is_clip = (tag_1 & 1u) != 0u; |
| bool is_blend = false; |
| if (is_clip) |
| { |
| uint drawmonoid_base_1 = drawmonoid_start + (4u * element_ix_1); |
| uint scene_offset = v_260.memory[drawmonoid_base_1 + 2u]; |
| uint dd = drawdata_start + (scene_offset >> uint(2)); |
| uint blend = _1372.scene[dd]; |
| is_blend = blend != 3u; |
| } |
| bool _1692 = tile.tile.offset != 0u; |
| bool _1701; |
| if (!_1692) |
| { |
| _1701 = (tile.backdrop == 0) == is_clip; |
| } |
| else |
| { |
| _1701 = _1692; |
| } |
| include_tile = _1701 || is_blend; |
| } |
| if (include_tile) |
| { |
| uint el_slice = el_ix / 32u; |
| uint el_mask = 1u << (el_ix & 31u); |
| uint _1723 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&sh_bitmaps[el_slice][(y * 16u) + x], el_mask, memory_order_relaxed); |
| } |
| } |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| uint slice_ix = 0u; |
| uint bitmap = sh_bitmaps[0][th_ix]; |
| while (mem_ok) |
| { |
| if (bitmap == 0u) |
| { |
| slice_ix++; |
| if (slice_ix == 8u) |
| { |
| break; |
| } |
| bitmap = sh_bitmaps[slice_ix][th_ix]; |
| if (bitmap == 0u) |
| { |
| continue; |
| } |
| } |
| uint element_ref_ix = (slice_ix * 32u) + uint(int(spvFindLSB(bitmap))); |
| uint element_ix_2 = sh_elements[element_ref_ix]; |
| bitmap &= (bitmap - 1u); |
| uint drawtag = _1372.scene[drawtag_start + element_ix_2]; |
| if (clip_zero_depth == 0u) |
| { |
| uint param_25 = element_ref_ix; |
| bool param_26 = mem_ok; |
| Alloc param_27 = read_tile_alloc(param_25, param_26, v_260, v_260BufferSize); |
| TileRef param_28 = TileRef{ sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; |
| Tile tile_1 = Tile_read(param_27, param_28, v_260, v_260BufferSize); |
| uint drawmonoid_base_2 = drawmonoid_start + (4u * element_ix_2); |
| uint scene_offset_1 = v_260.memory[drawmonoid_base_2 + 2u]; |
| uint info_offset = v_260.memory[drawmonoid_base_2 + 3u]; |
| uint dd_1 = drawdata_start + (scene_offset_1 >> uint(2)); |
| uint di = drawinfo_start + (info_offset >> uint(2)); |
| switch (drawtag) |
| { |
| case 68u: |
| { |
| linewidth = as_type<float>(v_260.memory[di]); |
| Alloc param_29 = cmd_alloc; |
| CmdRef param_30 = cmd_ref; |
| uint param_31 = cmd_limit; |
| bool _1848 = alloc_cmd(param_29, param_30, param_31, v_260, v_260BufferSize); |
| cmd_alloc = param_29; |
| cmd_ref = param_30; |
| cmd_limit = param_31; |
| if (!_1848) |
| { |
| break; |
| } |
| Alloc param_32 = cmd_alloc; |
| CmdRef param_33 = cmd_ref; |
| Tile param_34 = tile_1; |
| float param_35 = linewidth; |
| write_fill(param_32, param_33, param_34, param_35, v_260, v_260BufferSize); |
| cmd_ref = param_33; |
| uint rgba = _1372.scene[dd_1]; |
| Alloc param_36 = cmd_alloc; |
| CmdRef param_37 = cmd_ref; |
| CmdColor param_38 = CmdColor{ rgba }; |
| Cmd_Color_write(param_36, param_37, param_38, v_260, v_260BufferSize); |
| cmd_ref.offset += 8u; |
| break; |
| } |
| case 276u: |
| { |
| Alloc param_39 = cmd_alloc; |
| CmdRef param_40 = cmd_ref; |
| uint param_41 = cmd_limit; |
| bool _1889 = alloc_cmd(param_39, param_40, param_41, v_260, v_260BufferSize); |
| cmd_alloc = param_39; |
| cmd_ref = param_40; |
| cmd_limit = param_41; |
| if (!_1889) |
| { |
| break; |
| } |
| linewidth = as_type<float>(v_260.memory[di]); |
| Alloc param_42 = cmd_alloc; |
| CmdRef param_43 = cmd_ref; |
| Tile param_44 = tile_1; |
| float param_45 = linewidth; |
| write_fill(param_42, param_43, param_44, param_45, v_260, v_260BufferSize); |
| cmd_ref = param_43; |
| cmd_lin.index = _1372.scene[dd_1]; |
| cmd_lin.line_x = as_type<float>(v_260.memory[di + 1u]); |
| cmd_lin.line_y = as_type<float>(v_260.memory[di + 2u]); |
| cmd_lin.line_c = as_type<float>(v_260.memory[di + 3u]); |
| Alloc param_46 = cmd_alloc; |
| CmdRef param_47 = cmd_ref; |
| CmdLinGrad param_48 = cmd_lin; |
| Cmd_LinGrad_write(param_46, param_47, param_48, v_260, v_260BufferSize); |
| cmd_ref.offset += 20u; |
| break; |
| } |
| case 732u: |
| { |
| Alloc param_49 = cmd_alloc; |
| CmdRef param_50 = cmd_ref; |
| uint param_51 = cmd_limit; |
| bool _1953 = alloc_cmd(param_49, param_50, param_51, v_260, v_260BufferSize); |
| cmd_alloc = param_49; |
| cmd_ref = param_50; |
| cmd_limit = param_51; |
| if (!_1953) |
| { |
| break; |
| } |
| linewidth = as_type<float>(v_260.memory[di]); |
| Alloc param_52 = cmd_alloc; |
| CmdRef param_53 = cmd_ref; |
| Tile param_54 = tile_1; |
| float param_55 = linewidth; |
| write_fill(param_52, param_53, param_54, param_55, v_260, v_260BufferSize); |
| cmd_ref = param_53; |
| cmd_rad.index = _1372.scene[dd_1]; |
| cmd_rad.mat = as_type<float4>(uint4(v_260.memory[di + 1u], v_260.memory[di + 2u], v_260.memory[di + 3u], v_260.memory[di + 4u])); |
| cmd_rad.xlat = as_type<float2>(uint2(v_260.memory[di + 5u], v_260.memory[di + 6u])); |
| cmd_rad.c1 = as_type<float2>(uint2(v_260.memory[di + 7u], v_260.memory[di + 8u])); |
| cmd_rad.ra = as_type<float>(v_260.memory[di + 9u]); |
| cmd_rad.roff = as_type<float>(v_260.memory[di + 10u]); |
| Alloc param_56 = cmd_alloc; |
| CmdRef param_57 = cmd_ref; |
| CmdRadGrad param_58 = cmd_rad; |
| Cmd_RadGrad_write(param_56, param_57, param_58, v_260, v_260BufferSize); |
| cmd_ref.offset += 48u; |
| break; |
| } |
| case 72u: |
| { |
| linewidth = as_type<float>(v_260.memory[di]); |
| Alloc param_59 = cmd_alloc; |
| CmdRef param_60 = cmd_ref; |
| uint param_61 = cmd_limit; |
| bool _2059 = alloc_cmd(param_59, param_60, param_61, v_260, v_260BufferSize); |
| cmd_alloc = param_59; |
| cmd_ref = param_60; |
| cmd_limit = param_61; |
| if (!_2059) |
| { |
| break; |
| } |
| Alloc param_62 = cmd_alloc; |
| CmdRef param_63 = cmd_ref; |
| Tile param_64 = tile_1; |
| float param_65 = linewidth; |
| write_fill(param_62, param_63, param_64, param_65, v_260, v_260BufferSize); |
| cmd_ref = param_63; |
| uint index = _1372.scene[dd_1]; |
| uint raw1 = _1372.scene[dd_1 + 1u]; |
| int2 offset_1 = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); |
| Alloc param_66 = cmd_alloc; |
| CmdRef param_67 = cmd_ref; |
| CmdImage param_68 = CmdImage{ index, offset_1 }; |
| Cmd_Image_write(param_66, param_67, param_68, v_260, v_260BufferSize); |
| cmd_ref.offset += 12u; |
| break; |
| } |
| case 5u: |
| { |
| bool _2112 = tile_1.tile.offset == 0u; |
| bool _2118; |
| if (_2112) |
| { |
| _2118 = tile_1.backdrop == 0; |
| } |
| else |
| { |
| _2118 = _2112; |
| } |
| if (_2118) |
| { |
| clip_zero_depth = clip_depth + 1u; |
| } |
| else |
| { |
| Alloc param_69 = cmd_alloc; |
| CmdRef param_70 = cmd_ref; |
| uint param_71 = cmd_limit; |
| bool _2130 = alloc_cmd(param_69, param_70, param_71, v_260, v_260BufferSize); |
| cmd_alloc = param_69; |
| cmd_ref = param_70; |
| cmd_limit = param_71; |
| if (!_2130) |
| { |
| break; |
| } |
| Alloc param_72 = cmd_alloc; |
| CmdRef param_73 = cmd_ref; |
| Cmd_BeginClip_write(param_72, param_73, v_260, v_260BufferSize); |
| cmd_ref.offset += 4u; |
| } |
| clip_depth++; |
| break; |
| } |
| case 37u: |
| { |
| clip_depth--; |
| Alloc param_74 = cmd_alloc; |
| CmdRef param_75 = cmd_ref; |
| uint param_76 = cmd_limit; |
| bool _2158 = alloc_cmd(param_74, param_75, param_76, v_260, v_260BufferSize); |
| cmd_alloc = param_74; |
| cmd_ref = param_75; |
| cmd_limit = param_76; |
| if (!_2158) |
| { |
| break; |
| } |
| Alloc param_77 = cmd_alloc; |
| CmdRef param_78 = cmd_ref; |
| Tile param_79 = tile_1; |
| float param_80 = -1.0; |
| write_fill(param_77, param_78, param_79, param_80, v_260, v_260BufferSize); |
| cmd_ref = param_78; |
| uint blend_1 = _1372.scene[dd_1]; |
| Alloc param_81 = cmd_alloc; |
| CmdRef param_82 = cmd_ref; |
| CmdEndClip param_83 = CmdEndClip{ blend_1 }; |
| Cmd_EndClip_write(param_81, param_82, param_83, v_260, v_260BufferSize); |
| cmd_ref.offset += 8u; |
| break; |
| } |
| } |
| } |
| else |
| { |
| switch (drawtag) |
| { |
| case 5u: |
| { |
| clip_depth++; |
| break; |
| } |
| case 37u: |
| { |
| if (clip_depth == clip_zero_depth) |
| { |
| clip_zero_depth = 0u; |
| } |
| clip_depth--; |
| break; |
| } |
| } |
| } |
| } |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| rd_ix += 256u; |
| if ((rd_ix >= ready_ix) && (partition_ix >= n_partitions)) |
| { |
| break; |
| } |
| } |
| bool _2228 = (bin_tile_x + tile_x) < _1005.conf.width_in_tiles; |
| bool _2237; |
| if (_2228) |
| { |
| _2237 = (bin_tile_y + tile_y) < _1005.conf.height_in_tiles; |
| } |
| else |
| { |
| _2237 = _2228; |
| } |
| if (_2237) |
| { |
| Alloc param_84 = cmd_alloc; |
| CmdRef param_85 = cmd_ref; |
| Cmd_End_write(param_84, param_85, v_260, v_260BufferSize); |
| } |
| } |
| |