blob: 1bda1817c3509cc3bfbc0852e4862851703d4878 [file] [log] [blame]
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
struct Element
{
uint data;
uint flag;
};
struct DataBuf
{
Element data[1];
};
struct ControlBuf
{
uint failures;
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
uint permute_flag_ix(thread const uint& data_ix)
{
return (data_ix * 419u) & 65535u;
}
kernel void main0(volatile device DataBuf& data_buf [[buffer(0)]], device ControlBuf& control_buf [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
data_buf.data[gl_GlobalInvocationID.x].data = 1u;
threadgroup_barrier(mem_flags::mem_device);
uint param = gl_GlobalInvocationID.x;
uint write_flag_ix = permute_flag_ix(param);
atomic_store_explicit((volatile device atomic_uint*)&data_buf.data[write_flag_ix].flag, 1u, memory_order_relaxed);
uint read_ix = (gl_GlobalInvocationID.x * 4099u) & 65535u;
uint param_1 = read_ix;
uint read_flag_ix = permute_flag_ix(param_1);
uint _58 = atomic_load_explicit((volatile device atomic_uint*)&data_buf.data[read_flag_ix].flag, memory_order_relaxed);
uint flag = _58;
threadgroup_barrier(mem_flags::mem_device);
uint data = data_buf.data[read_ix].data;
if (flag > data)
{
uint _73 = atomic_fetch_add_explicit((device atomic_uint*)&control_buf.failures, 1u, memory_order_relaxed);
}
}