tests/sksl/compute/Threadgroup.metal - skia - Git at Google

 #include <metal_stdlib>
 #include <simd/simd.h>
 using namespace metal;
 struct Inputs {
     uint3 sk_ThreadPosition;
 };
 struct inputs {
     float in_data[1];
 };
 struct outputs {
     float out_data[1];
 };
 struct Globals {
     const device inputs* _anonInterface0;
     device outputs* _anonInterface1;
 };
 struct Threadgroups {
     array<float, 1024> shared_data;
 };
 void store_vIf(threadgroup Threadgroups& _threadgroups, uint i, float value) {
     _threadgroups.shared_data[i] = value;
 }
 kernel void computeMain(const device inputs& _anonInterface0 [[buffer(0)]], device outputs& _anonInterface1 [[buffer(1)]], uint3 sk_ThreadPosition [[thread_position_in_grid]]) {
     Globals _globals{&_anonInterface0, &_anonInterface1};
     (void)_globals;
     threadgroup Threadgroups _threadgroups{{}};
     (void)_threadgroups;
     Inputs _in = { sk_ThreadPosition };
     uint id = _in.sk_ThreadPosition.x;
     uint rd_id;
     uint wr_id;
     uint mask;
     _threadgroups.shared_data[id * 2u] = _globals._anonInterface0->in_data[id * 2u];
     _threadgroups.shared_data[id * 2u + 1u] = _globals._anonInterface0->in_data[id * 2u + 1u];
     threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
     const uint steps = 10u;
     for (uint step = 0u;step < steps; step++) {
         mask = (1u << step) - 1u;
         rd_id = ((id >> step) << step + 1u) + mask;
         wr_id = (rd_id + 1u) + (id & mask);
         store_vIf(_threadgroups, wr_id, _threadgroups.shared_data[wr_id] + _threadgroups.shared_data[rd_id]);
         threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
     }
     _globals._anonInterface1->out_data[id * 2u] = _threadgroups.shared_data[id * 2u];
     _globals._anonInterface1->out_data[id * 2u + 1u] = _threadgroups.shared_data[id * 2u + 1u];
     return;
 }
	#include <metal_stdlib>
	#include <simd/simd.h>
	using namespace metal;
	struct Inputs {
	uint3 sk_ThreadPosition;
	};
	struct inputs {
	float in_data[1];
	};
	struct outputs {
	float out_data[1];
	};
	struct Globals {
	const device inputs* _anonInterface0;
	device outputs* _anonInterface1;
	};
	struct Threadgroups {
	array<float, 1024> shared_data;
	};
	void store_vIf(threadgroup Threadgroups& _threadgroups, uint i, float value) {
	_threadgroups.shared_data[i] = value;
	}
	kernel void computeMain(const device inputs& _anonInterface0 [[buffer(0)]], device outputs& _anonInterface1 [[buffer(1)]], uint3 sk_ThreadPosition [[thread_position_in_grid]]) {
	Globals _globals{&_anonInterface0, &_anonInterface1};
	(void)_globals;
	threadgroup Threadgroups _threadgroups{{}};
	(void)_threadgroups;
	Inputs _in = { sk_ThreadPosition };
	uint id = _in.sk_ThreadPosition.x;
	uint rd_id;
	uint wr_id;
	uint mask;
	_threadgroups.shared_data[id * 2u] = _globals._anonInterface0->in_data[id * 2u];
	_threadgroups.shared_data[id * 2u + 1u] = _globals._anonInterface0->in_data[id * 2u + 1u];
	threadgroup_barrier(mem_flags::mem_device \| mem_flags::mem_threadgroup \| mem_flags::mem_texture);
	const uint steps = 10u;
	for (uint step = 0u;step < steps; step++) {
	mask = (1u << step) - 1u;
	rd_id = ((id >> step) << step + 1u) + mask;
	wr_id = (rd_id + 1u) + (id & mask);
	store_vIf(_threadgroups, wr_id, _threadgroups.shared_data[wr_id] + _threadgroups.shared_data[rd_id]);
	threadgroup_barrier(mem_flags::mem_device \| mem_flags::mem_threadgroup \| mem_flags::mem_texture);
	}
	_globals._anonInterface1->out_data[id * 2u] = _threadgroups.shared_data[id * 2u];
	_globals._anonInterface1->out_data[id * 2u + 1u] = _threadgroups.shared_data[id * 2u + 1u];
	return;
	}