| struct Monoid |
| { |
| uint element; |
| }; |
| |
| struct State |
| { |
| uint flag; |
| Monoid aggregate; |
| Monoid prefix; |
| }; |
| |
| static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u); |
| |
| static const Monoid _185 = { 0u }; |
| |
| globallycoherent RWByteAddressBuffer _43 : register(u2); |
| ByteAddressBuffer _67 : register(t0); |
| RWByteAddressBuffer _372 : register(u1); |
| |
| static uint3 gl_LocalInvocationID; |
| struct SPIRV_Cross_Input |
| { |
| uint3 gl_LocalInvocationID : SV_GroupThreadID; |
| }; |
| |
| groupshared uint sh_part_ix; |
| groupshared Monoid sh_scratch[512]; |
| groupshared uint sh_flag; |
| groupshared Monoid sh_prefix; |
| |
| Monoid combine_monoid(Monoid a, Monoid b) |
| { |
| Monoid _22 = { a.element + b.element }; |
| return _22; |
| } |
| |
| void comp_main() |
| { |
| if (gl_LocalInvocationID.x == 0u) |
| { |
| uint _47; |
| _43.InterlockedAdd(0, 1u, _47); |
| sh_part_ix = _47; |
| } |
| GroupMemoryBarrierWithGroupSync(); |
| uint part_ix = sh_part_ix; |
| uint ix = (part_ix * 8192u) + (gl_LocalInvocationID.x * 16u); |
| Monoid _71; |
| _71.element = _67.Load(ix * 4 + 0); |
| Monoid local[16]; |
| local[0].element = _71.element; |
| Monoid param_1; |
| for (uint i = 1u; i < 16u; i++) |
| { |
| Monoid param = local[i - 1u]; |
| Monoid _94; |
| _94.element = _67.Load((ix + i) * 4 + 0); |
| param_1.element = _94.element; |
| local[i] = combine_monoid(param, param_1); |
| } |
| Monoid agg = local[15]; |
| sh_scratch[gl_LocalInvocationID.x] = agg; |
| for (uint i_1 = 0u; i_1 < 9u; i_1++) |
| { |
| GroupMemoryBarrierWithGroupSync(); |
| if (gl_LocalInvocationID.x >= (1u << i_1)) |
| { |
| Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; |
| Monoid param_2 = other; |
| Monoid param_3 = agg; |
| agg = combine_monoid(param_2, param_3); |
| } |
| GroupMemoryBarrierWithGroupSync(); |
| sh_scratch[gl_LocalInvocationID.x] = agg; |
| } |
| if (gl_LocalInvocationID.x == 511u) |
| { |
| _43.Store(part_ix * 12 + 8, agg.element); |
| if (part_ix == 0u) |
| { |
| _43.Store(12, agg.element); |
| } |
| } |
| DeviceMemoryBarrier(); |
| if (gl_LocalInvocationID.x == 511u) |
| { |
| uint flag = 1u; |
| if (part_ix == 0u) |
| { |
| flag = 2u; |
| } |
| _43.Store(part_ix * 12 + 4, flag); |
| } |
| Monoid exclusive = _185; |
| if (part_ix != 0u) |
| { |
| uint look_back_ix = part_ix - 1u; |
| uint their_ix = 0u; |
| Monoid their_prefix; |
| Monoid their_agg; |
| Monoid m; |
| while (true) |
| { |
| if (gl_LocalInvocationID.x == 511u) |
| { |
| sh_flag = _43.Load(look_back_ix * 12 + 4); |
| } |
| GroupMemoryBarrierWithGroupSync(); |
| DeviceMemoryBarrier(); |
| uint flag_1 = sh_flag; |
| if (flag_1 == 2u) |
| { |
| if (gl_LocalInvocationID.x == 511u) |
| { |
| Monoid _223; |
| _223.element = _43.Load(look_back_ix * 12 + 12); |
| their_prefix.element = _223.element; |
| Monoid param_4 = their_prefix; |
| Monoid param_5 = exclusive; |
| exclusive = combine_monoid(param_4, param_5); |
| } |
| break; |
| } |
| else |
| { |
| if (flag_1 == 1u) |
| { |
| if (gl_LocalInvocationID.x == 511u) |
| { |
| Monoid _245; |
| _245.element = _43.Load(look_back_ix * 12 + 8); |
| their_agg.element = _245.element; |
| Monoid param_6 = their_agg; |
| Monoid param_7 = exclusive; |
| exclusive = combine_monoid(param_6, param_7); |
| } |
| look_back_ix--; |
| their_ix = 0u; |
| continue; |
| } |
| } |
| if (gl_LocalInvocationID.x == 511u) |
| { |
| Monoid _267; |
| _267.element = _67.Load(((look_back_ix * 8192u) + their_ix) * 4 + 0); |
| m.element = _267.element; |
| if (their_ix == 0u) |
| { |
| their_agg = m; |
| } |
| else |
| { |
| Monoid param_8 = their_agg; |
| Monoid param_9 = m; |
| their_agg = combine_monoid(param_8, param_9); |
| } |
| their_ix++; |
| if (their_ix == 8192u) |
| { |
| Monoid param_10 = their_agg; |
| Monoid param_11 = exclusive; |
| exclusive = combine_monoid(param_10, param_11); |
| if (look_back_ix == 0u) |
| { |
| sh_flag = 2u; |
| } |
| else |
| { |
| look_back_ix--; |
| their_ix = 0u; |
| } |
| } |
| } |
| GroupMemoryBarrierWithGroupSync(); |
| flag_1 = sh_flag; |
| if (flag_1 == 2u) |
| { |
| break; |
| } |
| } |
| if (gl_LocalInvocationID.x == 511u) |
| { |
| Monoid param_12 = exclusive; |
| Monoid param_13 = agg; |
| Monoid inclusive_prefix = combine_monoid(param_12, param_13); |
| sh_prefix = exclusive; |
| _43.Store(part_ix * 12 + 12, inclusive_prefix.element); |
| } |
| DeviceMemoryBarrier(); |
| if (gl_LocalInvocationID.x == 511u) |
| { |
| _43.Store(part_ix * 12 + 4, 2u); |
| } |
| } |
| GroupMemoryBarrierWithGroupSync(); |
| if (part_ix != 0u) |
| { |
| exclusive = sh_prefix; |
| } |
| Monoid row = exclusive; |
| if (gl_LocalInvocationID.x > 0u) |
| { |
| Monoid other_1 = sh_scratch[gl_LocalInvocationID.x - 1u]; |
| Monoid param_14 = row; |
| Monoid param_15 = other_1; |
| row = combine_monoid(param_14, param_15); |
| } |
| for (uint i_2 = 0u; i_2 < 16u; i_2++) |
| { |
| Monoid param_16 = row; |
| Monoid param_17 = local[i_2]; |
| Monoid m_1 = combine_monoid(param_16, param_17); |
| _372.Store((ix + i_2) * 4 + 0, m_1.element); |
| } |
| } |
| |
| [numthreads(512, 1, 1)] |
| void main(SPIRV_Cross_Input stage_input) |
| { |
| gl_LocalInvocationID = stage_input.gl_LocalInvocationID; |
| comp_main(); |
| } |