Revert unintended shader changes The previous commit picked up some experiments with shaders by mistake.
diff --git a/tests/shader/gen/message_passing.dxil b/tests/shader/gen/message_passing.dxil index dc2eced..2be73da 100644 --- a/tests/shader/gen/message_passing.dxil +++ b/tests/shader/gen/message_passing.dxil Binary files differ
diff --git a/tests/shader/gen/message_passing.hlsl b/tests/shader/gen/message_passing.hlsl index 3a48808..ba8ce5f 100644 --- a/tests/shader/gen/message_passing.hlsl +++ b/tests/shader/gen/message_passing.hlsl
@@ -6,7 +6,7 @@ static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); -globallycoherent RWByteAddressBuffer data_buf : register(u0); +RWByteAddressBuffer data_buf : register(u0); RWByteAddressBuffer control_buf : register(u1); static uint3 gl_GlobalInvocationID; @@ -22,12 +22,13 @@ void comp_main() { - data_buf.Store(gl_GlobalInvocationID.x * 8 + 0, 1u); + uint _76; + data_buf.InterlockedExchange(gl_GlobalInvocationID.x * 8 + 0, 1u, _76); DeviceMemoryBarrier(); uint param = gl_GlobalInvocationID.x; uint write_flag_ix = permute_flag_ix(param); - uint _76; - data_buf.InterlockedExchange(write_flag_ix * 8 + 4, 1u, _76); + uint _77; + data_buf.InterlockedExchange(write_flag_ix * 8 + 4, 1u, _77); uint read_ix = (gl_GlobalInvocationID.x * 4099u) & 65535u; uint param_1 = read_ix; uint read_flag_ix = permute_flag_ix(param_1); @@ -35,7 +36,9 @@ data_buf.InterlockedAdd(read_flag_ix * 8 + 4, 0, _58); uint flag = _58; DeviceMemoryBarrier(); - uint data = data_buf.Load(read_ix * 8 + 0); + uint _62; + data_buf.InterlockedAdd(read_ix * 8 + 0, 0, _62); + uint data = _62; if (flag > data) { uint _73;
diff --git a/tests/shader/gen/message_passing.msl b/tests/shader/gen/message_passing.msl index 1bda181..e48f48a 100644 --- a/tests/shader/gen/message_passing.msl +++ b/tests/shader/gen/message_passing.msl
@@ -31,20 +31,21 @@ return (data_ix * 419u) & 65535u; } -kernel void main0(volatile device DataBuf& data_buf [[buffer(0)]], device ControlBuf& control_buf [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +kernel void main0(device DataBuf& data_buf [[buffer(0)]], device ControlBuf& control_buf [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { - data_buf.data[gl_GlobalInvocationID.x].data = 1u; + atomic_store_explicit((device atomic_uint*)&data_buf.data[gl_GlobalInvocationID.x].data, 1u, memory_order_relaxed); threadgroup_barrier(mem_flags::mem_device); uint param = gl_GlobalInvocationID.x; uint write_flag_ix = permute_flag_ix(param); - atomic_store_explicit((volatile device atomic_uint*)&data_buf.data[write_flag_ix].flag, 1u, memory_order_relaxed); + atomic_store_explicit((device atomic_uint*)&data_buf.data[write_flag_ix].flag, 1u, memory_order_relaxed); uint read_ix = (gl_GlobalInvocationID.x * 4099u) & 65535u; uint param_1 = read_ix; uint read_flag_ix = permute_flag_ix(param_1); - uint _58 = atomic_load_explicit((volatile device atomic_uint*)&data_buf.data[read_flag_ix].flag, memory_order_relaxed); + uint _58 = atomic_load_explicit((device atomic_uint*)&data_buf.data[read_flag_ix].flag, memory_order_relaxed); uint flag = _58; threadgroup_barrier(mem_flags::mem_device); - uint data = data_buf.data[read_ix].data; + uint _62 = atomic_load_explicit((device atomic_uint*)&data_buf.data[read_ix].data, memory_order_relaxed); + uint data = _62; if (flag > data) { uint _73 = atomic_fetch_add_explicit((device atomic_uint*)&control_buf.failures, 1u, memory_order_relaxed);
diff --git a/tests/shader/gen/message_passing.spv b/tests/shader/gen/message_passing.spv index fa7fb3e..e5f56d6 100644 --- a/tests/shader/gen/message_passing.spv +++ b/tests/shader/gen/message_passing.spv Binary files differ
diff --git a/tests/shader/gen/message_passing_vkmm.spv b/tests/shader/gen/message_passing_vkmm.spv index 58d3521..8527c2b 100644 --- a/tests/shader/gen/message_passing_vkmm.spv +++ b/tests/shader/gen/message_passing_vkmm.spv Binary files differ
diff --git a/tests/shader/gen/prefix.dxil b/tests/shader/gen/prefix.dxil index 12d0d50..34f3d6a 100644 --- a/tests/shader/gen/prefix.dxil +++ b/tests/shader/gen/prefix.dxil Binary files differ
diff --git a/tests/shader/gen/prefix.hlsl b/tests/shader/gen/prefix.hlsl index ba76110..3af5a96 100644 --- a/tests/shader/gen/prefix.hlsl +++ b/tests/shader/gen/prefix.hlsl
@@ -12,11 +12,11 @@ static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u); -static const Monoid _183 = { 0u }; +static const Monoid _185 = { 0u }; globallycoherent RWByteAddressBuffer _43 : register(u2); ByteAddressBuffer _67 : register(t0); -RWByteAddressBuffer _367 : register(u1); +RWByteAddressBuffer _372 : register(u1); static uint3 gl_LocalInvocationID; struct SPIRV_Cross_Input @@ -76,12 +76,10 @@ } if (gl_LocalInvocationID.x == 511u) { - uint _378; - _43.InterlockedExchange(part_ix * 12 + 8, agg.element, _378); + _43.Store(part_ix * 12 + 8, agg.element); if (part_ix == 0u) { - uint _379; - _43.InterlockedExchange(12, agg.element, _379); + _43.Store(12, agg.element); } } DeviceMemoryBarrier(); @@ -94,11 +92,12 @@ } _43.Store(part_ix * 12 + 4, flag); } - Monoid exclusive = _183; + Monoid exclusive = _185; if (part_ix != 0u) { uint look_back_ix = part_ix - 1u; uint their_ix = 0u; + Monoid their_prefix; Monoid their_agg; Monoid m; while (true) @@ -114,10 +113,9 @@ { if (gl_LocalInvocationID.x == 511u) { - uint _221; - _43.InterlockedAdd(look_back_ix * 12 + 12, 0, _221); - Monoid _222 = { _221 }; - Monoid their_prefix = _222; + Monoid _223; + _223.element = _43.Load(look_back_ix * 12 + 12); + their_prefix.element = _223.element; Monoid param_4 = their_prefix; Monoid param_5 = exclusive; exclusive = combine_monoid(param_4, param_5); @@ -130,9 +128,9 @@ { if (gl_LocalInvocationID.x == 511u) { - uint _242; - _43.InterlockedAdd(look_back_ix * 12 + 8, 0, _242); - their_agg.element = _242; + Monoid _245; + _245.element = _43.Load(look_back_ix * 12 + 8); + their_agg.element = _245.element; Monoid param_6 = their_agg; Monoid param_7 = exclusive; exclusive = combine_monoid(param_6, param_7); @@ -144,9 +142,9 @@ } if (gl_LocalInvocationID.x == 511u) { - Monoid _263; - _263.element = _67.Load(((look_back_ix * 8192u) + their_ix) * 4 + 0); - m.element = _263.element; + Monoid _267; + _267.element = _67.Load(((look_back_ix * 8192u) + their_ix) * 4 + 0); + m.element = _267.element; if (their_ix == 0u) { their_agg = m; @@ -187,8 +185,7 @@ Monoid param_13 = agg; Monoid inclusive_prefix = combine_monoid(param_12, param_13); sh_prefix = exclusive; - uint _380; - _43.InterlockedExchange(part_ix * 12 + 12, inclusive_prefix.element, _380); + _43.Store(part_ix * 12 + 12, inclusive_prefix.element); } DeviceMemoryBarrier(); if (gl_LocalInvocationID.x == 511u) @@ -214,7 +211,7 @@ Monoid param_16 = row; Monoid param_17 = local[i_2]; Monoid m_1 = combine_monoid(param_16, param_17); - _367.Store((ix + i_2) * 4 + 0, m_1.element); + _372.Store((ix + i_2) * 4 + 0, m_1.element); } }
diff --git a/tests/shader/gen/prefix.msl b/tests/shader/gen/prefix.msl index aaa7158..8e402a9 100644 --- a/tests/shader/gen/prefix.msl +++ b/tests/shader/gen/prefix.msl
@@ -87,7 +87,7 @@ return Monoid{ a.element + b.element }; } -kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _367 [[buffer(1)]], volatile device StateBuf& _43 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _372 [[buffer(1)]], volatile device StateBuf& _43 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { threadgroup uint sh_part_ix; threadgroup Monoid sh_scratch[512]; @@ -127,10 +127,10 @@ } if (gl_LocalInvocationID.x == 511u) { - atomic_store_explicit((volatile device atomic_uint*)&_43.state[part_ix].aggregate.element, agg.element, memory_order_relaxed); + _43.state[part_ix].aggregate.element = agg.element; if (part_ix == 0u) { - atomic_store_explicit((volatile device atomic_uint*)&_43.state[0].prefix.element, agg.element, memory_order_relaxed); + _43.state[0].prefix.element = agg.element; } } threadgroup_barrier(mem_flags::mem_device); @@ -148,6 +148,7 @@ { uint look_back_ix = part_ix - 1u; uint their_ix = 0u; + Monoid their_prefix; Monoid their_agg; Monoid m; while (true) @@ -163,8 +164,7 @@ { if (gl_LocalInvocationID.x == 511u) { - uint _221 = atomic_load_explicit((volatile device atomic_uint*)&_43.state[look_back_ix].prefix.element, memory_order_relaxed); - Monoid their_prefix = Monoid{ _221 }; + their_prefix.element = _43.state[look_back_ix].prefix.element; Monoid param_4 = their_prefix; Monoid param_5 = exclusive; exclusive = combine_monoid(param_4, param_5); @@ -177,8 +177,7 @@ { if (gl_LocalInvocationID.x == 511u) { - uint _242 = atomic_load_explicit((volatile device atomic_uint*)&_43.state[look_back_ix].aggregate.element, memory_order_relaxed); - their_agg.element = _242; + their_agg.element = _43.state[look_back_ix].aggregate.element; Monoid param_6 = their_agg; Monoid param_7 = exclusive; exclusive = combine_monoid(param_6, param_7); @@ -231,7 +230,7 @@ Monoid param_13 = agg; Monoid inclusive_prefix = combine_monoid(param_12, param_13); sh_prefix = exclusive; - atomic_store_explicit((volatile device atomic_uint*)&_43.state[part_ix].prefix.element, inclusive_prefix.element, memory_order_relaxed); + _43.state[part_ix].prefix.element = inclusive_prefix.element; } threadgroup_barrier(mem_flags::mem_device); if (gl_LocalInvocationID.x == 511u) @@ -257,7 +256,7 @@ Monoid param_16 = row; Monoid param_17 = local[i_2]; Monoid m_1 = combine_monoid(param_16, param_17); - _367.outbuf[ix + i_2].element = m_1.element; + _372.outbuf[ix + i_2].element = m_1.element; } }
diff --git a/tests/shader/gen/prefix.spv b/tests/shader/gen/prefix.spv index 0da9ed8..d2c1aad 100644 --- a/tests/shader/gen/prefix.spv +++ b/tests/shader/gen/prefix.spv Binary files differ
diff --git a/tests/shader/gen/prefix_atomic.dxil b/tests/shader/gen/prefix_atomic.dxil index 80c05a3..68f47e5 100644 --- a/tests/shader/gen/prefix_atomic.dxil +++ b/tests/shader/gen/prefix_atomic.dxil Binary files differ
diff --git a/tests/shader/gen/prefix_atomic.hlsl b/tests/shader/gen/prefix_atomic.hlsl index 6105c01..10f7081 100644 --- a/tests/shader/gen/prefix_atomic.hlsl +++ b/tests/shader/gen/prefix_atomic.hlsl
@@ -12,11 +12,11 @@ static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u); -static const Monoid _183 = { 0u }; +static const Monoid _185 = { 0u }; globallycoherent RWByteAddressBuffer _43 : register(u2); ByteAddressBuffer _67 : register(t0); -RWByteAddressBuffer _367 : register(u1); +RWByteAddressBuffer _372 : register(u1); static uint3 gl_LocalInvocationID; struct SPIRV_Cross_Input @@ -76,12 +76,10 @@ } if (gl_LocalInvocationID.x == 511u) { - uint _378; - _43.InterlockedExchange(part_ix * 12 + 8, agg.element, _378); + _43.Store(part_ix * 12 + 8, agg.element); if (part_ix == 0u) { - uint _379; - _43.InterlockedExchange(12, agg.element, _379); + _43.Store(12, agg.element); } } DeviceMemoryBarrier(); @@ -92,23 +90,24 @@ { flag = 2u; } - uint _380; - _43.InterlockedExchange(part_ix * 12 + 4, flag, _380); + uint _383; + _43.InterlockedExchange(part_ix * 12 + 4, flag, _383); } - Monoid exclusive = _183; + Monoid exclusive = _185; if (part_ix != 0u) { uint look_back_ix = part_ix - 1u; uint their_ix = 0u; + Monoid their_prefix; Monoid their_agg; Monoid m; while (true) { if (gl_LocalInvocationID.x == 511u) { - uint _206; - _43.InterlockedAdd(look_back_ix * 12 + 4, 0, _206); - sh_flag = _206; + uint _208; + _43.InterlockedAdd(look_back_ix * 12 + 4, 0, _208); + sh_flag = _208; } GroupMemoryBarrierWithGroupSync(); DeviceMemoryBarrier(); @@ -117,10 +116,9 @@ { if (gl_LocalInvocationID.x == 511u) { - uint _221; - _43.InterlockedAdd(look_back_ix * 12 + 12, 0, _221); - Monoid _222 = { _221 }; - Monoid their_prefix = _222; + Monoid _223; + _223.element = _43.Load(look_back_ix * 12 + 12); + their_prefix.element = _223.element; Monoid param_4 = their_prefix; Monoid param_5 = exclusive; exclusive = combine_monoid(param_4, param_5); @@ -133,9 +131,9 @@ { if (gl_LocalInvocationID.x == 511u) { - uint _242; - _43.InterlockedAdd(look_back_ix * 12 + 8, 0, _242); - their_agg.element = _242; + Monoid _245; + _245.element = _43.Load(look_back_ix * 12 + 8); + their_agg.element = _245.element; Monoid param_6 = their_agg; Monoid param_7 = exclusive; exclusive = combine_monoid(param_6, param_7); @@ -147,9 +145,9 @@ } if (gl_LocalInvocationID.x == 511u) { - Monoid _263; - _263.element = _67.Load(((look_back_ix * 8192u) + their_ix) * 4 + 0); - m.element = _263.element; + Monoid _267; + _267.element = _67.Load(((look_back_ix * 8192u) + their_ix) * 4 + 0); + m.element = _267.element; if (their_ix == 0u) { their_agg = m; @@ -190,14 +188,13 @@ Monoid param_13 = agg; Monoid inclusive_prefix = combine_monoid(param_12, param_13); sh_prefix = exclusive; - uint _381; - _43.InterlockedExchange(part_ix * 12 + 12, inclusive_prefix.element, _381); + _43.Store(part_ix * 12 + 12, inclusive_prefix.element); } DeviceMemoryBarrier(); if (gl_LocalInvocationID.x == 511u) { - uint _382; - _43.InterlockedExchange(part_ix * 12 + 4, 2u, _382); + uint _384; + _43.InterlockedExchange(part_ix * 12 + 4, 2u, _384); } } GroupMemoryBarrierWithGroupSync(); @@ -218,7 +215,7 @@ Monoid param_16 = row; Monoid param_17 = local[i_2]; Monoid m_1 = combine_monoid(param_16, param_17); - _367.Store((ix + i_2) * 4 + 0, m_1.element); + _372.Store((ix + i_2) * 4 + 0, m_1.element); } }
diff --git a/tests/shader/gen/prefix_atomic.msl b/tests/shader/gen/prefix_atomic.msl index d5de44a..6d7d155 100644 --- a/tests/shader/gen/prefix_atomic.msl +++ b/tests/shader/gen/prefix_atomic.msl
@@ -87,7 +87,7 @@ return Monoid{ a.element + b.element }; } -kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _367 [[buffer(1)]], volatile device StateBuf& _43 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _372 [[buffer(1)]], volatile device StateBuf& _43 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { threadgroup uint sh_part_ix; threadgroup Monoid sh_scratch[512]; @@ -127,10 +127,10 @@ } if (gl_LocalInvocationID.x == 511u) { - atomic_store_explicit((volatile device atomic_uint*)&_43.state[part_ix].aggregate.element, agg.element, memory_order_relaxed); + _43.state[part_ix].aggregate.element = agg.element; if (part_ix == 0u) { - atomic_store_explicit((volatile device atomic_uint*)&_43.state[0].prefix.element, agg.element, memory_order_relaxed); + _43.state[0].prefix.element = agg.element; } } threadgroup_barrier(mem_flags::mem_device); @@ -148,14 +148,15 @@ { uint look_back_ix = part_ix - 1u; uint their_ix = 0u; + Monoid their_prefix; Monoid their_agg; Monoid m; while (true) { if (gl_LocalInvocationID.x == 511u) { - uint _206 = atomic_load_explicit((volatile device atomic_uint*)&_43.state[look_back_ix].flag, memory_order_relaxed); - sh_flag = _206; + uint _208 = atomic_load_explicit((volatile device atomic_uint*)&_43.state[look_back_ix].flag, memory_order_relaxed); + sh_flag = _208; } threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_device); @@ -164,8 +165,7 @@ { if (gl_LocalInvocationID.x == 511u) { - uint _221 = atomic_load_explicit((volatile device atomic_uint*)&_43.state[look_back_ix].prefix.element, memory_order_relaxed); - Monoid their_prefix = Monoid{ _221 }; + their_prefix.element = _43.state[look_back_ix].prefix.element; Monoid param_4 = their_prefix; Monoid param_5 = exclusive; exclusive = combine_monoid(param_4, param_5); @@ -178,8 +178,7 @@ { if (gl_LocalInvocationID.x == 511u) { - uint _242 = atomic_load_explicit((volatile device atomic_uint*)&_43.state[look_back_ix].aggregate.element, memory_order_relaxed); - their_agg.element = _242; + their_agg.element = _43.state[look_back_ix].aggregate.element; Monoid param_6 = their_agg; Monoid param_7 = exclusive; exclusive = combine_monoid(param_6, param_7); @@ -232,7 +231,7 @@ Monoid param_13 = agg; Monoid inclusive_prefix = combine_monoid(param_12, param_13); sh_prefix = exclusive; - atomic_store_explicit((volatile device atomic_uint*)&_43.state[part_ix].prefix.element, inclusive_prefix.element, memory_order_relaxed); + _43.state[part_ix].prefix.element = inclusive_prefix.element; } threadgroup_barrier(mem_flags::mem_device); if (gl_LocalInvocationID.x == 511u) @@ -258,7 +257,7 @@ Monoid param_16 = row; Monoid param_17 = local[i_2]; Monoid m_1 = combine_monoid(param_16, param_17); - _367.outbuf[ix + i_2].element = m_1.element; + _372.outbuf[ix + i_2].element = m_1.element; } }
diff --git a/tests/shader/gen/prefix_atomic.spv b/tests/shader/gen/prefix_atomic.spv index 1b31f67..acca545 100644 --- a/tests/shader/gen/prefix_atomic.spv +++ b/tests/shader/gen/prefix_atomic.spv Binary files differ
diff --git a/tests/shader/gen/prefix_vkmm.spv b/tests/shader/gen/prefix_vkmm.spv index c8b9ea2..0b8d475 100644 --- a/tests/shader/gen/prefix_vkmm.spv +++ b/tests/shader/gen/prefix_vkmm.spv Binary files differ
diff --git a/tests/shader/message_passing.comp b/tests/shader/message_passing.comp index 521dedd..e5e53b6 100644 --- a/tests/shader/message_passing.comp +++ b/tests/shader/message_passing.comp
@@ -23,7 +23,7 @@ uint flag; }; -layout(binding = 0) coherent buffer DataBuf +layout(binding = 0) buffer DataBuf { Element data[]; } data_buf; @@ -40,7 +40,7 @@ void main() { - data_buf.data[gl_GlobalInvocationID.x].data = 1u; + atomicStore(data_buf.data[gl_GlobalInvocationID.x].data, 1u, gl_ScopeDevice, 0, 0); #ifndef VKMM memoryBarrierBuffer(); #endif @@ -52,7 +52,7 @@ #ifndef VKMM memoryBarrierBuffer(); #endif - uint data = data_buf.data[read_ix].data; + uint data = atomicLoad(data_buf.data[read_ix].data, gl_ScopeDevice, 0, 0); if (flag > data) { atomicAdd(control_buf.failures, 1u);
diff --git a/tests/shader/prefix.comp b/tests/shader/prefix.comp index 0fc9909..a6a0d57 100644 --- a/tests/shader/prefix.comp +++ b/tests/shader/prefix.comp
@@ -99,9 +99,9 @@ // Publish aggregate for this partition if (gl_LocalInvocationID.x == WG_SIZE - 1) { - atomicStore(state[part_ix].aggregate.element, agg.element, gl_ScopeDevice, 0, 0); + state[part_ix].aggregate = agg; if (part_ix == 0) { - atomicStore(state[0].prefix.element, agg.element, gl_ScopeDevice, 0, 0); + state[0].prefix = agg; } } // Write flag with release semantics; this is done portably with a barrier. @@ -147,13 +147,13 @@ if (flag == FLAG_PREFIX_READY) { if (gl_LocalInvocationID.x == WG_SIZE - 1) { - Monoid their_prefix = Monoid(atomicLoad(state[look_back_ix].prefix.element, gl_ScopeDevice, 0, 0)); + Monoid their_prefix = state[look_back_ix].prefix; exclusive = combine_monoid(their_prefix, exclusive); } break; } else if (flag == FLAG_AGGREGATE_READY) { if (gl_LocalInvocationID.x == WG_SIZE - 1) { - their_agg.element = atomicLoad(state[look_back_ix].aggregate.element, gl_ScopeDevice, 0, 0); + their_agg = state[look_back_ix].aggregate; exclusive = combine_monoid(their_agg, exclusive); } look_back_ix--; @@ -193,7 +193,7 @@ if (gl_LocalInvocationID.x == WG_SIZE - 1) { Monoid inclusive_prefix = combine_monoid(exclusive, agg); sh_prefix = exclusive; - atomicStore(state[part_ix].prefix.element, inclusive_prefix.element, gl_ScopeDevice, 0, 0); + state[part_ix].prefix = inclusive_prefix; } #ifndef VKMM memoryBarrierBuffer();