Revert unintended shader changes
The previous commit picked up some experiments with shaders by mistake.
diff --git a/tests/shader/gen/message_passing.dxil b/tests/shader/gen/message_passing.dxil
index dc2eced..2be73da 100644
--- a/tests/shader/gen/message_passing.dxil
+++ b/tests/shader/gen/message_passing.dxil
Binary files differ
diff --git a/tests/shader/gen/message_passing.hlsl b/tests/shader/gen/message_passing.hlsl
index 3a48808..ba8ce5f 100644
--- a/tests/shader/gen/message_passing.hlsl
+++ b/tests/shader/gen/message_passing.hlsl
@@ -6,7 +6,7 @@
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
-globallycoherent RWByteAddressBuffer data_buf : register(u0);
+RWByteAddressBuffer data_buf : register(u0);
RWByteAddressBuffer control_buf : register(u1);
static uint3 gl_GlobalInvocationID;
@@ -22,12 +22,13 @@
void comp_main()
{
- data_buf.Store(gl_GlobalInvocationID.x * 8 + 0, 1u);
+ uint _76;
+ data_buf.InterlockedExchange(gl_GlobalInvocationID.x * 8 + 0, 1u, _76);
DeviceMemoryBarrier();
uint param = gl_GlobalInvocationID.x;
uint write_flag_ix = permute_flag_ix(param);
- uint _76;
- data_buf.InterlockedExchange(write_flag_ix * 8 + 4, 1u, _76);
+ uint _77;
+ data_buf.InterlockedExchange(write_flag_ix * 8 + 4, 1u, _77);
uint read_ix = (gl_GlobalInvocationID.x * 4099u) & 65535u;
uint param_1 = read_ix;
uint read_flag_ix = permute_flag_ix(param_1);
@@ -35,7 +36,9 @@
data_buf.InterlockedAdd(read_flag_ix * 8 + 4, 0, _58);
uint flag = _58;
DeviceMemoryBarrier();
- uint data = data_buf.Load(read_ix * 8 + 0);
+ uint _62;
+ data_buf.InterlockedAdd(read_ix * 8 + 0, 0, _62);
+ uint data = _62;
if (flag > data)
{
uint _73;
diff --git a/tests/shader/gen/message_passing.msl b/tests/shader/gen/message_passing.msl
index 1bda181..e48f48a 100644
--- a/tests/shader/gen/message_passing.msl
+++ b/tests/shader/gen/message_passing.msl
@@ -31,20 +31,21 @@
return (data_ix * 419u) & 65535u;
}
-kernel void main0(volatile device DataBuf& data_buf [[buffer(0)]], device ControlBuf& control_buf [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+kernel void main0(device DataBuf& data_buf [[buffer(0)]], device ControlBuf& control_buf [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
- data_buf.data[gl_GlobalInvocationID.x].data = 1u;
+ atomic_store_explicit((device atomic_uint*)&data_buf.data[gl_GlobalInvocationID.x].data, 1u, memory_order_relaxed);
threadgroup_barrier(mem_flags::mem_device);
uint param = gl_GlobalInvocationID.x;
uint write_flag_ix = permute_flag_ix(param);
- atomic_store_explicit((volatile device atomic_uint*)&data_buf.data[write_flag_ix].flag, 1u, memory_order_relaxed);
+ atomic_store_explicit((device atomic_uint*)&data_buf.data[write_flag_ix].flag, 1u, memory_order_relaxed);
uint read_ix = (gl_GlobalInvocationID.x * 4099u) & 65535u;
uint param_1 = read_ix;
uint read_flag_ix = permute_flag_ix(param_1);
- uint _58 = atomic_load_explicit((volatile device atomic_uint*)&data_buf.data[read_flag_ix].flag, memory_order_relaxed);
+ uint _58 = atomic_load_explicit((device atomic_uint*)&data_buf.data[read_flag_ix].flag, memory_order_relaxed);
uint flag = _58;
threadgroup_barrier(mem_flags::mem_device);
- uint data = data_buf.data[read_ix].data;
+ uint _62 = atomic_load_explicit((device atomic_uint*)&data_buf.data[read_ix].data, memory_order_relaxed);
+ uint data = _62;
if (flag > data)
{
uint _73 = atomic_fetch_add_explicit((device atomic_uint*)&control_buf.failures, 1u, memory_order_relaxed);
diff --git a/tests/shader/gen/message_passing.spv b/tests/shader/gen/message_passing.spv
index fa7fb3e..e5f56d6 100644
--- a/tests/shader/gen/message_passing.spv
+++ b/tests/shader/gen/message_passing.spv
Binary files differ
diff --git a/tests/shader/gen/message_passing_vkmm.spv b/tests/shader/gen/message_passing_vkmm.spv
index 58d3521..8527c2b 100644
--- a/tests/shader/gen/message_passing_vkmm.spv
+++ b/tests/shader/gen/message_passing_vkmm.spv
Binary files differ
diff --git a/tests/shader/gen/prefix.dxil b/tests/shader/gen/prefix.dxil
index 12d0d50..34f3d6a 100644
--- a/tests/shader/gen/prefix.dxil
+++ b/tests/shader/gen/prefix.dxil
Binary files differ
diff --git a/tests/shader/gen/prefix.hlsl b/tests/shader/gen/prefix.hlsl
index ba76110..3af5a96 100644
--- a/tests/shader/gen/prefix.hlsl
+++ b/tests/shader/gen/prefix.hlsl
@@ -12,11 +12,11 @@
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
-static const Monoid _183 = { 0u };
+static const Monoid _185 = { 0u };
globallycoherent RWByteAddressBuffer _43 : register(u2);
ByteAddressBuffer _67 : register(t0);
-RWByteAddressBuffer _367 : register(u1);
+RWByteAddressBuffer _372 : register(u1);
static uint3 gl_LocalInvocationID;
struct SPIRV_Cross_Input
@@ -76,12 +76,10 @@
}
if (gl_LocalInvocationID.x == 511u)
{
- uint _378;
- _43.InterlockedExchange(part_ix * 12 + 8, agg.element, _378);
+ _43.Store(part_ix * 12 + 8, agg.element);
if (part_ix == 0u)
{
- uint _379;
- _43.InterlockedExchange(12, agg.element, _379);
+ _43.Store(12, agg.element);
}
}
DeviceMemoryBarrier();
@@ -94,11 +92,12 @@
}
_43.Store(part_ix * 12 + 4, flag);
}
- Monoid exclusive = _183;
+ Monoid exclusive = _185;
if (part_ix != 0u)
{
uint look_back_ix = part_ix - 1u;
uint their_ix = 0u;
+ Monoid their_prefix;
Monoid their_agg;
Monoid m;
while (true)
@@ -114,10 +113,9 @@
{
if (gl_LocalInvocationID.x == 511u)
{
- uint _221;
- _43.InterlockedAdd(look_back_ix * 12 + 12, 0, _221);
- Monoid _222 = { _221 };
- Monoid their_prefix = _222;
+ Monoid _223;
+ _223.element = _43.Load(look_back_ix * 12 + 12);
+ their_prefix.element = _223.element;
Monoid param_4 = their_prefix;
Monoid param_5 = exclusive;
exclusive = combine_monoid(param_4, param_5);
@@ -130,9 +128,9 @@
{
if (gl_LocalInvocationID.x == 511u)
{
- uint _242;
- _43.InterlockedAdd(look_back_ix * 12 + 8, 0, _242);
- their_agg.element = _242;
+ Monoid _245;
+ _245.element = _43.Load(look_back_ix * 12 + 8);
+ their_agg.element = _245.element;
Monoid param_6 = their_agg;
Monoid param_7 = exclusive;
exclusive = combine_monoid(param_6, param_7);
@@ -144,9 +142,9 @@
}
if (gl_LocalInvocationID.x == 511u)
{
- Monoid _263;
- _263.element = _67.Load(((look_back_ix * 8192u) + their_ix) * 4 + 0);
- m.element = _263.element;
+ Monoid _267;
+ _267.element = _67.Load(((look_back_ix * 8192u) + their_ix) * 4 + 0);
+ m.element = _267.element;
if (their_ix == 0u)
{
their_agg = m;
@@ -187,8 +185,7 @@
Monoid param_13 = agg;
Monoid inclusive_prefix = combine_monoid(param_12, param_13);
sh_prefix = exclusive;
- uint _380;
- _43.InterlockedExchange(part_ix * 12 + 12, inclusive_prefix.element, _380);
+ _43.Store(part_ix * 12 + 12, inclusive_prefix.element);
}
DeviceMemoryBarrier();
if (gl_LocalInvocationID.x == 511u)
@@ -214,7 +211,7 @@
Monoid param_16 = row;
Monoid param_17 = local[i_2];
Monoid m_1 = combine_monoid(param_16, param_17);
- _367.Store((ix + i_2) * 4 + 0, m_1.element);
+ _372.Store((ix + i_2) * 4 + 0, m_1.element);
}
}
diff --git a/tests/shader/gen/prefix.msl b/tests/shader/gen/prefix.msl
index aaa7158..8e402a9 100644
--- a/tests/shader/gen/prefix.msl
+++ b/tests/shader/gen/prefix.msl
@@ -87,7 +87,7 @@
return Monoid{ a.element + b.element };
}
-kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _367 [[buffer(1)]], volatile device StateBuf& _43 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
+kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _372 [[buffer(1)]], volatile device StateBuf& _43 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup uint sh_part_ix;
threadgroup Monoid sh_scratch[512];
@@ -127,10 +127,10 @@
}
if (gl_LocalInvocationID.x == 511u)
{
- atomic_store_explicit((volatile device atomic_uint*)&_43.state[part_ix].aggregate.element, agg.element, memory_order_relaxed);
+ _43.state[part_ix].aggregate.element = agg.element;
if (part_ix == 0u)
{
- atomic_store_explicit((volatile device atomic_uint*)&_43.state[0].prefix.element, agg.element, memory_order_relaxed);
+ _43.state[0].prefix.element = agg.element;
}
}
threadgroup_barrier(mem_flags::mem_device);
@@ -148,6 +148,7 @@
{
uint look_back_ix = part_ix - 1u;
uint their_ix = 0u;
+ Monoid their_prefix;
Monoid their_agg;
Monoid m;
while (true)
@@ -163,8 +164,7 @@
{
if (gl_LocalInvocationID.x == 511u)
{
- uint _221 = atomic_load_explicit((volatile device atomic_uint*)&_43.state[look_back_ix].prefix.element, memory_order_relaxed);
- Monoid their_prefix = Monoid{ _221 };
+ their_prefix.element = _43.state[look_back_ix].prefix.element;
Monoid param_4 = their_prefix;
Monoid param_5 = exclusive;
exclusive = combine_monoid(param_4, param_5);
@@ -177,8 +177,7 @@
{
if (gl_LocalInvocationID.x == 511u)
{
- uint _242 = atomic_load_explicit((volatile device atomic_uint*)&_43.state[look_back_ix].aggregate.element, memory_order_relaxed);
- their_agg.element = _242;
+ their_agg.element = _43.state[look_back_ix].aggregate.element;
Monoid param_6 = their_agg;
Monoid param_7 = exclusive;
exclusive = combine_monoid(param_6, param_7);
@@ -231,7 +230,7 @@
Monoid param_13 = agg;
Monoid inclusive_prefix = combine_monoid(param_12, param_13);
sh_prefix = exclusive;
- atomic_store_explicit((volatile device atomic_uint*)&_43.state[part_ix].prefix.element, inclusive_prefix.element, memory_order_relaxed);
+ _43.state[part_ix].prefix.element = inclusive_prefix.element;
}
threadgroup_barrier(mem_flags::mem_device);
if (gl_LocalInvocationID.x == 511u)
@@ -257,7 +256,7 @@
Monoid param_16 = row;
Monoid param_17 = local[i_2];
Monoid m_1 = combine_monoid(param_16, param_17);
- _367.outbuf[ix + i_2].element = m_1.element;
+ _372.outbuf[ix + i_2].element = m_1.element;
}
}
diff --git a/tests/shader/gen/prefix.spv b/tests/shader/gen/prefix.spv
index 0da9ed8..d2c1aad 100644
--- a/tests/shader/gen/prefix.spv
+++ b/tests/shader/gen/prefix.spv
Binary files differ
diff --git a/tests/shader/gen/prefix_atomic.dxil b/tests/shader/gen/prefix_atomic.dxil
index 80c05a3..68f47e5 100644
--- a/tests/shader/gen/prefix_atomic.dxil
+++ b/tests/shader/gen/prefix_atomic.dxil
Binary files differ
diff --git a/tests/shader/gen/prefix_atomic.hlsl b/tests/shader/gen/prefix_atomic.hlsl
index 6105c01..10f7081 100644
--- a/tests/shader/gen/prefix_atomic.hlsl
+++ b/tests/shader/gen/prefix_atomic.hlsl
@@ -12,11 +12,11 @@
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
-static const Monoid _183 = { 0u };
+static const Monoid _185 = { 0u };
globallycoherent RWByteAddressBuffer _43 : register(u2);
ByteAddressBuffer _67 : register(t0);
-RWByteAddressBuffer _367 : register(u1);
+RWByteAddressBuffer _372 : register(u1);
static uint3 gl_LocalInvocationID;
struct SPIRV_Cross_Input
@@ -76,12 +76,10 @@
}
if (gl_LocalInvocationID.x == 511u)
{
- uint _378;
- _43.InterlockedExchange(part_ix * 12 + 8, agg.element, _378);
+ _43.Store(part_ix * 12 + 8, agg.element);
if (part_ix == 0u)
{
- uint _379;
- _43.InterlockedExchange(12, agg.element, _379);
+ _43.Store(12, agg.element);
}
}
DeviceMemoryBarrier();
@@ -92,23 +90,24 @@
{
flag = 2u;
}
- uint _380;
- _43.InterlockedExchange(part_ix * 12 + 4, flag, _380);
+ uint _383;
+ _43.InterlockedExchange(part_ix * 12 + 4, flag, _383);
}
- Monoid exclusive = _183;
+ Monoid exclusive = _185;
if (part_ix != 0u)
{
uint look_back_ix = part_ix - 1u;
uint their_ix = 0u;
+ Monoid their_prefix;
Monoid their_agg;
Monoid m;
while (true)
{
if (gl_LocalInvocationID.x == 511u)
{
- uint _206;
- _43.InterlockedAdd(look_back_ix * 12 + 4, 0, _206);
- sh_flag = _206;
+ uint _208;
+ _43.InterlockedAdd(look_back_ix * 12 + 4, 0, _208);
+ sh_flag = _208;
}
GroupMemoryBarrierWithGroupSync();
DeviceMemoryBarrier();
@@ -117,10 +116,9 @@
{
if (gl_LocalInvocationID.x == 511u)
{
- uint _221;
- _43.InterlockedAdd(look_back_ix * 12 + 12, 0, _221);
- Monoid _222 = { _221 };
- Monoid their_prefix = _222;
+ Monoid _223;
+ _223.element = _43.Load(look_back_ix * 12 + 12);
+ their_prefix.element = _223.element;
Monoid param_4 = their_prefix;
Monoid param_5 = exclusive;
exclusive = combine_monoid(param_4, param_5);
@@ -133,9 +131,9 @@
{
if (gl_LocalInvocationID.x == 511u)
{
- uint _242;
- _43.InterlockedAdd(look_back_ix * 12 + 8, 0, _242);
- their_agg.element = _242;
+ Monoid _245;
+ _245.element = _43.Load(look_back_ix * 12 + 8);
+ their_agg.element = _245.element;
Monoid param_6 = their_agg;
Monoid param_7 = exclusive;
exclusive = combine_monoid(param_6, param_7);
@@ -147,9 +145,9 @@
}
if (gl_LocalInvocationID.x == 511u)
{
- Monoid _263;
- _263.element = _67.Load(((look_back_ix * 8192u) + their_ix) * 4 + 0);
- m.element = _263.element;
+ Monoid _267;
+ _267.element = _67.Load(((look_back_ix * 8192u) + their_ix) * 4 + 0);
+ m.element = _267.element;
if (their_ix == 0u)
{
their_agg = m;
@@ -190,14 +188,13 @@
Monoid param_13 = agg;
Monoid inclusive_prefix = combine_monoid(param_12, param_13);
sh_prefix = exclusive;
- uint _381;
- _43.InterlockedExchange(part_ix * 12 + 12, inclusive_prefix.element, _381);
+ _43.Store(part_ix * 12 + 12, inclusive_prefix.element);
}
DeviceMemoryBarrier();
if (gl_LocalInvocationID.x == 511u)
{
- uint _382;
- _43.InterlockedExchange(part_ix * 12 + 4, 2u, _382);
+ uint _384;
+ _43.InterlockedExchange(part_ix * 12 + 4, 2u, _384);
}
}
GroupMemoryBarrierWithGroupSync();
@@ -218,7 +215,7 @@
Monoid param_16 = row;
Monoid param_17 = local[i_2];
Monoid m_1 = combine_monoid(param_16, param_17);
- _367.Store((ix + i_2) * 4 + 0, m_1.element);
+ _372.Store((ix + i_2) * 4 + 0, m_1.element);
}
}
diff --git a/tests/shader/gen/prefix_atomic.msl b/tests/shader/gen/prefix_atomic.msl
index d5de44a..6d7d155 100644
--- a/tests/shader/gen/prefix_atomic.msl
+++ b/tests/shader/gen/prefix_atomic.msl
@@ -87,7 +87,7 @@
return Monoid{ a.element + b.element };
}
-kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _367 [[buffer(1)]], volatile device StateBuf& _43 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
+kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _372 [[buffer(1)]], volatile device StateBuf& _43 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup uint sh_part_ix;
threadgroup Monoid sh_scratch[512];
@@ -127,10 +127,10 @@
}
if (gl_LocalInvocationID.x == 511u)
{
- atomic_store_explicit((volatile device atomic_uint*)&_43.state[part_ix].aggregate.element, agg.element, memory_order_relaxed);
+ _43.state[part_ix].aggregate.element = agg.element;
if (part_ix == 0u)
{
- atomic_store_explicit((volatile device atomic_uint*)&_43.state[0].prefix.element, agg.element, memory_order_relaxed);
+ _43.state[0].prefix.element = agg.element;
}
}
threadgroup_barrier(mem_flags::mem_device);
@@ -148,14 +148,15 @@
{
uint look_back_ix = part_ix - 1u;
uint their_ix = 0u;
+ Monoid their_prefix;
Monoid their_agg;
Monoid m;
while (true)
{
if (gl_LocalInvocationID.x == 511u)
{
- uint _206 = atomic_load_explicit((volatile device atomic_uint*)&_43.state[look_back_ix].flag, memory_order_relaxed);
- sh_flag = _206;
+ uint _208 = atomic_load_explicit((volatile device atomic_uint*)&_43.state[look_back_ix].flag, memory_order_relaxed);
+ sh_flag = _208;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
threadgroup_barrier(mem_flags::mem_device);
@@ -164,8 +165,7 @@
{
if (gl_LocalInvocationID.x == 511u)
{
- uint _221 = atomic_load_explicit((volatile device atomic_uint*)&_43.state[look_back_ix].prefix.element, memory_order_relaxed);
- Monoid their_prefix = Monoid{ _221 };
+ their_prefix.element = _43.state[look_back_ix].prefix.element;
Monoid param_4 = their_prefix;
Monoid param_5 = exclusive;
exclusive = combine_monoid(param_4, param_5);
@@ -178,8 +178,7 @@
{
if (gl_LocalInvocationID.x == 511u)
{
- uint _242 = atomic_load_explicit((volatile device atomic_uint*)&_43.state[look_back_ix].aggregate.element, memory_order_relaxed);
- their_agg.element = _242;
+ their_agg.element = _43.state[look_back_ix].aggregate.element;
Monoid param_6 = their_agg;
Monoid param_7 = exclusive;
exclusive = combine_monoid(param_6, param_7);
@@ -232,7 +231,7 @@
Monoid param_13 = agg;
Monoid inclusive_prefix = combine_monoid(param_12, param_13);
sh_prefix = exclusive;
- atomic_store_explicit((volatile device atomic_uint*)&_43.state[part_ix].prefix.element, inclusive_prefix.element, memory_order_relaxed);
+ _43.state[part_ix].prefix.element = inclusive_prefix.element;
}
threadgroup_barrier(mem_flags::mem_device);
if (gl_LocalInvocationID.x == 511u)
@@ -258,7 +257,7 @@
Monoid param_16 = row;
Monoid param_17 = local[i_2];
Monoid m_1 = combine_monoid(param_16, param_17);
- _367.outbuf[ix + i_2].element = m_1.element;
+ _372.outbuf[ix + i_2].element = m_1.element;
}
}
diff --git a/tests/shader/gen/prefix_atomic.spv b/tests/shader/gen/prefix_atomic.spv
index 1b31f67..acca545 100644
--- a/tests/shader/gen/prefix_atomic.spv
+++ b/tests/shader/gen/prefix_atomic.spv
Binary files differ
diff --git a/tests/shader/gen/prefix_vkmm.spv b/tests/shader/gen/prefix_vkmm.spv
index c8b9ea2..0b8d475 100644
--- a/tests/shader/gen/prefix_vkmm.spv
+++ b/tests/shader/gen/prefix_vkmm.spv
Binary files differ
diff --git a/tests/shader/message_passing.comp b/tests/shader/message_passing.comp
index 521dedd..e5e53b6 100644
--- a/tests/shader/message_passing.comp
+++ b/tests/shader/message_passing.comp
@@ -23,7 +23,7 @@
uint flag;
};
-layout(binding = 0) coherent buffer DataBuf
+layout(binding = 0) buffer DataBuf
{
Element data[];
} data_buf;
@@ -40,7 +40,7 @@
void main()
{
- data_buf.data[gl_GlobalInvocationID.x].data = 1u;
+ atomicStore(data_buf.data[gl_GlobalInvocationID.x].data, 1u, gl_ScopeDevice, 0, 0);
#ifndef VKMM
memoryBarrierBuffer();
#endif
@@ -52,7 +52,7 @@
#ifndef VKMM
memoryBarrierBuffer();
#endif
- uint data = data_buf.data[read_ix].data;
+ uint data = atomicLoad(data_buf.data[read_ix].data, gl_ScopeDevice, 0, 0);
if (flag > data)
{
atomicAdd(control_buf.failures, 1u);
diff --git a/tests/shader/prefix.comp b/tests/shader/prefix.comp
index 0fc9909..a6a0d57 100644
--- a/tests/shader/prefix.comp
+++ b/tests/shader/prefix.comp
@@ -99,9 +99,9 @@
// Publish aggregate for this partition
if (gl_LocalInvocationID.x == WG_SIZE - 1) {
- atomicStore(state[part_ix].aggregate.element, agg.element, gl_ScopeDevice, 0, 0);
+ state[part_ix].aggregate = agg;
if (part_ix == 0) {
- atomicStore(state[0].prefix.element, agg.element, gl_ScopeDevice, 0, 0);
+ state[0].prefix = agg;
}
}
// Write flag with release semantics; this is done portably with a barrier.
@@ -147,13 +147,13 @@
if (flag == FLAG_PREFIX_READY) {
if (gl_LocalInvocationID.x == WG_SIZE - 1) {
- Monoid their_prefix = Monoid(atomicLoad(state[look_back_ix].prefix.element, gl_ScopeDevice, 0, 0));
+ Monoid their_prefix = state[look_back_ix].prefix;
exclusive = combine_monoid(their_prefix, exclusive);
}
break;
} else if (flag == FLAG_AGGREGATE_READY) {
if (gl_LocalInvocationID.x == WG_SIZE - 1) {
- their_agg.element = atomicLoad(state[look_back_ix].aggregate.element, gl_ScopeDevice, 0, 0);
+ their_agg = state[look_back_ix].aggregate;
exclusive = combine_monoid(their_agg, exclusive);
}
look_back_ix--;
@@ -193,7 +193,7 @@
if (gl_LocalInvocationID.x == WG_SIZE - 1) {
Monoid inclusive_prefix = combine_monoid(exclusive, agg);
sh_prefix = exclusive;
- atomicStore(state[part_ix].prefix.element, inclusive_prefix.element, gl_ScopeDevice, 0, 0);
+ state[part_ix].prefix = inclusive_prefix;
}
#ifndef VKMM
memoryBarrierBuffer();