Work around lack of timer query on Metal
diff --git a/tests/shader/gen/message_passing.dxil b/tests/shader/gen/message_passing.dxil
index 2be73da..dc2eced 100644
--- a/tests/shader/gen/message_passing.dxil
+++ b/tests/shader/gen/message_passing.dxil
Binary files differ
diff --git a/tests/shader/gen/message_passing.hlsl b/tests/shader/gen/message_passing.hlsl
index ba8ce5f..3a48808 100644
--- a/tests/shader/gen/message_passing.hlsl
+++ b/tests/shader/gen/message_passing.hlsl
@@ -6,7 +6,7 @@
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
-RWByteAddressBuffer data_buf : register(u0);
+globallycoherent RWByteAddressBuffer data_buf : register(u0);
RWByteAddressBuffer control_buf : register(u1);
static uint3 gl_GlobalInvocationID;
@@ -22,13 +22,12 @@
void comp_main()
{
- uint _76;
- data_buf.InterlockedExchange(gl_GlobalInvocationID.x * 8 + 0, 1u, _76);
+ data_buf.Store(gl_GlobalInvocationID.x * 8 + 0, 1u);
DeviceMemoryBarrier();
uint param = gl_GlobalInvocationID.x;
uint write_flag_ix = permute_flag_ix(param);
- uint _77;
- data_buf.InterlockedExchange(write_flag_ix * 8 + 4, 1u, _77);
+ uint _76;
+ data_buf.InterlockedExchange(write_flag_ix * 8 + 4, 1u, _76);
uint read_ix = (gl_GlobalInvocationID.x * 4099u) & 65535u;
uint param_1 = read_ix;
uint read_flag_ix = permute_flag_ix(param_1);
@@ -36,9 +35,7 @@
data_buf.InterlockedAdd(read_flag_ix * 8 + 4, 0, _58);
uint flag = _58;
DeviceMemoryBarrier();
- uint _62;
- data_buf.InterlockedAdd(read_ix * 8 + 0, 0, _62);
- uint data = _62;
+ uint data = data_buf.Load(read_ix * 8 + 0);
if (flag > data)
{
uint _73;
diff --git a/tests/shader/gen/message_passing.msl b/tests/shader/gen/message_passing.msl
index e48f48a..1bda181 100644
--- a/tests/shader/gen/message_passing.msl
+++ b/tests/shader/gen/message_passing.msl
@@ -31,21 +31,20 @@
return (data_ix * 419u) & 65535u;
}
-kernel void main0(device DataBuf& data_buf [[buffer(0)]], device ControlBuf& control_buf [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+kernel void main0(volatile device DataBuf& data_buf [[buffer(0)]], device ControlBuf& control_buf [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
- atomic_store_explicit((device atomic_uint*)&data_buf.data[gl_GlobalInvocationID.x].data, 1u, memory_order_relaxed);
+ data_buf.data[gl_GlobalInvocationID.x].data = 1u;
threadgroup_barrier(mem_flags::mem_device);
uint param = gl_GlobalInvocationID.x;
uint write_flag_ix = permute_flag_ix(param);
- atomic_store_explicit((device atomic_uint*)&data_buf.data[write_flag_ix].flag, 1u, memory_order_relaxed);
+ atomic_store_explicit((volatile device atomic_uint*)&data_buf.data[write_flag_ix].flag, 1u, memory_order_relaxed);
uint read_ix = (gl_GlobalInvocationID.x * 4099u) & 65535u;
uint param_1 = read_ix;
uint read_flag_ix = permute_flag_ix(param_1);
- uint _58 = atomic_load_explicit((device atomic_uint*)&data_buf.data[read_flag_ix].flag, memory_order_relaxed);
+ uint _58 = atomic_load_explicit((volatile device atomic_uint*)&data_buf.data[read_flag_ix].flag, memory_order_relaxed);
uint flag = _58;
threadgroup_barrier(mem_flags::mem_device);
- uint _62 = atomic_load_explicit((device atomic_uint*)&data_buf.data[read_ix].data, memory_order_relaxed);
- uint data = _62;
+ uint data = data_buf.data[read_ix].data;
if (flag > data)
{
uint _73 = atomic_fetch_add_explicit((device atomic_uint*)&control_buf.failures, 1u, memory_order_relaxed);
diff --git a/tests/shader/gen/message_passing.spv b/tests/shader/gen/message_passing.spv
index e5f56d6..fa7fb3e 100644
--- a/tests/shader/gen/message_passing.spv
+++ b/tests/shader/gen/message_passing.spv
Binary files differ
diff --git a/tests/shader/gen/message_passing_vkmm.spv b/tests/shader/gen/message_passing_vkmm.spv
index 8527c2b..58d3521 100644
--- a/tests/shader/gen/message_passing_vkmm.spv
+++ b/tests/shader/gen/message_passing_vkmm.spv
Binary files differ
diff --git a/tests/shader/gen/prefix.dxil b/tests/shader/gen/prefix.dxil
index 34f3d6a..12d0d50 100644
--- a/tests/shader/gen/prefix.dxil
+++ b/tests/shader/gen/prefix.dxil
Binary files differ
diff --git a/tests/shader/gen/prefix.hlsl b/tests/shader/gen/prefix.hlsl
index 3af5a96..ba76110 100644
--- a/tests/shader/gen/prefix.hlsl
+++ b/tests/shader/gen/prefix.hlsl
@@ -12,11 +12,11 @@
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
-static const Monoid _185 = { 0u };
+static const Monoid _183 = { 0u };
globallycoherent RWByteAddressBuffer _43 : register(u2);
ByteAddressBuffer _67 : register(t0);
-RWByteAddressBuffer _372 : register(u1);
+RWByteAddressBuffer _367 : register(u1);
static uint3 gl_LocalInvocationID;
struct SPIRV_Cross_Input
@@ -76,10 +76,12 @@
}
if (gl_LocalInvocationID.x == 511u)
{
- _43.Store(part_ix * 12 + 8, agg.element);
+ uint _378;
+ _43.InterlockedExchange(part_ix * 12 + 8, agg.element, _378);
if (part_ix == 0u)
{
- _43.Store(12, agg.element);
+ uint _379;
+ _43.InterlockedExchange(12, agg.element, _379);
}
}
DeviceMemoryBarrier();
@@ -92,12 +94,11 @@
}
_43.Store(part_ix * 12 + 4, flag);
}
- Monoid exclusive = _185;
+ Monoid exclusive = _183;
if (part_ix != 0u)
{
uint look_back_ix = part_ix - 1u;
uint their_ix = 0u;
- Monoid their_prefix;
Monoid their_agg;
Monoid m;
while (true)
@@ -113,9 +114,10 @@
{
if (gl_LocalInvocationID.x == 511u)
{
- Monoid _223;
- _223.element = _43.Load(look_back_ix * 12 + 12);
- their_prefix.element = _223.element;
+ uint _221;
+ _43.InterlockedAdd(look_back_ix * 12 + 12, 0, _221);
+ Monoid _222 = { _221 };
+ Monoid their_prefix = _222;
Monoid param_4 = their_prefix;
Monoid param_5 = exclusive;
exclusive = combine_monoid(param_4, param_5);
@@ -128,9 +130,9 @@
{
if (gl_LocalInvocationID.x == 511u)
{
- Monoid _245;
- _245.element = _43.Load(look_back_ix * 12 + 8);
- their_agg.element = _245.element;
+ uint _242;
+ _43.InterlockedAdd(look_back_ix * 12 + 8, 0, _242);
+ their_agg.element = _242;
Monoid param_6 = their_agg;
Monoid param_7 = exclusive;
exclusive = combine_monoid(param_6, param_7);
@@ -142,9 +144,9 @@
}
if (gl_LocalInvocationID.x == 511u)
{
- Monoid _267;
- _267.element = _67.Load(((look_back_ix * 8192u) + their_ix) * 4 + 0);
- m.element = _267.element;
+ Monoid _263;
+ _263.element = _67.Load(((look_back_ix * 8192u) + their_ix) * 4 + 0);
+ m.element = _263.element;
if (their_ix == 0u)
{
their_agg = m;
@@ -185,7 +187,8 @@
Monoid param_13 = agg;
Monoid inclusive_prefix = combine_monoid(param_12, param_13);
sh_prefix = exclusive;
- _43.Store(part_ix * 12 + 12, inclusive_prefix.element);
+ uint _380;
+ _43.InterlockedExchange(part_ix * 12 + 12, inclusive_prefix.element, _380);
}
DeviceMemoryBarrier();
if (gl_LocalInvocationID.x == 511u)
@@ -211,7 +214,7 @@
Monoid param_16 = row;
Monoid param_17 = local[i_2];
Monoid m_1 = combine_monoid(param_16, param_17);
- _372.Store((ix + i_2) * 4 + 0, m_1.element);
+ _367.Store((ix + i_2) * 4 + 0, m_1.element);
}
}
diff --git a/tests/shader/gen/prefix.msl b/tests/shader/gen/prefix.msl
index 8e402a9..aaa7158 100644
--- a/tests/shader/gen/prefix.msl
+++ b/tests/shader/gen/prefix.msl
@@ -87,7 +87,7 @@
return Monoid{ a.element + b.element };
}
-kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _372 [[buffer(1)]], volatile device StateBuf& _43 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
+kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _367 [[buffer(1)]], volatile device StateBuf& _43 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup uint sh_part_ix;
threadgroup Monoid sh_scratch[512];
@@ -127,10 +127,10 @@
}
if (gl_LocalInvocationID.x == 511u)
{
- _43.state[part_ix].aggregate.element = agg.element;
+ atomic_store_explicit((volatile device atomic_uint*)&_43.state[part_ix].aggregate.element, agg.element, memory_order_relaxed);
if (part_ix == 0u)
{
- _43.state[0].prefix.element = agg.element;
+ atomic_store_explicit((volatile device atomic_uint*)&_43.state[0].prefix.element, agg.element, memory_order_relaxed);
}
}
threadgroup_barrier(mem_flags::mem_device);
@@ -148,7 +148,6 @@
{
uint look_back_ix = part_ix - 1u;
uint their_ix = 0u;
- Monoid their_prefix;
Monoid their_agg;
Monoid m;
while (true)
@@ -164,7 +163,8 @@
{
if (gl_LocalInvocationID.x == 511u)
{
- their_prefix.element = _43.state[look_back_ix].prefix.element;
+ uint _221 = atomic_load_explicit((volatile device atomic_uint*)&_43.state[look_back_ix].prefix.element, memory_order_relaxed);
+ Monoid their_prefix = Monoid{ _221 };
Monoid param_4 = their_prefix;
Monoid param_5 = exclusive;
exclusive = combine_monoid(param_4, param_5);
@@ -177,7 +177,8 @@
{
if (gl_LocalInvocationID.x == 511u)
{
- their_agg.element = _43.state[look_back_ix].aggregate.element;
+ uint _242 = atomic_load_explicit((volatile device atomic_uint*)&_43.state[look_back_ix].aggregate.element, memory_order_relaxed);
+ their_agg.element = _242;
Monoid param_6 = their_agg;
Monoid param_7 = exclusive;
exclusive = combine_monoid(param_6, param_7);
@@ -230,7 +231,7 @@
Monoid param_13 = agg;
Monoid inclusive_prefix = combine_monoid(param_12, param_13);
sh_prefix = exclusive;
- _43.state[part_ix].prefix.element = inclusive_prefix.element;
+ atomic_store_explicit((volatile device atomic_uint*)&_43.state[part_ix].prefix.element, inclusive_prefix.element, memory_order_relaxed);
}
threadgroup_barrier(mem_flags::mem_device);
if (gl_LocalInvocationID.x == 511u)
@@ -256,7 +257,7 @@
Monoid param_16 = row;
Monoid param_17 = local[i_2];
Monoid m_1 = combine_monoid(param_16, param_17);
- _372.outbuf[ix + i_2].element = m_1.element;
+ _367.outbuf[ix + i_2].element = m_1.element;
}
}
diff --git a/tests/shader/gen/prefix.spv b/tests/shader/gen/prefix.spv
index d2c1aad..0da9ed8 100644
--- a/tests/shader/gen/prefix.spv
+++ b/tests/shader/gen/prefix.spv
Binary files differ
diff --git a/tests/shader/gen/prefix_atomic.dxil b/tests/shader/gen/prefix_atomic.dxil
index 68f47e5..80c05a3 100644
--- a/tests/shader/gen/prefix_atomic.dxil
+++ b/tests/shader/gen/prefix_atomic.dxil
Binary files differ
diff --git a/tests/shader/gen/prefix_atomic.hlsl b/tests/shader/gen/prefix_atomic.hlsl
index 10f7081..6105c01 100644
--- a/tests/shader/gen/prefix_atomic.hlsl
+++ b/tests/shader/gen/prefix_atomic.hlsl
@@ -12,11 +12,11 @@
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
-static const Monoid _185 = { 0u };
+static const Monoid _183 = { 0u };
globallycoherent RWByteAddressBuffer _43 : register(u2);
ByteAddressBuffer _67 : register(t0);
-RWByteAddressBuffer _372 : register(u1);
+RWByteAddressBuffer _367 : register(u1);
static uint3 gl_LocalInvocationID;
struct SPIRV_Cross_Input
@@ -76,10 +76,12 @@
}
if (gl_LocalInvocationID.x == 511u)
{
- _43.Store(part_ix * 12 + 8, agg.element);
+ uint _378;
+ _43.InterlockedExchange(part_ix * 12 + 8, agg.element, _378);
if (part_ix == 0u)
{
- _43.Store(12, agg.element);
+ uint _379;
+ _43.InterlockedExchange(12, agg.element, _379);
}
}
DeviceMemoryBarrier();
@@ -90,24 +92,23 @@
{
flag = 2u;
}
- uint _383;
- _43.InterlockedExchange(part_ix * 12 + 4, flag, _383);
+ uint _380;
+ _43.InterlockedExchange(part_ix * 12 + 4, flag, _380);
}
- Monoid exclusive = _185;
+ Monoid exclusive = _183;
if (part_ix != 0u)
{
uint look_back_ix = part_ix - 1u;
uint their_ix = 0u;
- Monoid their_prefix;
Monoid their_agg;
Monoid m;
while (true)
{
if (gl_LocalInvocationID.x == 511u)
{
- uint _208;
- _43.InterlockedAdd(look_back_ix * 12 + 4, 0, _208);
- sh_flag = _208;
+ uint _206;
+ _43.InterlockedAdd(look_back_ix * 12 + 4, 0, _206);
+ sh_flag = _206;
}
GroupMemoryBarrierWithGroupSync();
DeviceMemoryBarrier();
@@ -116,9 +117,10 @@
{
if (gl_LocalInvocationID.x == 511u)
{
- Monoid _223;
- _223.element = _43.Load(look_back_ix * 12 + 12);
- their_prefix.element = _223.element;
+ uint _221;
+ _43.InterlockedAdd(look_back_ix * 12 + 12, 0, _221);
+ Monoid _222 = { _221 };
+ Monoid their_prefix = _222;
Monoid param_4 = their_prefix;
Monoid param_5 = exclusive;
exclusive = combine_monoid(param_4, param_5);
@@ -131,9 +133,9 @@
{
if (gl_LocalInvocationID.x == 511u)
{
- Monoid _245;
- _245.element = _43.Load(look_back_ix * 12 + 8);
- their_agg.element = _245.element;
+ uint _242;
+ _43.InterlockedAdd(look_back_ix * 12 + 8, 0, _242);
+ their_agg.element = _242;
Monoid param_6 = their_agg;
Monoid param_7 = exclusive;
exclusive = combine_monoid(param_6, param_7);
@@ -145,9 +147,9 @@
}
if (gl_LocalInvocationID.x == 511u)
{
- Monoid _267;
- _267.element = _67.Load(((look_back_ix * 8192u) + their_ix) * 4 + 0);
- m.element = _267.element;
+ Monoid _263;
+ _263.element = _67.Load(((look_back_ix * 8192u) + their_ix) * 4 + 0);
+ m.element = _263.element;
if (their_ix == 0u)
{
their_agg = m;
@@ -188,13 +190,14 @@
Monoid param_13 = agg;
Monoid inclusive_prefix = combine_monoid(param_12, param_13);
sh_prefix = exclusive;
- _43.Store(part_ix * 12 + 12, inclusive_prefix.element);
+ uint _381;
+ _43.InterlockedExchange(part_ix * 12 + 12, inclusive_prefix.element, _381);
}
DeviceMemoryBarrier();
if (gl_LocalInvocationID.x == 511u)
{
- uint _384;
- _43.InterlockedExchange(part_ix * 12 + 4, 2u, _384);
+ uint _382;
+ _43.InterlockedExchange(part_ix * 12 + 4, 2u, _382);
}
}
GroupMemoryBarrierWithGroupSync();
@@ -215,7 +218,7 @@
Monoid param_16 = row;
Monoid param_17 = local[i_2];
Monoid m_1 = combine_monoid(param_16, param_17);
- _372.Store((ix + i_2) * 4 + 0, m_1.element);
+ _367.Store((ix + i_2) * 4 + 0, m_1.element);
}
}
diff --git a/tests/shader/gen/prefix_atomic.msl b/tests/shader/gen/prefix_atomic.msl
index 6d7d155..d5de44a 100644
--- a/tests/shader/gen/prefix_atomic.msl
+++ b/tests/shader/gen/prefix_atomic.msl
@@ -87,7 +87,7 @@
return Monoid{ a.element + b.element };
}
-kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _372 [[buffer(1)]], volatile device StateBuf& _43 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
+kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _367 [[buffer(1)]], volatile device StateBuf& _43 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup uint sh_part_ix;
threadgroup Monoid sh_scratch[512];
@@ -127,10 +127,10 @@
}
if (gl_LocalInvocationID.x == 511u)
{
- _43.state[part_ix].aggregate.element = agg.element;
+ atomic_store_explicit((volatile device atomic_uint*)&_43.state[part_ix].aggregate.element, agg.element, memory_order_relaxed);
if (part_ix == 0u)
{
- _43.state[0].prefix.element = agg.element;
+ atomic_store_explicit((volatile device atomic_uint*)&_43.state[0].prefix.element, agg.element, memory_order_relaxed);
}
}
threadgroup_barrier(mem_flags::mem_device);
@@ -148,15 +148,14 @@
{
uint look_back_ix = part_ix - 1u;
uint their_ix = 0u;
- Monoid their_prefix;
Monoid their_agg;
Monoid m;
while (true)
{
if (gl_LocalInvocationID.x == 511u)
{
- uint _208 = atomic_load_explicit((volatile device atomic_uint*)&_43.state[look_back_ix].flag, memory_order_relaxed);
- sh_flag = _208;
+ uint _206 = atomic_load_explicit((volatile device atomic_uint*)&_43.state[look_back_ix].flag, memory_order_relaxed);
+ sh_flag = _206;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
threadgroup_barrier(mem_flags::mem_device);
@@ -165,7 +164,8 @@
{
if (gl_LocalInvocationID.x == 511u)
{
- their_prefix.element = _43.state[look_back_ix].prefix.element;
+ uint _221 = atomic_load_explicit((volatile device atomic_uint*)&_43.state[look_back_ix].prefix.element, memory_order_relaxed);
+ Monoid their_prefix = Monoid{ _221 };
Monoid param_4 = their_prefix;
Monoid param_5 = exclusive;
exclusive = combine_monoid(param_4, param_5);
@@ -178,7 +178,8 @@
{
if (gl_LocalInvocationID.x == 511u)
{
- their_agg.element = _43.state[look_back_ix].aggregate.element;
+ uint _242 = atomic_load_explicit((volatile device atomic_uint*)&_43.state[look_back_ix].aggregate.element, memory_order_relaxed);
+ their_agg.element = _242;
Monoid param_6 = their_agg;
Monoid param_7 = exclusive;
exclusive = combine_monoid(param_6, param_7);
@@ -231,7 +232,7 @@
Monoid param_13 = agg;
Monoid inclusive_prefix = combine_monoid(param_12, param_13);
sh_prefix = exclusive;
- _43.state[part_ix].prefix.element = inclusive_prefix.element;
+ atomic_store_explicit((volatile device atomic_uint*)&_43.state[part_ix].prefix.element, inclusive_prefix.element, memory_order_relaxed);
}
threadgroup_barrier(mem_flags::mem_device);
if (gl_LocalInvocationID.x == 511u)
@@ -257,7 +258,7 @@
Monoid param_16 = row;
Monoid param_17 = local[i_2];
Monoid m_1 = combine_monoid(param_16, param_17);
- _372.outbuf[ix + i_2].element = m_1.element;
+ _367.outbuf[ix + i_2].element = m_1.element;
}
}
diff --git a/tests/shader/gen/prefix_atomic.spv b/tests/shader/gen/prefix_atomic.spv
index acca545..1b31f67 100644
--- a/tests/shader/gen/prefix_atomic.spv
+++ b/tests/shader/gen/prefix_atomic.spv
Binary files differ
diff --git a/tests/shader/gen/prefix_vkmm.spv b/tests/shader/gen/prefix_vkmm.spv
index 0b8d475..c8b9ea2 100644
--- a/tests/shader/gen/prefix_vkmm.spv
+++ b/tests/shader/gen/prefix_vkmm.spv
Binary files differ
diff --git a/tests/shader/message_passing.comp b/tests/shader/message_passing.comp
index e5e53b6..521dedd 100644
--- a/tests/shader/message_passing.comp
+++ b/tests/shader/message_passing.comp
@@ -23,7 +23,7 @@
uint flag;
};
-layout(binding = 0) buffer DataBuf
+layout(binding = 0) coherent buffer DataBuf
{
Element data[];
} data_buf;
@@ -40,7 +40,7 @@
void main()
{
- atomicStore(data_buf.data[gl_GlobalInvocationID.x].data, 1u, gl_ScopeDevice, 0, 0);
+ data_buf.data[gl_GlobalInvocationID.x].data = 1u;
#ifndef VKMM
memoryBarrierBuffer();
#endif
@@ -52,7 +52,7 @@
#ifndef VKMM
memoryBarrierBuffer();
#endif
- uint data = atomicLoad(data_buf.data[read_ix].data, gl_ScopeDevice, 0, 0);
+ uint data = data_buf.data[read_ix].data;
if (flag > data)
{
atomicAdd(control_buf.failures, 1u);
diff --git a/tests/shader/prefix.comp b/tests/shader/prefix.comp
index a6a0d57..0fc9909 100644
--- a/tests/shader/prefix.comp
+++ b/tests/shader/prefix.comp
@@ -99,9 +99,9 @@
// Publish aggregate for this partition
if (gl_LocalInvocationID.x == WG_SIZE - 1) {
- state[part_ix].aggregate = agg;
+ atomicStore(state[part_ix].aggregate.element, agg.element, gl_ScopeDevice, 0, 0);
if (part_ix == 0) {
- state[0].prefix = agg;
+ atomicStore(state[0].prefix.element, agg.element, gl_ScopeDevice, 0, 0);
}
}
// Write flag with release semantics; this is done portably with a barrier.
@@ -147,13 +147,13 @@
if (flag == FLAG_PREFIX_READY) {
if (gl_LocalInvocationID.x == WG_SIZE - 1) {
- Monoid their_prefix = state[look_back_ix].prefix;
+ Monoid their_prefix = Monoid(atomicLoad(state[look_back_ix].prefix.element, gl_ScopeDevice, 0, 0));
exclusive = combine_monoid(their_prefix, exclusive);
}
break;
} else if (flag == FLAG_AGGREGATE_READY) {
if (gl_LocalInvocationID.x == WG_SIZE - 1) {
- their_agg = state[look_back_ix].aggregate;
+ their_agg.element = atomicLoad(state[look_back_ix].aggregate.element, gl_ScopeDevice, 0, 0);
exclusive = combine_monoid(their_agg, exclusive);
}
look_back_ix--;
@@ -193,7 +193,7 @@
if (gl_LocalInvocationID.x == WG_SIZE - 1) {
Monoid inclusive_prefix = combine_monoid(exclusive, agg);
sh_prefix = exclusive;
- state[part_ix].prefix = inclusive_prefix;
+ atomicStore(state[part_ix].prefix.element, inclusive_prefix.element, gl_ScopeDevice, 0, 0);
}
#ifndef VKMM
memoryBarrierBuffer();
diff --git a/tests/src/coherence.rs b/tests/src/coherence.rs
index 8a0bf97..28ee79c 100644
--- a/tests/src/coherence.rs
+++ b/tests/src/coherence.rs
@@ -72,7 +72,13 @@
commands.cmd_buf.memory_barrier();
commands.download(&out_buf);
}
- total_elapsed += runner.submit(commands);
+ let start_clock = std::time::Instant::now();
+ let mut elapsed = runner.submit(commands);
+ // Work around lack of timer queries on Metal
+ if runner.backend_type() == BackendType::Metal {
+ elapsed = start_clock.elapsed().as_secs_f64();
+ }
+ total_elapsed += elapsed;
if i == 0 {
let mut dst: Vec<u32> = Default::default();
out_buf.read(&mut dst);