| // Copyright 2021 The piet-gpu authors. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // https://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| // Also licensed under MIT license, at your choice. |
| |
| use piet_gpu_hal::{include_shader, BackendType, BindType, BufferUsage, DescriptorSet}; |
| use piet_gpu_hal::{Buffer, Pipeline}; |
| |
| use crate::clear::{ClearBinding, ClearCode, ClearStage}; |
| use crate::runner::{Commands, Runner}; |
| use crate::test_result::TestResult; |
| use crate::Config; |
| |
| const WG_SIZE: u64 = 256; |
| const N_BUCKETS: u64 = 65536; |
| const OUT_BUF_SIZE: u64 = 256; |
| |
| struct CoherenceCode { |
| pipeline: Pipeline, |
| clear_code: Option<ClearCode>, |
| } |
| |
| struct CoherenceStage { |
| clear_stage: Option<ClearStage>, |
| } |
| |
| struct CoherenceBinding { |
| descriptor_set: DescriptorSet, |
| clear_binding: Option<ClearBinding>, |
| } |
| |
| #[derive(Debug)] |
| pub enum Variant { |
| Load, |
| Rmw, |
| } |
| |
| pub unsafe fn run_coherence_test( |
| runner: &mut Runner, |
| _config: &Config, |
| variant: Variant, |
| ) -> TestResult { |
| let mut result = TestResult::new(format!("coherence, {:?}", variant)); |
| let data_buf = runner |
| .session |
| .create_buffer(4 * (N_BUCKETS + 1), BufferUsage::STORAGE) |
| .unwrap(); |
| let out_buf = runner.buf_down(4 * OUT_BUF_SIZE * N_BUCKETS); |
| let code = CoherenceCode::new(runner, variant); |
| let stage = CoherenceStage::new(runner, &code, N_BUCKETS); |
| let binding = stage.bind(runner, &code, &data_buf, &out_buf.dev_buf); |
| // This runs long, and we're not really benchmarking, so just do 1 iter. |
| let n_iter = 1; |
| let mut total_elapsed = 0.0; |
| for i in 0..n_iter { |
| let mut commands = runner.commands(); |
| commands.write_timestamp(0); |
| stage.record(&mut commands, &code, &binding, &data_buf); |
| commands.write_timestamp(1); |
| if i == 0 { |
| commands.cmd_buf.memory_barrier(); |
| commands.download(&out_buf); |
| } |
| let start_clock = std::time::Instant::now(); |
| let mut elapsed = runner.submit(commands); |
| // Work around lack of timer queries on Metal |
| if runner.backend_type() == BackendType::Metal { |
| elapsed = start_clock.elapsed().as_secs_f64(); |
| } |
| total_elapsed += elapsed; |
| if i == 0 { |
| let mut dst: Vec<u32> = Default::default(); |
| out_buf.read(&mut dst); |
| result.info(analyze(total_elapsed, &dst)); |
| } |
| } |
| result.timing(total_elapsed, 0); |
| result |
| } |
| |
| impl CoherenceCode { |
| unsafe fn new(runner: &mut Runner, variant: Variant) -> CoherenceCode { |
| let code = match variant { |
| Variant::Load => include_shader!(&runner.session, "../shader/gen/coherence"), |
| Variant::Rmw => include_shader!(&runner.session, "../shader/gen/coherence_rmw"), |
| }; |
| let pipeline = runner |
| .session |
| .create_compute_pipeline(code, &[BindType::Buffer, BindType::Buffer]) |
| .unwrap(); |
| let clear_code = if runner.backend_type() != BackendType::Vulkan { |
| Some(ClearCode::new(runner)) |
| } else { |
| None |
| }; |
| CoherenceCode { |
| pipeline, |
| clear_code, |
| } |
| } |
| } |
| |
| impl CoherenceStage { |
| unsafe fn new(runner: &mut Runner, code: &CoherenceCode, n_buckets: u64) -> CoherenceStage { |
| let clear_stage = if code.clear_code.is_some() { |
| Some(ClearStage::new(runner, n_buckets + 1)) |
| } else { |
| None |
| }; |
| CoherenceStage { clear_stage } |
| } |
| |
| unsafe fn bind( |
| &self, |
| runner: &mut Runner, |
| code: &CoherenceCode, |
| data_buf: &Buffer, |
| out_buf: &Buffer, |
| ) -> CoherenceBinding { |
| let descriptor_set = runner |
| .session |
| .create_simple_descriptor_set(&code.pipeline, &[data_buf, out_buf]) |
| .unwrap(); |
| let clear_binding = if let Some(stage) = &self.clear_stage { |
| Some(stage.bind(runner, &code.clear_code.as_ref().unwrap(), data_buf)) |
| } else { |
| None |
| }; |
| CoherenceBinding { |
| descriptor_set, |
| clear_binding, |
| } |
| } |
| |
| unsafe fn record( |
| &self, |
| commands: &mut Commands, |
| code: &CoherenceCode, |
| bindings: &CoherenceBinding, |
| data_buf: &Buffer, |
| ) { |
| if let Some(stage) = &self.clear_stage { |
| stage.record( |
| commands, |
| code.clear_code.as_ref().unwrap(), |
| bindings.clear_binding.as_ref().unwrap(), |
| ); |
| } else { |
| commands.cmd_buf.clear_buffer(data_buf, None); |
| } |
| commands.cmd_buf.memory_barrier(); |
| let n_workgroups = N_BUCKETS / WG_SIZE; |
| commands.cmd_buf.dispatch( |
| &code.pipeline, |
| &bindings.descriptor_set, |
| (n_workgroups as u32, 1, 1), |
| (WG_SIZE as u32, 1, 1), |
| ); |
| } |
| } |
| |
| fn analyze(elapsed: f64, results: &[u32]) -> String { |
| let mut max_ts = 0; |
| let mut sum_ticks = 0.0; |
| let mut n_samples = 0; |
| for i in 0..N_BUCKETS { |
| let start_ix = i * OUT_BUF_SIZE; |
| for j in 1..OUT_BUF_SIZE { |
| if j == OUT_BUF_SIZE - 1 || results[(start_ix + j + 1) as usize] == !0 { |
| let end_ts = results[(start_ix + j) as usize]; |
| max_ts = max_ts.max(end_ts); |
| break; |
| } |
| sum_ticks += results[(start_ix + j) as usize] as f64; |
| n_samples += 1; |
| } |
| } |
| let clock_res = elapsed / max_ts as f64; |
| let mean_latency = clock_res * sum_ticks / n_samples as f64; |
| format!( |
| "clock resolution {}s, mean latency {}s", |
| crate::test_result::format_nice(clock_res, 1), |
| crate::test_result::format_nice(mean_latency, 1) |
| ) |
| } |