Measure throughput over problems sizes
Measure both GPU and CPU throughput over a range of problem sizes, formatting the output for gnuplot.
diff --git a/tests/src/bbox_intersection.rs b/tests/src/bbox_intersection.rs
index 66851c7..fac9704 100644
--- a/tests/src/bbox_intersection.rs
+++ b/tests/src/bbox_intersection.rs
@@ -65,7 +65,8 @@
pub unsafe fn run_intersection_test(runner: &mut Runner, config: &Config) -> TestResult {
let mut result = TestResult::new("Bounding box Intersection");
println!("# bounding box intersection");
- for exp in 10..=16 {
+ let expmax = 2 * WG_SIZE.trailing_zeros();
+ for exp in 10..=expmax {
let n_elements: u64 = 1 << exp;
let data = IntersectionData::new(n_elements);
let data_buf = runner
@@ -101,6 +102,18 @@
println!("{} {}", n_elements, throughput);
}
println!("e");
+ println!("# bounding box intersection, CPU");
+ for exp in 10..=expmax {
+ let n_elements: u64 = 1 << exp;
+ let data = IntersectionData::new(n_elements);
+ let start = std::time::Instant::now();
+ let result = data.run();
+ let elapsed = start.elapsed().as_secs_f64();
+ let throughput = n_elements as f64 / elapsed;
+ println!("{} {}", n_elements, throughput);
+ data.verify(&result);
+ }
+ println!("e");
//result.timing(total_elapsed, n_elements * n_iter);
result
@@ -148,7 +161,10 @@
.session
.create_buffer(bic_size, BufferUsage::STORAGE)
.unwrap();
- IntersectionStage { bic_buf, intersection_buf }
+ IntersectionStage {
+ bic_buf,
+ intersection_buf,
+ }
}
unsafe fn bind(
@@ -217,7 +233,12 @@
}
fn bbox_intersection(a: [f32; 4], b: [f32; 4]) -> [f32; 4] {
- [a[0].max(b[0]), a[1].max(b[1]), a[2].min(b[2]), a[3].min(b[3])]
+ [
+ a[0].max(b[0]),
+ a[1].max(b[1]),
+ a[2].min(b[2]),
+ a[3].min(b[3]),
+ ]
}
const EMPTY_BBOX: [f32; 4] = [-1e9, -1e9, 1e9, 1e9];
@@ -246,12 +267,37 @@
} else {
EMPTY_BBOX
};
- Node { node_type, bbox, .. Default::default() }
+ Node {
+ node_type,
+ bbox,
+ ..Default::default()
+ }
})
.collect();
IntersectionData { nodes }
}
+ fn run(&self) -> Vec<[f32; 4]> {
+ let mut stack = Vec::new();
+ let mut tos = EMPTY_BBOX;
+ self.nodes
+ .iter()
+ .map(|inp| match inp.node_type {
+ OPEN_PAREN => {
+ tos = bbox_intersection(tos, inp.bbox);
+ stack.push(tos);
+ tos
+ }
+ CLOSE_PAREN => {
+ stack.pop().unwrap();
+ tos = *stack.last().unwrap_or(&EMPTY_BBOX);
+ EMPTY_BBOX
+ }
+ _ => unreachable!(),
+ })
+ .collect()
+ }
+
fn verify(&self, data: &[[f32; 4]]) -> Option<String> {
let mut stack = Vec::new();
let mut tos = EMPTY_BBOX;
diff --git a/tests/src/bbox_union.rs b/tests/src/bbox_union.rs
index 9224cb1..ca2224c 100644
--- a/tests/src/bbox_union.rs
+++ b/tests/src/bbox_union.rs
@@ -66,7 +66,8 @@
pub unsafe fn run_union_test(runner: &mut Runner, config: &Config) -> TestResult {
let mut result = TestResult::new("Bounding box union");
println!("# bounding box union");
- for exp in 10..=16 {
+ let expmax = 2 * WG_SIZE.trailing_zeros();
+ for exp in 10..=expmax {
let n_elements: u64 = 1 << exp;
let data = UnionData::new(n_elements);
let data_buf = runner
@@ -102,6 +103,18 @@
println!("{} {}", n_elements, throughput);
}
println!("e");
+ println!("# bounding box union, CPU");
+ for exp in 10..=expmax {
+ let n_elements: u64 = 1 << exp;
+ let data = UnionData::new(n_elements);
+ let start = std::time::Instant::now();
+ let result = data.run();
+ let elapsed = start.elapsed().as_secs_f64();
+ let throughput = n_elements as f64 / elapsed;
+ println!("{} {}", n_elements, throughput);
+ data.verify(&result);
+ }
+ println!("e");
//result.timing(total_elapsed, n_elements * n_iter);
result
@@ -218,7 +231,12 @@
}
fn bbox_union(a: [f32; 4], b: [f32; 4]) -> [f32; 4] {
- [a[0].min(b[0]), a[1].min(b[1]), a[2].max(b[2]), a[3].max(b[3])]
+ [
+ a[0].min(b[0]),
+ a[1].min(b[1]),
+ a[2].max(b[2]),
+ a[3].max(b[3]),
+ ]
}
const EMPTY_BBOX: [f32; 4] = [1e9, 1e9, -1e9, -1e9];
@@ -246,12 +264,43 @@
} else {
EMPTY_BBOX
};
- Node { node_type, bbox, .. Default::default() }
+ Node {
+ node_type,
+ bbox,
+ ..Default::default()
+ }
})
.collect();
UnionData { nodes }
}
+ // Run on CPU side, for performance comparison
+ fn run(&self) -> Vec<[f32; 4]> {
+ let mut stack = Vec::new();
+ self.nodes
+ .iter()
+ .map(|inp| {
+ let mut expected = inp.bbox;
+ match inp.node_type {
+ OPEN_PAREN => stack.push(EMPTY_BBOX),
+ CLOSE_PAREN => {
+ let tos = stack.pop().unwrap();
+ expected = tos;
+ if let Some(nos) = stack.last_mut() {
+ *nos = bbox_union(*nos, tos);
+ }
+ }
+ LEAF => {
+ let tos = stack.last_mut().unwrap();
+ *tos = bbox_union(*tos, inp.bbox);
+ }
+ _ => unreachable!(),
+ }
+ expected
+ })
+ .collect()
+ }
+
fn verify(&self, data: &[[f32; 4]]) -> Option<String> {
let mut stack = Vec::new();
for (i, (inp, outp)) in self.nodes.iter().zip(data).enumerate() {
diff --git a/tests/src/main.rs b/tests/src/main.rs
index 7f231f1..754848d 100644
--- a/tests/src/main.rs
+++ b/tests/src/main.rs
@@ -141,7 +141,10 @@
if config.groups.matches("stack") {
report(&stack::run_stack_test(&mut runner, &config));
report(&bbox_union::run_union_test(&mut runner, &config));
- report(&bbox_intersection::run_intersection_test(&mut runner, &config));
+ report(&bbox_intersection::run_intersection_test(
+ &mut runner,
+ &config,
+ ));
}
#[cfg(feature = "piet-gpu")]
if config.groups.matches("piet") {
diff --git a/tests/src/stack.rs b/tests/src/stack.rs
index c86e3df..aba875b 100644
--- a/tests/src/stack.rs
+++ b/tests/src/stack.rs
@@ -47,7 +47,8 @@
pub unsafe fn run_stack_test(runner: &mut Runner, config: &Config) -> TestResult {
let mut result = TestResult::new("stack monoid");
println!("# stack monoid (just parentheses matching)");
- for exp in 10..=18 {
+ let expmax = 2 * (WG_SIZE * N_ROWS).trailing_zeros();
+ for exp in 10..=expmax {
let n_elements: u64 = 1 << exp;
let data = StackData::new(n_elements);
let data_buf = runner
@@ -83,6 +84,18 @@
println!("{} {}", n_elements, throughput);
}
println!("e");
+ println!("# stack monoid, CPU");
+ for exp in 10..=expmax {
+ let n_elements: u64 = 1 << exp;
+ let data = StackData::new(n_elements);
+ let start = std::time::Instant::now();
+ let result = data.run();
+ let elapsed = start.elapsed().as_secs_f64();
+ let throughput = n_elements as f64 / elapsed;
+ println!("{} {}", n_elements, throughput);
+ data.verify(&result);
+ }
+ println!("e");
//result.timing(total_elapsed, n_elements * n_iter);
result
@@ -208,6 +221,24 @@
StackData { dyck }
}
+ // Run on CPU side, for performance comparison
+ fn run(&self) -> Vec<u32> {
+ let mut stack = Vec::new();
+ self.dyck
+ .iter()
+ .enumerate()
+ .map(|(i, inp)| {
+ let expected = *stack.last().unwrap_or(&!0);
+ if *inp == 0 {
+ stack.pop();
+ } else {
+ stack.push(i as u32);
+ }
+ expected
+ })
+ .collect()
+ }
+
fn verify(&self, data: &[u32]) -> Option<String> {
let mut stack = Vec::new();
for (i, (inp, outp)) in self.dyck.iter().zip(data).enumerate() {