Improve numerical robustness of conservative line rasterization
Apply additional logic to make sure that first and last tiles of conservative line rasterization land on the right tile.
Note: this fixes the assert and should avoid artifacts, but still doesn't render the poland svg correctly (there are black splotches).
Fixes #376 and #378
diff --git a/shader/fine.wgsl b/shader/fine.wgsl
index f41747d..5d500ad 100644
--- a/shader/fine.wgsl
+++ b/shader/fine.wgsl
@@ -77,6 +77,10 @@
let SEG_SIZE = 5u;
+// See cpu_shaders/util.rs for explanation of these.
+let ONE_MINUS_ULP: f32 = 0.99999994;
+let ROBUST_EPSILON: f32 = 2e-7;
+
// New multisampled algorithm.
fn fill_path_ms(fill: CmdFill, wg_id: vec2<u32>, local_id: vec2<u32>) -> array<f32, PIXELS_PER_THREAD> {
let n_segs = fill.size_and_rule >> 1u;
@@ -175,16 +179,21 @@
// One alternative is to compute it in a separate dispatch.
let dx = abs(xy1.x - xy0.x);
let dy = xy1.y - xy0.y;
- // TODO: apply numerical robustness and optimization
- let dy_dxdy = dy / (dx + dy);
- let a = dx / (dx + dy);
+ let idxdy = 1.0 / (dx + dy);
+ var a = dx * idxdy;
let is_positive_slope = xy1.x >= xy0.x;
let sign = select(-1.0, 1.0, is_positive_slope);
let xt0 = floor(xy0.x * sign);
let c = xy0.x * sign - xt0;
let y0i = floor(xy0.y);
let ytop = y0i + 1.0;
- let b = dy_dxdy * c + a * (ytop - xy0.y);
+ let b = min((dy * c + dx * (ytop - xy0.y)) * idxdy, ONE_MINUS_ULP);
+ let count_x = span(xy0.x, xy1.x) - 1u;
+ let count = count_x + span(xy0.y, xy1.y);
+ let robust_err = floor(a * (f32(count) - 1.0) + b) - f32(count_x);
+ if robust_err != 0.0 {
+ a -= ROBUST_EPSILON * sign(robust_err);
+ }
let x0i = i32(xt0 * sign + 0.5 * (sign - 1.0));
// Use line equation to plot pixel coordinates
diff --git a/shader/path_count.wgsl b/shader/path_count.wgsl
index e34ff7a..dabb6d1 100644
--- a/shader/path_count.wgsl
+++ b/shader/path_count.wgsl
@@ -36,6 +36,10 @@
return u32(max(ceil(max(a, b)) - floor(min(a, b)), 1.0));
}
+// See cpu_shaders/util.rs for explanation of these.
+let ONE_MINUS_ULP: f32 = 0.99999994;
+let ROBUST_EPSILON: f32 = 2e-7;
+
// Note regarding clipping to bounding box:
//
// We have to do the backdrop bumps for all tiles to the left of the bbox.
@@ -57,7 +61,8 @@
let xy1 = select(line.p0, line.p1, is_down);
let s0 = xy0 * TILE_SCALE;
let s1 = xy1 * TILE_SCALE;
- count = span(s0.x, s1.x) + span(s0.y, s1.y) - 1u;
+ let count_x = span(s0.x, s1.x) - 1u;
+ count = count_x + span(s0.y, s1.y);
let line_ix = global_id.x;
let dx = abs(s1.x - s0.x);
@@ -72,14 +77,18 @@
return;
}
let idxdy = 1.0 / (dx + dy);
- let a = dx * idxdy;
+ var a = dx * idxdy;
let is_positive_slope = s1.x >= s0.x;
let sign = select(-1.0, 1.0, is_positive_slope);
let xt0 = floor(s0.x * sign);
let c = s0.x * sign - xt0;
let y0 = floor(s0.y);
let ytop = select(y0 + 1.0, ceil(s0.y), s0.y == s1.y);
- let b = (dy * c + dx * (ytop - s0.y)) * idxdy;
+ let b = min((dy * c + dx * (ytop - s0.y)) * idxdy, ONE_MINUS_ULP);
+ let robust_err = floor(a * (f32(count) - 1.0) + b) - f32(count_x);
+ if robust_err != 0.0 {
+ a -= ROBUST_EPSILON * sign(robust_err);
+ }
let x0 = xt0 * sign + select(-1.0, 0.0, is_positive_slope);
let path = paths[line.path_ix];
diff --git a/shader/path_tiling.wgsl b/shader/path_tiling.wgsl
index ace01ab..fad5d72 100644
--- a/shader/path_tiling.wgsl
+++ b/shader/path_tiling.wgsl
@@ -29,6 +29,10 @@
return u32(max(ceil(max(a, b)) - floor(min(a, b)), 1.0));
}
+// See cpu_shaders/util.rs for explanation of these.
+let ONE_MINUS_ULP: f32 = 0.99999994;
+let ROBUST_EPSILON: f32 = 2e-7;
+
// One invocation for each tile that is to be written.
// Total number of invocations = bump.seg_counts
@compute @workgroup_size(256)
@@ -49,20 +53,25 @@
var xy1 = select(line.p0, line.p1, is_down);
let s0 = xy0 * TILE_SCALE;
let s1 = xy1 * TILE_SCALE;
- let count = span(s0.x, s1.x) + span(s0.y, s1.y) - 1u;
+ let count_x = span(s0.x, s1.x) - 1u;
+ let count = count_x + span(s0.y, s1.y);
let dx = abs(s1.x - s0.x);
let dy = s1.y - s0.y;
// Division by zero can't happen because zero-length lines
// have already been discarded in the path_count stage.
let idxdy = 1.0 / (dx + dy);
- let a = dx * idxdy;
+ var a = dx * idxdy;
let is_positive_slope = s1.x >= s0.x;
let sign = select(-1.0, 1.0, is_positive_slope);
let xt0 = floor(s0.x * sign);
let c = s0.x * sign - xt0;
let y0i = floor(s0.y);
let ytop = select(y0i + 1.0, ceil(s0.y), s0.y == s1.y);
- let b = (dy * c + dx * (ytop - s0.y)) * idxdy;
+ let b = min((dy * c + dx * (ytop - s0.y)) * idxdy, ONE_MINUS_ULP);
+ let robust_err = floor(a * (f32(count) - 1.0) + b) - f32(count_x);
+ if robust_err != 0.0 {
+ a -= ROBUST_EPSILON * sign(robust_err);
+ }
let x0i = i32(xt0 * sign + 0.5 * (sign - 1.0));
let z = floor(a * f32(seg_within_line) + b);
let x = x0i + i32(sign * z);
diff --git a/src/cpu_shader/path_count.rs b/src/cpu_shader/path_count.rs
index 2cee5b8..3cb8855 100644
--- a/src/cpu_shader/path_count.rs
+++ b/src/cpu_shader/path_count.rs
@@ -5,7 +5,7 @@
use crate::cpu_dispatch::CpuBinding;
-use super::util::{span, Vec2};
+use super::util::{span, Vec2, ONE_MINUS_ULP, ROBUST_EPSILON};
const TILE_SCALE: f32 = 1.0 / 16.0;
@@ -24,7 +24,8 @@
let (xy0, xy1) = if is_down { (p0, p1) } else { (p1, p0) };
let s0 = xy0 * TILE_SCALE;
let s1 = xy1 * TILE_SCALE;
- let count = span(s0.x, s1.x) + span(s0.y, s1.y) - 1;
+ let count_x = span(s0.x, s1.x) - 1;
+ let count = count_x + span(s0.y, s1.y);
let dx = (s1.x - s0.x).abs();
let dy = s1.y - s0.y;
@@ -35,14 +36,18 @@
continue;
}
let idxdy = 1.0 / (dx + dy);
- let a = dx * idxdy;
+ let mut a = dx * idxdy;
let is_positive_slope = s1.x >= s0.x;
let sign = if is_positive_slope { 1.0 } else { -1.0 };
let xt0 = (s0.x * sign).floor();
let c = s0.x * sign - xt0;
let y0 = s0.y.floor();
let ytop = if s0.y == s1.y { s0.y.ceil() } else { y0 + 1.0 };
- let b = (dy * c + dx * (ytop - s0.y)) * idxdy;
+ let b = ((dy * c + dx * (ytop - s0.y)) * idxdy).min(ONE_MINUS_ULP);
+ let robust_err = (a * (count as f32 - 1.0) + b).floor() - count_x as f32;
+ if robust_err != 0.0 {
+ a -= ROBUST_EPSILON.copysign(robust_err);
+ }
let x0 = xt0 * sign + if is_positive_slope { 0.0 } else { -1.0 };
let path = paths[line.path_ix as usize];
diff --git a/src/cpu_shader/path_tiling.rs b/src/cpu_shader/path_tiling.rs
index 41549bb..56bc2b4 100644
--- a/src/cpu_shader/path_tiling.rs
+++ b/src/cpu_shader/path_tiling.rs
@@ -3,7 +3,10 @@
use vello_encoding::{BumpAllocators, LineSoup, Path, PathSegment, SegmentCount, Tile};
-use crate::cpu_dispatch::CpuBinding;
+use crate::{
+ cpu_dispatch::CpuBinding,
+ cpu_shader::util::{ONE_MINUS_ULP, ROBUST_EPSILON},
+};
use super::util::{span, Vec2};
@@ -33,19 +36,24 @@
let (mut xy0, mut xy1) = if is_down { (p0, p1) } else { (p1, p0) };
let s0 = xy0 * TILE_SCALE;
let s1 = xy1 * TILE_SCALE;
- let count = span(s0.x, s1.x) + span(s0.y, s1.y) - 1;
+ let count_x = span(s0.x, s1.x) - 1;
+ let count = count_x + span(s0.y, s1.y);
let dx = (s1.x - s0.x).abs();
let dy = s1.y - s0.y;
let idxdy = 1.0 / (dx + dy);
- let a = dx * idxdy;
+ let mut a = dx * idxdy;
let is_positive_slope = s1.x >= s0.x;
let sign = if is_positive_slope { 1.0 } else { -1.0 };
let xt0 = (s0.x * sign).floor();
let c = s0.x * sign - xt0;
let y0 = s0.y.floor();
let ytop = if s0.y == s1.y { s0.y.ceil() } else { y0 + 1.0 };
- let b = (dy * c + dx * (ytop - s0.y)) * idxdy;
+ let b = ((dy * c + dx * (ytop - s0.y)) * idxdy).min(ONE_MINUS_ULP);
+ let robust_err = (a * (count as f32 - 1.0) + b).floor() - count_x as f32;
+ if robust_err != 0.0 {
+ a -= ROBUST_EPSILON.copysign(robust_err);
+ }
let x0 = xt0 * sign + if is_positive_slope { 0.0 } else { -1.0 };
let z = (a * seg_within_line as f32 + b).floor();
let x = x0 as i32 + (sign * z) as i32;
diff --git a/src/cpu_shader/util.rs b/src/cpu_shader/util.rs
index 2bb3279..eae6f8b 100644
--- a/src/cpu_shader/util.rs
+++ b/src/cpu_shader/util.rs
@@ -111,3 +111,18 @@
DRAWTAG_NOP
}
}
+
+/// The largest floating point value strictly less than 1.
+///
+/// This value is used to limit the value of b so that its floor is strictly less
+/// than 1. That guarantees that floor(a * i + b) == 0 for i == 0, which lands on
+/// the correct first tile.
+pub const ONE_MINUS_ULP: f32 = 0.99999994;
+
+/// An epsilon to be applied in path numerical robustness.
+///
+/// When floor(a * (n - 1) + b) does not match the expected value (the width in
+/// grid cells minus one), this delta is applied to a to push it in the correct
+/// direction. The theory is that a is not off by more than a few ulp, and it's
+/// always in the range of 0..1.
+pub const ROBUST_EPSILON: f32 = 2e-7;