| // Copyright 2025 the Vello Authors |
| // SPDX-License-Identifier: Apache-2.0 OR MIT |
| |
| //! This is a temporary module that contains a SIMD version of flattening of cubic curves, as |
| //! well as some code that was copied from kurbo, which is needed to reimplement the |
| //! full `flatten` method. |
| |
| use crate::flatten::TOL_2; |
| #[cfg(not(feature = "std"))] |
| use crate::kurbo::common::FloatFuncs as _; |
| use crate::kurbo::{CubicBez, ParamCurve, PathEl, Point, QuadBez}; |
| use alloc::vec::Vec; |
| use bytemuck::{Pod, Zeroable}; |
| use fearless_simd::*; |
| |
| /// The element of a path made of lines. |
| /// |
| /// Each subpath must start with a `MoveTo`. Closing of subpaths is not supported, and subpaths are |
| /// not closed implicitly when a new subpath (with `MoveTo`) is started. It is expected that closed |
| /// subpaths are watertight in the sense that the last `LineTo` matches exactly with the first |
| /// `MoveTo`. |
| /// |
| /// This intentionally allows for non-watertight subpaths, as, e.g., lines that are fully outside |
| /// of the viewport do not need to be drawn. |
| /// |
| /// See [`PathEl`] for a more general-purpose path element type. |
| pub(crate) enum LinePathEl { |
| MoveTo(Point), |
| LineTo(Point), |
| } |
| |
| // Unlike kurbo, which takes a closure with a callback for outputting the lines, we use a trait |
| // instead. The reason is that this way the callback can be inlined, which is not possible with |
| // a closure and turned out to have a noticeable overhead. |
| pub(crate) trait Callback { |
| fn callback(&mut self, el: LinePathEl); |
| } |
| |
| /// See the docs for the kurbo implementation of flattening: |
| /// <https://docs.rs/kurbo/latest/kurbo/fn.flatten.html> |
| /// |
| /// This version works using a similar approach but using f32x4/f32x8 SIMD instead. |
| #[inline(always)] |
| pub(crate) fn flatten<S: Simd>( |
| simd: S, |
| path: impl IntoIterator<Item = PathEl>, |
| tolerance: f64, |
| callback: &mut impl Callback, |
| flatten_ctx: &mut FlattenCtx, |
| ) { |
| flatten_ctx.flattened_cubics.clear(); |
| |
| let sqrt_tol = tolerance.sqrt(); |
| let mut closed = true; |
| let mut start_pt = Point::ZERO; |
| let mut last_pt = Point::ZERO; |
| |
| for el in path { |
| match el { |
| PathEl::MoveTo(p) => { |
| if !closed && last_pt != start_pt { |
| callback.callback(LinePathEl::LineTo(start_pt)); |
| } |
| closed = false; |
| last_pt = p; |
| start_pt = p; |
| callback.callback(LinePathEl::MoveTo(p)); |
| } |
| PathEl::LineTo(p) => { |
| debug_assert!(!closed, "Expected a `MoveTo` before a `LineTo`"); |
| last_pt = p; |
| callback.callback(LinePathEl::LineTo(p)); |
| } |
| PathEl::QuadTo(p1, p2) => { |
| debug_assert!(!closed, "Expected a `MoveTo` before a `QuadTo`"); |
| let p0 = last_pt; |
| // An upper bound on the shortest distance of any point on the quadratic Bezier |
| // curve to the line segment [p0, p2] is 1/2 of the maximum of the |
| // endpoint-to-control-point distances. |
| // |
| // The derivation is similar to that for the cubic Bezier (see below). In |
| // short: |
| // |
| // q}(t) = B0(t) p0 + B1(t) p1 + B2(t) p2 |
| // dist(q(t), [p0, p1]) <= B1(t) dist(p1, [p0, p1]) |
| // = 2 (1-t)t dist(p1, [p0, p1]). |
| // |
| // The maximum occurs at t=1/2, hence |
| // max(dist(q(t), [p0, p1] <= 1/2 dist(p1, [p0, p1])). |
| // |
| // A cheap upper bound for dist(p1, [p0, p1]) is max(dist(p1, p0), dist(p1, p2)). |
| // |
| // The following takes the square to elide the square root of the Euclidean |
| // distance. |
| if f64::max((p1 - p0).hypot2(), (p1 - p2).hypot2()) <= 4. * TOL_2 { |
| callback.callback(LinePathEl::LineTo(p2)); |
| } else { |
| let q = QuadBez::new(p0, p1, p2); |
| let params = q.estimate_subdiv(sqrt_tol); |
| let n = ((0.5 * params.val / sqrt_tol).ceil() as usize).max(1); |
| let step = 1.0 / (n as f64); |
| for i in 1..n { |
| let u = (i as f64) * step; |
| let t = q.determine_subdiv_t(¶ms, u); |
| let p = q.eval(t); |
| callback.callback(LinePathEl::LineTo(p)); |
| } |
| callback.callback(LinePathEl::LineTo(p2)); |
| } |
| last_pt = p2; |
| } |
| PathEl::CurveTo(p1, p2, p3) => { |
| debug_assert!(!closed, "Expected a `MoveTo` before a `CurveTo`"); |
| let p0 = last_pt; |
| // An upper bound on the shortest distance of any point on the cubic Bezier |
| // curve to the line segment [p0, p3] is 3/4 of the maximum of the |
| // endpoint-to-control-point distances. |
| // |
| // With Bernstein weights Bi(t), we have |
| // c(t) = B0(t) p0 + B1(t) p1 + B2(t) p2 + B3(t) p3 |
| // with t from 0 to 1 (inclusive). |
| // |
| // Through convexivity of the Euclidean distance function and the line segment, |
| // we have |
| // dist(c(t), [p0, p3]) <= B1(t) dist(p1, [p0, p3]) + B2(t) dist(p2, [p0, p3]) |
| // <= B1(t) ||p1-p0|| + B2(t) ||p2-p3|| |
| // <= (B1(t) + B2(t)) max(||p1-p0||, ||p2-p3|||) |
| // = 3 ((1-t)t^2 + (1-t)^2t) max(||p1-p0||, ||p2-p3||). |
| // |
| // The inner polynomial has its maximum of 1/4 at t=1/2, hence |
| // max(dist(c(t), [p0, p3])) <= 3/4 max(||p1-p0||, ||p2-p3||). |
| // |
| // The following takes the square to elide the square root of the Euclidean |
| // distance. |
| if f64::max((p0 - p1).hypot2(), (p3 - p2).hypot2()) <= 16. / 9. * TOL_2 { |
| callback.callback(LinePathEl::LineTo(p3)); |
| } else { |
| let c = CubicBez::new(p0, p1, p2, p3); |
| let max = flatten_cubic_simd(simd, c, flatten_ctx, tolerance as f32); |
| |
| for p in &flatten_ctx.flattened_cubics[1..max] { |
| callback.callback(LinePathEl::LineTo(Point::new(p.x as f64, p.y as f64))); |
| } |
| } |
| last_pt = p3; |
| } |
| PathEl::ClosePath => { |
| closed = true; |
| if last_pt != start_pt { |
| callback.callback(LinePathEl::LineTo(start_pt)); |
| } |
| } |
| } |
| } |
| |
| if !closed && last_pt != start_pt { |
| callback.callback(LinePathEl::LineTo(start_pt)); |
| } |
| } |
| |
| // The below methods are copied from kurbo and needed to implement flattening of normal quad curves. |
| |
| /// An approximation to $\int (1 + 4x^2) ^ -0.25 dx$ |
| /// |
| /// This is used for flattening curves. |
| fn approx_parabola_integral(x: f64) -> f64 { |
| const D: f64 = 0.67; |
| x / (1.0 - D + (D.powi(4) + 0.25 * x * x).sqrt().sqrt()) |
| } |
| |
| /// An approximation to the inverse parabola integral. |
| fn approx_parabola_inv_integral(x: f64) -> f64 { |
| const B: f64 = 0.39; |
| x * (1.0 - B + (B * B + 0.25 * x * x).sqrt()) |
| } |
| |
| impl FlattenParamsExt for QuadBez { |
| #[inline(always)] |
| fn estimate_subdiv(&self, sqrt_tol: f64) -> FlattenParams { |
| // Determine transformation to $y = x^2$ parabola. |
| let d01 = self.p1 - self.p0; |
| let d12 = self.p2 - self.p1; |
| let dd = d01 - d12; |
| let cross = (self.p2 - self.p0).cross(dd); |
| let x0 = d01.dot(dd) * cross.recip(); |
| let x2 = d12.dot(dd) * cross.recip(); |
| let scale = (cross / (dd.hypot() * (x2 - x0))).abs(); |
| |
| // Compute number of subdivisions needed. |
| let a0 = approx_parabola_integral(x0); |
| let a2 = approx_parabola_integral(x2); |
| let val = if scale.is_finite() { |
| let da = (a2 - a0).abs(); |
| let sqrt_scale = scale.sqrt(); |
| if x0.signum() == x2.signum() { |
| da * sqrt_scale |
| } else { |
| // Handle cusp case (segment contains curvature maximum) |
| let xmin = sqrt_tol / sqrt_scale; |
| sqrt_tol * da / approx_parabola_integral(xmin) |
| } |
| } else { |
| 0.0 |
| }; |
| let u0 = approx_parabola_inv_integral(a0); |
| let u2 = approx_parabola_inv_integral(a2); |
| let uscale = (u2 - u0).recip(); |
| FlattenParams { |
| a0, |
| a2, |
| u0, |
| uscale, |
| val, |
| } |
| } |
| |
| #[inline(always)] |
| fn determine_subdiv_t(&self, params: &FlattenParams, x: f64) -> f64 { |
| let a = params.a0 + (params.a2 - params.a0) * x; |
| let u = approx_parabola_inv_integral(a); |
| (u - params.u0) * params.uscale |
| } |
| } |
| |
| trait FlattenParamsExt { |
| fn estimate_subdiv(&self, sqrt_tol: f64) -> FlattenParams; |
| fn determine_subdiv_t(&self, params: &FlattenParams, x: f64) -> f64; |
| } |
| |
| // Everything below is a SIMD implementation of flattening of cubic curves. |
| // It's a combination of https://gist.github.com/raphlinus/5f4e9feb85fd79bafc72da744571ec0e |
| // and https://gist.github.com/raphlinus/44e114fef2fd33b889383a60ced0129b. |
| |
| // TODO(laurenz): Perhaps we should get rid of this in the future and work directly with f32, |
| // as it's the only reason we have to pull in proc_macros via the `derive` feature of bytemuck. |
| #[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] |
| #[repr(C)] |
| struct Point32 { |
| x: f32, |
| y: f32, |
| } |
| |
| struct FlattenParams { |
| a0: f64, |
| a2: f64, |
| u0: f64, |
| uscale: f64, |
| /// The number of `subdivisions * 2 * sqrt_tol`. |
| val: f64, |
| } |
| |
| /// This limit was chosen based on the pre-existing GitHub gist. |
| /// This limit should not be hit in normal operation, but _might_ be hit for very large |
| /// transforms. |
| const MAX_QUADS: usize = 16; |
| |
| /// The context needed for flattening curves. |
| #[derive(Default, Debug)] |
| pub struct FlattenCtx { |
| // The +4 is to encourage alignment; might be better to be explicit |
| even_pts: [Point32; MAX_QUADS + 4], |
| odd_pts: [Point32; MAX_QUADS], |
| a0: [f32; MAX_QUADS], |
| da: [f32; MAX_QUADS], |
| u0: [f32; MAX_QUADS], |
| uscale: [f32; MAX_QUADS], |
| val: [f32; MAX_QUADS], |
| n_quads: usize, |
| /// Reusable buffer for flattened cubic points. |
| flattened_cubics: Vec<Point32>, |
| } |
| |
| #[inline(always)] |
| fn is_finite_simd<S: Simd>(x: f32x4<S>) -> mask32x4<S> { |
| let simd = x.simd; |
| |
| let x_abs = x.abs(); |
| let reinterpreted = u32x4::from_bytes(x_abs.to_bytes()); |
| simd.simd_lt_u32x4(reinterpreted, u32x4::splat(simd, 0x7f80_0000)) |
| } |
| |
| #[inline(always)] |
| fn approx_parabola_integral_simd<S: Simd>(x: f32x8<S>) -> f32x8<S> { |
| let simd = x.simd; |
| |
| const D: f32 = 0.67; |
| const D_POWI_4: f32 = 0.201_511_2; |
| |
| let temp1 = f32x8::splat(simd, 0.25).madd(x * x, f32x8::splat(simd, D_POWI_4)); |
| let temp2 = temp1.sqrt(); |
| let temp3 = temp2.sqrt(); |
| let temp4 = f32x8::splat(simd, 1.0) - f32x8::splat(simd, D); |
| let temp5 = temp4 + temp3; |
| x / temp5 |
| } |
| |
| #[inline(always)] |
| fn approx_parabola_integral_simd_x4<S: Simd>(x: f32x4<S>) -> f32x4<S> { |
| let simd = x.simd; |
| |
| const D: f32 = 0.67; |
| const D_POWI_4: f32 = 0.201_511_2; |
| |
| let temp1 = f32x4::splat(simd, 0.25).madd(x * x, f32x4::splat(simd, D_POWI_4)); |
| let temp2 = temp1.sqrt(); |
| let temp3 = temp2.sqrt(); |
| let temp4 = f32x4::splat(simd, 1.0) - f32x4::splat(simd, D); |
| let temp5 = temp4 + temp3; |
| x / temp5 |
| } |
| |
| #[inline(always)] |
| fn approx_parabola_inv_integral_simd<S: Simd>(x: f32x8<S>) -> f32x8<S> { |
| let simd = x.simd; |
| |
| const B: f32 = 0.39; |
| const ONE_MINUS_B: f32 = 1.0 - B; |
| |
| let temp1 = f32x8::splat(simd, B * B); |
| let temp2 = f32x8::splat(simd, 0.25).madd(x * x, temp1); |
| let temp3 = temp2.sqrt(); |
| let temp4 = f32x8::splat(simd, ONE_MINUS_B) + temp3; |
| |
| x * temp4 |
| } |
| |
| #[inline(always)] |
| fn pt_splat_simd<S: Simd>(simd: S, pt: Point32) -> f32x8<S> { |
| let p_f64: f64 = bytemuck::cast(pt); |
| simd.reinterpret_f32_f64x4(f64x4::splat(simd, p_f64)) |
| } |
| |
| #[inline(always)] |
| fn eval_simd<S: Simd>( |
| p0: f32x8<S>, |
| p1: f32x8<S>, |
| p2: f32x8<S>, |
| p3: f32x8<S>, |
| t: f32x8<S>, |
| ) -> f32x8<S> { |
| let mt = 1.0 - t; |
| let im0 = p0 * mt * mt * mt; |
| let im1 = p1 * mt * mt * 3.0; |
| let im2 = p2 * mt * 3.0; |
| let im3 = p3.madd(t, im2) * t; |
| |
| (im1 + im3).madd(t, im0) |
| } |
| |
| #[inline(always)] |
| fn eval_cubics_simd<S: Simd>(simd: S, c: &CubicBez, n: usize, result: &mut FlattenCtx) { |
| result.n_quads = n; |
| let dt = 0.5 / n as f32; |
| |
| // TODO(laurenz): Perhaps we can SIMDify this better |
| let p0p1 = f32x4::from_slice( |
| simd, |
| &[c.p0.x as f32, c.p0.y as f32, c.p1.x as f32, c.p1.y as f32], |
| ); |
| let p2p3 = f32x4::from_slice( |
| simd, |
| &[c.p2.x as f32, c.p2.y as f32, c.p3.x as f32, c.p3.y as f32], |
| ); |
| |
| let split_single = |input: f32x4<S>| { |
| let t1 = simd.reinterpret_f64_f32x4(input); |
| let p0 = simd.zip_low_f64x2(t1, t1); |
| let p1 = simd.zip_high_f64x2(t1, t1); |
| |
| let p0 = simd.reinterpret_f32_f64x2(p0); |
| let p1 = simd.reinterpret_f32_f64x2(p1); |
| |
| (f32x8::block_splat(p0), f32x8::block_splat(p1)) |
| }; |
| |
| let (p0_128, p1_128) = split_single(p0p1); |
| let (p2_128, p3_128) = split_single(p2p3); |
| |
| let iota = f32x8::from_slice(simd, &[0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 3.0, 3.0]); |
| let step = iota * dt; |
| let mut t = step; |
| let t_inc = f32x8::splat(simd, 4.0 * dt); |
| |
| let even_pts: &mut [f32] = bytemuck::cast_slice_mut(&mut result.even_pts); |
| let odd_pts: &mut [f32] = bytemuck::cast_slice_mut(&mut result.odd_pts); |
| |
| for i in 0..n.div_ceil(2) { |
| let evaluated = eval_simd(p0_128, p1_128, p2_128, p3_128, t); |
| let (low, high) = simd.split_f32x8(evaluated); |
| |
| even_pts[i * 4..][..4].copy_from_slice(low.as_slice()); |
| odd_pts[i * 4..][..4].copy_from_slice(high.as_slice()); |
| |
| t += t_inc; |
| } |
| |
| even_pts[n * 2..][..8].copy_from_slice(p3_128.as_slice()); |
| } |
| |
| #[inline(always)] |
| fn estimate_subdiv_simd<S: Simd>(simd: S, sqrt_tol: f32, ctx: &mut FlattenCtx) { |
| let n = ctx.n_quads; |
| |
| let even_pts: &mut [f32] = bytemuck::cast_slice_mut(&mut ctx.even_pts); |
| let odd_pts: &mut [f32] = bytemuck::cast_slice_mut(&mut ctx.odd_pts); |
| |
| for i in 0..n.div_ceil(4) { |
| let p0 = f32x8::from_slice(simd, &even_pts[i * 8..][..8]); |
| let p_onehalf = f32x8::from_slice(simd, &odd_pts[i * 8..][..8]); |
| let p2 = f32x8::from_slice(simd, &even_pts[(i * 8 + 2)..][..8]); |
| let x = p0 * -0.5; |
| let x1 = p_onehalf.madd(2.0, x); |
| let p1 = p2.madd(-0.5, x1); |
| |
| odd_pts[(i * 8)..][..8].copy_from_slice(p1.as_slice()); |
| |
| let d01 = p1 - p0; |
| let d12 = p2 - p1; |
| let d01x = simd.unzip_low_f32x8(d01, d01); |
| let d01y = simd.unzip_high_f32x8(d01, d01); |
| let d12x = simd.unzip_low_f32x8(d12, d12); |
| let d12y = simd.unzip_high_f32x8(d12, d12); |
| let ddx = d01x - d12x; |
| let ddy = d01y - d12y; |
| let d02x = d01x + d12x; |
| let d02y = d01y + d12y; |
| // (d02x * ddy) - (d02y * ddx) |
| let cross = ddx.madd(-d02y, d02x * ddy); |
| |
| let x0_x2_a = { |
| let (d01x_low, _) = simd.split_f32x8(d01x); |
| let (d12x_low, _) = simd.split_f32x8(d12x); |
| |
| simd.combine_f32x4(d12x_low, d01x_low) * ddx |
| }; |
| let temp1 = { |
| let (d12y_low, _) = simd.split_f32x8(d12y); |
| let (d01y_low, _) = simd.split_f32x8(d01y); |
| |
| simd.combine_f32x4(d12y_low, d01y_low) |
| }; |
| let x0_x2_num = temp1.madd(ddy, x0_x2_a); |
| let x0_x2 = x0_x2_num / cross; |
| let (ddx_low, _) = simd.split_f32x8(ddx); |
| let (ddy_low, _) = simd.split_f32x8(ddy); |
| let dd_hypot = ddy_low.madd(ddy_low, ddx_low * ddx_low).sqrt(); |
| let (x0, x2) = simd.split_f32x8(x0_x2); |
| let scale_denom = dd_hypot * (x2 - x0); |
| let (temp2, _) = simd.split_f32x8(cross); |
| let scale = (temp2 / scale_denom).abs(); |
| let a0_a2 = approx_parabola_integral_simd(x0_x2); |
| let (a0, a2) = simd.split_f32x8(a0_a2); |
| let da = a2 - a0; |
| let da_abs = da.abs(); |
| let sqrt_scale = scale.sqrt(); |
| let temp3 = simd.or_i32x4(x0.bitcast(), x2.bitcast()); |
| let mask = simd.simd_ge_i32x4(temp3, i32x4::splat(simd, 0)); |
| let noncusp = da_abs * sqrt_scale; |
| // TODO: should we skip this if neither is a cusp? Maybe not worth branch prediction cost |
| let xmin = sqrt_tol / sqrt_scale; |
| let approxint = approx_parabola_integral_simd_x4(xmin); |
| let cusp = (sqrt_tol * da_abs) / approxint; |
| let val_raw = simd.select_f32x4(mask, noncusp, cusp); |
| let finite_mask = is_finite_simd(val_raw); |
| let val = simd.select_f32x4(finite_mask, val_raw, f32x4::splat(simd, 0.0)); |
| let u0_u2 = approx_parabola_inv_integral_simd(a0_a2); |
| let (u0, u2) = simd.split_f32x8(u0_u2); |
| let uscale_a = u2 - u0; |
| let uscale = 1.0 / uscale_a; |
| |
| ctx.a0[i * 4..][..4].copy_from_slice(a0.as_slice()); |
| ctx.da[i * 4..][..4].copy_from_slice(da.as_slice()); |
| ctx.u0[i * 4..][..4].copy_from_slice(u0.as_slice()); |
| ctx.uscale[i * 4..][..4].copy_from_slice(uscale.as_slice()); |
| ctx.val[i * 4..][..4].copy_from_slice(val.as_slice()); |
| } |
| } |
| |
| #[inline(always)] |
| fn output_lines_simd<S: Simd>( |
| simd: S, |
| ctx: &mut FlattenCtx, |
| i: usize, |
| x0: f32, |
| dx: f32, |
| n: usize, |
| start_idx: usize, |
| ) { |
| let p0 = pt_splat_simd(simd, ctx.even_pts[i]); |
| let p1 = pt_splat_simd(simd, ctx.odd_pts[i]); |
| let p2 = pt_splat_simd(simd, ctx.even_pts[i + 1]); |
| |
| const IOTA2: [f32; 8] = [0., 0., 1., 1., 2., 2., 3., 3.]; |
| let iota2 = f32x8::from_slice(simd, IOTA2.as_ref()); |
| let x = iota2.madd(dx, f32x8::splat(simd, x0)); |
| let da = f32x8::splat(simd, ctx.da[i]); |
| let mut a = da.madd(x, f32x8::splat(simd, ctx.a0[i])); |
| let a_inc = 4.0 * dx * da; |
| let uscale = f32x8::splat(simd, ctx.uscale[i]); |
| |
| let out: &mut [f32] = bytemuck::cast_slice_mut(&mut ctx.flattened_cubics[start_idx..]); |
| |
| for j in 0..n.div_ceil(4) { |
| let u = approx_parabola_inv_integral_simd(a); |
| let t = u.madd(uscale, -ctx.u0[i] * uscale); |
| let mt = 1.0 - t; |
| let z = p0 * mt * mt; |
| let z1 = p1.madd(2.0 * t * mt, z); |
| let p = p2.madd(t * t, z1); |
| |
| out[j * 8..][..8].copy_from_slice(p.as_slice()); |
| |
| a += a_inc; |
| } |
| } |
| |
| #[inline(always)] |
| fn flatten_cubic_simd<S: Simd>(simd: S, c: CubicBez, ctx: &mut FlattenCtx, accuracy: f32) -> usize { |
| let n_quads = estimate_num_quads(c, accuracy); |
| eval_cubics_simd(simd, &c, n_quads, ctx); |
| let tol = accuracy * (1.0 - TO_QUAD_TOL); |
| let sqrt_tol = tol.sqrt(); |
| estimate_subdiv_simd(simd, sqrt_tol, ctx); |
| let sum: f32 = ctx.val[..n_quads].iter().sum(); |
| let n = ((0.5 * sum / sqrt_tol).ceil() as usize).max(1); |
| ctx.flattened_cubics.resize(n + 4, Point32::default()); |
| |
| let step = sum / (n as f32); |
| let step_recip = 1.0 / step; |
| let mut val_sum = 0.0; |
| let mut last_n = 0; |
| let mut x0base = 0.0; |
| |
| for i in 0..n_quads { |
| let val = ctx.val[i]; |
| val_sum += val; |
| let this_n = val_sum * step_recip; |
| let this_n_next = 1.0 + this_n.floor(); |
| let dn = this_n_next as usize - last_n; |
| if dn > 0 { |
| let dx = step / val; |
| let x0 = x0base * dx; |
| output_lines_simd(simd, ctx, i, x0, dx, dn, last_n); |
| } |
| x0base = this_n_next - this_n; |
| last_n = this_n_next as usize; |
| } |
| |
| ctx.flattened_cubics[n] = ctx.even_pts[n_quads]; |
| |
| n + 1 |
| } |
| |
| #[inline(always)] |
| fn estimate_num_quads(c: CubicBez, accuracy: f32) -> usize { |
| let q_accuracy = (accuracy * TO_QUAD_TOL) as f64; |
| let max_hypot2 = 432.0 * q_accuracy * q_accuracy; |
| let p1x2 = c.p1.to_vec2() * 3.0 - c.p0.to_vec2(); |
| let p2x2 = c.p2.to_vec2() * 3.0 - c.p3.to_vec2(); |
| let err = (p2x2 - p1x2).hypot2(); |
| let err_div = err / max_hypot2; |
| |
| estimate(err_div) |
| } |
| |
| const TO_QUAD_TOL: f32 = 0.1; |
| |
| #[inline(always)] |
| fn estimate(err_div: f64) -> usize { |
| // The original version of this method was: |
| // let n_quads = (err_div.powf(1. / 6.0).ceil() as usize).max(1); |
| // n_quads.min(MAX_QUADS) |
| // |
| // Note how we always round up and clamp to the range [1, max_quads]. Since we don't |
| // care about the actual fractional value resulting from the powf call we can simply |
| // compute this using a precomputed lookup table evaluating 1^6, 2^6, 3^6, etc. and simply |
| // comparing if the value is less than or equal to each threshold. |
| |
| const LUT: [f64; MAX_QUADS] = [ |
| 1.0, 64.0, 729.0, 4096.0, 15625.0, 46656.0, 117649.0, 262144.0, 531441.0, 1000000.0, |
| 1771561.0, 2985984.0, 4826809.0, 7529536.0, 11390625.0, 16777216.0, |
| ]; |
| |
| #[expect(clippy::needless_range_loop, reason = "better clarity")] |
| for i in 0..MAX_QUADS { |
| if err_div <= LUT[i] { |
| return i + 1; |
| } |
| } |
| |
| MAX_QUADS |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use crate::flatten_simd::{MAX_QUADS, estimate}; |
| |
| fn old_estimate(err_div: f64) -> usize { |
| let n_quads = (err_div.powf(1. / 6.0).ceil() as usize).max(1); |
| n_quads.min(MAX_QUADS) |
| } |
| |
| // Test is disabled by default since it takes 10-20 seconds to run, even in release mode. |
| #[test] |
| #[ignore] |
| fn accuracy() { |
| for i in 0..u32::MAX { |
| let num = f32::from_bits(i); |
| |
| if num.is_finite() { |
| assert_eq!(old_estimate(num as f64), estimate(num as f64), "{num}"); |
| } |
| } |
| } |
| } |