sparse_strips/vello_common/src/strip.rs - external/github.com/linebender/vello - Git at Google

 // Copyright 2025 the Vello Authors
 // SPDX-License-Identifier: Apache-2.0 OR MIT

 //! Rendering strips.

 use crate::flatten::Line;
 use crate::peniko::Fill;
 use crate::tile::{Tile, Tiles};
 use crate::util::f32_to_u8;
 use alloc::vec::Vec;
 use fearless_simd::*;

 /// A strip.
 #[derive(Debug, Clone, Copy)]
 pub struct Strip {
     /// The x coordinate of the strip, in user coordinates.
     pub x: u16,
     /// The y coordinate of the strip, in user coordinates.
     pub y: u16,
     /// Packed alpha index and fill gap flag.
     ///
     /// Bit layout (u32):
     /// - bit 31: `fill_gap` (See `Strip::fill_gap()`).
     /// - bits 0..=30: `alpha_idx` (See `Strip::alpha_idx()`).
     packed_alpha_idx_fill_gap: u32,
 }

 impl Strip {
     /// The bit mask for `fill_gap` packed into `packed_alpha_idx_fill_gap`.
     const FILL_GAP_MASK: u32 = 1 << 31;

     /// Creates a new strip.
     pub fn new(x: u16, y: u16, alpha_idx: u32, fill_gap: bool) -> Self {
         // Ensure `alpha_idx` does not collide with the fill flag bit.
         assert!(
             alpha_idx & Self::FILL_GAP_MASK == 0,
             "`alpha_idx` too large"
         );
         let fill_gap = u32::from(fill_gap) << 31;
         Self {
             x,
             y,
             packed_alpha_idx_fill_gap: alpha_idx | fill_gap,
         }
     }

     /// Returns the y coordinate of the strip, in strip units.
     pub fn strip_y(&self) -> u16 {
         self.y / Tile::HEIGHT
     }

     /// Returns the alpha index.
     #[inline(always)]
     pub fn alpha_idx(&self) -> u32 {
         self.packed_alpha_idx_fill_gap & !Self::FILL_GAP_MASK
     }

     /// Sets the alpha index.
     ///
     /// Note that the largest value that can be stored in the alpha index is `u32::MAX << 1`, as the
     /// highest bit is reserved for `fill_gap`.
     #[inline(always)]
     pub fn set_alpha_idx(&mut self, alpha_idx: u32) {
         // Ensure `alpha_idx` does not collide with the fill flag bit.
         assert!(
             alpha_idx & Self::FILL_GAP_MASK == 0,
             "`alpha_idx` too large"
         );
         let fill_gap = self.packed_alpha_idx_fill_gap & Self::FILL_GAP_MASK;
         self.packed_alpha_idx_fill_gap = alpha_idx | fill_gap;
     }

     /// Returns whether the gap that lies between this strip and the previous in the same row should be filled.
     #[inline(always)]
     pub fn fill_gap(&self) -> bool {
         (self.packed_alpha_idx_fill_gap & Self::FILL_GAP_MASK) != 0
     }

     /// Sets whether the gap that lies between this strip and the previous in the same row should be filled.
     #[inline(always)]
     pub fn set_fill_gap(&mut self, fill: bool) {
         let fill = u32::from(fill) << 31;
         self.packed_alpha_idx_fill_gap =
             (self.packed_alpha_idx_fill_gap & !Self::FILL_GAP_MASK) | fill;
     }
 }

 /// Render the tiles stored in `tiles` into the strip and alpha buffer.
 pub fn render(
     level: Level,
     tiles: &Tiles,
     strip_buf: &mut Vec<Strip>,
     alpha_buf: &mut Vec<u8>,
     fill_rule: Fill,
     aliasing_threshold: Option<u8>,
     lines: &[Line],
 ) {
     dispatch!(level, simd => render_impl(simd, tiles, strip_buf, alpha_buf, fill_rule, aliasing_threshold, lines));
 }

 fn render_impl<S: Simd>(
     s: S,
     tiles: &Tiles,
     strip_buf: &mut Vec<Strip>,
     alpha_buf: &mut Vec<u8>,
     fill_rule: Fill,
     aliasing_threshold: Option<u8>,
     lines: &[Line],
 ) {
     if tiles.is_empty() {
         return;
     }

     let should_fill = |winding: i32| match fill_rule {
         Fill::NonZero => winding != 0,
         Fill::EvenOdd => winding % 2 != 0,
     };

     // The accumulated tile winding delta. A line that crosses the top edge of a tile
     // increments the delta if the line is directed upwards, and decrements it if goes
     // downwards. Horizontal lines leave it unchanged.
     let mut winding_delta: i32 = 0;

     // The previous tile visited.
     let mut prev_tile = *tiles.get(0);
     // The accumulated (fractional) winding of the tile-sized location we're currently at.
     // Note multiple tiles can be at the same location.
     // Note that we are also implicitly assuming here that the tile height exactly fits into a
     // SIMD vector (i.e. 128 bits).
     let mut location_winding = [f32x4::splat(s, 0.0); Tile::WIDTH as usize];
     // The accumulated (fractional) windings at this location's right edge. When we move to the
     // next location, this is splatted to that location's starting winding.
     let mut accumulated_winding = f32x4::splat(s, 0.0);

     /// A special tile to keep the logic below simple.
     const SENTINEL: Tile = Tile::new(u16::MAX, u16::MAX, 0, false);

     // The strip we're building.
     let mut strip = Strip::new(
         prev_tile.x * Tile::WIDTH,
         prev_tile.y * Tile::HEIGHT,
         alpha_buf.len() as u32,
         false,
     );

     for (tile_idx, tile) in tiles.iter().copied().chain([SENTINEL]).enumerate() {
         let line = lines[tile.line_idx() as usize];
         let tile_left_x = f32::from(tile.x) * f32::from(Tile::WIDTH);
         let tile_top_y = f32::from(tile.y) * f32::from(Tile::HEIGHT);
         let p0_x = line.p0.x - tile_left_x;
         let p0_y = line.p0.y - tile_top_y;
         let p1_x = line.p1.x - tile_left_x;
         let p1_y = line.p1.y - tile_top_y;

         // Push out the winding as an alpha mask when we move to the next location (i.e., a tile
         // without the same location).
         if !prev_tile.same_loc(&tile) {
             match fill_rule {
                 Fill::NonZero => {
                     let p1 = f32x4::splat(s, 0.5);
                     let p2 = f32x4::splat(s, 255.0);

                     #[expect(clippy::needless_range_loop, reason = "dimension clarity")]
                     for x in 0..Tile::WIDTH as usize {
                         let area = location_winding[x];
                         let coverage = area.abs();
                         let mulled = coverage.madd(p2, p1);
                         // Note that we are not storing the location winding here but the actual
                         // alpha value as f32, so we reuse the variable as a temporary storage.
                         // Also note that we need the `min` here because the winding can be > 1
                         // and thus the calculated alpha value need to be clamped to 255.
                         location_winding[x] = mulled.min(p2);
                     }
                 }
                 Fill::EvenOdd => {
                     let p1 = f32x4::splat(s, 0.5);
                     let p2 = f32x4::splat(s, -2.0);
                     let p3 = f32x4::splat(s, 255.0);

                     #[expect(clippy::needless_range_loop, reason = "dimension clarity")]
                     for x in 0..Tile::WIDTH as usize {
                         let area = location_winding[x];
                         let im1 = area.madd(p1, p1).floor();
                         let coverage = p2.madd(im1, area).abs();
                         let mulled = p3.madd(coverage, p1);
                         // TODO: It is possible that, unlike for `NonZero`, we don't need the `min`
                         // here.
                         location_winding[x] = mulled.min(p3);
                     }
                 }
             };

             let p1 = s.combine_f32x4(location_winding[0], location_winding[1]);
             let p2 = s.combine_f32x4(location_winding[2], location_winding[3]);

             let mut u8_vals = f32_to_u8(s.combine_f32x8(p1, p2));

             if let Some(aliasing_threshold) = aliasing_threshold {
                 u8_vals = s.select_u8x16(
                     u8_vals.simd_ge(u8x16::splat(s, aliasing_threshold)),
                     u8x16::splat(s, 255),
                     u8x16::splat(s, 0),
                 );
             }

             alpha_buf.extend_from_slice(&u8_vals.val);

             #[expect(clippy::needless_range_loop, reason = "dimension clarity")]
             for x in 0..Tile::WIDTH as usize {
                 location_winding[x] = accumulated_winding;
             }
         }

         // Push out the strip if we're moving to a next strip.
         if !prev_tile.same_loc(&tile) && !prev_tile.prev_loc(&tile) {
             debug_assert_eq!(
                 (prev_tile.x + 1) * Tile::WIDTH - strip.x,
                 ((alpha_buf.len() - strip.alpha_idx() as usize) / usize::from(Tile::HEIGHT)) as u16,
                 "The number of columns written to the alpha buffer should equal the number of columns spanned by this strip."
             );
             strip_buf.push(strip);

             let is_sentinel = tile_idx == tiles.len() as usize;
             if !prev_tile.same_row(&tile) {
                 // Emit a final strip in the row if there is non-zero winding for the sparse fill,
                 // or unconditionally if we've reached the sentinel tile to end the path (the
                 // `alpha_idx` field is used for width calculations).
                 if winding_delta != 0 || is_sentinel {
                     strip_buf.push(Strip::new(
                         u16::MAX,
                         prev_tile.y * Tile::HEIGHT,
                         alpha_buf.len() as u32,
                         should_fill(winding_delta),
                     ));
                 }

                 winding_delta = 0;
                 accumulated_winding = f32x4::splat(s, 0.0);

                 #[expect(clippy::needless_range_loop, reason = "dimension clarity")]
                 for x in 0..Tile::WIDTH as usize {
                     location_winding[x] = accumulated_winding;
                 }
             }

             if is_sentinel {
                 break;
             }

             strip = Strip::new(
                 tile.x * Tile::WIDTH,
                 tile.y * Tile::HEIGHT,
                 alpha_buf.len() as u32,
                 should_fill(winding_delta),
             );
             // Note: this fill is mathematically not necessary. It provides a way to reduce
             // accumulation of float rounding errors.
             accumulated_winding = f32x4::splat(s, winding_delta as f32);
         }
         prev_tile = tile;

         // TODO: horizontal geometry has no impact on winding. This branch will be removed when
         // horizontal geometry is culled at the tile-generation stage.
         if p0_y == p1_y {
             continue;
         }

         // Lines moving upwards (in a y-down coordinate system) add to winding; lines moving
         // downwards subtract from winding.
         let sign = (p0_y - p1_y).signum();

         // Calculate winding / pixel area coverage.
         //
         // Conceptually, horizontal rays are shot from left to right. Every time the ray crosses a
         // line that is directed upwards (decreasing `y`), the winding is incremented. Every time
         // the ray crosses a line moving downwards (increasing `y`), the winding is decremented.
         // The fractional area coverage of a pixel is the integral of the winding within it.
         //
         // Practically, to calculate this, each pixel is considered individually, and we determine
         // whether the line moves through this pixel. The line's y-delta within this pixel is
         // accumulated and added to the area coverage of pixels to the right. Within the pixel
         // itself, the area to the right of the line segment forms a trapezoid (or a triangle in
         // the degenerate case). The area of this trapezoid is added to the pixel's area coverage.
         //
         // For example, consider the following pixel square, with a line indicated by asterisks
         // starting inside the pixel and crossing its bottom edge. The area covered is the
         // trapezoid on the bottom-right enclosed by the line and the pixel square. The area is
         // positive if the line moves down, and negative otherwise.
         //
         //  __________________
         //  |                |
         //  |         *------|
         //  |        *       |
         //  |       *        |
         //  |      *         |
         //  |     *          |
         //  |    *           |
         //  |___*____________|
         //     *
         //    *

         let (line_top_y, line_top_x, line_bottom_y, line_bottom_x) = if p0_y < p1_y {
             (p0_y, p0_x, p1_y, p1_x)
         } else {
             (p1_y, p1_x, p0_y, p0_x)
         };

         let (line_left_x, line_left_y, line_right_x) = if p0_x < p1_x {
             (p0_x, p0_y, p1_x)
         } else {
             (p1_x, p1_y, p0_x)
         };

         let y_slope = (line_bottom_y - line_top_y) / (line_bottom_x - line_top_x);
         let x_slope = 1. / y_slope;

         winding_delta += sign as i32 * i32::from(tile.winding());

         // TODO: this should be removed when out-of-viewport tiles are culled at the
         // tile-generation stage. That requires calculating and forwarding winding to strip
         // generation.
         if tile.x == 0 && line_left_x < 0. {
             let (ymin, ymax) = if line.p0.x == line.p1.x {
                 (line_top_y, line_bottom_y)
             } else {
                 let line_viewport_left_y = (line_top_y - line_top_x * y_slope)
                     .max(line_top_y)
                     .min(line_bottom_y);

                 (
                     f32::min(line_left_y, line_viewport_left_y),
                     f32::max(line_left_y, line_viewport_left_y),
                 )
             };

             let ymin: f32x4<_> = ymin.simd_into(s);
             let ymax: f32x4<_> = ymax.simd_into(s);

             let px_top_y: f32x4<_> = [0.0, 1.0, 2.0, 3.0].simd_into(s);
             let px_bottom_y = 1.0 + px_top_y;
             let ymin = px_top_y.max(ymin);
             let ymax = px_bottom_y.min(ymax);
             let h = (ymax - ymin).max(0.0);
             accumulated_winding = h.madd(sign, accumulated_winding);
             for x_idx in 0..Tile::WIDTH {
                 location_winding[x_idx as usize] = h.madd(sign, location_winding[x_idx as usize]);
             }

             if line_right_x < 0. {
                 // Early exit, as no part of the line is inside the tile.
                 continue;
             }
         }

         let line_top_y = f32x4::splat(s, line_top_y);
         let line_bottom_y = f32x4::splat(s, line_bottom_y);

         let y_idx = f32x4::from_slice(s, &[0.0, 1.0, 2.0, 3.0]);
         let px_top_y = y_idx;
         let px_bottom_y = 1. + y_idx;

         let ymin = line_top_y.max(px_top_y);
         let ymax = line_bottom_y.min(px_bottom_y);

         let mut acc = f32x4::splat(s, 0.0);

         for x_idx in 0..Tile::WIDTH {
             let x_idx_s = f32x4::splat(s, x_idx as f32);
             let px_left_x = x_idx_s;
             let px_right_x = 1.0 + x_idx_s;

             // The y-coordinate of the intersections between the line and the pixel's left and
             // right edges respectively.
             //
             // There is some subtlety going on here: `y_slope` will usually be finite, but will
             // be `inf` for purely vertical lines (`p0_x == p1_x`).
             //
             // In the case of `inf`, the resulting slope calculation will be `-inf` or `inf`
             // depending on whether the pixel edge is left or right of the line, respectively
             // (from the viewport's coordinate system perspective). The `min` and `max`
             // y-clamping logic generalizes nicely, as a pixel edge to the left of the line is
             // clamped to `ymin`, and a pixel edge to the right is clamped to `ymax`.
             //
             // In the special case where a vertical line and pixel edge are at the exact same
             // x-position (collinear), the line belongs to the pixel on whose _left_ edge it is
             // situated. The resulting slope calculation for the edge the line is situated on
             // will be NaN, as `0 * inf` results in NaN. This is true for both the left and
             // right edge. In both cases, the call to `f32::max` will set this to `ymin`.
             let line_px_left_y = (px_left_x - line_top_x)
                 .madd(y_slope, line_top_y)
                 .max_precise(ymin)
                 .min_precise(ymax);
             let line_px_right_y = (px_right_x - line_top_x)
                 .madd(y_slope, line_top_y)
                 .max_precise(ymin)
                 .min_precise(ymax);

             // `x_slope` is always finite, as horizontal geometry is elided.
             let line_px_left_yx =
                 (line_px_left_y - line_top_y).madd(x_slope, f32x4::splat(s, line_top_x));
             let line_px_right_yx =
                 (line_px_right_y - line_top_y).madd(x_slope, f32x4::splat(s, line_top_x));
             let h = (line_px_right_y - line_px_left_y).abs();

             // The trapezoidal area enclosed between the line and the right edge of the pixel
             // square.
             let area = 0.5 * h * (2. * px_right_x - line_px_right_yx - line_px_left_yx);
             location_winding[x_idx as usize] += area.madd(sign, acc);
             acc = h.madd(sign, acc);
         }

         accumulated_winding += acc;
     }
 }
	// Copyright 2025 the Vello Authors
	// SPDX-License-Identifier: Apache-2.0 OR MIT

	//! Rendering strips.

	use crate::flatten::Line;
	use crate::peniko::Fill;
	use crate::tile::{Tile, Tiles};
	use crate::util::f32_to_u8;
	use alloc::vec::Vec;
	use fearless_simd::*;

	/// A strip.
	#[derive(Debug, Clone, Copy)]
	pub struct Strip {
	/// The x coordinate of the strip, in user coordinates.
	pub x: u16,
	/// The y coordinate of the strip, in user coordinates.
	pub y: u16,
	/// Packed alpha index and fill gap flag.
	///
	/// Bit layout (u32):
	/// - bit 31: `fill_gap` (See `Strip::fill_gap()`).
	/// - bits 0..=30: `alpha_idx` (See `Strip::alpha_idx()`).
	packed_alpha_idx_fill_gap: u32,
	}

	impl Strip {
	/// The bit mask for `fill_gap` packed into `packed_alpha_idx_fill_gap`.
	const FILL_GAP_MASK: u32 = 1 << 31;

	/// Creates a new strip.
	pub fn new(x: u16, y: u16, alpha_idx: u32, fill_gap: bool) -> Self {
	// Ensure `alpha_idx` does not collide with the fill flag bit.
	assert!(
	alpha_idx & Self::FILL_GAP_MASK == 0,
	"`alpha_idx` too large"
	);
	let fill_gap = u32::from(fill_gap) << 31;
	Self {
	x,
	y,
	packed_alpha_idx_fill_gap: alpha_idx \| fill_gap,
	}
	}

	/// Returns the y coordinate of the strip, in strip units.
	pub fn strip_y(&self) -> u16 {
	self.y / Tile::HEIGHT
	}

	/// Returns the alpha index.
	#[inline(always)]
	pub fn alpha_idx(&self) -> u32 {
	self.packed_alpha_idx_fill_gap & !Self::FILL_GAP_MASK
	}

	/// Sets the alpha index.
	///
	/// Note that the largest value that can be stored in the alpha index is `u32::MAX << 1`, as the
	/// highest bit is reserved for `fill_gap`.
	#[inline(always)]
	pub fn set_alpha_idx(&mut self, alpha_idx: u32) {
	// Ensure `alpha_idx` does not collide with the fill flag bit.
	assert!(
	alpha_idx & Self::FILL_GAP_MASK == 0,
	"`alpha_idx` too large"
	);
	let fill_gap = self.packed_alpha_idx_fill_gap & Self::FILL_GAP_MASK;
	self.packed_alpha_idx_fill_gap = alpha_idx \| fill_gap;
	}

	/// Returns whether the gap that lies between this strip and the previous in the same row should be filled.
	#[inline(always)]
	pub fn fill_gap(&self) -> bool {
	(self.packed_alpha_idx_fill_gap & Self::FILL_GAP_MASK) != 0
	}

	/// Sets whether the gap that lies between this strip and the previous in the same row should be filled.
	#[inline(always)]
	pub fn set_fill_gap(&mut self, fill: bool) {
	let fill = u32::from(fill) << 31;
	self.packed_alpha_idx_fill_gap =
	(self.packed_alpha_idx_fill_gap & !Self::FILL_GAP_MASK) \| fill;
	}
	}

	/// Render the tiles stored in `tiles` into the strip and alpha buffer.
	pub fn render(
	level: Level,
	tiles: &Tiles,
	strip_buf: &mut Vec<Strip>,
	alpha_buf: &mut Vec<u8>,
	fill_rule: Fill,
	aliasing_threshold: Option<u8>,
	lines: &[Line],
	) {
	dispatch!(level, simd => render_impl(simd, tiles, strip_buf, alpha_buf, fill_rule, aliasing_threshold, lines));
	}

	fn render_impl<S: Simd>(
	s: S,
	tiles: &Tiles,
	strip_buf: &mut Vec<Strip>,
	alpha_buf: &mut Vec<u8>,
	fill_rule: Fill,
	aliasing_threshold: Option<u8>,
	lines: &[Line],
	) {
	if tiles.is_empty() {
	return;
	}

	let should_fill = \|winding: i32\| match fill_rule {
	Fill::NonZero => winding != 0,
	Fill::EvenOdd => winding % 2 != 0,
	};

	// The accumulated tile winding delta. A line that crosses the top edge of a tile
	// increments the delta if the line is directed upwards, and decrements it if goes
	// downwards. Horizontal lines leave it unchanged.
	let mut winding_delta: i32 = 0;

	// The previous tile visited.
	let mut prev_tile = *tiles.get(0);
	// The accumulated (fractional) winding of the tile-sized location we're currently at.
	// Note multiple tiles can be at the same location.
	// Note that we are also implicitly assuming here that the tile height exactly fits into a
	// SIMD vector (i.e. 128 bits).
	let mut location_winding = [f32x4::splat(s, 0.0); Tile::WIDTH as usize];
	// The accumulated (fractional) windings at this location's right edge. When we move to the
	// next location, this is splatted to that location's starting winding.
	let mut accumulated_winding = f32x4::splat(s, 0.0);

	/// A special tile to keep the logic below simple.
	const SENTINEL: Tile = Tile::new(u16::MAX, u16::MAX, 0, false);

	// The strip we're building.
	let mut strip = Strip::new(
	prev_tile.x * Tile::WIDTH,
	prev_tile.y * Tile::HEIGHT,
	alpha_buf.len() as u32,
	false,
	);

	for (tile_idx, tile) in tiles.iter().copied().chain([SENTINEL]).enumerate() {
	let line = lines[tile.line_idx() as usize];
	let tile_left_x = f32::from(tile.x) * f32::from(Tile::WIDTH);
	let tile_top_y = f32::from(tile.y) * f32::from(Tile::HEIGHT);
	let p0_x = line.p0.x - tile_left_x;
	let p0_y = line.p0.y - tile_top_y;
	let p1_x = line.p1.x - tile_left_x;
	let p1_y = line.p1.y - tile_top_y;

	// Push out the winding as an alpha mask when we move to the next location (i.e., a tile
	// without the same location).
	if !prev_tile.same_loc(&tile) {
	match fill_rule {
	Fill::NonZero => {
	let p1 = f32x4::splat(s, 0.5);
	let p2 = f32x4::splat(s, 255.0);

	#[expect(clippy::needless_range_loop, reason = "dimension clarity")]
	for x in 0..Tile::WIDTH as usize {
	let area = location_winding[x];
	let coverage = area.abs();
	let mulled = coverage.madd(p2, p1);
	// Note that we are not storing the location winding here but the actual
	// alpha value as f32, so we reuse the variable as a temporary storage.
	// Also note that we need the `min` here because the winding can be > 1
	// and thus the calculated alpha value need to be clamped to 255.
	location_winding[x] = mulled.min(p2);
	}
	}
	Fill::EvenOdd => {
	let p1 = f32x4::splat(s, 0.5);
	let p2 = f32x4::splat(s, -2.0);
	let p3 = f32x4::splat(s, 255.0);

	#[expect(clippy::needless_range_loop, reason = "dimension clarity")]
	for x in 0..Tile::WIDTH as usize {
	let area = location_winding[x];
	let im1 = area.madd(p1, p1).floor();
	let coverage = p2.madd(im1, area).abs();
	let mulled = p3.madd(coverage, p1);
	// TODO: It is possible that, unlike for `NonZero`, we don't need the `min`
	// here.
	location_winding[x] = mulled.min(p3);
	}
	}
	};

	let p1 = s.combine_f32x4(location_winding[0], location_winding[1]);
	let p2 = s.combine_f32x4(location_winding[2], location_winding[3]);

	let mut u8_vals = f32_to_u8(s.combine_f32x8(p1, p2));

	if let Some(aliasing_threshold) = aliasing_threshold {
	u8_vals = s.select_u8x16(
	u8_vals.simd_ge(u8x16::splat(s, aliasing_threshold)),
	u8x16::splat(s, 255),
	u8x16::splat(s, 0),
	);
	}

	alpha_buf.extend_from_slice(&u8_vals.val);

	#[expect(clippy::needless_range_loop, reason = "dimension clarity")]
	for x in 0..Tile::WIDTH as usize {
	location_winding[x] = accumulated_winding;
	}
	}

	// Push out the strip if we're moving to a next strip.
	if !prev_tile.same_loc(&tile) && !prev_tile.prev_loc(&tile) {
	debug_assert_eq!(
	(prev_tile.x + 1) * Tile::WIDTH - strip.x,
	((alpha_buf.len() - strip.alpha_idx() as usize) / usize::from(Tile::HEIGHT)) as u16,
	"The number of columns written to the alpha buffer should equal the number of columns spanned by this strip."
	);
	strip_buf.push(strip);

	let is_sentinel = tile_idx == tiles.len() as usize;
	if !prev_tile.same_row(&tile) {
	// Emit a final strip in the row if there is non-zero winding for the sparse fill,
	// or unconditionally if we've reached the sentinel tile to end the path (the
	// `alpha_idx` field is used for width calculations).
	if winding_delta != 0 \|\| is_sentinel {
	strip_buf.push(Strip::new(
	u16::MAX,
	prev_tile.y * Tile::HEIGHT,
	alpha_buf.len() as u32,
	should_fill(winding_delta),
	));
	}

	winding_delta = 0;
	accumulated_winding = f32x4::splat(s, 0.0);

	#[expect(clippy::needless_range_loop, reason = "dimension clarity")]
	for x in 0..Tile::WIDTH as usize {
	location_winding[x] = accumulated_winding;
	}
	}

	if is_sentinel {
	break;
	}

	strip = Strip::new(
	tile.x * Tile::WIDTH,
	tile.y * Tile::HEIGHT,
	alpha_buf.len() as u32,
	should_fill(winding_delta),
	);
	// Note: this fill is mathematically not necessary. It provides a way to reduce
	// accumulation of float rounding errors.
	accumulated_winding = f32x4::splat(s, winding_delta as f32);
	}
	prev_tile = tile;

	// TODO: horizontal geometry has no impact on winding. This branch will be removed when
	// horizontal geometry is culled at the tile-generation stage.
	if p0_y == p1_y {
	continue;
	}

	// Lines moving upwards (in a y-down coordinate system) add to winding; lines moving
	// downwards subtract from winding.
	let sign = (p0_y - p1_y).signum();

	// Calculate winding / pixel area coverage.
	//
	// Conceptually, horizontal rays are shot from left to right. Every time the ray crosses a
	// line that is directed upwards (decreasing `y`), the winding is incremented. Every time
	// the ray crosses a line moving downwards (increasing `y`), the winding is decremented.
	// The fractional area coverage of a pixel is the integral of the winding within it.
	//
	// Practically, to calculate this, each pixel is considered individually, and we determine
	// whether the line moves through this pixel. The line's y-delta within this pixel is
	// accumulated and added to the area coverage of pixels to the right. Within the pixel
	// itself, the area to the right of the line segment forms a trapezoid (or a triangle in
	// the degenerate case). The area of this trapezoid is added to the pixel's area coverage.
	//
	// For example, consider the following pixel square, with a line indicated by asterisks
	// starting inside the pixel and crossing its bottom edge. The area covered is the
	// trapezoid on the bottom-right enclosed by the line and the pixel square. The area is
	// positive if the line moves down, and negative otherwise.
	//
	// __________________
	// \| \|
	// \| *------\|
	// \| * \|
	// \| * \|
	// \| * \|
	// \| * \|
	// \| * \|
	// \|___*____________\|
	// *
	// *

	let (line_top_y, line_top_x, line_bottom_y, line_bottom_x) = if p0_y < p1_y {
	(p0_y, p0_x, p1_y, p1_x)
	} else {
	(p1_y, p1_x, p0_y, p0_x)
	};

	let (line_left_x, line_left_y, line_right_x) = if p0_x < p1_x {
	(p0_x, p0_y, p1_x)
	} else {
	(p1_x, p1_y, p0_x)
	};

	let y_slope = (line_bottom_y - line_top_y) / (line_bottom_x - line_top_x);
	let x_slope = 1. / y_slope;

	winding_delta += sign as i32 * i32::from(tile.winding());

	// TODO: this should be removed when out-of-viewport tiles are culled at the
	// tile-generation stage. That requires calculating and forwarding winding to strip
	// generation.
	if tile.x == 0 && line_left_x < 0. {
	let (ymin, ymax) = if line.p0.x == line.p1.x {
	(line_top_y, line_bottom_y)
	} else {
	let line_viewport_left_y = (line_top_y - line_top_x * y_slope)
	.max(line_top_y)
	.min(line_bottom_y);

	(
	f32::min(line_left_y, line_viewport_left_y),
	f32::max(line_left_y, line_viewport_left_y),
	)
	};

	let ymin: f32x4<_> = ymin.simd_into(s);
	let ymax: f32x4<_> = ymax.simd_into(s);

	let px_top_y: f32x4<_> = [0.0, 1.0, 2.0, 3.0].simd_into(s);
	let px_bottom_y = 1.0 + px_top_y;
	let ymin = px_top_y.max(ymin);
	let ymax = px_bottom_y.min(ymax);
	let h = (ymax - ymin).max(0.0);
	accumulated_winding = h.madd(sign, accumulated_winding);
	for x_idx in 0..Tile::WIDTH {
	location_winding[x_idx as usize] = h.madd(sign, location_winding[x_idx as usize]);
	}

	if line_right_x < 0. {
	// Early exit, as no part of the line is inside the tile.
	continue;
	}
	}

	let line_top_y = f32x4::splat(s, line_top_y);
	let line_bottom_y = f32x4::splat(s, line_bottom_y);

	let y_idx = f32x4::from_slice(s, &[0.0, 1.0, 2.0, 3.0]);
	let px_top_y = y_idx;
	let px_bottom_y = 1. + y_idx;

	let ymin = line_top_y.max(px_top_y);
	let ymax = line_bottom_y.min(px_bottom_y);

	let mut acc = f32x4::splat(s, 0.0);

	for x_idx in 0..Tile::WIDTH {
	let x_idx_s = f32x4::splat(s, x_idx as f32);
	let px_left_x = x_idx_s;
	let px_right_x = 1.0 + x_idx_s;

	// The y-coordinate of the intersections between the line and the pixel's left and
	// right edges respectively.
	//
	// There is some subtlety going on here: `y_slope` will usually be finite, but will
	// be `inf` for purely vertical lines (`p0_x == p1_x`).
	//
	// In the case of `inf`, the resulting slope calculation will be `-inf` or `inf`
	// depending on whether the pixel edge is left or right of the line, respectively
	// (from the viewport's coordinate system perspective). The `min` and `max`
	// y-clamping logic generalizes nicely, as a pixel edge to the left of the line is
	// clamped to `ymin`, and a pixel edge to the right is clamped to `ymax`.
	//
	// In the special case where a vertical line and pixel edge are at the exact same
	// x-position (collinear), the line belongs to the pixel on whose _left_ edge it is
	// situated. The resulting slope calculation for the edge the line is situated on
	// will be NaN, as `0 * inf` results in NaN. This is true for both the left and
	// right edge. In both cases, the call to `f32::max` will set this to `ymin`.
	let line_px_left_y = (px_left_x - line_top_x)
	.madd(y_slope, line_top_y)
	.max_precise(ymin)
	.min_precise(ymax);
	let line_px_right_y = (px_right_x - line_top_x)
	.madd(y_slope, line_top_y)
	.max_precise(ymin)
	.min_precise(ymax);

	// `x_slope` is always finite, as horizontal geometry is elided.
	let line_px_left_yx =
	(line_px_left_y - line_top_y).madd(x_slope, f32x4::splat(s, line_top_x));
	let line_px_right_yx =
	(line_px_right_y - line_top_y).madd(x_slope, f32x4::splat(s, line_top_x));
	let h = (line_px_right_y - line_px_left_y).abs();

	// The trapezoidal area enclosed between the line and the right edge of the pixel
	// square.
	let area = 0.5 * h * (2. * px_right_x - line_px_right_yx - line_px_left_yx);
	location_winding[x_idx as usize] += area.madd(sign, acc);
	acc = h.madd(sign, acc);
	}

	accumulated_winding += acc;
	}
	}