Oops, forgot scanline.rs
diff --git a/src/cpu_shader/scanline.rs b/src/cpu_shader/scanline.rs new file mode 100644 index 0000000..2fd76b0 --- /dev/null +++ b/src/cpu_shader/scanline.rs
@@ -0,0 +1,238 @@ +// Copyright 2023 The Vello authors +// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense + +use std::cmp::Ordering; + +use crate::{ + cpu_dispatch::CpuBinding, + cpu_shader::util::{ONE_MINUS_ULP, ROBUST_EPSILON}, +}; + +use vello_encoding::{BumpAllocators, LineSoup}; + +use super::util::{span, Vec2}; + +const TILE_WIDTH: u32 = 1; +const TILE_HEIGHT: u32 = 4; +const TILE_SCALE_X: f32 = 1.0 / (TILE_WIDTH as f32); +const TILE_SCALE_Y: f32 = 1.0 / (TILE_HEIGHT as f32); + +// This represents a small tile, analogous to a boundary fragment in Li +// et al. Ultimately we should have a packed representation as in the +// Scanline meets Vello doc. +#[derive(Clone, Debug)] +struct MiniTile { + path_id: u32, + // These coordinates are in tile units + x: u32, + y: u32, + delta_wind: i32, +} + +fn cmp_minitile(a: &MiniTile, b: &MiniTile) -> Ordering { + (a.path_id, a.y, a.x).cmp(&(b.path_id, b.y, b.x)) +} + +fn merge_tiles(tiles: &[MiniTile]) -> Vec<MiniTile> { + let mut result: Vec<MiniTile> = vec![]; + for tile in tiles { + if let Some(last) = result.last_mut() { + if last.path_id == tile.path_id && last.y == tile.y { + if last.x == tile.x { + last.delta_wind += tile.delta_wind; + } else { + let mut new_tile = tile.clone(); + new_tile.delta_wind += last.delta_wind; + result.push(new_tile); + } + } else { + result.push(tile.clone()); + } + } else { + result.push(tile.clone()); + } + } + result +} + +fn make_strips(merged: &[MiniTile]) { + let mut i = 0; + let mut n_strips = 0; + let mut n_li_prims = merged.len(); + while i < merged.len() { + let first = &merged[i]; + let mut j = i + 1; + let mut expected_x = first.x + 1; + let mut strip_end = 0; + let mut last_wind = first.delta_wind; + while j < merged.len() { + let this = &merged[j]; + if first.path_id != this.path_id || first.y != this.y || this.x != expected_x { + strip_end = if last_wind != 0 { this.x } else { expected_x }; + break; + } + j += 1; + expected_x += 1; + last_wind = this.delta_wind; + } + if strip_end != expected_x { + n_li_prims += 1; + } + println!( + "strip path_id = {}, y = {}, {}..{} {}", + first.path_id, first.y, first.x, expected_x, strip_end + ); + n_strips += 1; + i = j; + } + println!("n_strips = {n_strips}, n_li_prims = {n_li_prims}"); +} + +fn scanline_main(bump: &BumpAllocators, lines: &[LineSoup]) { + println!("running scanline, n_lines = {}", bump.lines); + let mut tiles = vec![]; + for line in &lines[..bump.lines as usize] { + // coarse rasterization logic + let p0 = Vec2::from_array(line.p0); + let p1 = Vec2::from_array(line.p1); + let is_down = p1.y >= p0.y; + let (xy0, xy1) = if is_down { (p0, p1) } else { (p1, p0) }; + let s0 = Vec2::new(xy0.x * TILE_SCALE_X, xy0.y * TILE_SCALE_Y); + let s1 = Vec2::new(xy1.x * TILE_SCALE_X, xy1.y * TILE_SCALE_Y); + let count_x = span(s0.x, s1.x) - 1; + let count = count_x + span(s0.y, s1.y); + println!("line {line:?}, count {count}"); + + let dx = (s1.x - s0.x).abs(); + let dy = s1.y - s0.y; + if dx + dy == 0.0 { + continue; + } + if dy == 0.0 && s0.y.floor() == s0.y { + continue; + } + let idxdy = 1.0 / (dx + dy); + let mut a = dx * idxdy; + let is_positive_slope = s1.x >= s0.x; + let sign = if is_positive_slope { 1.0 } else { -1.0 }; + let xt0 = (s0.x * sign).floor(); + let c = s0.x * sign - xt0; + let y0 = s0.y.floor(); + let ytop = if s0.y == s1.y { s0.y.ceil() } else { y0 + 1.0 }; + let b = ((dy * c + dx * (ytop - s0.y)) * idxdy).min(ONE_MINUS_ULP); + let robust_err = (a * (count as f32 - 1.0) + b).floor() - count_x as f32; + if robust_err != 0.0 { + a -= ROBUST_EPSILON.copysign(robust_err); + } + let x0 = xt0 * sign + if is_positive_slope { 0.0 } else { -1.0 }; + + // This is a hack and should be more principled + let bbox = [0, 0, 2048, 2110]; + let xmin = s0.x.min(s1.x); + let stride = bbox[2] - bbox[0]; + if s0.y >= bbox[3] as f32 || s1.y < bbox[1] as f32 || xmin >= bbox[2] as f32 || stride == 0 + { + continue; + } + // Clip line to bounding box. Clipping is done in "i" space. + let mut imin = 0; + if s0.y < bbox[1] as f32 { + let mut iminf = ((bbox[1] as f32 - y0 + b - a) / (1.0 - a)).round() - 1.0; + if y0 + iminf - (a * iminf + b).floor() < bbox[1] as f32 { + iminf += 1.0; + } + imin = iminf as u32; + } + let mut imax = count; + if s1.y > bbox[3] as f32 { + let mut imaxf = ((bbox[3] as f32 - y0 + b - a) / (1.0 - a)).round() - 1.0; + if y0 + imaxf - (a * imaxf + b).floor() < bbox[3] as f32 { + imaxf += 1.0; + } + imax = imaxf as u32; + } + let delta = if is_down { -1 } else { 1 }; + let mut ymin = 0; + let mut ymax = 0; + if s0.x.max(s1.x) < bbox[0] as f32 { + ymin = s0.y.ceil() as i32; + ymax = s1.y.ceil() as i32; + imax = imin; + } else { + let fudge = if is_positive_slope { 0.0 } else { 1.0 }; + if xmin < bbox[0] as f32 { + let mut f = ((sign * (bbox[0] as f32 - x0) - b + fudge) / a).round(); + if (x0 + sign * (a * f + b).floor() < bbox[0] as f32) == is_positive_slope { + f += 1.0; + } + let ynext = (y0 + f - (a * f + b).floor() + 1.0) as i32; + if is_positive_slope { + if f as u32 > imin { + ymin = (y0 + if y0 == s0.y { 0.0 } else { 1.0 }) as i32; + ymax = ynext; + imin = f as u32; + } + } else if (f as u32) < imax { + ymin = ynext; + ymax = s1.y.ceil() as i32; + imax = f as u32; + } + } + if s0.x.max(s1.x) > bbox[2] as f32 { + let mut f = ((sign * (bbox[2] as f32 - x0) - b + fudge) / a).round(); + if (x0 + sign * (a * f + b).floor() < bbox[2] as f32) == is_positive_slope { + f += 1.0; + } + if is_positive_slope { + imax = imax.min(f as u32); + } else { + imin = imin.max(f as u32); + } + } + } + imax = imin.max(imax); + ymin = ymin.max(bbox[1]); + ymax = ymax.min(bbox[3]); + for _y in ymin..ymax { + // TODO: handle delta bumps left of viewport + } + let mut last_z = (a * (imin as f32 - 1.0) + b).floor(); + for i in imin..imax { + let zf = a * i as f32 + b; + let z = zf.floor(); + let y = (y0 + i as f32 - z) as i32; + let x = (x0 + sign * z) as i32; + let top_edge = if i == 0 { y0 == s0.y } else { last_z == z }; + let delta_wind = if top_edge && x + 1 < bbox[2] { + delta + } else { + 0 + }; + let tile = MiniTile { + path_id: line.path_ix, + x: x as u32, + y: y as u32, + delta_wind, + }; + tiles.push(tile); + // let seg_within_slice = tile[(base + x) as usize].segment_count_or_ix; + // tile[(base + x) as usize].segment_count_or_ix += 1; + // let counts = (seg_within_slice << 16) | i; + // let seg_count = SegmentCount { line_ix, counts }; + // seg_counts[(seg_base + i - imin) as usize] = seg_count; + last_z = z; + } + } + tiles.sort_by(cmp_minitile); + //println!("{tiles:#?}"); + let merged = merge_tiles(&tiles); + println!("{merged:#?}"); + make_strips(&merged); + println!("n tiles {}, n merged {}", tiles.len(), merged.len()); +} + +pub fn scanline(_n_wg: u32, resources: &[CpuBinding]) { + let bump = resources[0].as_typed(); + let lines = resources[1].as_slice(); + scanline_main(&bump, &lines); +}