| // Copyright 2023 The Vello authors |
| // SPDX-License-Identifier: Apache-2.0 OR MIT |
| |
| use crate::SegmentCount; |
| |
| use super::{ |
| BinHeader, Clip, ClipBbox, ClipBic, ClipElement, Cubic, DrawBbox, DrawMonoid, Layout, LineSoup, |
| Path, PathBbox, PathMonoid, PathSegment, Tile, |
| }; |
| use bytemuck::{Pod, Zeroable}; |
| use std::mem; |
| |
| const TILE_WIDTH: u32 = 16; |
| const TILE_HEIGHT: u32 = 16; |
| |
| // TODO: Obtain these from the vello_shaders crate |
| pub(crate) const PATH_REDUCE_WG: u32 = 256; |
| const PATH_BBOX_WG: u32 = 256; |
| const FLATTEN_WG: u32 = 256; |
| const CLIP_REDUCE_WG: u32 = 256; |
| |
| /// Counters for tracking dynamic allocation on the GPU. |
| /// |
| /// This must be kept in sync with the struct in shader/shared/bump.wgsl |
| #[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] |
| #[repr(C)] |
| pub struct BumpAllocators { |
| pub failed: u32, |
| // Final needed dynamic size of the buffers. If any of these are larger |
| // than the corresponding `_size` element reallocation needs to occur. |
| pub binning: u32, |
| pub ptcl: u32, |
| pub tile: u32, |
| pub seg_counts: u32, |
| pub segments: u32, |
| pub blend: u32, |
| pub lines: u32, |
| } |
| |
| /// Storage of indirect dispatch size values. |
| /// |
| /// The original plan was to reuse BumpAllocators, but the WebGPU compatible |
| /// usage list rules forbid that being used as indirect counts while also |
| /// bound as writable. |
| #[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] |
| #[repr(C)] |
| pub struct IndirectCount { |
| pub count_x: u32, |
| pub count_y: u32, |
| pub count_z: u32, |
| pub pad0: u32, |
| } |
| |
| /// Uniform render configuration data used by all GPU stages. |
| /// |
| /// This data structure must be kept in sync with the definition in |
| /// shaders/shared/config.wgsl. |
| #[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] |
| #[repr(C)] |
| pub struct ConfigUniform { |
| /// Width of the scene in tiles. |
| pub width_in_tiles: u32, |
| /// Height of the scene in tiles. |
| pub height_in_tiles: u32, |
| /// Width of the target in pixels. |
| pub target_width: u32, |
| /// Height of the target in pixels. |
| pub target_height: u32, |
| /// The base background color applied to the target before any blends. |
| pub base_color: u32, |
| /// Layout of packed scene data. |
| pub layout: Layout, |
| /// Size of binning buffer allocation (in u32s). |
| pub binning_size: u32, |
| /// Size of tile buffer allocation (in Tiles). |
| pub tiles_size: u32, |
| /// Size of segment buffer allocation (in PathSegments). |
| pub segments_size: u32, |
| /// Size of per-tile command list buffer allocation (in u32s). |
| pub ptcl_size: u32, |
| } |
| |
| /// CPU side setup and configuration. |
| #[derive(Default)] |
| pub struct RenderConfig { |
| /// GPU side configuration. |
| pub gpu: ConfigUniform, |
| /// Workgroup counts for all compute pipelines. |
| pub workgroup_counts: WorkgroupCounts, |
| /// Sizes of all buffer resources. |
| pub buffer_sizes: BufferSizes, |
| } |
| |
| impl RenderConfig { |
| pub fn new(layout: &Layout, width: u32, height: u32, base_color: &peniko::Color) -> Self { |
| let new_width = next_multiple_of(width, TILE_WIDTH); |
| let new_height = next_multiple_of(height, TILE_HEIGHT); |
| let width_in_tiles = new_width / TILE_WIDTH; |
| let height_in_tiles = new_height / TILE_HEIGHT; |
| let n_path_tags = layout.path_tags_size(); |
| let workgroup_counts = |
| WorkgroupCounts::new(layout, width_in_tiles, height_in_tiles, n_path_tags); |
| let buffer_sizes = BufferSizes::new(layout, &workgroup_counts, n_path_tags); |
| Self { |
| gpu: ConfigUniform { |
| width_in_tiles, |
| height_in_tiles, |
| target_width: width, |
| target_height: height, |
| base_color: base_color.to_premul_u32(), |
| binning_size: buffer_sizes.bin_data.len() - layout.bin_data_start, |
| tiles_size: buffer_sizes.tiles.len(), |
| segments_size: buffer_sizes.segments.len(), |
| ptcl_size: buffer_sizes.ptcl.len(), |
| layout: *layout, |
| }, |
| workgroup_counts, |
| buffer_sizes, |
| } |
| } |
| } |
| |
| /// Type alias for a workgroup size. |
| pub type WorkgroupSize = (u32, u32, u32); |
| |
| /// Computed sizes for all dispatches. |
| #[derive(Copy, Clone, Debug, Default)] |
| pub struct WorkgroupCounts { |
| pub use_large_path_scan: bool, |
| pub path_reduce: WorkgroupSize, |
| pub path_reduce2: WorkgroupSize, |
| pub path_scan1: WorkgroupSize, |
| pub path_scan: WorkgroupSize, |
| pub bbox_clear: WorkgroupSize, |
| pub flatten: WorkgroupSize, |
| pub draw_reduce: WorkgroupSize, |
| pub draw_leaf: WorkgroupSize, |
| pub clip_reduce: WorkgroupSize, |
| pub clip_leaf: WorkgroupSize, |
| pub binning: WorkgroupSize, |
| pub tile_alloc: WorkgroupSize, |
| pub path_coarse: WorkgroupSize, |
| pub backdrop: WorkgroupSize, |
| pub coarse: WorkgroupSize, |
| pub fine: WorkgroupSize, |
| } |
| |
| impl WorkgroupCounts { |
| pub fn new( |
| layout: &Layout, |
| width_in_tiles: u32, |
| height_in_tiles: u32, |
| n_path_tags: u32, |
| ) -> Self { |
| let n_paths = layout.n_paths; |
| let n_draw_objects = layout.n_draw_objects; |
| let n_clips = layout.n_clips; |
| let path_tag_padded = align_up(n_path_tags, 4 * PATH_REDUCE_WG); |
| let path_tag_wgs = path_tag_padded / (4 * PATH_REDUCE_WG); |
| let use_large_path_scan = path_tag_wgs > PATH_REDUCE_WG; |
| let reduced_size = if use_large_path_scan { |
| align_up(path_tag_wgs, PATH_REDUCE_WG) |
| } else { |
| path_tag_wgs |
| }; |
| let draw_object_wgs = (n_draw_objects + PATH_BBOX_WG - 1) / PATH_BBOX_WG; |
| let flatten_wgs = (n_path_tags + FLATTEN_WG - 1) / FLATTEN_WG; |
| let clip_reduce_wgs = n_clips.saturating_sub(1) / CLIP_REDUCE_WG; |
| let clip_wgs = (n_clips + CLIP_REDUCE_WG - 1) / CLIP_REDUCE_WG; |
| let path_wgs = (n_paths + PATH_BBOX_WG - 1) / PATH_BBOX_WG; |
| let width_in_bins = (width_in_tiles + 15) / 16; |
| let height_in_bins = (height_in_tiles + 15) / 16; |
| Self { |
| use_large_path_scan, |
| path_reduce: (path_tag_wgs, 1, 1), |
| path_reduce2: (PATH_REDUCE_WG, 1, 1), |
| path_scan1: (reduced_size / PATH_REDUCE_WG, 1, 1), |
| path_scan: (path_tag_wgs, 1, 1), |
| bbox_clear: (draw_object_wgs, 1, 1), |
| flatten: (flatten_wgs, 1, 1), |
| draw_reduce: (draw_object_wgs, 1, 1), |
| draw_leaf: (draw_object_wgs, 1, 1), |
| clip_reduce: (clip_reduce_wgs, 1, 1), |
| clip_leaf: (clip_wgs, 1, 1), |
| binning: (draw_object_wgs, 1, 1), |
| tile_alloc: (path_wgs, 1, 1), |
| path_coarse: (flatten_wgs, 1, 1), |
| backdrop: (path_wgs, 1, 1), |
| coarse: (width_in_bins, height_in_bins, 1), |
| fine: (width_in_tiles, height_in_tiles, 1), |
| } |
| } |
| } |
| |
| /// Typed buffer size primitive. |
| #[derive(Copy, Clone, Eq, Default, Debug)] |
| pub struct BufferSize<T: Sized> { |
| len: u32, |
| _phantom: std::marker::PhantomData<T>, |
| } |
| |
| impl<T: Sized> BufferSize<T> { |
| /// Creates a new buffer size from number of elements. |
| pub const fn new(len: u32) -> Self { |
| Self { |
| // Each buffer binding must be large enough to hold at least one element to avoid |
| // triggering validation errors. |
| // |
| // Note: not using `Ord::max` here because it doesn't support const eval yet (except |
| // in nightly) |
| len: if len > 0 { len } else { 1 }, |
| _phantom: std::marker::PhantomData, |
| } |
| } |
| |
| /// Creates a new buffer size from size in bytes. |
| pub const fn from_size_in_bytes(size: u32) -> Self { |
| Self::new(size / mem::size_of::<T>() as u32) |
| } |
| |
| /// Returns the number of elements. |
| #[allow(clippy::len_without_is_empty)] |
| pub const fn len(self) -> u32 { |
| self.len |
| } |
| |
| /// Returns the size in bytes. |
| pub const fn size_in_bytes(self) -> u32 { |
| mem::size_of::<T>() as u32 * self.len |
| } |
| |
| /// Returns the size in bytes aligned up to the given value. |
| pub const fn aligned_in_bytes(self, alignment: u32) -> u32 { |
| align_up(self.size_in_bytes(), alignment) |
| } |
| } |
| |
| impl<T: Sized> PartialEq for BufferSize<T> { |
| fn eq(&self, other: &Self) -> bool { |
| self.len == other.len |
| } |
| } |
| |
| impl<T: Sized> PartialOrd for BufferSize<T> { |
| fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { |
| self.len.partial_cmp(&other.len) |
| } |
| } |
| |
| /// Computed sizes for all buffers. |
| #[derive(Copy, Clone, Debug, Default)] |
| pub struct BufferSizes { |
| // Known size buffers |
| pub path_reduced: BufferSize<PathMonoid>, |
| pub path_reduced2: BufferSize<PathMonoid>, |
| pub path_reduced_scan: BufferSize<PathMonoid>, |
| pub path_monoids: BufferSize<PathMonoid>, |
| pub path_bboxes: BufferSize<PathBbox>, |
| pub cubics: BufferSize<Cubic>, |
| pub draw_reduced: BufferSize<DrawMonoid>, |
| pub draw_monoids: BufferSize<DrawMonoid>, |
| pub info: BufferSize<u32>, |
| pub clip_inps: BufferSize<Clip>, |
| pub clip_els: BufferSize<ClipElement>, |
| pub clip_bics: BufferSize<ClipBic>, |
| pub clip_bboxes: BufferSize<ClipBbox>, |
| pub draw_bboxes: BufferSize<DrawBbox>, |
| pub bump_alloc: BufferSize<BumpAllocators>, |
| pub indirect_count: BufferSize<IndirectCount>, |
| pub bin_headers: BufferSize<BinHeader>, |
| pub paths: BufferSize<Path>, |
| // Bump allocated buffers |
| pub lines: BufferSize<LineSoup>, |
| pub bin_data: BufferSize<u32>, |
| pub tiles: BufferSize<Tile>, |
| pub seg_counts: BufferSize<SegmentCount>, |
| pub segments: BufferSize<PathSegment>, |
| pub ptcl: BufferSize<u32>, |
| } |
| |
| impl BufferSizes { |
| pub fn new(layout: &Layout, workgroups: &WorkgroupCounts, n_path_tags: u32) -> Self { |
| let n_paths = layout.n_paths; |
| let n_draw_objects = layout.n_draw_objects; |
| let n_clips = layout.n_clips; |
| let path_tag_wgs = workgroups.path_reduce.0; |
| let reduced_size = if workgroups.use_large_path_scan { |
| align_up(path_tag_wgs, PATH_REDUCE_WG) |
| } else { |
| path_tag_wgs |
| }; |
| let path_reduced = BufferSize::new(reduced_size); |
| let path_reduced2 = BufferSize::new(PATH_REDUCE_WG); |
| let path_reduced_scan = BufferSize::new(path_tag_wgs); |
| let path_monoids = BufferSize::new(path_tag_wgs * PATH_REDUCE_WG); |
| let path_bboxes = BufferSize::new(n_paths); |
| let cubics = BufferSize::new(n_path_tags); |
| let draw_object_wgs = workgroups.draw_reduce.0; |
| let draw_reduced = BufferSize::new(draw_object_wgs); |
| let draw_monoids = BufferSize::new(n_draw_objects); |
| let info = BufferSize::new(layout.bin_data_start); |
| let clip_inps = BufferSize::new(n_clips); |
| let clip_els = BufferSize::new(n_clips); |
| let clip_bics = BufferSize::new(n_clips / CLIP_REDUCE_WG); |
| let clip_bboxes = BufferSize::new(n_clips); |
| let draw_bboxes = BufferSize::new(n_paths); |
| let bump_alloc = BufferSize::new(1); |
| let indirect_count = BufferSize::new(1); |
| let bin_headers = BufferSize::new(draw_object_wgs * 256); |
| let n_paths_aligned = align_up(n_paths, 256); |
| let paths = BufferSize::new(n_paths_aligned); |
| |
| // The following buffer sizes have been hand picked to accommodate the vello test scenes as |
| // well as paris-30k. These should instead get derived from the scene layout using |
| // reasonable heuristics. |
| let bin_data = BufferSize::new(1 << 18); |
| let tiles = BufferSize::new(1 << 21); |
| let lines = BufferSize::new(1 << 21); |
| let seg_counts = BufferSize::new(1 << 21); |
| let segments = BufferSize::new(1 << 21); |
| let ptcl = BufferSize::new(1 << 23); |
| Self { |
| path_reduced, |
| path_reduced2, |
| path_reduced_scan, |
| path_monoids, |
| path_bboxes, |
| cubics, |
| draw_reduced, |
| draw_monoids, |
| info, |
| clip_inps, |
| clip_els, |
| clip_bics, |
| clip_bboxes, |
| draw_bboxes, |
| bump_alloc, |
| indirect_count, |
| lines, |
| bin_headers, |
| paths, |
| bin_data, |
| tiles, |
| seg_counts, |
| segments, |
| ptcl, |
| } |
| } |
| } |
| |
| const fn align_up(len: u32, alignment: u32) -> u32 { |
| len + (len.wrapping_neg() & (alignment - 1)) |
| } |
| |
| const fn next_multiple_of(val: u32, rhs: u32) -> u32 { |
| match val % rhs { |
| 0 => val, |
| r => val + (rhs - r), |
| } |
| } |