capture computation of workgroup and buffer sizes
diff --git a/Cargo.toml b/Cargo.toml index cdbde01..8ab85a5 100644 --- a/Cargo.toml +++ b/Cargo.toml
@@ -1,7 +1,7 @@ [workspace] resolver = "2" -members = ["examples/with_winit", "examples/with_bevy", "examples/run_wasm", "examples/usvg_viewer", "vello_shaders"] +members = ["examples/with_winit", "examples/with_bevy", "examples/run_wasm", "examples/usvg_viewer", "vello_shaders", "vello_native"] [workspace.package] edition = "2021"
diff --git a/src/encoding.rs b/src/encoding.rs index f24c3bf..9782ddd 100644 --- a/src/encoding.rs +++ b/src/encoding.rs
@@ -16,6 +16,9 @@ //! Raw scene encoding. +mod binning; +mod clip; +mod config; mod draw; mod encoding; mod math; @@ -25,12 +28,19 @@ pub mod resource; +pub use binning::BinHeader; +pub use clip::{ClipBbox, ClipBic, ClipElement, Clip}; +pub use config::{ + BufferSize, BufferSizes, BumpAllocators, CpuConfig, GpuConfig, WorkgroupSize, WorkgroupSizes, +}; pub use draw::{ - DrawBeginClip, DrawColor, DrawImage, DrawLinearGradient, DrawMonoid, DrawRadialGradient, - DrawTag, + DrawBbox, DrawBeginClip, DrawColor, DrawImage, DrawLinearGradient, DrawMonoid, + DrawRadialGradient, DrawTag, }; pub use encoding::Encoding; pub use math::Transform; pub use monoid::Monoid; -pub use packed::{Config, Layout, PackedEncoding}; -pub use path::{PathBbox, PathEncoder, PathMonoid, PathSegment, PathSegmentType, PathTag}; +pub use packed::{Layout, PackedEncoding}; +pub use path::{ + Cubic, Path, PathBbox, PathEncoder, PathMonoid, PathSegment, PathSegmentType, PathTag, Tile, +};
diff --git a/src/encoding/binning.rs b/src/encoding/binning.rs new file mode 100644 index 0000000..f9ec7d0 --- /dev/null +++ b/src/encoding/binning.rs
@@ -0,0 +1,12 @@ +// Copyright 2022 Google LLC +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use bytemuck::{Pod, Zeroable}; + +/// Binning header. +#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)] +#[repr(C)] +pub struct BinHeader { + pub element_count: u32, + pub chunk_offset: u32, +}
diff --git a/src/encoding/clip.rs b/src/encoding/clip.rs new file mode 100644 index 0000000..b621630 --- /dev/null +++ b/src/encoding/clip.rs
@@ -0,0 +1,42 @@ +// Copyright 2022 Google LLC +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use bytemuck::{Pod, Zeroable}; + +/// Clip stack element. +#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)] +#[repr(C)] +pub struct ClipBic { + pub a: u32, + pub b: u32, +} + +/// Clip element. +#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)] +#[repr(C)] +pub struct ClipElement { + pub parent_ix: u32, + pub bbox: [f32; 4], +} + +/// Clip resolution. +/// +/// This is an intermediate element used to match clips to associated paths +/// and is also used to connect begin and end clip pairs. +#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)] +#[repr(C)] +pub struct Clip { + // Index of the draw object. + pub ix: u32, + /// This is a packed encoding of an enum with the sign bit as the tag. If positive, + /// this entry is a BeginClip and contains the associated path index. If negative, + /// it is an EndClip and contains the bitwise-not of the EndClip draw object index. + pub path_ix: i32, +} + +/// Clip bounding box. +#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)] +#[repr(C)] +pub struct ClipBbox { + pub bbox: [f32; 4], +}
diff --git a/src/encoding/config.rs b/src/encoding/config.rs new file mode 100644 index 0000000..dfe894b --- /dev/null +++ b/src/encoding/config.rs
@@ -0,0 +1,301 @@ +use super::{ + BinHeader, ClipBbox, ClipBic, ClipElement, Clip, Cubic, DrawBbox, DrawMonoid, Layout, + PackedEncoding, Path, PathBbox, PathMonoid, PathSegment, Tile, +}; +use bytemuck::{Pod, Zeroable}; +use std::mem; + +/// Counters for tracking dynamic allocation on the GPU. +/// +/// This must be kept in sync with the struct in shader/shared/bump.wgsl +#[repr(C)] +#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] +pub struct BumpAllocators { + pub failed: u32, + // Final needed dynamic size of the buffers. If any of these are larger + // than the corresponding `_size` element reallocation needs to occur. + pub binning: u32, + pub ptcl: u32, + pub tile: u32, + pub segments: u32, + pub blend: u32, +} + +/// GPU side configuration. +#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] +#[repr(C)] +pub struct GpuConfig { + /// Width of the scene in tiles. + pub width_in_tiles: u32, + /// Height of the scene in tiles. + pub height_in_tiles: u32, + /// Width of the target in pixels. + pub target_width: u32, + /// Height of the target in pixels. + pub target_height: u32, + /// Layout of packed scene data. + pub layout: Layout, + /// Size of binning buffer allocation (in u32s). + pub binning_size: u32, + /// Size of tile buffer allocation (in Tiles). + pub tiles_size: u32, + /// Size of segment buffer allocation (in PathSegments). + pub segments_size: u32, + /// Size of per-tile command list buffer allocation (in u32s). + pub ptcl_size: u32, +} + +/// CPU side setup and configuration. +#[derive(Default)] +pub struct CpuConfig { + /// GPU side configuration. + pub gpu: GpuConfig, + /// Workgroup sizes for all compute pipelines. + pub workgroup_sizes: WorkgroupSizes, + /// Sizes of all buffer resources. + pub buffer_sizes: BufferSizes, +} + +impl CpuConfig { + pub fn new(encoding: &PackedEncoding, width: u32, height: u32) -> Self { + let new_width = next_multiple_of(width, 16); + let new_height = next_multiple_of(height, 16); + let mut config = GpuConfig { + width_in_tiles: new_width / 16, + height_in_tiles: new_height / 16, + target_width: width, + target_height: height, + binning_size: 0, + tiles_size: 0, + segments_size: 0, + ptcl_size: 0, + layout: encoding.layout, + }; + let n_path_tags = encoding.path_tags().len() as u32; + let workgroup_sizes = WorkgroupSizes::new(&config, n_path_tags); + let buffer_sizes = BufferSizes::new(&config, &workgroup_sizes, n_path_tags); + config.binning_size = buffer_sizes.bin_data.len(); + config.tiles_size = buffer_sizes.tiles.len(); + config.segments_size = buffer_sizes.tiles.len(); + config.ptcl_size = buffer_sizes.tiles.len(); + Self { + gpu: config, + workgroup_sizes, + buffer_sizes, + } + } +} + +const PATH_REDUCE_WG: u32 = 256; +const PATH_BBOX_WG: u32 = 256; +const PATH_COARSE_WG: u32 = 256; +const CLIP_REDUCE_WG: u32 = 256; + +/// Type alias for a workgroup size. +pub type WorkgroupSize = (u32, u32, u32); + +/// Computed sizes for all dispatches. +#[derive(Copy, Clone, Debug, Default)] +pub struct WorkgroupSizes { + pub use_large_path_scan: bool, + pub path_reduce: WorkgroupSize, + pub path_reduce2: WorkgroupSize, + pub path_scan1: WorkgroupSize, + pub path_scan: WorkgroupSize, + pub bbox_clear: WorkgroupSize, + pub path_seg: WorkgroupSize, + pub draw_reduce: WorkgroupSize, + pub draw_leaf: WorkgroupSize, + pub clip_reduce: WorkgroupSize, + pub clip_leaf: WorkgroupSize, + pub binning: WorkgroupSize, + pub tile_alloc: WorkgroupSize, + pub path_coarse: WorkgroupSize, + pub backdrop: WorkgroupSize, + pub coarse: WorkgroupSize, + pub fine: WorkgroupSize, +} + +impl WorkgroupSizes { + pub fn new(config: &GpuConfig, n_path_tags: u32) -> Self { + let n_paths = config.layout.n_paths; + let n_draw_objects = config.layout.n_draw_objects; + let n_clips = config.layout.n_clips; + let path_tag_padded = align_up(n_path_tags, 4 * PATH_REDUCE_WG); + let path_tag_wgs = path_tag_padded / (4 * PATH_REDUCE_WG); + let use_large_path_scan = path_tag_wgs > PATH_REDUCE_WG; + let path_reduce_wgs = if use_large_path_scan { + align_up(path_tag_wgs, PATH_REDUCE_WG) + } else { + path_tag_wgs + }; + let draw_object_wgs = (n_draw_objects + PATH_BBOX_WG - 1) / PATH_BBOX_WG; + let path_coarse_wgs = (n_path_tags + PATH_COARSE_WG - 1) / PATH_COARSE_WG; + let clip_reduce_wgs = n_clips.saturating_sub(1) / CLIP_REDUCE_WG; + let clip_wgs = (n_clips + CLIP_REDUCE_WG - 1) / CLIP_REDUCE_WG; + let path_wgs = (n_paths + PATH_BBOX_WG - 1) / PATH_BBOX_WG; + let width_in_bins = (config.width_in_tiles + 15) / 16; + let height_in_bins = (config.height_in_tiles + 15) / 16; + Self { + use_large_path_scan, + path_reduce: (path_reduce_wgs, 1, 1), + path_reduce2: (PATH_REDUCE_WG, 1, 1), + path_scan1: (path_reduce_wgs / PATH_REDUCE_WG, 1, 1), + path_scan: (path_tag_wgs, 1, 1), + bbox_clear: (draw_object_wgs, 1, 1), + path_seg: (path_coarse_wgs, 1, 1), + draw_reduce: (draw_object_wgs, 1, 1), + draw_leaf: (draw_object_wgs, 1, 1), + clip_reduce: (clip_reduce_wgs, 1, 1), + clip_leaf: (clip_wgs, 1, 1), + binning: (draw_object_wgs, 1, 1), + tile_alloc: (path_wgs, 1, 1), + path_coarse: (path_coarse_wgs, 1, 1), + backdrop: (path_wgs, 1, 1), + coarse: (width_in_bins, height_in_bins, 1), + fine: (config.width_in_tiles, config.height_in_tiles, 1), + } + } +} + +/// Typed buffer size primitive. +#[derive(Copy, Clone, Eq, Ord, Default, Debug)] +pub struct BufferSize<T: Sized> { + len: u32, + _phantom: std::marker::PhantomData<T>, +} + +impl<T: Sized> BufferSize<T> { + /// Creates a new buffer size from number of elements. + pub const fn new(len: u32) -> Self { + Self { + len, + _phantom: std::marker::PhantomData, + } + } + + /// Creates a new buffer size from size in bytes. + pub const fn from_size_in_bytes(size: u32) -> Self { + Self::new(size / mem::size_of::<T>() as u32) + } + + /// Returns the number of elements. + pub const fn len(self) -> u32 { + self.len + } + + /// Returns the size in bytes. + pub const fn size_in_bytes(self) -> u32 { + mem::size_of::<T>() as u32 * self.len + } + + /// Returns the size in bytes aligned up to the given value. + pub const fn aligned_in_bytes(self, alignment: u32) -> u32 { + align_up(self.size_in_bytes(), alignment) + } +} + +impl<T: Sized> PartialEq for BufferSize<T> { + fn eq(&self, other: &Self) -> bool { + self.len == other.len + } +} + +impl<T: Sized> PartialOrd for BufferSize<T> { + fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { + self.len.partial_cmp(&other.len) + } +} + +/// Computed sizes for all buffers. +#[derive(Copy, Clone, Debug, Default)] +pub struct BufferSizes { + // Known size buffers + pub path_reduced: BufferSize<PathMonoid>, + pub path_reduced2: BufferSize<PathMonoid>, + pub path_reduced_scan: BufferSize<PathMonoid>, + pub path_monoids: BufferSize<PathMonoid>, + pub path_bboxes: BufferSize<PathBbox>, + pub cubics: BufferSize<Cubic>, + pub draw_reduced: BufferSize<DrawMonoid>, + pub draw_monoids: BufferSize<DrawMonoid>, + pub info: BufferSize<u32>, + pub clip_inps: BufferSize<Clip>, + pub clip_els: BufferSize<ClipElement>, + pub clip_bics: BufferSize<ClipBic>, + pub clip_bboxes: BufferSize<ClipBbox>, + pub draw_bboxes: BufferSize<DrawBbox>, + pub bin_headers: BufferSize<BinHeader>, + pub paths: BufferSize<Path>, + // Bump allocated buffers + pub bin_data: BufferSize<u32>, + pub tiles: BufferSize<Tile>, + pub segments: BufferSize<PathSegment>, + pub ptcl: BufferSize<u32>, +} + +impl BufferSizes { + pub fn new(config: &GpuConfig, workgroups: &WorkgroupSizes, n_path_tags: u32) -> Self { + let n_paths = config.layout.n_paths; + let n_draw_objects = config.layout.n_draw_objects; + let n_clips = config.layout.n_clips; + let path_tag_wgs = workgroups.path_reduce.0; + let path_reduced = BufferSize::new(path_tag_wgs); + let path_reduced2 = BufferSize::new(PATH_REDUCE_WG); + let path_reduced_scan = BufferSize::new(path_tag_wgs); + let path_monoids = BufferSize::new(path_tag_wgs * PATH_REDUCE_WG); + let path_bboxes = BufferSize::new(n_paths); + let cubics = BufferSize::new(n_path_tags); + let draw_object_wgs = workgroups.draw_reduce.0; + let draw_reduced = BufferSize::new(draw_object_wgs); + let draw_monoids = BufferSize::new(n_draw_objects); + let info = BufferSize::new(config.layout.bin_data_start); + let clip_inps = BufferSize::new(n_clips); + let clip_els = BufferSize::new(n_clips); + let clip_bics = BufferSize::new(n_clips / CLIP_REDUCE_WG); + let clip_bboxes = BufferSize::new(n_clips); + let draw_bboxes = BufferSize::new(n_paths); + let bin_headers = BufferSize::new(draw_object_wgs * 256); + let n_paths_aligned = align_up(n_paths, 256); + let paths = BufferSize::new(n_paths_aligned); + // TODO: better heuristics. Just use 128k for now + let initial_bump_size = 128 * 1024; + let bin_data = BufferSize::from_size_in_bytes(initial_bump_size); + let tiles = BufferSize::from_size_in_bytes(initial_bump_size); + let segments = BufferSize::from_size_in_bytes(initial_bump_size); + let ptcl = BufferSize::from_size_in_bytes(initial_bump_size); + Self { + path_reduced, + path_reduced2, + path_reduced_scan, + path_monoids, + path_bboxes, + cubics, + draw_reduced, + draw_monoids, + info, + clip_inps, + clip_els, + clip_bics, + clip_bboxes, + draw_bboxes, + bin_headers, + paths, + bin_data, + tiles, + segments, + ptcl, + } + } +} + +const fn align_up(len: u32, alignment: u32) -> u32 { + len + (len.wrapping_neg() & (alignment - 1)) +} + +const fn next_multiple_of(val: u32, rhs: u32) -> u32 { + match val % rhs { + 0 => val, + r => val + (rhs - r), + } +}
diff --git a/src/encoding/draw.rs b/src/encoding/draw.rs index 1ddaead..5a9b07a 100644 --- a/src/encoding/draw.rs +++ b/src/encoding/draw.rs
@@ -54,6 +54,13 @@ } } +/// Draw object bounding box. +#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)] +#[repr(C)] +pub struct DrawBbox { + pub bbox: [f32; 4], +} + /// Draw data for a solid color. #[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] #[repr(C)] @@ -131,7 +138,7 @@ } /// Monoid for the draw tag stream. -#[derive(Copy, Clone, PartialEq, Eq, Pod, Zeroable, Default)] +#[derive(Copy, Clone, PartialEq, Eq, Pod, Zeroable, Default, Debug)] #[repr(C)] pub struct DrawMonoid { // The number of paths preceding this draw object.
diff --git a/src/encoding/packed.rs b/src/encoding/packed.rs index 55ffc17..00a9e3a 100644 --- a/src/encoding/packed.rs +++ b/src/encoding/packed.rs
@@ -48,30 +48,6 @@ pub linewidth_base: u32, } -/// Scene configuration. -#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] -#[repr(C)] -pub struct Config { - /// Width of the scene in tiles. - pub width_in_tiles: u32, - /// Height of the scene in tiles. - pub height_in_tiles: u32, - /// Width of the target in pixels. - pub target_width: u32, - /// Height of the target in pixels. - pub target_height: u32, - /// Layout of packed scene data. - pub layout: Layout, - /// Size of binning buffer allocation (in u32s). - pub binning_size: u32, - /// Size of tile buffer allocation (in Tiles). - pub tiles_size: u32, - /// Size of segment buffer allocation (in PathSegments). - pub segments_size: u32, - /// Size of per-tile command list buffer allocation (in u32s). - pub ptcl_size: u32, -} - /// Packed encoding of scene data. #[derive(Default)] pub struct PackedEncoding {
diff --git a/src/encoding/path.rs b/src/encoding/path.rs index 760eb32..0ebfcda 100644 --- a/src/encoding/path.rs +++ b/src/encoding/path.rs
@@ -20,7 +20,7 @@ use super::Monoid; /// Path segment. -#[derive(Clone, Copy, Debug, Zeroable, Pod)] +#[derive(Clone, Copy, Debug, Zeroable, Pod, Default)] #[repr(C)] pub struct PathSegment { pub origin: [f32; 2], @@ -170,6 +170,19 @@ } } +/// Cubic path segment. +#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)] +#[repr(C)] +pub struct Cubic { + pub p0: [f32; 2], + pub p1: [f32; 2], + pub p2: [f32; 2], + pub p3: [f32; 2], + pub stroke: [f32; 2], + pub path_ix: u32, + pub flags: u32, +} + /// Path bounding box. #[derive(Copy, Clone, Pod, Zeroable, Default, Debug)] #[repr(C)] @@ -188,6 +201,26 @@ pub trans_ix: u32, } +/// Tiled path object. +#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)] +#[repr(C)] +pub struct Path { + /// Bounding box in tiles. + pub bbox: [f32; 4], + /// Offset (in u32s) to tile rectangle. + pub tiles: u32, +} + +/// Tile object. +#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)] +#[repr(C)] +pub struct Tile { + /// Accumulated backdrop at the left edge of the tile. + pub backdrop: i32, + /// Index of first path segment. + pub segments: u32, +} + /// Encoder for path segments. pub struct PathEncoder<'a> { tags: &'a mut Vec<PathTag>,
diff --git a/src/render.rs b/src/render.rs index 9136931..8d6f627 100644 --- a/src/render.rs +++ b/src/render.rs
@@ -191,7 +191,7 @@ let new_height = next_multiple_of(height, 16); let info_size = packed.layout.bin_data_start; - let config = crate::encoding::Config { + let config = crate::encoding::GpuConfig { width_in_tiles: new_width / 16, height_in_tiles: new_height / 16, target_width: width,
diff --git a/vello_native/Cargo.toml b/vello_native/Cargo.toml new file mode 100644 index 0000000..233ed7d --- /dev/null +++ b/vello_native/Cargo.toml
@@ -0,0 +1,20 @@ +[package] +name = "vello_native" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +vello = { path = "../" } +vello_shaders = { path = "../vello_shaders" } +bytemuck = { version = "1.12.1", features = ["derive"] } + +[target.'cfg(target_os="macos")'.dependencies] +metal = "0.24" +objc = "0.2" +block = "0.1" +cocoa-foundation = "0.1" +# Note: foreign-types is up to 0.5 but metal hasn't upgraded to it +foreign-types = "0.3" +core-graphics-types = "0.1"
diff --git a/vello_native/src/lib.rs b/vello_native/src/lib.rs new file mode 100644 index 0000000..0367c24 --- /dev/null +++ b/vello_native/src/lib.rs
@@ -0,0 +1 @@ +pub mod metal;
diff --git a/vello_native/src/metal.rs b/vello_native/src/metal.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/vello_native/src/metal.rs
@@ -0,0 +1 @@ +
diff --git a/vello_native/src/metal/util.rs b/vello_native/src/metal/util.rs new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/vello_native/src/metal/util.rs