Allow deeper blend stacks by spilling to a buffer (#657)
This brings in support for blend spilling (which was supported in the
old piet-gpu).
I don't have a good heuristic for how big to make the buffer. That is
something which will need to be addressed in #606 (or its successors). I
just guessed that 256 spills would be fine. I think this is probably too
small - I suspect we'll get feedback from @TrueDoctor about this.
I have confirmed that the robustness works as expected with the GPU
shaders.
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9a0f9b2..76dfa8f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,6 +17,8 @@
### Added
+- Support blends more than four layers deep ([#657][] by [@DJMcNab][])
+
### Changed
- Breaking: Updated `wgpu` to 22.1.0. ([#635] by [@waywardmonkeys])
@@ -119,6 +121,7 @@
[#630]: https://github.com/linebender/vello/pull/630
[#631]: https://github.com/linebender/vello/pull/631
[#635]: https://github.com/linebender/vello/pull/635
+[#657]: https://github.com/linebender/vello/pull/657
<!-- Note that this still comparing against 0.2.0, because 0.2.1 is a cherry-picked patch -->
[Unreleased]: https://github.com/linebender/vello/compare/v0.2.0...HEAD
diff --git a/examples/scenes/src/test_scenes.rs b/examples/scenes/src/test_scenes.rs
index 93e59c7..6fc9189 100644
--- a/examples/scenes/src/test_scenes.rs
+++ b/examples/scenes/src/test_scenes.rs
@@ -67,6 +67,7 @@
two_point_radial(two_point_radial),
brush_transform(brush_transform: animated),
blend_grid(blend_grid),
+ deep_blend(deep_blend),
conflation_artifacts(conflation_artifacts),
labyrinth(labyrinth),
robust_paths(robust_paths),
@@ -1057,6 +1058,42 @@
}
}
+ pub(super) fn deep_blend(scene: &mut Scene, params: &mut SceneParams) {
+ params.resolution = Some(Vec2::new(1000., 1000.));
+ let main_rect = Rect::from_origin_size((10., 10.), (900., 900.));
+ scene.fill(
+ Fill::EvenOdd,
+ Affine::IDENTITY,
+ Color::RED,
+ None,
+ &main_rect,
+ );
+ let options = [
+ (800., Color::AQUA),
+ (700., Color::RED),
+ (600., Color::ALICE_BLUE),
+ (500., Color::YELLOW),
+ (400., Color::GREEN),
+ (300., Color::BLUE),
+ (200., Color::ORANGE),
+ (100., Color::WHITE),
+ ];
+ let mut depth = 0;
+ for (width, colour) in &options[..params.complexity.min(options.len() - 1)] {
+ scene.push_layer(
+ Mix::Normal,
+ 0.9,
+ Affine::IDENTITY,
+ &Rect::from_origin_size((10., 10.), (*width, *width)),
+ );
+ scene.fill(Fill::EvenOdd, Affine::IDENTITY, colour, None, &main_rect);
+ depth += 1;
+ }
+ for _ in 0..depth {
+ scene.pop_layer();
+ }
+ }
+
// Support functions
pub(super) fn render_cardioid(scene: &mut Scene) {
diff --git a/vello/src/render.rs b/vello/src/render.rs
index bbd1c8c..952ef49 100644
--- a/vello/src/render.rs
+++ b/vello/src/render.rs
@@ -46,6 +46,7 @@
gradient_image: ResourceProxy,
info_bin_data_buf: ResourceProxy,
image_atlas: ResourceProxy,
+ blend_spill_buf: ResourceProxy,
out_image: ImageProxy,
}
@@ -450,6 +451,10 @@
recording.free_resource(bin_header_buf);
recording.free_resource(path_buf);
let out_image = ImageProxy::new(params.width, params.height, ImageFormat::Rgba8);
+ let blend_spill_buf = BufferProxy::new(
+ buffer_sizes.blend_spill.size_in_bytes().into(),
+ "blend_spill",
+ );
self.fine_wg_count = Some(wg_counts.fine);
self.fine_resources = Some(FineResources {
aa_config: params.antialiasing_method,
@@ -460,6 +465,7 @@
ptcl_buf,
gradient_image,
info_bin_data_buf,
+ blend_spill_buf: ResourceProxy::Buffer(blend_spill_buf),
image_atlas: ResourceProxy::Image(image_atlas),
out_image,
});
@@ -510,6 +516,7 @@
fine.segments_buf,
fine.ptcl_buf,
fine.info_bin_data_buf,
+ fine.blend_spill_buf,
ResourceProxy::Image(fine.out_image),
fine.gradient_image,
fine.image_atlas,
@@ -543,6 +550,7 @@
fine.segments_buf,
fine.ptcl_buf,
fine.info_bin_data_buf,
+ fine.blend_spill_buf,
ResourceProxy::Image(fine.out_image),
fine.gradient_image,
fine.image_atlas,
diff --git a/vello/src/shaders.rs b/vello/src/shaders.rs
index bf34bad..a58e0ed 100644
--- a/vello/src/shaders.rs
+++ b/vello/src/shaders.rs
@@ -211,6 +211,7 @@
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
+ BindType::Buffer,
BindType::Image(ImageFormat::Rgba8),
BindType::ImageRead(ImageFormat::Rgba8),
BindType::ImageRead(ImageFormat::Rgba8),
diff --git a/vello_encoding/src/config.rs b/vello_encoding/src/config.rs
index 8b44bb9..88da7fd 100644
--- a/vello_encoding/src/config.rs
+++ b/vello_encoding/src/config.rs
@@ -147,6 +147,9 @@
pub seg_counts_size: u32,
/// Size of segment buffer allocation (in [`PathSegment`]s).
pub segments_size: u32,
+ /// Size of blend spill buffer (in `u32` pixels).
+ // TODO: Maybe store in TILE_WIDTH * TILE_HEIGHT blocks of pixels instead?
+ pub blend_size: u32,
/// Size of per-tile command list buffer allocation (in `u32`s).
pub ptcl_size: u32,
}
@@ -184,6 +187,7 @@
tiles_size: buffer_sizes.tiles.len(),
seg_counts_size: buffer_sizes.seg_counts.len(),
segments_size: buffer_sizes.segments.len(),
+ blend_size: buffer_sizes.blend_spill.len(),
ptcl_size: buffer_sizes.ptcl.len(),
layout: *layout,
},
@@ -352,6 +356,7 @@
pub tiles: BufferSize<Tile>,
pub seg_counts: BufferSize<SegmentCount>,
pub segments: BufferSize<PathSegment>,
+ pub blend_spill: BufferSize<u32>,
pub ptcl: BufferSize<u32>,
}
@@ -395,6 +400,8 @@
let lines = BufferSize::new(1 << 21);
let seg_counts = BufferSize::new(1 << 21);
let segments = BufferSize::new(1 << 21);
+ // 16 * 16 (1 << 8) is one blend spill, so this allows for 4096 spills.
+ let blend_spill = BufferSize::new(1 << 20);
let ptcl = BufferSize::new(1 << 23);
Self {
path_reduced,
@@ -419,6 +426,7 @@
tiles,
seg_counts,
segments,
+ blend_spill,
ptcl,
}
}
diff --git a/vello_shaders/shader/coarse.wgsl b/vello_shaders/shader/coarse.wgsl
index c28f8d2..6856396 100644
--- a/vello_shaders/shader/coarse.wgsl
+++ b/vello_shaders/shader/coarse.wgsl
@@ -444,8 +444,11 @@
ptcl[cmd_offset] = CMD_END;
var blend_ix = 0u;
if max_blend_depth > BLEND_STACK_SPLIT {
- let scratch_size = max_blend_depth * TILE_WIDTH * TILE_HEIGHT;
+ let scratch_size = (max_blend_depth - BLEND_STACK_SPLIT) * TILE_WIDTH * TILE_HEIGHT;
blend_ix = atomicAdd(&bump.blend, scratch_size);
+ if blend_ix + scratch_size > config.blend_size {
+ atomicOr(&bump.failed, STAGE_COARSE);
+ }
}
ptcl[blend_offset] = blend_ix;
}
diff --git a/vello_shaders/shader/fine.wgsl b/vello_shaders/shader/fine.wgsl
index 5af82f4..810f416 100644
--- a/vello_shaders/shader/fine.wgsl
+++ b/vello_shaders/shader/fine.wgsl
@@ -39,6 +39,9 @@
var<storage> info: array<u32>;
@group(0) @binding(4)
+var<storage, read_write> blend_spill: array<u32>;
+
+@group(0) @binding(5)
#ifdef r8
var output: texture_storage_2d<r8unorm, write>;
#else
@@ -46,10 +49,10 @@
#endif
#ifdef full
-@group(0) @binding(5)
+@group(0) @binding(6)
var gradients: texture_2d<f32>;
-@group(0) @binding(6)
+@group(0) @binding(7)
var image_atlas: texture_2d<f32>;
#endif
@@ -57,9 +60,9 @@
#ifdef msaa
#ifdef full
-const MASK_LUT_INDEX: u32 = 7;
+const MASK_LUT_INDEX: u32 = 8;
#else
-const MASK_LUT_INDEX: u32 = 5;
+const MASK_LUT_INDEX: u32 = 6;
#endif
#ifdef msaa8
@@ -947,7 +950,13 @@
rgba[i] = vec4(0.0);
}
} else {
- // TODO: spill to memory
+ let blend_in_scratch = clip_depth - BLEND_STACK_SPLIT;
+ let local_tile_ix = local_id.x * PIXELS_PER_THREAD + local_id.y * TILE_WIDTH;
+ let local_blend_start = blend_offset + blend_in_scratch * TILE_WIDTH * TILE_HEIGHT + local_tile_ix;
+ for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
+ blend_spill[local_blend_start + i] = pack4x8unorm(rgba[i]);
+ rgba[i] = vec4(0.0);
+ }
}
clip_depth += 1u;
cmd_ix += 1u;
@@ -960,7 +969,10 @@
if clip_depth < BLEND_STACK_SPLIT {
bg_rgba = blend_stack[clip_depth][i];
} else {
- // load from memory
+ let blend_in_scratch = clip_depth - BLEND_STACK_SPLIT;
+ let local_tile_ix = local_id.x * PIXELS_PER_THREAD + local_id.y * TILE_WIDTH;
+ let local_blend_start = blend_offset + blend_in_scratch * TILE_WIDTH * TILE_HEIGHT + local_tile_ix;
+ bg_rgba = blend_spill[local_blend_start + i];
}
let bg = unpack4x8unorm(bg_rgba);
let fg = rgba[i] * area[i] * end_clip.alpha;
diff --git a/vello_shaders/shader/shared/config.wgsl b/vello_shaders/shader/shared/config.wgsl
index ef7b928..3391afd 100644
--- a/vello_shaders/shader/shared/config.wgsl
+++ b/vello_shaders/shader/shared/config.wgsl
@@ -1,7 +1,7 @@
// Copyright 2022 the Vello Authors
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
-// This must be kept in sync with the struct in src/encoding/resolve.rs
+// This must be kept in sync with `ConfigUniform` in `vello_encoding/src/config.rs`
struct Config {
width_in_tiles: u32,
height_in_tiles: u32,
@@ -38,6 +38,7 @@
tiles_size: u32,
seg_counts_size: u32,
segments_size: u32,
+ blend_size: u32,
ptcl_size: u32,
}
@@ -54,6 +55,9 @@
// Not currently supporting non-square tiles
let TILE_SCALE = 0.0625;
+// The "split" point between using local memory in fine for the blend stack and spilling to the blend_spill buffer.
+// A higher value will increase vgpr ("register") pressure in fine, but decrease required dynamic memory allocation.
+// If changing, also change in vello_shaders/src/cpu/coarse.rs.
let BLEND_STACK_SPLIT = 4u;
// The following are computed in draw_leaf from the generic gradient parameters
diff --git a/vello_shaders/src/cpu/coarse.rs b/vello_shaders/src/cpu/coarse.rs
index dddcd4b..88ec603 100644
--- a/vello_shaders/src/cpu/coarse.rs
+++ b/vello_shaders/src/cpu/coarse.rs
@@ -1,6 +1,8 @@
// Copyright 2023 the Vello Authors
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
+use std::cmp::max;
+
use vello_encoding::{
BinHeader, BumpAllocators, ConfigUniform, DrawMonoid, DrawTag, Path, Tile,
DRAW_INFO_FLAGS_FILL_RULE_BIT,
@@ -11,10 +13,18 @@
CMD_LIN_GRAD, CMD_RAD_GRAD, CMD_SOLID, CMD_SWEEP_GRAD, PTCL_INITIAL_ALLOC,
};
+// Tiles per bin
const N_TILE_X: usize = 16;
const N_TILE_Y: usize = 16;
const N_TILE: usize = N_TILE_X * N_TILE_Y;
+// If changing also change in config.wgsl
+const BLEND_STACK_SPLIT: u32 = 4;
+
+// Pixels per tile
+const TILE_WIDTH: u32 = 16;
+const TILE_HEIGHT: u32 = 16;
+
const PTCL_INCREMENT: u32 = 256;
const PTCL_HEADROOM: u32 = 2;
@@ -219,6 +229,8 @@
let blend_offset = tile_state.cmd_offset;
tile_state.cmd_offset += 1;
let mut clip_depth = 0;
+ let mut render_blend_depth = 0;
+ let mut max_blend_depth = 0_u32;
let mut clip_zero_depth = 0;
for drawobj_ix in &compacted[tile_ix] {
let drawtag = scene[(drawtag_base + drawobj_ix) as usize];
@@ -306,7 +318,10 @@
clip_zero_depth = clip_depth + 1;
} else {
tile_state.write_begin_clip(config, bump, ptcl);
- // TODO: update blend depth
+ // TODO: Do we need to track this separately, seems like it
+ // is always the same as clip_depth in this code path
+ render_blend_depth += 1;
+ max_blend_depth = max(render_blend_depth, max_blend_depth);
}
clip_depth += 1;
}
@@ -317,6 +332,7 @@
let blend = scene[dd as usize];
let alpha = f32::from_bits(scene[dd as usize + 1]);
tile_state.write_end_clip(config, bump, ptcl, blend, alpha);
+ render_blend_depth -= 1;
}
_ => todo!(),
}
@@ -338,7 +354,8 @@
if bin_tile_x + tile_x < width_in_tiles && bin_tile_y + tile_y < height_in_tiles {
ptcl[tile_state.cmd_offset as usize] = CMD_END;
- let scratch_size = 0; // TODO: actually compute blend depth
+ let scratch_size =
+ (max_blend_depth.saturating_sub(BLEND_STACK_SPLIT)) * TILE_WIDTH * TILE_HEIGHT;
ptcl[blend_offset as usize] = bump.blend;
bump.blend += scratch_size;
}
diff --git a/vello_tests/snapshots/deep_blend.png b/vello_tests/snapshots/deep_blend.png
new file mode 100644
index 0000000..8375462
--- /dev/null
+++ b/vello_tests/snapshots/deep_blend.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69f822f9fbe2de48420f5dee7ab0a502e022e65e08f99f2f16823927e8b63f95
+size 7000
diff --git a/vello_tests/tests/compare_gpu_cpu.rs b/vello_tests/tests/compare_gpu_cpu.rs
index e906f31..860728f 100644
--- a/vello_tests/tests/compare_gpu_cpu.rs
+++ b/vello_tests/tests/compare_gpu_cpu.rs
@@ -77,7 +77,15 @@
#[cfg_attr(skip_gpu_tests, ignore)]
fn compare_fill_types() {
let test_scene = test_scenes::fill_types();
- assert_eq!(test_scene.config.name, "fill_types");
let params = TestParams::new("compare_fill_types", 1400, 700);
compare_test_scene(test_scene, params);
}
+
+#[test]
+#[cfg_attr(skip_gpu_tests, ignore)]
+fn compare_deep_blend() {
+ let test_scene = test_scenes::deep_blend();
+ assert_eq!(test_scene.config.name, "deep_blend");
+ let params = TestParams::new("compare_deep_blend", 150, 150);
+ compare_test_scene(test_scene, params);
+}
diff --git a/vello_tests/tests/snapshots.rs b/vello_tests/tests/snapshots.rs
index 033af03..4abcc70 100644
--- a/vello_tests/tests/snapshots.rs
+++ b/vello_tests/tests/snapshots.rs
@@ -71,7 +71,14 @@
#[cfg_attr(skip_gpu_tests, ignore)]
fn snapshot_fill_types() {
let test_scene = test_scenes::fill_types();
- assert_eq!(test_scene.config.name, "fill_types");
let params = TestParams::new("fill_types", 700, 350);
snapshot_test_scene(test_scene, params);
}
+
+#[test]
+#[cfg_attr(skip_gpu_tests, ignore)]
+fn snapshot_deep_blend() {
+ let test_scene = test_scenes::deep_blend();
+ let params = TestParams::new("deep_blend", 200, 200);
+ snapshot_test_scene(test_scene, params);
+}