it's jank
diff --git a/sparse_strips/vello_common/src/coarse.rs b/sparse_strips/vello_common/src/coarse.rs index cbbc58f..66b1bdb 100644 --- a/sparse_strips/vello_common/src/coarse.rs +++ b/sparse_strips/vello_common/src/coarse.rs
@@ -899,25 +899,27 @@ // inline the blend mode instead. let (_, tail) = self.cmds.split_at(self.cmds.len() - 3); + // For vello_hybrid I am not sure how this is possible. + // Temporarily commented out so I can focus on the layers golden path. let updated = match tail { - [Cmd::PushBuf, Cmd::AlphaFill(a), Cmd::Blend(b)] => { - if !b.is_destructive() && a.blend_mode.is_none() { - let mut blended = a.clone(); - blended.blend_mode = Some(*b); - Some(Cmd::AlphaFill(blended)) - } else { - None - } - } - [Cmd::PushBuf, Cmd::Fill(a), Cmd::Blend(b)] => { - if !b.is_destructive() && a.blend_mode.is_none() { - let mut blended = a.clone(); - blended.blend_mode = Some(*b); - Some(Cmd::Fill(blended)) - } else { - None - } - } + // [Cmd::PushBuf, Cmd::AlphaFill(a), Cmd::Blend(b)] => { + // if !b.is_destructive() && a.blend_mode.is_none() { + // let mut blended = a.clone(); + // blended.blend_mode = Some(*b); + // Some(Cmd::AlphaFill(blended)) + // } else { + // None + // } + // } + // [Cmd::PushBuf, Cmd::Fill(a), Cmd::Blend(b)] => { + // if !b.is_destructive() && a.blend_mode.is_none() { + // let mut blended = a.clone(); + // blended.blend_mode = Some(*b); + // Some(Cmd::Fill(blended)) + // } else { + // None + // } + // } _ => None, };
diff --git a/sparse_strips/vello_hybrid/examples/scenes/src/lib.rs b/sparse_strips/vello_hybrid/examples/scenes/src/lib.rs index f9f5394..38a8478 100644 --- a/sparse_strips/vello_hybrid/examples/scenes/src/lib.rs +++ b/sparse_strips/vello_hybrid/examples/scenes/src/lib.rs
@@ -50,24 +50,9 @@ /// Get all available example scenes /// Unlike the Wasm version, this function allows for passing custom SVGs. #[cfg(not(target_arch = "wasm32"))] -pub fn get_example_scenes(svg_paths: Option<Vec<&str>>) -> Box<[AnyScene]> { +pub fn get_example_scenes() -> Box<[AnyScene]> { let mut scenes = Vec::new(); - - // Create SVG scenes for each provided path - if let Some(paths) = svg_paths { - for path in paths { - scenes.push(AnyScene::new( - svg::SvgScene::with_svg_file(path.into()).unwrap(), - )); - } - } else { - scenes.push(AnyScene::new(svg::SvgScene::tiger())); - } - - scenes.push(AnyScene::new(text::TextScene::new("Hello, Vello!"))); scenes.push(AnyScene::new(simple::SimpleScene::new())); - scenes.push(AnyScene::new(clip::ClipScene::new())); - scenes.push(AnyScene::new(image::ImageScene::new())); scenes.into_boxed_slice() }
diff --git a/sparse_strips/vello_hybrid/examples/scenes/src/simple.rs b/sparse_strips/vello_hybrid/examples/scenes/src/simple.rs index b8da87b..8f19b44 100644 --- a/sparse_strips/vello_hybrid/examples/scenes/src/simple.rs +++ b/sparse_strips/vello_hybrid/examples/scenes/src/simple.rs
@@ -3,8 +3,12 @@ //! Simple example scene with basic shapes. +use parley::Rect; +use vello_common::color::palette::css::{BLUE, WHITE, YELLOW}; use vello_common::kurbo::{Affine, BezPath, Stroke}; use vello_common::peniko::color::palette; +use vello_common::kurbo::Shape; +use vello_common::peniko::{BlendMode, Compose, Mix}; use vello_hybrid::Scene; use crate::ExampleScene; @@ -33,21 +37,32 @@ } /// Draws a simple scene with shapes -pub fn render(ctx: &mut Scene, root_transform: Affine) { - let mut path = BezPath::new(); - path.move_to((10.0, 10.0)); - path.line_to((180.0, 20.0)); - path.line_to((30.0, 40.0)); - path.close_path(); +pub fn render(ctx: &mut Scene, _root_transform: Affine) { + let path = Rect::new(0.0, 0.0, 100 as f64, 100 as f64).to_path(0.1); - // Use a combined transform that includes the root transform - let scene_transform = Affine::scale(5.0); - ctx.set_transform(root_transform * scene_transform); - - ctx.set_paint(palette::css::REBECCA_PURPLE); + ctx.set_paint(WHITE); ctx.fill_path(&path); - let stroke = Stroke::new(1.0); - ctx.set_paint(palette::css::DARK_BLUE); - ctx.set_stroke(stroke); - ctx.stroke_path(&path); + + ctx.push_layer( + None, + Some(BlendMode::new(Mix::Normal, Compose::SrcOver)), + None, + None, + ); + + // Draw the destination layer. + ctx.set_paint(YELLOW.with_alpha(1.0)); + ctx.fill_rect(&Rect::new(10.0, 10.0, 70.0, 70.0)); + // Draw the source layer. + ctx.push_layer( + None, + Some(BlendMode::new(Mix::Normal, Compose::Xor)), + None, + None, + ); + ctx.set_paint(BLUE.with_alpha(1.0)); + ctx.fill_rect(&Rect::new(30.0, 30.0, 90.0, 90.0)); + // Compose. + ctx.pop_layer(); + ctx.pop_layer(); }
diff --git a/sparse_strips/vello_hybrid/examples/winit/src/main.rs b/sparse_strips/vello_hybrid/examples/winit/src/main.rs index 2482073..fa5256f 100644 --- a/sparse_strips/vello_hybrid/examples/winit/src/main.rs +++ b/sparse_strips/vello_hybrid/examples/winit/src/main.rs
@@ -65,11 +65,8 @@ } } } - let scenes = if svg_paths.is_empty() { - get_example_scenes(None) - } else { - get_example_scenes(Some(svg_paths)) - }; + + let scenes = get_example_scenes(); start_scene_index = start_scene_index.min(scenes.len() - 1); (scenes, start_scene_index) @@ -83,7 +80,7 @@ scenes, current_scene: start_scene_index, render_state: RenderState::Suspended(None), - scene: Scene::new(1800, 1200), + scene: Scene::new(100, 100), transform: Affine::IDENTITY, mouse_down: false, last_cursor_position: None, @@ -119,8 +116,7 @@ let window = cached_window.take().unwrap_or_else(|| { create_winit_window( event_loop, - self.scene.width().into(), - self.scene.height().into(), + 800,600, true, ) }); @@ -166,10 +162,10 @@ WindowEvent::Resized(size) => { self.context .resize_surface(surface, size.width, size.height); - self.scene = Scene::new( - u16::try_from(size.width).unwrap(), - u16::try_from(size.height).unwrap(), - ); + // self.scene = Scene::new( + // u16::try_from(size.width).unwrap(), + // u16::try_from(size.height).unwrap(), + // ); } WindowEvent::KeyboardInput { event:
diff --git a/sparse_strips/vello_hybrid/src/lib.rs b/sparse_strips/vello_hybrid/src/lib.rs index 306acc3..04a6d02 100644 --- a/sparse_strips/vello_hybrid/src/lib.rs +++ b/sparse_strips/vello_hybrid/src/lib.rs
@@ -29,7 +29,8 @@ //! //! See the individual module documentation for more details on usage and implementation. -#![no_std] +// Commented out to allow dbg! and println. +// #![no_std] extern crate alloc;
diff --git a/sparse_strips/vello_hybrid/src/render/common.rs b/sparse_strips/vello_hybrid/src/render/common.rs index 1a811e7..f21c6d6 100644 --- a/sparse_strips/vello_hybrid/src/render/common.rs +++ b/sparse_strips/vello_hybrid/src/render/common.rs
@@ -74,6 +74,73 @@ pub _padding2: [u32; 2], } +/// Represents a GPU blend command for wide tile blending operations. +/// +/// This struct corresponds to the `BlendCommand` struct in the blend_wide_tile.wgsl shader. +#[repr(C)] +#[derive(Debug, Clone, Copy, Zeroable, Pod)] +pub struct GpuBlendCommand { + /// [x, y] packed as u16's - coordinates of the top left of the source wide tile + pub xy_src: u32, + /// [x, y] packed as u16's - coordinates of the top left of the destination wide tile + pub xy_dst: u32, + /// Bits 0-7: opacity + /// Bits 8-11: compose + /// Bits 12-15: mix + /// Bits 16: source texture (0 = slots of ix=0, 1 = slots of ix=1) + /// Bits 17-18: dest texture (0 = slots of ix=0, 1 = slots of ix=1, 2 = final target) + /// Bits 19-26: blend slot index + pub payload: u32, +} + +/// Represents a GPU copy command for copying slots between textures. +/// +/// This struct corresponds to the `CopyCommand` struct in the copy_slot.wgsl shader. +#[repr(C)] +#[derive(Debug, Clone, Copy, Zeroable, Pod)] +pub struct GpuCopyCommand { + /// [x, y] packed as u16's - coordinates of the top left of the target wide tile + pub xy_target: u32, + /// Slot index to identify the pixel position to sample from + pub slot_ix: u32, +} + +/// Configuration for the blend wide tile operations +#[repr(C)] +#[derive(Debug, Copy, Clone, Pod, Zeroable)] +pub struct BlendConfig { + /// Width of a wide tile (matching `WideTile::WIDTH`). + pub wide_tile_width: u32, + /// Height of a wide tile (matching `WideTile::HEIGHT`). + pub wide_tile_height: u32, + /// Height of the slot texture. + pub slot_texture_height: u32, + /// Height of the final target texture. + pub final_target_height: u32, + /// Height of the blend texture. + pub blend_texture_height: u32, + /// Padding for 16-byte alignment + pub _padding: [u32; 3], +} + +/// Configuration for the copy slot operations +#[repr(C)] +#[derive(Debug, Copy, Clone, Pod, Zeroable)] +pub struct CopyConfig { + /// Width of a wide tile (matching `WideTile::WIDTH`). + pub wide_tile_width: u32, + /// Height of a wide tile (matching `WideTile::HEIGHT`). + pub wide_tile_height: u32, + /// Height of the slot texture (source). + pub slot_texture_height: u32, + /// Width of the target texture (destination). + pub target_texture_width: u32, + /// Height of the target texture (destination). + pub target_texture_height: u32, + /// Padding for 16-byte alignment + pub _padding: [u32; 3], +} + #[cfg(all(target_arch = "wasm32", feature = "webgl", feature = "wgpu"))] pub(crate) fn maybe_warn_about_webgl_feature_conflict() { use core::sync::atomic::{AtomicBool, Ordering};
diff --git a/sparse_strips/vello_hybrid/src/render/wgpu.rs b/sparse_strips/vello_hybrid/src/render/wgpu.rs index 6968e22..ed21bb7 100644 --- a/sparse_strips/vello_hybrid/src/render/wgpu.rs +++ b/sparse_strips/vello_hybrid/src/render/wgpu.rs
@@ -36,9 +36,12 @@ use crate::{ GpuStrip, RenderError, RenderSize, image_cache::{ImageCache, ImageResource}, - render::{Config, common::GpuEncodedImage}, + render::{ + Config, + common::{BlendConfig, CopyConfig, GpuBlendCommand, GpuCopyCommand, GpuEncodedImage}, + }, scene::Scene, - schedule::{LoadOp, RendererBackend, Scheduler}, + schedule::{BlendCommand, LoadOp, Location, RendererBackend, Scheduler}, }; /// Options for the renderer @@ -76,6 +79,14 @@ } } + pub fn get_slots_texture_views(&self) -> &[TextureView; 2] { + &self.programs.resources.slot_texture_views + } + + pub fn get_blend_texture_view(&self) -> &TextureView { + &self.programs.resources.blend_texture_view + } + /// Render `scene` into the provided command encoder. /// /// This method creates GPU resources as needed and schedules potentially multiple @@ -257,6 +268,16 @@ /// Pipeline for clearing slots in slot textures. clear_pipeline: RenderPipeline, + /// Pipeline for blending wide tiles. + blend_pipeline: RenderPipeline, + /// Bind group layout for blend operations + blend_bind_group_layout: BindGroupLayout, + + /// Pipeline for copying slots between textures. + copy_pipeline: RenderPipeline, + /// Bind group layout for copy operations + copy_bind_group_layout: BindGroupLayout, + /// GPU resources for rendering (created during prepare) resources: GpuResources, /// Dimensions of the rendering target @@ -298,6 +319,26 @@ /// Bind group for clear slots operation clear_bind_group: BindGroup, + + /// Blend texture for wide tile blending operations + blend_texture: Texture, + /// Blend texture view + blend_texture_view: TextureView, + /// Config buffer for blend operations + blend_config_buffer: Buffer, + + /// Config buffer for copy operations to slots + copy_slot_config_buffer: Buffer, + /// Config buffer for copy operations to final target + copy_target_config_buffer: Buffer, + /// Bind group for copy operations to slots + copy_slot_bind_group: BindGroup, + /// Bind group for copy operations to final target + copy_target_bind_group: BindGroup, + + /// Buffers for blend and copy commands + blend_commands_buffer: Buffer, + copy_commands_buffer: Buffer, } const SIZE_OF_CONFIG: NonZeroU64 = NonZeroU64::new(size_of::<Config>() as u64).unwrap(); @@ -329,6 +370,27 @@ } } +impl GpuBlendCommand { + /// Vertex attributes for the blend command + pub fn vertex_attributes() -> [wgpu::VertexAttribute; 3] { + wgpu::vertex_attr_array![ + 0 => Uint32, // xy_src + 1 => Uint32, // xy_dst + 2 => Uint32, // payload + ] + } +} + +impl GpuCopyCommand { + /// Vertex attributes for the copy command + pub fn vertex_attributes() -> [wgpu::VertexAttribute; 2] { + wgpu::vertex_attr_array![ + 0 => Uint32, // xy_target + 1 => Uint32, // slot_ix + ] + } +} + impl Programs { fn new(device: &Device, render_target_config: &RenderTargetConfig, slot_count: usize) -> Self { let strip_bind_group_layout = @@ -414,6 +476,72 @@ }], }); + // Create bind group layout for blend operations + let blend_bind_group_layout = + device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("Blend Bind Group Layout"), + entries: &[ + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::VERTEX, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 1, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: false }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 2, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: false }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + ], + }); + + // Create bind group layout for copy operations + let copy_bind_group_layout = + device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("Copy Bind Group Layout"), + entries: &[ + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::VERTEX | wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 1, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: false }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + ], + }); + let strip_shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { label: Some("Strip Shader"), source: wgpu::ShaderSource::Wgsl(vello_sparse_shaders::wgsl::RENDER_STRIPS.into()), @@ -424,6 +552,16 @@ source: wgpu::ShaderSource::Wgsl(vello_sparse_shaders::wgsl::CLEAR_SLOTS.into()), }); + let blend_shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: Some("Blend Wide Tile Shader"), + source: wgpu::ShaderSource::Wgsl(vello_sparse_shaders::wgsl::BLEND_WIDE_TILE.into()), + }); + + let copy_shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: Some("Copy Slot Shader"), + source: wgpu::ShaderSource::Wgsl(vello_sparse_shaders::wgsl::COPY_SLOT.into()), + }); + let strip_pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { label: Some("Strip Pipeline Layout"), @@ -442,6 +580,19 @@ push_constant_ranges: &[], }); + let blend_pipeline_layout = + device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: Some("Blend Pipeline Layout"), + bind_group_layouts: &[&blend_bind_group_layout], + push_constant_ranges: &[], + }); + + let copy_pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: Some("Copy Pipeline Layout"), + bind_group_layouts: &[©_bind_group_layout], + push_constant_ranges: &[], + }); + let strip_pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor { label: Some("Strip Pipeline"), layout: Some(&strip_pipeline_layout), @@ -513,6 +664,72 @@ cache: None, }); + let blend_pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor { + label: Some("Blend Wide Tile Pipeline"), + layout: Some(&blend_pipeline_layout), + vertex: wgpu::VertexState { + module: &blend_shader, + entry_point: Some("vs_main"), + buffers: &[wgpu::VertexBufferLayout { + array_stride: size_of::<GpuBlendCommand>() as u64, + step_mode: wgpu::VertexStepMode::Instance, + attributes: &GpuBlendCommand::vertex_attributes(), + }], + compilation_options: PipelineCompilationOptions::default(), + }, + fragment: Some(wgpu::FragmentState { + module: &blend_shader, + entry_point: Some("fs_main"), + targets: &[Some(ColorTargetState { + format: render_target_config.format, + blend: Some(BlendState::REPLACE), + write_mask: ColorWrites::ALL, + })], + compilation_options: PipelineCompilationOptions::default(), + }), + primitive: wgpu::PrimitiveState { + topology: wgpu::PrimitiveTopology::TriangleStrip, + ..Default::default() + }, + depth_stencil: None, + multisample: wgpu::MultisampleState::default(), + multiview: None, + cache: None, + }); + + let copy_pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor { + label: Some("Copy Slot Pipeline"), + layout: Some(©_pipeline_layout), + vertex: wgpu::VertexState { + module: ©_shader, + entry_point: Some("vs_main"), + buffers: &[wgpu::VertexBufferLayout { + array_stride: size_of::<GpuCopyCommand>() as u64, + step_mode: wgpu::VertexStepMode::Instance, + attributes: &GpuCopyCommand::vertex_attributes(), + }], + compilation_options: PipelineCompilationOptions::default(), + }, + fragment: Some(wgpu::FragmentState { + module: ©_shader, + entry_point: Some("fs_main"), + targets: &[Some(ColorTargetState { + format: render_target_config.format, + blend: Some(BlendState::REPLACE), + write_mask: ColorWrites::ALL, + })], + compilation_options: PipelineCompilationOptions::default(), + }), + primitive: wgpu::PrimitiveState { + topology: wgpu::PrimitiveTopology::TriangleStrip, + ..Default::default() + }, + depth_stencil: None, + multisample: wgpu::MultisampleState::default(), + multiview: None, + cache: None, + }); + let slot_texture_views: [TextureView; 2] = core::array::from_fn(|_| { device .create_texture(&wgpu::TextureDescriptor { @@ -619,6 +836,103 @@ &slot_texture_views, ); + // Create blend texture + let blend_texture = device.create_texture(&wgpu::TextureDescriptor { + label: Some("Blend Texture"), + size: wgpu::Extent3d { + width: u32::from(WideTile::WIDTH), + height: u32::from(Tile::HEIGHT) * slot_count as u32, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: render_target_config.format, + usage: wgpu::TextureUsages::TEXTURE_BINDING + | wgpu::TextureUsages::RENDER_ATTACHMENT + | wgpu::TextureUsages::COPY_SRC, + view_formats: &[], + }); + let blend_texture_view = blend_texture.create_view(&wgpu::TextureViewDescriptor::default()); + + // Create blend config buffer + let blend_config_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: Some("Blend Config"), + contents: bytemuck::bytes_of(&BlendConfig { + wide_tile_width: u32::from(WideTile::WIDTH), + wide_tile_height: u32::from(Tile::HEIGHT), + slot_texture_height: u32::from(Tile::HEIGHT) * slot_count as u32, + final_target_height: render_target_config.height, + blend_texture_height: u32::from(Tile::HEIGHT) * slot_count as u32, + _padding: [0; 3], + }), + usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST, + }); + + // Create copy config buffers for slots and final target + let copy_slot_config_buffer = + device.create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: Some("Copy Slot Config"), + contents: bytemuck::bytes_of(&CopyConfig { + wide_tile_width: u32::from(WideTile::WIDTH), + wide_tile_height: u32::from(Tile::HEIGHT), + slot_texture_height: u32::from(Tile::HEIGHT) * slot_count as u32, + target_texture_width: u32::from(WideTile::WIDTH), + target_texture_height: u32::from(Tile::HEIGHT) * slot_count as u32, + _padding: [0; 3], + }), + usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST, + }); + + let copy_target_config_buffer = + device.create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: Some("Copy Target Config"), + contents: bytemuck::bytes_of(&CopyConfig { + wide_tile_width: u32::from(WideTile::WIDTH), + wide_tile_height: u32::from(Tile::HEIGHT), + slot_texture_height: u32::from(Tile::HEIGHT) * slot_count as u32, + target_texture_width: render_target_config.width, + target_texture_height: render_target_config.height, + _padding: [0; 3], + }), + usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST, + }); + + // Create copy bind groups + let copy_slot_bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("Copy Slot Bind Group"), + layout: ©_bind_group_layout, + entries: &[ + wgpu::BindGroupEntry { + binding: 0, + resource: copy_slot_config_buffer.as_entire_binding(), + }, + wgpu::BindGroupEntry { + binding: 1, + resource: wgpu::BindingResource::TextureView(&blend_texture_view), + }, + ], + }); + + let copy_target_bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("Copy Target Bind Group"), + layout: ©_bind_group_layout, + entries: &[ + wgpu::BindGroupEntry { + binding: 0, + resource: copy_target_config_buffer.as_entire_binding(), + }, + wgpu::BindGroupEntry { + binding: 1, + resource: wgpu::BindingResource::TextureView(&blend_texture_view), + }, + ], + }); + + // Create command buffers + let blend_commands_buffer = Self::make_commands_buffer(device, 0); + let copy_commands_buffer = Self::make_commands_buffer(device, 0); + let resources = GpuResources { strips_buffer: Self::make_strips_buffer(device, 0), clear_slot_indices_buffer, @@ -632,12 +946,26 @@ encoded_paints_texture, encoded_paints_bind_group, view_config_buffer, + blend_texture, + blend_texture_view, + blend_config_buffer, + copy_slot_config_buffer, + copy_target_config_buffer, + copy_slot_bind_group, + copy_target_bind_group, + blend_commands_buffer, + copy_commands_buffer, }; Self { strip_pipeline, strip_bind_group_layout, encoded_paints_bind_group_layout, + clear_pipeline, + blend_pipeline, + blend_bind_group_layout, + copy_pipeline, + copy_bind_group_layout, resources, alpha_data, encoded_paints_data, @@ -645,8 +973,6 @@ width: render_target_config.width, height: render_target_config.height, }, - - clear_pipeline, } } @@ -668,6 +994,15 @@ }) } + fn make_commands_buffer(device: &Device, required_size: u64) -> Buffer { + device.create_buffer(&wgpu::BufferDescriptor { + label: Some("Commands Buffer"), + size: required_size, + usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }) + } + fn make_config_buffer( device: &Device, render_size: &RenderSize, @@ -1129,6 +1464,73 @@ render_pass.draw(0..4, 0..u32::try_from(slot_indices.len()).unwrap()); } } + + fn upload_blend_commands(&mut self, commands: &[crate::render::common::GpuBlendCommand]) { + let required_size = mem::size_of_val(commands) as u64; + self.programs.resources.blend_commands_buffer = + Programs::make_commands_buffer(self.device, required_size); + + let mut buffer = self + .queue + .write_buffer_with( + &self.programs.resources.blend_commands_buffer, + 0, + required_size.try_into().unwrap(), + ) + .expect("Capacity handled in creation"); + buffer.copy_from_slice(bytemuck::cast_slice(commands)); + } + + fn upload_copy_commands(&mut self, commands: &[crate::render::common::GpuCopyCommand]) { + let required_size = mem::size_of_val(commands) as u64; + self.programs.resources.copy_commands_buffer = + Programs::make_commands_buffer(self.device, required_size); + + let mut buffer = self + .queue + .write_buffer_with( + &self.programs.resources.copy_commands_buffer, + 0, + required_size.try_into().unwrap(), + ) + .expect("Capacity handled in creation"); + buffer.copy_from_slice(bytemuck::cast_slice(commands)); + } + + fn do_copy_render_pass( + &mut self, + commands: &[crate::render::common::GpuCopyCommand], + target_index: usize, + ) { + let (bind_group, target_view) = if target_index == 2 { + (&self.programs.resources.copy_target_bind_group, self.view) + } else { + ( + &self.programs.resources.copy_slot_bind_group, + &self.programs.resources.slot_texture_views[target_index], + ) + }; + + let mut render_pass = self.encoder.begin_render_pass(&RenderPassDescriptor { + label: Some("Copy Pass"), + color_attachments: &[Some(RenderPassColorAttachment { + view: target_view, + resolve_target: None, + ops: wgpu::Operations { + load: wgpu::LoadOp::Load, + store: wgpu::StoreOp::Store, + }, + })], + depth_stencil_attachment: None, + occlusion_query_set: None, + timestamp_writes: None, + }); + + render_pass.set_pipeline(&self.programs.copy_pipeline); + render_pass.set_bind_group(0, bind_group, &[]); + render_pass.set_vertex_buffer(0, self.programs.resources.copy_commands_buffer.slice(..)); + render_pass.draw(0..4, 0..commands.len() as u32); + } } impl RendererBackend for RendererContext<'_> { @@ -1151,6 +1553,169 @@ self.do_strip_render_pass(strips, target_index, wgpu_load_op); } + + fn blend_pass(&mut self, commands: &[BlendCommand]) { + use crate::render::common::{GpuBlendCommand, GpuCopyCommand}; + + // Process each blend command individually with its copy + for cmd in commands { + // Convert single blend command to GPU command + let src_xy = ((cmd.src_slot as u32 * Tile::HEIGHT as u32) << 16) | 0u32; // x=0 for slots + + let (dst_xy, copy_target) = match &cmd.dst_location { + Location::XY(x, y) => { + // Destination is final target + let xy = ((*y as u32) << 16) | (*x as u32); + (xy, None) + } + Location::Slot(slot) => { + // Destination is a slot + let xy = ((*slot as u32 * Tile::HEIGHT as u32) << 16) | 0u32; + (xy, Some(cmd.dst_texture as usize)) + } + }; + + // Encode payload: opacity (0-7), compose (8-11), mix (12-15), + // source texture (16), dest texture (17-18), blend slot (19-26) + let opacity = cmd.opacity as u32; + let compose = encode_compose_mode(cmd.mode.compose) << 8; + let mix = encode_mix_mode(cmd.mode.mix) << 12; + let src_texture = (cmd.src_texture as u32) << 16; + let dst_texture = (cmd.dst_texture as u32) << 17; + let blend_slot = (cmd.blend_slot as u32) << 19; + + let payload = opacity | compose | mix | src_texture | dst_texture | blend_slot; + + let blend_command = GpuBlendCommand { + xy_src: src_xy, + xy_dst: dst_xy, + payload, + }; + + println!("Processing blend command: {:?}", blend_command); + + // Upload this single blend command + self.upload_blend_commands(&[blend_command]); + + // Create blend bind group + let blend_bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("Dynamic Blend Bind Group"), + layout: &self.programs.blend_bind_group_layout, + entries: &[ + wgpu::BindGroupEntry { + binding: 0, + resource: self + .programs + .resources + .blend_config_buffer + .as_entire_binding(), + }, + wgpu::BindGroupEntry { + binding: 1, + resource: wgpu::BindingResource::TextureView( + &self.programs.resources.slot_texture_views[0], + ), + }, + wgpu::BindGroupEntry { + binding: 2, + resource: wgpu::BindingResource::TextureView( + &self.programs.resources.slot_texture_views[1], + ), + }, + ], + }); + + // Execute blend pass for this single command + { + let mut render_pass = self.encoder.begin_render_pass(&RenderPassDescriptor { + label: Some("Blend Pass"), + color_attachments: &[Some(RenderPassColorAttachment { + view: &self.programs.resources.blend_texture_view, + resolve_target: None, + ops: wgpu::Operations { + load: wgpu::LoadOp::Load, + store: wgpu::StoreOp::Store, + }, + })], + depth_stencil_attachment: None, + occlusion_query_set: None, + timestamp_writes: None, + }); + + render_pass.set_pipeline(&self.programs.blend_pipeline); + render_pass.set_bind_group(0, &blend_bind_group, &[]); + render_pass + .set_vertex_buffer(0, self.programs.resources.blend_commands_buffer.slice(..)); + render_pass.draw(0..4, 0..1); // Only 1 instance for this single command + } + + // Execute copy pass for this single command + let copy_command = GpuCopyCommand { + xy_target: dst_xy, + slot_ix: cmd.blend_slot as u32, + }; + + println!("Executing copy: {:?} to {:?}", copy_command, copy_target); + + self.upload_copy_commands(&[copy_command]); + + match copy_target { + None => { + // Copy to final target + self.do_copy_render_pass(&[copy_command], 2); + } + Some(slot_texture_ix) => { + // Copy to slot texture + self.do_copy_render_pass(&[copy_command], slot_texture_ix); + } + } + } + } +} + +/// Encode compose mode to u32 for shader +fn encode_compose_mode(compose: vello_common::peniko::Compose) -> u32 { + use vello_common::peniko::Compose; + match compose { + Compose::Clear => 0, + Compose::Copy => 1, + Compose::Dest => 2, + Compose::SrcOver => 3, + Compose::DestOver => 4, + Compose::SrcIn => 5, + Compose::DestIn => 6, + Compose::SrcOut => 7, + Compose::DestOut => 8, + Compose::SrcAtop => 9, + Compose::DestAtop => 10, + Compose::Xor => 11, + Compose::Plus => 12, + Compose::PlusLighter => 13, + } +} + +/// Encode mix mode to u32 for shader +fn encode_mix_mode(mix: vello_common::peniko::Mix) -> u32 { + use vello_common::peniko::Mix; + match mix { + Mix::Normal => 0, + Mix::Multiply => 1, + Mix::Screen => 2, + Mix::Overlay => 3, + Mix::Darken => 4, + Mix::Lighten => 5, + Mix::ColorDodge => 6, + Mix::ColorBurn => 7, + Mix::HardLight => 8, + Mix::SoftLight => 9, + Mix::Difference => 10, + Mix::Exclusion => 11, + Mix::Hue => 12, + Mix::Saturation => 13, + Mix::Color => 14, + Mix::Luminosity => 15, + Mix::Clip => 16, + } } /// Trait for types that can write image data directly to the atlas texture.
diff --git a/sparse_strips/vello_hybrid/src/scene.rs b/sparse_strips/vello_hybrid/src/scene.rs index 0be42a0..70849a9 100644 --- a/sparse_strips/vello_hybrid/src/scene.rs +++ b/sparse_strips/vello_hybrid/src/scene.rs
@@ -201,17 +201,13 @@ None }; - // Blend mode, opacity, and mask are not supported yet. - if blend_mode.is_some() { - unimplemented!() - } if mask.is_some() { unimplemented!() } self.wide.push_layer( clip, - BlendMode::new(Mix::Normal, Compose::SrcOver), + blend_mode.unwrap_or(BlendMode::new(Mix::Normal, Compose::SrcOver)), None, opacity.unwrap_or(1.), 0,
diff --git a/sparse_strips/vello_hybrid/src/schedule.rs b/sparse_strips/vello_hybrid/src/schedule.rs index 3d5665b..61d361c 100644 --- a/sparse_strips/vello_hybrid/src/schedule.rs +++ b/sparse_strips/vello_hybrid/src/schedule.rs
@@ -196,6 +196,9 @@ /// Execute a render pass for strips. fn render_strips(&mut self, strips: &[GpuStrip], target_index: usize, load_op: LoadOp); + + /// Execute a blend pass. + fn blend_pass(&mut self, commands: &[BlendCommand]); } /// Backend agnostic enum that specifies the operation to perform to the output attachment at the @@ -208,6 +211,30 @@ } #[derive(Debug)] +pub(crate) struct BlendCommand { + // Source location + pub src_texture: u8, // 0 or 1 for slot textures + pub src_slot: u16, // Which slot in that texture + + // Destination location (where result ultimately goes) + pub dst_texture: u8, // 0, 1 for slots, 2 for final target + pub dst_location: Location, // Either a slot index or (x,y) for final target + + // Blend parameters + pub mode: BlendMode, + pub opacity: u8, + + // Where to render in the blend texture + pub blend_slot: usize, // Index into blend texture +} + +#[derive(Debug)] +pub enum Location { + Slot(u16), // For slot textures + XY(u16, u16), // For final target +} + +#[derive(Debug)] pub(crate) struct Scheduler { /// Index of the current round round: usize, @@ -221,6 +248,8 @@ rounds_queue: VecDeque<Round>, /// State for a single wide tile. tile_state: TileState, + /// Blend slot on a third buffer texture. + next_blend_slot: usize, } /// A "round" is a coarse scheduling quantum. @@ -233,6 +262,7 @@ draws: [Draw; 3], /// Slots that will be freed after drawing into the two slot textures [0, 1]. free: [Vec<usize>; 2], + blend_commands: Vec<BlendCommand>, } /// State for a single wide tile. @@ -264,6 +294,7 @@ clear, rounds_queue: Default::default(), tile_state: Default::default(), + next_blend_slot: 0, } } @@ -318,7 +349,8 @@ /// /// The rounds queue must not be empty. fn flush<R: RendererBackend>(&mut self, renderer: &mut R) { - let round = self.rounds_queue.pop_front().unwrap(); + println!("FLUSH"); + let round = dbg!(self.rounds_queue.pop_front().unwrap()); for (i, draw) in round.draws.iter().enumerate() { if draw.0.is_empty() { continue; @@ -342,9 +374,13 @@ }; renderer.render_strips(&draw.0, i, load); } + renderer.blend_pass(&round.blend_commands); for i in 0..2 { self.free[i].extend(&round.free[i]); } + if self.rounds_queue.is_empty() { + self.next_blend_slot = 0; + } self.round += 1; } @@ -397,7 +433,7 @@ }); } } - for cmd in &tile.cmds { + for cmd in dbg!(&tile.cmds) { // Note: this starts at 1 (for the final target) let clip_depth = state.stack.len(); match cmd { @@ -539,44 +575,39 @@ state.stack.last_mut().unwrap().opacity = *opacity; } Cmd::Blend(mode) => { - // This blend mode is implicitly supported. Currently no other blend mode is - // supported in `vello_hybrid`. - assert!( - matches!( - mode, - BlendMode { - mix: Mix::Normal, - compose: Compose::SrcOver - } - ), - "Changing blend mode is unsupported" - ); - let tos = state.stack.last().unwrap(); let nos = &state.stack[state.stack.len() - 2]; - let next_round = clip_depth % 2 == 0 && clip_depth > 2; - let round = nos.round.max(tos.round + usize::from(next_round)); - let draw = self.draw_mut(round, clip_depth - 1); - let (x, y) = if clip_depth <= 2 { - (wide_tile_x, wide_tile_y) + let opacity_u8 = (tos.opacity * 255.0) as u8; + let blend_slot = self.next_blend_slot; + self.next_blend_slot += 1; + + let blend_command = if clip_depth <= 2 { + // Blending to final target + BlendCommand { + src_texture: (1 - clip_depth % 2) as u8, + src_slot: tos.slot_ix as u16, + dst_texture: 2, + dst_location: Location::XY(wide_tile_x, wide_tile_y), + mode: *mode, + opacity: opacity_u8, + blend_slot, + } } else { - (0, nos.slot_ix as u16 * Tile::HEIGHT) + // Blending between slots + let dst_texture = (clip_depth % 2) as u8; + BlendCommand { + src_texture: (1 - clip_depth % 2) as u8, + src_slot: tos.slot_ix as u16, + dst_texture, + dst_location: Location::Slot(nos.slot_ix as u16), + mode: *mode, + opacity: opacity_u8, + blend_slot, + } }; - // Opacity packed into the first 8 bits. - let opacity_u8 = (tos.opacity * 255.0) as u32; - let paint = (COLOR_SOURCE_SLOT << 31) | opacity_u8; - - draw.0.push(GpuStrip { - x, - y, - width: WideTile::WIDTH, - dense_width: 0, - col_idx: 0, - payload: tos.slot_ix as u32, - paint, - }); + self.rounds_queue[0].blend_commands.push(blend_command); } _ => unimplemented!(), }
diff --git a/sparse_strips/vello_sparse_shaders/shaders/blend_wide_tile.wgsl b/sparse_strips/vello_sparse_shaders/shaders/blend_wide_tile.wgsl new file mode 100644 index 0000000..b347f12 --- /dev/null +++ b/sparse_strips/vello_sparse_shaders/shaders/blend_wide_tile.wgsl
@@ -0,0 +1,466 @@ +// Copyright 2025 the Vello Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +struct Config { + // Width of a wide tile (matching `WideTile::WIDTH`). + wide_tile_width: u32, + // Height of a wide tile (matching `WideTile::HEIGHT`). + wide_tile_height: u32, + // Height of the slot texture. + slot_texture_height: u32, + // Height of the final target texture. + final_target_height: u32, + // Height of the blend texture. + blend_texture_height: u32, +} + +struct BlendCommand { + // [x, y] packed as u16's + // x, y — coordinates of the top left of the source wide tile + @location(0) xy_src: u32, + // [x, y] packed as u16's + // x, y — coordinates of the top left of the destination wide tile + @location(1) xy_dst: u32, + // Bits 0-7: opacity + // Bits 8-11: compose + // Bits 12-15: mix + // Bits 16: source texture (TODO: Consider passing slot_ix alone) + // 0 = slots of ix=0 + // 1 = slots of ix=1 + // Bits 17-18: dest texture + // 0 = slots of ix=0 + // 1 = slots of ix=1 + // 2 = final target + // Bits 19-26: blend slot index + @location(2) payload: u32, +} + +struct VertexOutput { + // Normalized device coordinates (NDC) for the current vertex + @builtin(position) position: vec4<f32>, + // Texture coordinates for the current fragment + @location(0) src_tex_coord: vec2<f32>, + @location(1) dst_tex_coord: vec2<f32>, + // See `BlendCommand` documentation. + @location(2) payload: u32, +} + +@group(0) @binding(0) +var<uniform> config: Config; + +@group(0) @binding(1) +var slot_texture_0: texture_2d<f32>; + +@group(0) @binding(2) +var slot_texture_1: texture_2d<f32>; + +@group(0) @binding(3) +var final_target: texture_2d<f32>; + +@vertex +fn vs_main( + @builtin(vertex_index) in_vertex_index: u32, + command: BlendCommand, +) -> VertexOutput { + var out: VertexOutput; + out.payload = command.payload; + + // Map vertex_index (0-3) to quad corners: + // 0 → (0,0), 1 → (1,0), 2 → (0,1), 3 → (1,1) + let x = f32(in_vertex_index & 1u); + let y = f32(in_vertex_index >> 1u); + + // Calculate `position` for output. + { + // Extract bits 19-26 for blend slot index + let blend_slot_ix = (command.payload >> 19u) & 0xffu; + + // Calculate the y-position based on the slot index + let slot_y_offset = f32(blend_slot_ix * config.wide_tile_height); + + // Scale to match slot dimensions + let pix_x = x * f32(config.wide_tile_width); + let pix_y = slot_y_offset + y * f32(config.wide_tile_height); + + // Convert to NDC + let ndc_x = pix_x * 2.0 / f32(config.wide_tile_width) - 1.0; + let ndc_y = 1.0 - pix_y * 2.0 / f32(config.blend_texture_height); + + out.position = vec4(ndc_x, ndc_y, 0.0, 1.0); + } + + // Calculate `src_tex_coord` for the source texture. + { + let src_x0 = f32(command.xy_src & 0xffffu); + let src_y0 = f32(command.xy_src >> 16u); + + let src_x = src_x0 + x * f32(config.wide_tile_width); + let src_y = src_y0 + y * f32(config.wide_tile_height); + + out.src_tex_coord = vec2f(src_x, src_y); + } + + // Calculate `dst_tex_coord` for the destination texture. + { + let dst_x0 = f32(command.xy_dst & 0xffffu); + let dst_y0 = f32(command.xy_dst >> 16u); + + let dst_texture_ix = (command.payload >> 17u) & 3u; + + let dst_height = f32(dst_texture_ix != 2u) * f32(config.wide_tile_height) + f32(dst_texture_ix == 2) * f32(config.final_target_height); + + let dst_x = dst_x0 + x * f32(config.wide_tile_width); + let dst_y = dst_y0 + y * f32(dst_height); + + out.dst_tex_coord = vec2f(dst_x, dst_y); + } + + return out; +} + +@fragment +fn fs_main(in: VertexOutput) -> @location(0) vec4<f32> { + var bg_color: vec4<f32>; + var fg_color: vec4<f32>; + + // Calculate `fg_color` of the foreground texture. + { + let src_texture_ix = (in.payload >> 16u) & 1u; + let src = vec2u(u32(floor(in.src_tex_coord.x)), u32(floor(in.src_tex_coord.y))); + + if src_texture_ix == 0u { + fg_color = textureLoad(slot_texture_0, src, 0); + } else { + fg_color = textureLoad(slot_texture_1, src, 0); + } + } + + // Calculate `bg_color` of the background texture. + { + let dst_texture_ix = (in.payload >> 17u) & 3u; + let dst = vec2u(u32(floor(in.dst_tex_coord.x)), u32(floor(in.dst_tex_coord.y))); + + if dst_texture_ix == 0u { + bg_color = textureLoad(slot_texture_0, dst, 0); + } else if dst_texture_ix == 1u { + bg_color = textureLoad(slot_texture_1, dst, 0); + } else { + // Encode a special marker in the red channel to indicate "untouched" background + // Use a small non-zero value that won't affect visual output but can be detected. + // This is so janky it isn't funny. This "special value" is used in the copy shader. + bg_color = vec4(1.0 / 255.0, 0.0, 0.0, 0.0); + } + } + + let opacity = f32(in.payload & 0xFFu) / 255.0; + let mixed = blend_mix_compose(bg_color, fg_color, in.payload >> 8u); + return mixed * opacity; +} + +// Color mixing modes + +const MIX_NORMAL = 0u; +const MIX_MULTIPLY = 1u; +const MIX_SCREEN = 2u; +const MIX_OVERLAY = 3u; +const MIX_DARKEN = 4u; +const MIX_LIGHTEN = 5u; +const MIX_COLOR_DODGE = 6u; +const MIX_COLOR_BURN = 7u; +const MIX_HARD_LIGHT = 8u; +const MIX_SOFT_LIGHT = 9u; +const MIX_DIFFERENCE = 10u; +const MIX_EXCLUSION = 11u; +const MIX_HUE = 12u; +const MIX_SATURATION = 13u; +const MIX_COLOR = 14u; +const MIX_LUMINOSITY = 15u; +const MIX_CLIP = 16u; + +fn screen(cb: vec3<f32>, cs: vec3<f32>) -> vec3<f32> { + return cb + cs - (cb * cs); +} + +fn color_dodge(cb: f32, cs: f32) -> f32 { + if cb == 0.0 { + return 0.0; + } else if cs == 1.0 { + return 1.0; + } else { + return min(1.0, cb / (1.0 - cs)); + } +} + +fn color_burn(cb: f32, cs: f32) -> f32 { + if cb == 1.0 { + return 1.0; + } else if cs == 0.0 { + return 0.0; + } else { + return 1.0 - min(1.0, (1.0 - cb) / cs); + } +} + +fn hard_light(cb: vec3<f32>, cs: vec3<f32>) -> vec3<f32> { + return select( + screen(cb, 2.0 * cs - 1.0), + cb * 2.0 * cs, + cs <= vec3(0.5) + ); +} + +fn soft_light(cb: vec3<f32>, cs: vec3<f32>) -> vec3<f32> { + let d = select( + sqrt(cb), + ((16.0 * cb - 12.0) * cb + 4.0) * cb, + cb <= vec3(0.25) + ); + return select( + cb + (2.0 * cs - 1.0) * (d - cb), + cb - (1.0 - 2.0 * cs) * cb * (1.0 - cb), + cs <= vec3(0.5) + ); +} + +fn sat(c: vec3<f32>) -> f32 { + return max(c.x, max(c.y, c.z)) - min(c.x, min(c.y, c.z)); +} + +fn lum(c: vec3<f32>) -> f32 { + let f = vec3(0.3, 0.59, 0.11); + return dot(c, f); +} + +fn clip_color(c_in: vec3<f32>) -> vec3<f32> { + var c = c_in; + let l = lum(c); + let n = min(c.x, min(c.y, c.z)); + let x = max(c.x, max(c.y, c.z)); + if n < 0.0 { + c = l + (((c - l) * l) / (l - n)); + } + if x > 1.0 { + c = l + (((c - l) * (1.0 - l)) / (x - l)); + } + return c; +} + +fn set_lum(c: vec3<f32>, l: f32) -> vec3<f32> { + return clip_color(c + (l - lum(c))); +} + +fn set_sat_inner( + cmin: ptr<function, f32>, + cmid: ptr<function, f32>, + cmax: ptr<function, f32>, + s: f32 +) { + if *cmax > *cmin { + *cmid = ((*cmid - *cmin) * s) / (*cmax - *cmin); + *cmax = s; + } else { + *cmid = 0.0; + *cmax = 0.0; + } + *cmin = 0.0; +} + +fn set_sat(c: vec3<f32>, s: f32) -> vec3<f32> { + var r = c.r; + var g = c.g; + var b = c.b; + if r <= g { + if g <= b { + set_sat_inner(&r, &g, &b, s); + } else { + if r <= b { + set_sat_inner(&r, &b, &g, s); + } else { + set_sat_inner(&b, &r, &g, s); + } + } + } else { + if r <= b { + set_sat_inner(&g, &r, &b, s); + } else { + if g <= b { + set_sat_inner(&g, &b, &r, s); + } else { + set_sat_inner(&b, &g, &r, s); + } + } + } + return vec3(r, g, b); +} + +// Blends two RGB colors together. The colors are assumed to be in sRGB +// color space, and this function does not take alpha into account. +fn blend_mix(cb: vec3<f32>, cs: vec3<f32>, mode: u32) -> vec3<f32> { + var b = vec3(0.0); + switch mode { + case MIX_MULTIPLY: { + b = cb * cs; + } + case MIX_SCREEN: { + b = screen(cb, cs); + } + case MIX_OVERLAY: { + b = hard_light(cs, cb); + } + case MIX_DARKEN: { + b = min(cb, cs); + } + case MIX_LIGHTEN: { + b = max(cb, cs); + } + case MIX_COLOR_DODGE: { + b = vec3(color_dodge(cb.x, cs.x), color_dodge(cb.y, cs.y), color_dodge(cb.z, cs.z)); + } + case MIX_COLOR_BURN: { + b = vec3(color_burn(cb.x, cs.x), color_burn(cb.y, cs.y), color_burn(cb.z, cs.z)); + } + case MIX_HARD_LIGHT: { + b = hard_light(cb, cs); + } + case MIX_SOFT_LIGHT: { + b = soft_light(cb, cs); + } + case MIX_DIFFERENCE: { + b = abs(cb - cs); + } + case MIX_EXCLUSION: { + b = cb + cs - 2.0 * cb * cs; + } + case MIX_HUE: { + b = set_lum(set_sat(cs, sat(cb)), lum(cb)); + } + case MIX_SATURATION: { + b = set_lum(set_sat(cb, sat(cs)), lum(cb)); + } + case MIX_COLOR: { + b = set_lum(cs, lum(cb)); + } + case MIX_LUMINOSITY: { + b = set_lum(cb, lum(cs)); + } + default: { + b = cs; + } + } + return b; +} + +// Composition modes + +const COMPOSE_CLEAR = 0u; +const COMPOSE_COPY = 1u; +const COMPOSE_DEST = 2u; +const COMPOSE_SRC_OVER = 3u; +const COMPOSE_DEST_OVER = 4u; +const COMPOSE_SRC_IN = 5u; +const COMPOSE_DEST_IN = 6u; +const COMPOSE_SRC_OUT = 7u; +const COMPOSE_DEST_OUT = 8u; +const COMPOSE_SRC_ATOP = 9u; +const COMPOSE_DEST_ATOP = 10u; +const COMPOSE_XOR = 11u; +const COMPOSE_PLUS = 12u; +const COMPOSE_PLUS_LIGHTER = 13u; + +// Apply general compositing operation. +// Inputs are separated colors and alpha, output is premultiplied. +fn blend_compose( + cb: vec3<f32>, + cs: vec3<f32>, + ab: f32, + as_: f32, + mode: u32 +) -> vec4<f32> { + var fa = 0.0; + var fb = 0.0; + switch mode { + case COMPOSE_COPY: { + fa = 1.0; + fb = 0.0; + } + case COMPOSE_DEST: { + fa = 0.0; + fb = 1.0; + } + case COMPOSE_SRC_OVER: { + fa = 1.0; + fb = 1.0 - as_; + } + case COMPOSE_DEST_OVER: { + fa = 1.0 - ab; + fb = 1.0; + } + case COMPOSE_SRC_IN: { + fa = ab; + fb = 0.0; + } + case COMPOSE_DEST_IN: { + fa = 0.0; + fb = as_; + } + case COMPOSE_SRC_OUT: { + fa = 1.0 - ab; + fb = 0.0; + } + case COMPOSE_DEST_OUT: { + fa = 0.0; + fb = 1.0 - as_; + } + case COMPOSE_SRC_ATOP: { + fa = ab; + fb = 1.0 - as_; + } + case COMPOSE_DEST_ATOP: { + fa = 1.0 - ab; + fb = as_; + } + case COMPOSE_XOR: { + fa = 1.0 - ab; + fb = 1.0 - as_; + } + case COMPOSE_PLUS: { + fa = 1.0; + fb = 1.0; + } + case COMPOSE_PLUS_LIGHTER: { + return min(vec4(1.0), vec4(as_ * cs + ab * cb, as_ + ab)); + } + default: {} + } + let as_fa = as_ * fa; + let ab_fb = ab * fb; + let co = as_fa * cs + ab_fb * cb; + // Modes like COMPOSE_PLUS can generate alpha > 1.0, so clamp. + return vec4(co, min(as_fa + ab_fb, 1.0)); +} + +// Apply color mixing and composition. Both input and output colors are +// premultiplied RGB. +fn blend_mix_compose(backdrop: vec4<f32>, src: vec4<f32>, mode: u32) -> vec4<f32> { + let BLEND_DEFAULT = ((MIX_NORMAL << 4u) | COMPOSE_SRC_OVER); + let EPSILON = 1e-15; + if (mode & 0xffu) == BLEND_DEFAULT { + // Both normal+src_over blend and clip case + return backdrop * (1.0 - src.a) + src; + } + // Un-premultiply colors for blending. Max with a small epsilon to avoid NaNs. + let inv_src_a = 1.0 / max(src.a, EPSILON); + var cs = src.rgb * inv_src_a; + let inv_backdrop_a = 1.0 / max(backdrop.a, EPSILON); + let cb = backdrop.rgb * inv_backdrop_a; + let mix_mode = (mode >> 4u) & 0xfu; + let mixed = blend_mix(cb, cs, mix_mode); + cs = mix(cs, mixed, backdrop.a); + let compose_mode = mode & 0xfu; + if compose_mode == COMPOSE_SRC_OVER { + let co = mix(backdrop.rgb, cs, src.a); + return vec4(co, src.a + backdrop.a * (1.0 - src.a)); + } else { + return blend_compose(cb, cs, backdrop.a, src.a, compose_mode); + } +}
diff --git a/sparse_strips/vello_sparse_shaders/shaders/copy_slot.wgsl b/sparse_strips/vello_sparse_shaders/shaders/copy_slot.wgsl new file mode 100644 index 0000000..1d88e3a --- /dev/null +++ b/sparse_strips/vello_sparse_shaders/shaders/copy_slot.wgsl
@@ -0,0 +1,82 @@ +// Copyright 2025 the Vello Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This shader copies a wide tile slot from a slot texture to a target location. + +struct Config { + // Width of a wide tile (matching `WideTile::WIDTH`). + wide_tile_width: u32, + // Height of a wide tile (matching `WideTile::HEIGHT`). + wide_tile_height: u32, + // Height of the slot texture (source). + slot_texture_height: u32, + // Width of the target texture (destination). + target_texture_width: u32, + // Height of the target texture (destination). + target_texture_height: u32, +} + +struct CopyCommand { + // [x, y] packed as u16's + // x, y — coordinates of the top left of the target wide tile + @location(0) xy_target: u32, + // Slot index to identify the pixel position to sample from + @location(1) slot_ix: u32, +} + +struct VertexOutput { + // Normalized device coordinates (NDC) for the current vertex + @builtin(position) position: vec4<f32>, + // Slot index passed to the fragment shader + @location(0) @interpolate(flat) slot_ix: u32, +} + +@group(0) @binding(0) +var<uniform> config: Config; + +@group(0) @binding(1) +var slot_texture: texture_2d<f32>; + +@vertex +fn vs_main( + @builtin(vertex_index) vertex_index: u32, + command: CopyCommand, +) -> VertexOutput { + var out: VertexOutput; + out.slot_ix = command.slot_ix; + + // Map vertex_index (0-3) to quad corners: + // 0 → (0,0), 1 → (1,0), 2 → (0,1), 3 → (1,1) + let x = f32(vertex_index & 1u); + let y = f32(vertex_index >> 1u); + + // Unpack target coordinates + let target_x0 = command.xy_target & 0xffffu; + let target_y0 = command.xy_target >> 16u; + + // Calculate pixel coordinates of the current vertex within the wide tile + let pix_x = f32(target_x0) + x * f32(config.wide_tile_width); + let pix_y = f32(target_y0) + y * f32(config.wide_tile_height); + + // Convert to NDC for the target texture + let ndc_x = pix_x * 2.0 / f32(config.target_texture_width) - 1.0; + let ndc_y = 1.0 - pix_y * 2.0 / f32(config.target_texture_height); + + out.position = vec4<f32>(ndc_x, ndc_y, 0.0, 1.0); + + return out; +} + +@fragment +fn fs_main(in: VertexOutput) -> @location(0) vec4<f32> { + // Calculate the coordinates to sample from the slot texture + let slot_x = u32(in.position.x) & 0xFFu; + let slot_y = (u32(in.position.y) & 3u) + in.slot_ix * config.wide_tile_height; + + let color = textureLoad(slot_texture, vec2u(slot_x, slot_y), 0); + + if color.a == 0.0 && color.r > 0.0 && color.r <= (1.0 / 255.0) { + discard; + } + return color; +}
diff --git a/sparse_strips/vello_sparse_tests/tests/renderer.rs b/sparse_strips/vello_sparse_tests/tests/renderer.rs index c71cd49..fac25d7 100644 --- a/sparse_strips/vello_sparse_tests/tests/renderer.rs +++ b/sparse_strips/vello_sparse_tests/tests/renderer.rs
@@ -275,8 +275,8 @@ self.scene.push_clip_layer(path); } - fn push_blend_layer(&mut self, _: BlendMode) { - unimplemented!() + fn push_blend_layer(&mut self, blend_mode: BlendMode) { + self.scene.push_layer(None, Some(blend_mode), None, None); } fn push_opacity_layer(&mut self, opacity: f32) {