scratch pad
diff --git a/sparse_strips/vello_dev_macros/src/test.rs b/sparse_strips/vello_dev_macros/src/test.rs
index 9c57280..4fca01c 100644
--- a/sparse_strips/vello_dev_macros/src/test.rs
+++ b/sparse_strips/vello_dev_macros/src/test.rs
@@ -161,8 +161,7 @@
 
     // These tests currently don't work with `vello_hybrid`.
     skip_hybrid |= {
-        input_fn_name_str.contains("compose")
-            || input_fn_name_str.contains("gradient")
+            input_fn_name_str.contains("gradient")
             || input_fn_name_str.contains("layer_multiple_properties")
             || input_fn_name_str.contains("mask")
             || input_fn_name_str.contains("mix")
diff --git a/sparse_strips/vello_hybrid/src/lib.rs b/sparse_strips/vello_hybrid/src/lib.rs
index 306acc3..eb78ac6 100644
--- a/sparse_strips/vello_hybrid/src/lib.rs
+++ b/sparse_strips/vello_hybrid/src/lib.rs
@@ -29,7 +29,8 @@
 //!
 //! See the individual module documentation for more details on usage and implementation.
 
-#![no_std]
+//  DO NOT COMMIT - for println debugging.
+// #![no_std]
 
 extern crate alloc;
 
diff --git a/sparse_strips/vello_hybrid/src/render/common.rs b/sparse_strips/vello_hybrid/src/render/common.rs
index bb89822..fb38efb 100644
--- a/sparse_strips/vello_hybrid/src/render/common.rs
+++ b/sparse_strips/vello_hybrid/src/render/common.rs
@@ -60,37 +60,40 @@
 const COLOR_SOURCE_BLEND: u32 = 2;
 const PAINT_TYPE_SOLID: u32 = 0;
 const PAINT_TYPE_IMAGE: u32 = 1;
-
 impl fmt::Debug for GpuStrip {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        let color_source = (self.paint >> 30) & 0x3;  // Changed to 2 bits for 3 source types
+        let color_source = (self.paint >> 30) & 0x3;  // Changed from 31 to 30 (2 bits)
+        let paint_type = (self.paint >> 28) & 0x3;    // Changed from 29 to 28
         
         let mut debug_struct = f.debug_struct("GpuStrip");
         
+        // Add tile information based on y coordinate
+        let tile_index = self.y / 4 as u16;
+        let tile_local_y = self.y % 4 as u16;
+        
         debug_struct
             .field("x", &self.x)
-            .field("y", &self.y)
+            .field("y", &format!("{} (tile={}, local_y={})", self.y, tile_index, tile_local_y))
             .field("width", &self.width)
             .field("dense_width", &self.dense_width)
             .field("col_idx", &self.col_idx);
         
         let paint_info = match color_source {
-            COLOR_SOURCE_PAYLOAD => {
-                let paint_type = (self.paint >> 28) & 0x3;  // Adjusted bit position
-                if paint_type == PAINT_TYPE_SOLID {
-                    format!("Solid(color_source=payload)")
-                } else if paint_type == PAINT_TYPE_IMAGE {
-                    let paint_tex_id = self.paint & 0x0FFFFFFF;  // Adjusted mask
+            0 => { // COLOR_SOURCE_PAYLOAD
+                if paint_type == 0 { // PAINT_TYPE_SOLID
+                    "Solid(color_source=payload)".to_string()
+                } else if paint_type == 1 { // PAINT_TYPE_IMAGE
+                    let paint_tex_id = self.paint & 0x0FFFFFFF;
                     format!("Image(color_source=payload, texture_id={})", paint_tex_id)
                 } else {
                     format!("Unknown(color_source=payload, type={})", paint_type)
                 }
             }
-            COLOR_SOURCE_SLOT => {
+            1 => { // COLOR_SOURCE_SLOT
                 let opacity = self.paint & 0xFF;
                 format!("Slot(color_source=slot, opacity={})", opacity)
             }
-            COLOR_SOURCE_BLEND => {
+            2 => { // COLOR_SOURCE_BLEND
                 let dest_slot = (self.paint >> 16) & 0x3FFF;
                 let mix = (self.paint >> 8) & 0xFF;
                 let compose = self.paint & 0xFF;
@@ -101,17 +104,16 @@
         
         debug_struct.field("paint", &paint_info);
         
-        // Decode payload based on paint configuration
+        // Enhanced payload info
         let payload_info = match color_source {
-            COLOR_SOURCE_PAYLOAD => {
-                let paint_type = (self.paint >> 28) & 0x3;
-                if paint_type == PAINT_TYPE_SOLID {
+            0 => { // COLOR_SOURCE_PAYLOAD
+                if paint_type == 0 { // PAINT_TYPE_SOLID
                     let r = (self.payload >> 0) & 0xFF;
                     let g = (self.payload >> 8) & 0xFF;
                     let b = (self.payload >> 16) & 0xFF;
                     let a = (self.payload >> 24) & 0xFF;
                     format!("Color(r={}, g={}, b={}, a={})", r, g, b, a)
-                } else if paint_type == PAINT_TYPE_IMAGE {
+                } else if paint_type == 1 { // PAINT_TYPE_IMAGE
                     let x = self.payload & 0xFFFF;
                     let y = self.payload >> 16;
                     format!("ImageCoords(x={}, y={})", x, y)
@@ -119,11 +121,10 @@
                     format!("Unknown(raw=0x{:08x})", self.payload)
                 }
             }
-            COLOR_SOURCE_SLOT => {
-                format!("SlotIndex({})", self.payload)
-            }
-            COLOR_SOURCE_BLEND => {
-                format!("SourceSlot({})", self.payload)
+            1 | 2 => { // COLOR_SOURCE_SLOT or COLOR_SOURCE_BLEND
+                let slot = self.payload;
+                let slot_y = slot as u16 * 4 as u16;
+                format!("SourceSlot({}) [maps to y={}]", slot, slot_y)
             }
             _ => format!("Unknown(raw=0x{:08x})", self.payload)
         };
diff --git a/sparse_strips/vello_hybrid/src/schedule.rs b/sparse_strips/vello_hybrid/src/schedule.rs
index 67ed833..29f33e0 100644
--- a/sparse_strips/vello_hybrid/src/schedule.rs
+++ b/sparse_strips/vello_hybrid/src/schedule.rs
@@ -216,15 +216,21 @@
     /// The total number of slots in each slot texture.
     total_slots: usize,
     /// The slots that are free to use in each slot texture.
-    free: [Vec<usize>; 2],
+    free: [Vec<SlotOccupation>; 2],
     /// Slots that require clearing before subsequent draws for each slot texture.
-    clear: [Vec<u32>; 2],
+    clear: [Vec<SlotOccupation>; 2],
     /// Rounds are enqueued on push clip commands and dequeued on flush.
     rounds_queue: VecDeque<Round>,
     /// State for a single wide tile.
     tile_state: TileState,
 }
 
+#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)]
+struct SlotOccupation {
+    slot_idx: usize,
+    texture: u8,
+}
+
 /// A "round" is a coarse scheduling quantum.
 ///
 /// It represents draws in up to three render targets; two for intermediate
@@ -234,7 +240,7 @@
     /// Draw calls scheduled into the two slot textures (0, 1) and the final target (2).
     draws: [Draw; 3],
     /// Slots that will be freed after drawing into the two slot textures [0, 1].
-    free: [Vec<usize>; 2],
+    free: [Vec<SlotOccupation>; 2],
 }
 
 /// State for a single wide tile.
@@ -245,8 +251,8 @@
 
 #[derive(Clone, Copy, Debug)]
 struct TileEl {
-    slot_ix: usize,
-    slot_prime_ix: Option<usize>,
+    slot_ix: SlotOccupation,
+    slot_prime_ix: Option<SlotOccupation>,
     round: usize,
     opacity: f32,
 }
@@ -256,8 +262,20 @@
 
 impl Scheduler {
     pub(crate) fn new(total_slots: usize) -> Self {
-        let free0: Vec<_> = (0..total_slots).collect();
-        let free1 = free0.clone();
+        let free0: Vec<_> = (0..total_slots)
+            .map(|idx| SlotOccupation {
+                slot_idx: idx,
+                texture: 0,
+            })
+            .collect();
+        let free1 = free0
+            .clone()
+            .into_iter()
+            .map(|SlotOccupation { slot_idx, .. }| SlotOccupation {
+                slot_idx,
+                texture: 1,
+            })
+            .collect();
         let free = [free0, free1];
         let clear = [Vec::new(), Vec::new()];
         Self {
@@ -270,6 +288,7 @@
         }
     }
 
+    /// This looks ahead to see if another PushBuf is incoming. If so - we will be "blending".
     fn should_allocate_prime(cmds: &[Cmd], current_idx: usize) -> bool {
         for cmd in &cmds[current_idx + 1..] {
             match cmd {
@@ -295,7 +314,7 @@
         // Left to right, top to bottom iteration over wide tiles.
         for wide_tile_row in 0..wide_tiles_per_col {
             for wide_tile_col in 0..wide_tiles_per_row {
-                let wide_tile = dbg!(scene.wide.get(wide_tile_col, wide_tile_row));
+                let wide_tile = scene.wide.get(wide_tile_col, wide_tile_row);
                 let wide_tile_x = wide_tile_col * WideTile::WIDTH;
                 let wide_tile_y = wide_tile_row * Tile::HEIGHT;
                 self.do_tile(
@@ -321,8 +340,20 @@
         #[cfg(debug_assertions)]
         {
             for i in 0..self.total_slots {
-                debug_assert!(self.free[0].contains(&i), "free[0] is missing slot {i}");
-                debug_assert!(self.free[1].contains(&i), "free[1] is missing slot {i}");
+                debug_assert!(
+                    self.free[0].contains(&SlotOccupation {
+                        slot_idx: i,
+                        texture: 0
+                    }),
+                    "free[0] is missing slot {i}"
+                );
+                debug_assert!(
+                    self.free[1].contains(&SlotOccupation {
+                        slot_idx: i,
+                        texture: 1
+                    }),
+                    "free[1] is missing slot {i}"
+                );
             }
         }
         debug_assert!(self.rounds_queue.is_empty(), "rounds_queue is not empty");
@@ -353,7 +384,14 @@
                     LoadOp::Clear
                 } else {
                     // Some slots need to be preserved, so only clear the dirty slots.
-                    renderer.clear_slots(i, self.clear[i].as_slice());
+                    renderer.clear_slots(
+                        i,
+                        self.clear[i]
+                            .iter()
+                            .map(|slot| slot.slot_idx as u32)
+                            .collect::<Vec<u32>>()
+                            .as_slice(),
+                    );
                     self.clear[i].clear();
                     LoadOp::Load
                 }
@@ -396,7 +434,7 @@
         // Sentinel `TileEl` to indicate the end of the stack where we draw all
         // commands to the final target.
         state.stack.push(TileEl {
-            slot_ix: usize::MAX,
+            slot_ix: SlotOccupation { slot_idx: usize::MAX, texture: 0 },
             slot_prime_ix: None,
             round: self.round,
             opacity: 1.,
@@ -417,7 +455,7 @@
                 });
             }
         }
-        for (cmd_idx, cmd) in tile.cmds.iter().enumerate() {
+        for (cmd_idx, cmd) in dbg!(tile).cmds.iter().enumerate() {
             // Note: this starts at 1 (for the final target)
             let clip_depth = state.stack.len();
             match cmd {
@@ -442,14 +480,14 @@
                         if effective_depth == 1 {
                             scene_strip_y
                         } else {
-                            prime_slot as u16 * Tile::HEIGHT
+                            prime_slot.slot_idx as u16 * Tile::HEIGHT
                         }
                     } else {
                         // Write to main slot
                         if clip_depth == 1 {
                             scene_strip_y
                         } else {
-                            el.slot_ix as u16 * Tile::HEIGHT
+                            el.slot_ix.slot_idx as u16 * Tile::HEIGHT
                         }
                     };
 
@@ -495,14 +533,13 @@
                         if effective_depth == 1 {
                             scene_strip_y
                         } else {
-                            prime_slot as u16 * Tile::HEIGHT
+                            prime_slot.slot_idx as u16 * Tile::HEIGHT
                         }
                     } else {
-                        // Write to main slot
-                        if clip_depth == 1 {
+                        if effective_depth == 1 {
                             scene_strip_y
                         } else {
-                            el.slot_ix as u16 * Tile::HEIGHT
+                            el.slot_ix.slot_idx as u16 * Tile::HEIGHT
                         }
                     };
 
@@ -524,26 +561,26 @@
                 }
                 Cmd::PushBuf => {
                     if Self::should_allocate_prime(&tile.cmds, cmd_idx) {
-                        let dest_ix = 1 - clip_depth % 2; // Destination texture
+                        let dest_ix = (1 - clip_depth) % 2; // Destination texture (prime slot)
                         let temp_ix = clip_depth % 2; // Temporary texture
 
-                        while self.free[dest_ix].is_empty() || self.free[temp_ix].is_empty() {
-                            if self.rounds_queue.is_empty() {
-                                return Err(RenderError::SlotsExhausted);
-                            }
-                            self.flush(renderer);
-                        }
+                        // while self.free[dest_ix].is_empty() || self.free[temp_ix].is_empty() {
+                        //     if self.rounds_queue.is_empty() {
+                        //         return Err(RenderError::SlotsExhausted);
+                        //     }
+                        //     self.flush(renderer);
+                        // }
 
                         // Allocate both
                         let dest_slot = self.free[dest_ix].pop().unwrap();
                         let temp_slot = self.free[temp_ix].pop().unwrap();
 
-                        self.clear[dest_ix].push(dest_slot as u32);
-                        self.clear[temp_ix].push(temp_slot as u32);
+                        self.clear[dest_ix].push(dest_slot);
+                        self.clear[temp_ix].push(temp_slot);
 
                         state.stack.push(TileEl {
                             slot_ix: dest_slot,             // Destination (prime) - blend result.
-                            slot_prime_ix: Some(temp_slot), // Temporary accumulation
+                            slot_prime_ix: Some(temp_slot), // Temporary accumulation of fills before blending...
                             round: self.round,
                             opacity: 1.,
                         });
@@ -556,7 +593,7 @@
                             self.flush(renderer);
                         }
                         let slot_ix = self.free[ix].pop().unwrap();
-                        self.clear[ix].push(slot_ix as u32);
+                        self.clear[ix].push(slot_ix);
 
                         state.stack.push(TileEl {
                             slot_ix,
@@ -582,12 +619,13 @@
                         "round must be in queue"
                     );
 
-                    let main_tex_ix = 1 - clip_depth % 2;
-                    self.rounds_queue[round - self.round].free[main_tex_ix]
+
+
+                    self.rounds_queue[round - self.round].free[popped_buffer.slot_ix.texture as usize]
                         .push(popped_buffer.slot_ix);
+
                     if let Some(slot_prime_ix) = popped_buffer.slot_prime_ix {
-                        let prime_tex_ix = 1 - main_tex_ix; // Opposite texture
-                        self.rounds_queue[round - self.round].free[prime_tex_ix]
+                        self.rounds_queue[round - self.round].free[slot_prime_ix.texture as usize]
                             .push(slot_prime_ix);
                     }
                 }
@@ -604,7 +642,7 @@
                     } else {
                         (
                             clip_fill.x as u16,
-                            clip_target.slot_ix as u16 * Tile::HEIGHT,
+                            clip_target.slot_ix.slot_idx as u16 * Tile::HEIGHT,
                         )
                     };
                     // Opacity packed into the first 8 bits – pack full opacity (0xFF).
@@ -615,7 +653,7 @@
                         width: clip_fill.width as u16,
                         dense_width: 0,
                         col_idx: 0,
-                        payload: clip_source.slot_ix as u32,
+                        payload: clip_source.slot_ix.slot_idx as u32,
                         paint,
                     });
                 }
@@ -632,7 +670,7 @@
                     } else {
                         (
                             clip_alpha_fill.x as u16,
-                            clip_target.slot_ix as u16 * Tile::HEIGHT,
+                            clip_target.slot_ix.slot_idx as u16 * Tile::HEIGHT,
                         )
                     };
                     // Opacity packed into the first 8 bits – pack full opacity (0xFF).
@@ -645,7 +683,7 @@
                         col_idx: (clip_alpha_fill.alpha_idx / usize::from(Tile::HEIGHT))
                             .try_into()
                             .expect("Sparse strips are bound to u32 range"),
-                        payload: clip_source.slot_ix as u32,
+                        payload: clip_source.slot_ix.slot_idx as u32,
                         paint,
                     });
                 }
@@ -654,6 +692,7 @@
                 }
                 Cmd::Blend(mode) => {
                     let blend_source = state.stack.last().unwrap();
+
                     let blend_target = &state.stack[state.stack.len() - 2];
 
                     // Determine where the actual content is for source
@@ -675,16 +714,18 @@
                         let (x, y) = if clip_depth <= 2 {
                             (wide_tile_x, wide_tile_y)
                         } else {
-                            (0, blend_target.slot_ix as u16 * Tile::HEIGHT) // Write to destination
+                            (0, blend_target.slot_ix.slot_idx as u16 * Tile::HEIGHT) // Write to destination
                         };
 
+                        // debug_assert_ne!(dest_temp_slot, source_content_slot, "cmd_idx: {cmd_idx}");
+
                         // Encode blend operation
                         let paint = (COLOR_SOURCE_BLEND << 30)
-                            | ((dest_temp_slot as u32 & 0x3FFF) << 16)  // Where dest content is
+                            | ((dest_temp_slot.slot_idx as u32 & 0x3FFF) << 16)
                             | ((mode.mix as u32) << 8)
                             | (mode.compose as u32);
 
-                        let payload = source_content_slot as u32; // Where source content is
+                        let payload = source_content_slot.slot_idx as u32; // Where source content is
 
                         draw.0.push(GpuStrip {
                             x,
@@ -706,7 +747,7 @@
                         let (x, y) = if clip_depth <= 2 {
                             (wide_tile_x, wide_tile_y)
                         } else {
-                            (0, blend_target.slot_ix as u16 * Tile::HEIGHT)
+                            (0, blend_target.slot_ix.slot_idx as u16 * Tile::HEIGHT)
                         };
 
                         let opacity_u8 = (blend_source.opacity * 255.0) as u32;
@@ -718,7 +759,7 @@
                             width: WideTile::WIDTH,
                             dense_width: 0,
                             col_idx: 0,
-                            payload: source_content_slot as u32, // Use actual content location
+                            payload: source_content_slot.slot_idx as u32, // Use actual content location
                             paint,
                         });
                     }
@@ -727,6 +768,14 @@
             }
         }
 
+        if state.stack.len() > 1 {
+            // Had nested operations
+            // Force a new round for the next tile
+            while !self.rounds_queue.is_empty() {
+                self.flush(renderer);
+            }
+        }
+
         Ok(())
     }
 
@@ -756,7 +805,7 @@
                         ImageSource::OpaqueId(_) => {
                             let paint_packed = (COLOR_SOURCE_PAYLOAD << 30)
                                 | (PAINT_TYPE_IMAGE << 28)
-                                | (paint_tex_id & 0x1FFFFFFF);
+                                | (paint_tex_id & 0x0FFFFFFF);
                             let scene_strip_xy =
                                 ((scene_strip_y as u32) << 16) | (scene_strip_x as u32);
                             (scene_strip_xy, paint_packed)
diff --git a/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl b/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl
index 1a87d70..fbde354 100644
--- a/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl
+++ b/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl
@@ -42,7 +42,22 @@
 
 // Blend modes
 const MIX_NORMAL: u32 = 0u;
+
+// Composite modes - must match
+const COMPOSE_CLEAR: u32 = 0u;
+const COMPOSE_COPY: u32 = 1u;
+const COMPOSE_DEST: u32 = 2u;
 const COMPOSE_SRC_OVER: u32 = 3u;
+const COMPOSE_DEST_OVER: u32 = 4u;
+const COMPOSE_SRC_IN: u32 = 5u;
+const COMPOSE_DEST_IN: u32 = 6u;
+const COMPOSE_SRC_OUT: u32 = 7u;
+const COMPOSE_DEST_OUT: u32 = 8u;
+const COMPOSE_SRC_ATOP: u32 = 9u;
+const COMPOSE_DEST_ATOP: u32 = 10u;
+const COMPOSE_XOR: u32 = 11u;
+const COMPOSE_PLUS: u32 = 12u;
+const COMPOSE_PLUS_LIGHTER: u32 = 13u;
 
 struct Config {
     // Width of the rendering target
@@ -150,7 +165,7 @@
     // NDC ranges from -1 to 1, with (0,0) at the center of the viewport
     let ndc_x = pix_x * 2.0 / f32(config.width) - 1.0;
     let ndc_y = 1.0 - pix_y * 2.0 / f32(config.height);
-    let paint_type = (instance.paint >> 29u) & 0x3u;
+    let paint_type = (instance.paint >> 28u) & 0x3u;
 
     if paint_type == PAINT_TYPE_IMAGE {
         let paint_tex_id = instance.paint & 0x1FFFFFFF;
@@ -226,7 +241,7 @@
         if paint_type == PAINT_TYPE_SOLID {
             final_color = alpha * unpack4x8unorm(in.payload);
         } else if paint_type == PAINT_TYPE_IMAGE {
-            let paint_tex_id = in.paint & 0x1FFFFFFF;
+            let paint_tex_id = in.paint & 0x0FFFFFFF;
             let encoded_image = unpack_encoded_image(paint_tex_id);
             let image_offset = encoded_image.image_offset;
             let image_size = encoded_image.image_size;
@@ -287,16 +302,70 @@
         let dest_y = (u32(in.position.y) & 3u) + dest_slot * config.strip_height;
         let dest_color = textureLoad(clip_input_texture, vec2(clip_x, dest_y), 0);
         
-        // Can if or switch over the compose modes....
-        // if compose_mode == COMPOSE_SRC_OVER {
-        //     // SrcOver: result = src + dest * (1 - src.a)
-        // }
+        switch compose_mode {
+            case COMPOSE_CLEAR: {
+                // Clear: result = 0
+                final_color = vec4<f32>(0.0, 0.0, 0.0, 0.0);
+            }
+            case COMPOSE_COPY: {
+                // Copy: result = src
+                final_color = src_color;
+            }
+            case COMPOSE_DEST: {
+                // Dest: result = dest
+                final_color = dest_color;
+            }
+            case COMPOSE_SRC_OVER: {
+                // SrcOver: result = src + dest * (1 - src.a)
+                final_color = src_color + dest_color * (1.0 - src_color.a);
+            }
+            case COMPOSE_DEST_OVER: {
+                // DestOver: result = dest + src * (1 - dest.a)
+                final_color = dest_color + src_color * (1.0 - dest_color.a);
+            }
+            case COMPOSE_SRC_IN: {
+                // SrcIn: result = src * dest.a
+                final_color = src_color * dest_color.a;
+            }
+            case COMPOSE_DEST_IN: {
+                // DestIn: result = dest * src.a
+                final_color = dest_color * src_color.a;
+            }
+            case COMPOSE_SRC_OUT: {
+                // SrcOut: result = src * (1 - dest.a)
+                final_color = src_color * (1.0 - dest_color.a);
+            }
+            case COMPOSE_DEST_OUT: {
+                // DestOut: result = dest * (1 - src.a)
+                final_color = dest_color * (1.0 - src_color.a);
+            }
+            case COMPOSE_SRC_ATOP: {
+                // SrcAtop: result = src * dest.a + dest * (1 - src.a)
+                final_color = src_color * dest_color.a + dest_color * (1.0 - src_color.a);
+            }
+            case COMPOSE_DEST_ATOP: {
+                // DestAtop: result = dest * src.a + src * (1 - dest.a)
+                final_color = dest_color * src_color.a + src_color * (1.0 - dest_color.a);
+            }
+            case COMPOSE_XOR: {
+                // Xor: result = src * (1 - dest.a) + dest * (1 - src.a)
+                final_color = src_color * (1.0 - dest_color.a) + dest_color * (1.0 - src_color.a);
+            }
+            case COMPOSE_PLUS: {
+                // Plus: result = min(src + dest, 1)
+                final_color = clamp(src_color + dest_color, vec4<f32>(0.0), vec4<f32>(1.0));
+            }
+            case COMPOSE_PLUS_LIGHTER: {
+                // PlusLighter: result = src + dest (unclamped)
+                final_color = src_color + dest_color;
+            }
+            default: {
+                // Fallback to SrcOver
+                final_color = src_color + dest_color * (1.0 - src_color.a);
+            }
+        }
 
-        // Hard coded SrcOver...
-        final_color = src_color + dest_color * (1.0 - src_color.a);
         final_color = alpha * final_color;
-
-
     }
 
     return final_color;
diff --git a/sparse_strips/vello_sparse_tests/log b/sparse_strips/vello_sparse_tests/log
new file mode 100644
index 0000000..24a665f
--- /dev/null
+++ b/sparse_strips/vello_sparse_tests/log
@@ -0,0 +1,654 @@
+
+running 6 tests
+test compose::compose_wide_tile_nested_cpu_u8_scalar ... FAILED
+test compose::compose_wide_tile_nested_cpu_u8_neon ... FAILED
+test compose::compose_wide_tile_nested_cpu_multithreaded ... FAILED
+test compose::compose_wide_tile_nested_cpu_f32_scalar ... FAILED
+test compose::compose_wide_tile_nested_cpu_f32_neon ... FAILED
+test compose::compose_wide_tile_nested_hybrid ... FAILED
+
+failures:
+
+---- compose::compose_wide_tile_nested_cpu_u8_scalar stdout ----
+
+thread 'compose::compose_wide_tile_nested_cpu_u8_scalar' panicked at sparse_strips/vello_sparse_tests/tests/util.rs:314:9:
+test didnt match reference image
+
+---- compose::compose_wide_tile_nested_cpu_u8_neon stdout ----
+
+thread 'compose::compose_wide_tile_nested_cpu_u8_neon' panicked at sparse_strips/vello_sparse_tests/tests/util.rs:314:9:
+test didnt match reference image
+note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
+
+---- compose::compose_wide_tile_nested_cpu_multithreaded stdout ----
+
+thread 'compose::compose_wide_tile_nested_cpu_multithreaded' panicked at sparse_strips/vello_sparse_tests/tests/util.rs:314:9:
+test didnt match reference image
+
+---- compose::compose_wide_tile_nested_cpu_f32_scalar stdout ----
+
+thread 'compose::compose_wide_tile_nested_cpu_f32_scalar' panicked at sparse_strips/vello_sparse_tests/tests/util.rs:314:9:
+test didnt match reference image
+
+---- compose::compose_wide_tile_nested_cpu_f32_neon stdout ----
+
+thread 'compose::compose_wide_tile_nested_cpu_f32_neon' panicked at sparse_strips/vello_sparse_tests/tests/util.rs:314:9:
+test didnt match reference image
+
+---- compose::compose_wide_tile_nested_hybrid stdout ----
+do_scene - tile: {width: 1, height: 2 }
+self.tile_state len: TileState { stack: [] }
+[sparse_strips/vello_hybrid/src/schedule.rs:458:31] tile = WideTile {
+    x: 0,
+    y: 0,
+    bg: PremulColor {
+        premul_u8: PremulRgba8 {
+            r: 0,
+            g: 0,
+            b: 0,
+            a: 0,
+        },
+        premul_f32: PremulColor {
+            components: [
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+            ],
+            cs: PhantomData<color::colorspace::Srgb>,
+        },
+    },
+    cmds: [
+        AlphaFill(
+            CmdAlphaFill {
+                x: 0,
+                width: 4,
+                alpha_idx: 0,
+                thread_idx: 0,
+                paint: Solid(
+                    PremulColor {
+                        premul_u8: PremulRgba8 {
+                            r: 255,
+                            g: 255,
+                            b: 255,
+                            a: 255,
+                        },
+                        premul_f32: PremulColor {
+                            components: [
+                                1.0,
+                                1.0,
+                                1.0,
+                                1.0,
+                            ],
+                            cs: PhantomData<color::colorspace::Srgb>,
+                        },
+                    },
+                ),
+                blend_mode: None,
+            },
+        ),
+        Fill(
+            CmdFill {
+                x: 4,
+                width: 252,
+                paint: Solid(
+                    PremulColor {
+                        premul_u8: PremulRgba8 {
+                            r: 255,
+                            g: 255,
+                            b: 255,
+                            a: 255,
+                        },
+                        premul_f32: PremulColor {
+                            components: [
+                                1.0,
+                                1.0,
+                                1.0,
+                                1.0,
+                            ],
+                            cs: PhantomData<color::colorspace::Srgb>,
+                        },
+                    },
+                ),
+                blend_mode: None,
+            },
+        ),
+        PushBuf,
+        AlphaFill(
+            CmdAlphaFill {
+                x: 0,
+                width: 4,
+                alpha_idx: 32,
+                thread_idx: 0,
+                paint: Solid(
+                    PremulColor {
+                        premul_u8: PremulRgba8 {
+                            r: 0,
+                            g: 0,
+                            b: 255,
+                            a: 255,
+                        },
+                        premul_f32: PremulColor {
+                            components: [
+                                0.0,
+                                0.0,
+                                1.0,
+                                1.0,
+                            ],
+                            cs: PhantomData<color::colorspace::Srgb>,
+                        },
+                    },
+                ),
+                blend_mode: None,
+            },
+        ),
+        Fill(
+            CmdFill {
+                x: 4,
+                width: 252,
+                paint: Solid(
+                    PremulColor {
+                        premul_u8: PremulRgba8 {
+                            r: 0,
+                            g: 0,
+                            b: 255,
+                            a: 255,
+                        },
+                        premul_f32: PremulColor {
+                            components: [
+                                0.0,
+                                0.0,
+                                1.0,
+                                1.0,
+                            ],
+                            cs: PhantomData<color::colorspace::Srgb>,
+                        },
+                    },
+                ),
+                blend_mode: None,
+            },
+        ),
+        PushBuf,
+        AlphaFill(
+            CmdAlphaFill {
+                x: 48,
+                width: 4,
+                alpha_idx: 64,
+                thread_idx: 0,
+                paint: Solid(
+                    PremulColor {
+                        premul_u8: PremulRgba8 {
+                            r: 0,
+                            g: 255,
+                            b: 0,
+                            a: 255,
+                        },
+                        premul_f32: PremulColor {
+                            components: [
+                                0.0,
+                                1.0,
+                                0.0,
+                                1.0,
+                            ],
+                            cs: PhantomData<color::colorspace::Srgb>,
+                        },
+                    },
+                ),
+                blend_mode: None,
+            },
+        ),
+        Fill(
+            CmdFill {
+                x: 52,
+                width: 204,
+                paint: Solid(
+                    PremulColor {
+                        premul_u8: PremulRgba8 {
+                            r: 0,
+                            g: 255,
+                            b: 0,
+                            a: 255,
+                        },
+                        premul_f32: PremulColor {
+                            components: [
+                                0.0,
+                                1.0,
+                                0.0,
+                                1.0,
+                            ],
+                            cs: PhantomData<color::colorspace::Srgb>,
+                        },
+                    },
+                ),
+                blend_mode: None,
+            },
+        ),
+        Blend(
+            BlendMode {
+                mix: Normal,
+                compose: SrcOver,
+            },
+        ),
+        PopBuf,
+        Blend(
+            BlendMode {
+                mix: Normal,
+                compose: SrcOver,
+            },
+        ),
+        PopBuf,
+    ],
+    n_zero_clip: 0,
+    n_clip: 0,
+    n_bufs: 0,
+}
+[sparse_strips/vello_hybrid/src/schedule.rs:458:31] tile = WideTile {
+    x: 0,
+    y: 4,
+    bg: PremulColor {
+        premul_u8: PremulRgba8 {
+            r: 0,
+            g: 0,
+            b: 0,
+            a: 0,
+        },
+        premul_f32: PremulColor {
+            components: [
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+            ],
+            cs: PhantomData<color::colorspace::Srgb>,
+        },
+    },
+    cmds: [
+        AlphaFill(
+            CmdAlphaFill {
+                x: 0,
+                width: 4,
+                alpha_idx: 16,
+                thread_idx: 0,
+                paint: Solid(
+                    PremulColor {
+                        premul_u8: PremulRgba8 {
+                            r: 255,
+                            g: 255,
+                            b: 255,
+                            a: 255,
+                        },
+                        premul_f32: PremulColor {
+                            components: [
+                                1.0,
+                                1.0,
+                                1.0,
+                                1.0,
+                            ],
+                            cs: PhantomData<color::colorspace::Srgb>,
+                        },
+                    },
+                ),
+                blend_mode: None,
+            },
+        ),
+        Fill(
+            CmdFill {
+                x: 4,
+                width: 252,
+                paint: Solid(
+                    PremulColor {
+                        premul_u8: PremulRgba8 {
+                            r: 255,
+                            g: 255,
+                            b: 255,
+                            a: 255,
+                        },
+                        premul_f32: PremulColor {
+                            components: [
+                                1.0,
+                                1.0,
+                                1.0,
+                                1.0,
+                            ],
+                            cs: PhantomData<color::colorspace::Srgb>,
+                        },
+                    },
+                ),
+                blend_mode: None,
+            },
+        ),
+        PushBuf,
+        AlphaFill(
+            CmdAlphaFill {
+                x: 0,
+                width: 4,
+                alpha_idx: 48,
+                thread_idx: 0,
+                paint: Solid(
+                    PremulColor {
+                        premul_u8: PremulRgba8 {
+                            r: 0,
+                            g: 0,
+                            b: 255,
+                            a: 255,
+                        },
+                        premul_f32: PremulColor {
+                            components: [
+                                0.0,
+                                0.0,
+                                1.0,
+                                1.0,
+                            ],
+                            cs: PhantomData<color::colorspace::Srgb>,
+                        },
+                    },
+                ),
+                blend_mode: None,
+            },
+        ),
+        Fill(
+            CmdFill {
+                x: 4,
+                width: 252,
+                paint: Solid(
+                    PremulColor {
+                        premul_u8: PremulRgba8 {
+                            r: 0,
+                            g: 0,
+                            b: 255,
+                            a: 255,
+                        },
+                        premul_f32: PremulColor {
+                            components: [
+                                0.0,
+                                0.0,
+                                1.0,
+                                1.0,
+                            ],
+                            cs: PhantomData<color::colorspace::Srgb>,
+                        },
+                    },
+                ),
+                blend_mode: None,
+            },
+        ),
+        PushBuf,
+        AlphaFill(
+            CmdAlphaFill {
+                x: 48,
+                width: 4,
+                alpha_idx: 80,
+                thread_idx: 0,
+                paint: Solid(
+                    PremulColor {
+                        premul_u8: PremulRgba8 {
+                            r: 0,
+                            g: 255,
+                            b: 0,
+                            a: 255,
+                        },
+                        premul_f32: PremulColor {
+                            components: [
+                                0.0,
+                                1.0,
+                                0.0,
+                                1.0,
+                            ],
+                            cs: PhantomData<color::colorspace::Srgb>,
+                        },
+                    },
+                ),
+                blend_mode: None,
+            },
+        ),
+        Fill(
+            CmdFill {
+                x: 52,
+                width: 204,
+                paint: Solid(
+                    PremulColor {
+                        premul_u8: PremulRgba8 {
+                            r: 0,
+                            g: 255,
+                            b: 0,
+                            a: 255,
+                        },
+                        premul_f32: PremulColor {
+                            components: [
+                                0.0,
+                                1.0,
+                                0.0,
+                                1.0,
+                            ],
+                            cs: PhantomData<color::colorspace::Srgb>,
+                        },
+                    },
+                ),
+                blend_mode: None,
+            },
+        ),
+        Blend(
+            BlendMode {
+                mix: Normal,
+                compose: SrcOver,
+            },
+        ),
+        PopBuf,
+        Blend(
+            BlendMode {
+                mix: Normal,
+                compose: SrcOver,
+            },
+        ),
+        PopBuf,
+    ],
+    n_zero_clip: 0,
+    n_clip: 0,
+    n_bufs: 0,
+}
+Drawing a round:
+[sparse_strips/vello_hybrid/src/schedule.rs:370:9] &round = Round {
+    draws: [
+        Draw(
+            [
+                GpuStrip {
+                    x: 0,
+                    y: "8188 (tile=2047, local_y=0)",
+                    width: 4,
+                    dense_width: 4,
+                    col_idx: 8,
+                    paint: "Solid(color_source=payload)",
+                    payload: "Color(r=0, g=0, b=255, a=255)",
+                },
+                GpuStrip {
+                    x: 4,
+                    y: "8188 (tile=2047, local_y=0)",
+                    width: 252,
+                    dense_width: 0,
+                    col_idx: 0,
+                    paint: "Solid(color_source=payload)",
+                    payload: "Color(r=0, g=0, b=255, a=255)",
+                },
+                GpuStrip {
+                    x: 48,
+                    y: "8184 (tile=2046, local_y=0)",
+                    width: 4,
+                    dense_width: 4,
+                    col_idx: 16,
+                    paint: "Solid(color_source=payload)",
+                    payload: "Color(r=0, g=255, b=0, a=255)",
+                },
+                GpuStrip {
+                    x: 52,
+                    y: "8184 (tile=2046, local_y=0)",
+                    width: 204,
+                    dense_width: 0,
+                    col_idx: 0,
+                    paint: "Solid(color_source=payload)",
+                    payload: "Color(r=0, g=255, b=0, a=255)",
+                },
+                GpuStrip {
+                    x: 0,
+                    y: "8184 (tile=2046, local_y=0)",
+                    width: 4,
+                    dense_width: 4,
+                    col_idx: 12,
+                    paint: "Solid(color_source=payload)",
+                    payload: "Color(r=0, g=0, b=255, a=255)",
+                },
+                GpuStrip {
+                    x: 4,
+                    y: "8184 (tile=2046, local_y=0)",
+                    width: 252,
+                    dense_width: 0,
+                    col_idx: 0,
+                    paint: "Solid(color_source=payload)",
+                    payload: "Color(r=0, g=0, b=255, a=255)",
+                },
+                GpuStrip {
+                    x: 48,
+                    y: "8176 (tile=2044, local_y=0)",
+                    width: 4,
+                    dense_width: 4,
+                    col_idx: 20,
+                    paint: "Solid(color_source=payload)",
+                    payload: "Color(r=0, g=255, b=0, a=255)",
+                },
+                GpuStrip {
+                    x: 52,
+                    y: "8176 (tile=2044, local_y=0)",
+                    width: 204,
+                    dense_width: 0,
+                    col_idx: 0,
+                    paint: "Solid(color_source=payload)",
+                    payload: "Color(r=0, g=255, b=0, a=255)",
+                },
+            ],
+        ),
+        Draw(
+            [
+                GpuStrip {
+                    x: 0,
+                    y: "8188 (tile=2047, local_y=0)",
+                    width: 256,
+                    dense_width: 0,
+                    col_idx: 0,
+                    paint: "Blend(dest_slot=2047, mix=0, compose=3)",
+                    payload: "SourceSlot(2046) [maps to y=8184]",
+                },
+                GpuStrip {
+                    x: 0,
+                    y: "8180 (tile=2045, local_y=0)",
+                    width: 256,
+                    dense_width: 0,
+                    col_idx: 0,
+                    paint: "Blend(dest_slot=2046, mix=0, compose=3)",
+                    payload: "SourceSlot(2044) [maps to y=8176]",
+                },
+            ],
+        ),
+        Draw(
+            [
+                GpuStrip {
+                    x: 0,
+                    y: "0 (tile=0, local_y=0)",
+                    width: 4,
+                    dense_width: 4,
+                    col_idx: 0,
+                    paint: "Solid(color_source=payload)",
+                    payload: "Color(r=255, g=255, b=255, a=255)",
+                },
+                GpuStrip {
+                    x: 4,
+                    y: "0 (tile=0, local_y=0)",
+                    width: 252,
+                    dense_width: 0,
+                    col_idx: 0,
+                    paint: "Solid(color_source=payload)",
+                    payload: "Color(r=255, g=255, b=255, a=255)",
+                },
+                GpuStrip {
+                    x: 0,
+                    y: "0 (tile=0, local_y=0)",
+                    width: 256,
+                    dense_width: 0,
+                    col_idx: 0,
+                    paint: "Slot(color_source=slot, opacity=255)",
+                    payload: "SourceSlot(2047) [maps to y=8188]",
+                },
+                GpuStrip {
+                    x: 0,
+                    y: "4 (tile=1, local_y=0)",
+                    width: 4,
+                    dense_width: 4,
+                    col_idx: 4,
+                    paint: "Solid(color_source=payload)",
+                    payload: "Color(r=255, g=255, b=255, a=255)",
+                },
+                GpuStrip {
+                    x: 4,
+                    y: "4 (tile=1, local_y=0)",
+                    width: 252,
+                    dense_width: 0,
+                    col_idx: 0,
+                    paint: "Solid(color_source=payload)",
+                    payload: "Color(r=255, g=255, b=255, a=255)",
+                },
+                GpuStrip {
+                    x: 0,
+                    y: "4 (tile=1, local_y=0)",
+                    width: 256,
+                    dense_width: 0,
+                    col_idx: 0,
+                    paint: "Slot(color_source=slot, opacity=255)",
+                    payload: "SourceSlot(2046) [maps to y=8184]",
+                },
+            ],
+        ),
+    ],
+    free: [
+        [
+            SlotOccupation {
+                slot_idx: 2046,
+                texture: 0,
+            },
+            SlotOccupation {
+                slot_idx: 2047,
+                texture: 0,
+            },
+            SlotOccupation {
+                slot_idx: 2044,
+                texture: 0,
+            },
+            SlotOccupation {
+                slot_idx: 2045,
+                texture: 0,
+            },
+        ],
+        [
+            SlotOccupation {
+                slot_idx: 2047,
+                texture: 1,
+            },
+            SlotOccupation {
+                slot_idx: 2046,
+                texture: 1,
+            },
+        ],
+    ],
+}
+
+thread 'compose::compose_wide_tile_nested_hybrid' panicked at sparse_strips/vello_sparse_tests/tests/util.rs:314:9:
+test didnt match reference image
+
+
+failures:
+    compose::compose_wide_tile_nested_cpu_f32_neon
+    compose::compose_wide_tile_nested_cpu_f32_scalar
+    compose::compose_wide_tile_nested_cpu_multithreaded
+    compose::compose_wide_tile_nested_cpu_u8_neon
+    compose::compose_wide_tile_nested_cpu_u8_scalar
+    compose::compose_wide_tile_nested_hybrid
+
+test result: FAILED. 0 passed; 6 failed; 0 ignored; 0 measured; 1584 filtered out; finished in 0.14s
+
diff --git a/sparse_strips/vello_sparse_tests/snapshots/compose_wide_tile_nested.png b/sparse_strips/vello_sparse_tests/snapshots/compose_wide_tile_nested.png
index 00f5e7c..35b9641 100644
--- a/sparse_strips/vello_sparse_tests/snapshots/compose_wide_tile_nested.png
+++ b/sparse_strips/vello_sparse_tests/snapshots/compose_wide_tile_nested.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab11bef3cf105e5117d115622f9463fbcd7752643e0e9e2b4f8add92c79c0846
-size 101
+oid sha256:a020f1465bb609cb18276a01e2ebcdaedb3bf674d048a527859302cb5deabead
+size 99
diff --git a/sparse_strips/vello_sparse_tests/tests/compose.rs b/sparse_strips/vello_sparse_tests/tests/compose.rs
index 0c46eed..0e15490 100644
--- a/sparse_strips/vello_sparse_tests/tests/compose.rs
+++ b/sparse_strips/vello_sparse_tests/tests/compose.rs
@@ -26,41 +26,41 @@
 #[vello_test(height = 8)]
 fn compose_wide_tile_nested(ctx: &mut impl Renderer) {
     const WIDTH: f64 = 100.0;
-    const HEIGHT: f64 = 4.0;
-    const OFFSET: f64 = 20.0;
-    
+    const HEIGHT: f64 = 8.0;
+    const OFFSET: f64 = 50.0;
+
     // Pure colors: max values only
-    let red = Color::from_rgb8(255, 0, 0);
-    let green = Color::from_rgb8(0, 255, 0);
     let blue = Color::from_rgb8(0, 0, 255);
-    let cyan = Color::from_rgb8(0, 255, 255); // Green + Blue maxed
-    
+    let green = Color::from_rgb8(0, 255, 0);
+    // let blue = Color::from_rgb8(0, 0, 255);
+    // let cyan = Color::from_rgb8(0, 255, 255); // Green + Blue maxed
+
     ctx.push_blend_layer(BlendMode::new(Mix::Normal, Compose::SrcOver));
-    
-    // Draw base red rectangle
-    ctx.set_paint(red);
+
+    // Draw base blue rectangle
+    ctx.set_paint(blue);
     ctx.fill_rect(&Rect::new(0.0, 0.0, WIDTH, HEIGHT));
-    
+
     // Start nesting - green with Xor
     ctx.push_blend_layer(BlendMode::new(Mix::Normal, Compose::SrcOver));
     ctx.set_paint(green);
     ctx.fill_rect(&Rect::new(OFFSET, 0.0, OFFSET + WIDTH, HEIGHT));
-    
-        // Nest blue with Plus
-        ctx.push_blend_layer(BlendMode::new(Mix::Normal, Compose::SrcOver));
-        ctx.set_paint(blue);
-        ctx.fill_rect(&Rect::new(OFFSET * 2.0, 0.0, OFFSET * 2.0 + WIDTH, HEIGHT));
-        
-            // Nest cyan with Multiply
-            ctx.push_blend_layer(BlendMode::new(Mix::Normal, Compose::SrcOver));
-            ctx.set_paint(cyan);
-            ctx.fill_rect(&Rect::new(OFFSET * 3.0, 0.0, OFFSET * 3.0 + WIDTH, HEIGHT));
-            ctx.pop_layer(); // cyan
-            
-        ctx.pop_layer(); // blue
-        
+
+    // Nest blue with Plus
+    // ctx.push_blend_layer(BlendMode::new(Mix::Normal, Compose::SrcOver));
+    // ctx.set_paint(blue);
+    // ctx.fill_rect(&Rect::new(OFFSET * 2.0, 0.0, OFFSET * 2.0 + WIDTH, HEIGHT));
+
+    //     // Nest cyan with Multiply
+    //     ctx.push_blend_layer(BlendMode::new(Mix::Normal, Compose::SrcOver));
+    //     ctx.set_paint(cyan);
+    //     ctx.fill_rect(&Rect::new(OFFSET * 3.0, 0.0, OFFSET * 3.0 + WIDTH, HEIGHT));
+    //     ctx.pop_layer(); // cyan
+
+    // ctx.pop_layer(); // blue
+
     ctx.pop_layer(); // green
-    
+
     ctx.pop_layer(); // base
 }