Address review feedback
diff --git a/src/cpu_shader/binning.rs b/src/cpu_shader/binning.rs
index 136e333..5ace850 100644
--- a/src/cpu_shader/binning.rs
+++ b/src/cpu_shader/binning.rs
@@ -50,6 +50,7 @@
                 let draw_monoid = draw_monoids[element_ix];
                 let mut clip_bbox = [-1e9, -1e9, 1e9, 1e9];
                 if draw_monoid.clip_ix > 0 {
+                    assert!(draw_monoid.clip_ix - 1 < config.layout.n_clips);
                     clip_bbox = clip_bbox_buf[draw_monoid.clip_ix as usize - 1];
                 }
                 let path_bbox = path_bbox_buf[draw_monoid.path_ix as usize];
diff --git a/src/cpu_shader/clip_reduce.rs b/src/cpu_shader/clip_reduce.rs
index 96bc358..fc30661 100644
--- a/src/cpu_shader/clip_reduce.rs
+++ b/src/cpu_shader/clip_reduce.rs
@@ -48,7 +48,6 @@
 }
 
 pub fn clip_reduce(n_wg: u32, resources: &[CpuBinding]) {
-    // TODO: probably remove config, it's not needed
     let clip_inp = resources[0].as_slice();
     let path_bboxes = resources[1].as_slice();
     let mut reduced = resources[2].as_slice_mut();
diff --git a/src/cpu_shader/coarse.rs b/src/cpu_shader/coarse.rs
index c54aa10..390df7f 100644
--- a/src/cpu_shader/coarse.rs
+++ b/src/cpu_shader/coarse.rs
@@ -57,7 +57,6 @@
         ptcl[(self.cmd_offset + offset) as usize] = value;
     }
 
-    // TODO: handle even/odd winding rule
     fn write_path(
         &mut self,
         config: &ConfigUniform,
@@ -217,7 +216,6 @@
             let mut tile_state = TileState::new(this_tile_ix);
             let blend_offset = tile_state.cmd_offset;
             tile_state.cmd_offset += 1;
-            // Discussion question: do these belong in tile state?
             let mut clip_depth = 0;
             let mut clip_zero_depth = 0;
             for drawobj_ix in &compacted[tile_ix] {
@@ -314,7 +312,7 @@
 
             if bin_tile_x + tile_x < width_in_tiles && bin_tile_y + tile_y < height_in_tiles {
                 ptcl[tile_state.cmd_offset as usize] = CMD_END;
-                let scratch_size = 0; // TODO: actually compute
+                let scratch_size = 0; // TODO: actually compute blend depth
                 ptcl[blend_offset as usize] = bump.blend;
                 bump.blend += scratch_size;
             }
diff --git a/src/cpu_shader/draw_leaf.rs b/src/cpu_shader/draw_leaf.rs
index 1d69930..0aa779e 100644
--- a/src/cpu_shader/draw_leaf.rs
+++ b/src/cpu_shader/draw_leaf.rs
@@ -5,7 +5,7 @@
 
 use crate::cpu_dispatch::CpuBinding;
 
-use super::util::{Transform, Vec2};
+use super::util::{read_draw_tag_from_scene, Transform, Vec2};
 
 const WG_SIZE: usize = 256;
 
@@ -19,22 +19,18 @@
     info: &mut [u32],
     clip_inp: &mut [Clip],
 ) {
-    let drawtag_base = config.layout.draw_tag_base;
     let mut prefix = DrawMonoid::default();
     for i in 0..n_wg {
         let mut m = prefix;
         for j in 0..WG_SIZE {
             let ix = i * WG_SIZE as u32 + j as u32;
-            let tag_raw = if ix < config.layout.n_draw_objects {
-                scene[(drawtag_base + ix) as usize]
-            } else {
-                0
-            };
+            let tag_raw = read_draw_tag_from_scene(config, scene, ix);
             let tag_word = DrawTag(tag_raw);
             // store exclusive prefix sum
             if ix < config.layout.n_draw_objects {
                 draw_monoid[ix as usize] = m;
             }
+            let m_next = m.combine(&DrawMonoid::new(tag_word));
             let dd = config.layout.draw_data_base + m.scene_offset;
             let di = m.info_offset as usize;
             if tag_word == DrawTag::COLOR
@@ -145,7 +141,7 @@
                 let path_ix = !ix as i32;
                 clip_inp[m.clip_ix as usize] = Clip { ix, path_ix };
             }
-            m = m.combine(&DrawMonoid::new(tag_word));
+            m = m_next;
         }
         prefix = prefix.combine(&reduced[i as usize]);
     }
diff --git a/src/cpu_shader/draw_reduce.rs b/src/cpu_shader/draw_reduce.rs
index 019b941..61c338c 100644
--- a/src/cpu_shader/draw_reduce.rs
+++ b/src/cpu_shader/draw_reduce.rs
@@ -5,19 +5,16 @@
 
 use crate::cpu_dispatch::CpuBinding;
 
+use super::util::read_draw_tag_from_scene;
+
 const WG_SIZE: usize = 256;
 
 fn draw_reduce_main(n_wg: u32, config: &ConfigUniform, scene: &[u32], reduced: &mut [DrawMonoid]) {
-    let drawtag_base = config.layout.draw_tag_base;
     for i in 0..n_wg {
         let mut m = DrawMonoid::default();
         for j in 0..WG_SIZE {
             let ix = i * WG_SIZE as u32 + j as u32;
-            let tag = if ix < config.layout.n_draw_objects {
-                scene[(drawtag_base + ix) as usize]
-            } else {
-                0
-            };
+            let tag = read_draw_tag_from_scene(config, scene, ix);
             m = m.combine(&DrawMonoid::new(DrawTag(tag)));
         }
         reduced[i as usize] = m;
diff --git a/src/cpu_shader/path_count.rs b/src/cpu_shader/path_count.rs
index b55cd12..2cee5b8 100644
--- a/src/cpu_shader/path_count.rs
+++ b/src/cpu_shader/path_count.rs
@@ -34,15 +34,15 @@
         if dy == 0.0 && s0.y.floor() == s0.y {
             continue;
         }
-        let dy_dxdy = dy / (dx + dy);
-        let a = 1.0 - dy_dxdy;
+        let idxdy = 1.0 / (dx + dy);
+        let a = dx * idxdy;
         let is_positive_slope = s1.x >= s0.x;
         let sign = if is_positive_slope { 1.0 } else { -1.0 };
         let xt0 = (s0.x * sign).floor();
         let c = s0.x * sign - xt0;
         let y0 = s0.y.floor();
         let ytop = if s0.y == s1.y { s0.y.ceil() } else { y0 + 1.0 };
-        let b = dy_dxdy * c + a * (ytop - s0.y);
+        let b = (dy * c + dx * (ytop - s0.y)) * idxdy;
         let x0 = xt0 * sign + if is_positive_slope { 0.0 } else { -1.0 };
 
         let path = paths[line.path_ix as usize];
diff --git a/src/cpu_shader/path_tiling.rs b/src/cpu_shader/path_tiling.rs
index 53f5cd9..41549bb 100644
--- a/src/cpu_shader/path_tiling.rs
+++ b/src/cpu_shader/path_tiling.rs
@@ -37,15 +37,15 @@
 
         let dx = (s1.x - s0.x).abs();
         let dy = s1.y - s0.y;
-        let dy_dxdy = dy / (dx + dy);
-        let a = 1.0 - dy_dxdy;
+        let idxdy = 1.0 / (dx + dy);
+        let a = dx * idxdy;
         let is_positive_slope = s1.x >= s0.x;
         let sign = if is_positive_slope { 1.0 } else { -1.0 };
         let xt0 = (s0.x * sign).floor();
         let c = s0.x * sign - xt0;
         let y0 = s0.y.floor();
         let ytop = if s0.y == s1.y { s0.y.ceil() } else { y0 + 1.0 };
-        let b = dy_dxdy * c + a * (ytop - s0.y);
+        let b = (dy * c + dx * (ytop - s0.y)) * idxdy;
         let x0 = xt0 * sign + if is_positive_slope { 0.0 } else { -1.0 };
         let z = (a * seg_within_line as f32 + b).floor();
         let x = x0 as i32 + (sign * z) as i32;
diff --git a/src/cpu_shader/util.rs b/src/cpu_shader/util.rs
index 7c32cdd..2bb3279 100644
--- a/src/cpu_shader/util.rs
+++ b/src/cpu_shader/util.rs
@@ -3,6 +3,8 @@
 
 //! Utility types
 
+use vello_encoding::ConfigUniform;
+
 #[derive(Clone, Copy, Default, Debug)]
 #[repr(C)]
 pub struct Vec2 {
@@ -94,3 +96,18 @@
 pub fn span(a: f32, b: f32) -> u32 {
     (a.max(b).ceil() - a.min(b).floor()).max(1.0) as u32
 }
+
+const DRAWTAG_NOP: u32 = 0;
+
+/// Read draw tag, guarded by number of draw objects.
+///
+/// The `ix` argument is allowed to exceed the number of draw objects,
+/// in which case a NOP is returned.
+pub fn read_draw_tag_from_scene(config: &ConfigUniform, scene: &[u32], ix: u32) -> u32 {
+    if ix < config.layout.n_draw_objects {
+        let tag_ix = config.layout.draw_tag_base + ix;
+        scene[tag_ix as usize]
+    } else {
+        DRAWTAG_NOP
+    }
+}