Address review feedback

Mostly comments, but also a little cleanup of function signatures in fine.
diff --git a/crates/encoding/src/config.rs b/crates/encoding/src/config.rs
index d853a08..6f24925 100644
--- a/crates/encoding/src/config.rs
+++ b/crates/encoding/src/config.rs
@@ -16,7 +16,7 @@
 // TODO: Obtain these from the vello_shaders crate
 pub(crate) const PATH_REDUCE_WG: u32 = 256;
 const PATH_BBOX_WG: u32 = 256;
-const PATH_COARSE_WG: u32 = 256;
+const FLATTEN_WG: u32 = 256;
 const CLIP_REDUCE_WG: u32 = 256;
 
 /// Counters for tracking dynamic allocation on the GPU.
@@ -164,7 +164,7 @@
             path_tag_wgs
         };
         let draw_object_wgs = (n_draw_objects + PATH_BBOX_WG - 1) / PATH_BBOX_WG;
-        let path_coarse_wgs = (n_path_tags + PATH_COARSE_WG - 1) / PATH_COARSE_WG;
+        let flatten_wgs = (n_path_tags + FLATTEN_WG - 1) / FLATTEN_WG;
         let clip_reduce_wgs = n_clips.saturating_sub(1) / CLIP_REDUCE_WG;
         let clip_wgs = (n_clips + CLIP_REDUCE_WG - 1) / CLIP_REDUCE_WG;
         let path_wgs = (n_paths + PATH_BBOX_WG - 1) / PATH_BBOX_WG;
@@ -177,14 +177,14 @@
             path_scan1: (reduced_size / PATH_REDUCE_WG, 1, 1),
             path_scan: (path_tag_wgs, 1, 1),
             bbox_clear: (draw_object_wgs, 1, 1),
-            flatten: (path_coarse_wgs, 1, 1),
+            flatten: (flatten_wgs, 1, 1),
             draw_reduce: (draw_object_wgs, 1, 1),
             draw_leaf: (draw_object_wgs, 1, 1),
             clip_reduce: (clip_reduce_wgs, 1, 1),
             clip_leaf: (clip_wgs, 1, 1),
             binning: (draw_object_wgs, 1, 1),
             tile_alloc: (path_wgs, 1, 1),
-            path_coarse: (path_coarse_wgs, 1, 1),
+            path_coarse: (flatten_wgs, 1, 1),
             backdrop: (path_wgs, 1, 1),
             coarse: (width_in_bins, height_in_bins, 1),
             fine: (width_in_tiles, height_in_tiles, 1),
diff --git a/shader/coarse.wgsl b/shader/coarse.wgsl
index 2d324c7..0635443 100644
--- a/shader/coarse.wgsl
+++ b/shader/coarse.wgsl
@@ -87,10 +87,10 @@
     // We overload the "segments" field to store both count (written by
     // path_count stage) and segment allocation (used by path_tiling and
     // fine).
-    let n_segs = tile.segments;
+    let n_segs = tile.segment_count_or_ix;
     if n_segs != 0u {
         var seg_ix = atomicAdd(&bump.segments, n_segs);
-        tiles[tile_ix].segments = ~seg_ix;
+        tiles[tile_ix].segment_count_or_ix = ~seg_ix;
         alloc_cmd(4u);
         ptcl[cmd_offset] = CMD_FILL;
         let size_and_rule = (n_segs << 1u) | u32(even_odd);
@@ -310,7 +310,7 @@
                 let blend = scene[dd];
                 is_blend = blend != BLEND_CLIP;
             }
-            let include_tile = tile.segments != 0u || (tile.backdrop == 0) == is_clip || is_blend;
+            let include_tile = tile.segment_count_or_ix != 0u || (tile.backdrop == 0) == is_clip || is_blend;
             if include_tile {
                 let el_slice = el_ix / 32u;
                 let el_mask = 1u << (el_ix & 31u);
@@ -383,7 +383,7 @@
                     }
                     // DRAWTAG_BEGIN_CLIP
                     case 0x9u: {
-                        if tile.segments == 0u && tile.backdrop == 0 {
+                        if tile.segment_count_or_ix == 0u && tile.backdrop == 0 {
                             clip_zero_depth = clip_depth + 1u;
                         } else {
                             write_begin_clip();
diff --git a/shader/fine.wgsl b/shader/fine.wgsl
index 7998459..108c88c 100644
--- a/shader/fine.wgsl
+++ b/shader/fine.wgsl
@@ -135,14 +135,16 @@
 
 let PIXELS_PER_THREAD = 4u;
 
-fn fill_path(seg_data: u32, n_segs: u32, backdrop: i32, xy: vec2<f32>, even_odd: bool) -> array<f32, PIXELS_PER_THREAD> {
+fn fill_path(fill: CmdFill, xy: vec2<f32>) -> array<f32, PIXELS_PER_THREAD> {
+    let n_segs = fill.size_and_rule >> 1u;
+    let even_odd = (fill.size_and_rule & 1u) != 0u;
     var area: array<f32, PIXELS_PER_THREAD>;
-    let backdrop_f = f32(backdrop);
+    let backdrop_f = f32(fill.backdrop);
     for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
         area[i] = backdrop_f;
     }
     for (var i = 0u; i < n_segs; i++) {
-        let seg_off = seg_data + i;
+        let seg_off = fill.seg_data + i;
         let segment = segments[seg_off];
         let y = segment.origin.y - xy.y;
         let y0 = clamp(y, 0.0, 1.0);
@@ -218,9 +220,7 @@
             // CMD_FILL
             case 1u: {
                 let fill = read_fill(cmd_ix);
-                let n_segs = fill.size_and_rule >> 1u;
-                let even_odd = (fill.size_and_rule & 1u) != 0u;
-                area = fill_path(fill.seg_data, n_segs, fill.backdrop, xy, even_odd);
+                area = fill_path(fill, xy);
                 cmd_ix += 4u;
             }
             // CMD_STROKE
diff --git a/shader/flatten.wgsl b/shader/flatten.wgsl
index b4197bc..e27bb6e 100644
--- a/shader/flatten.wgsl
+++ b/shader/flatten.wgsl
@@ -174,7 +174,7 @@
 }
 
 struct Transform {
-    matrx: vec4<f32>,
+    mat: vec4<f32>,
     translate: vec2<f32>,
 }
 
@@ -186,13 +186,13 @@
     let c3 = bitcast<f32>(scene[base + 3u]);
     let c4 = bitcast<f32>(scene[base + 4u]);
     let c5 = bitcast<f32>(scene[base + 5u]);
-    let matrx = vec4(c0, c1, c2, c3);
+    let mat = vec4(c0, c1, c2, c3);
     let translate = vec2(c4, c5);
-    return Transform(matrx, translate);
+    return Transform(mat, translate);
 }
 
 fn transform_apply(transform: Transform, p: vec2<f32>) -> vec2<f32> {
-    return transform.matrx.xy * p.x + transform.matrx.zw * p.y + transform.translate;
+    return transform.mat.xy * p.x + transform.mat.zw * p.y + transform.translate;
 }
 
 fn round_down(x: f32) -> i32 {
@@ -275,7 +275,7 @@
             // See https://www.iquilezles.org/www/articles/ellipses/ellipses.htm
             // This is the correct bounding box, but we're not handling rendering
             // in the isotropic case, so it may mismatch.
-            stroke = 0.5 * linewidth * vec2(length(transform.matrx.xz), length(transform.matrx.yw));
+            stroke = 0.5 * linewidth * vec2(length(transform.mat.xz), length(transform.mat.yw));
             bbox += vec4(-stroke, stroke);
         }
         let flags = u32(linewidth >= 0.0);
diff --git a/shader/path_count.wgsl b/shader/path_count.wgsl
index 3cf4e4c..e34ff7a 100644
--- a/shader/path_count.wgsl
+++ b/shader/path_count.wgsl
@@ -13,8 +13,7 @@
 // TODO: this is cut'n'pasted from path_coarse.
 struct AtomicTile {
     backdrop: atomic<i32>,
-    // This is "segments" but we're renaming
-    count: atomic<u32>,
+    segment_count_or_ix: atomic<u32>,
 }
 
 @group(0) @binding(1)
@@ -58,14 +57,15 @@
         let xy1 = select(line.p0, line.p1, is_down);
         let s0 = xy0 * TILE_SCALE;
         let s1 = xy1 * TILE_SCALE;
-        // TODO: clip line to bounding box
         count = span(s0.x, s1.x) + span(s0.y, s1.y) - 1u;
         let line_ix = global_id.x;
 
         let dx = abs(s1.x - s0.x);
         let dy = s1.y - s0.y;
         if dx + dy == 0.0 {
-            // Zero-length segment, drop it. Note, this should be culled earlier.
+            // Zero-length segment, drop it. Note, this could be culled in the
+            // flattening stage, but eliding the test here would be fragile, as
+            // it would be pretty bad to let it slip through.
             return;
         }
         if dy == 0.0 && floor(s0.y) == s0.y {
@@ -176,7 +176,7 @@
                 let x_bump = max(x + 1, bbox.x);
                 atomicAdd(&tile[base + x_bump].backdrop, delta);
             }
-            let seg_within_slice = atomicAdd(&tile[base + x].count, 1u);
+            let seg_within_slice = atomicAdd(&tile[base + x].segment_count_or_ix, 1u);
             // Pack two count values into a single u32
             let counts = (seg_within_slice << 16u) | subix;
             let seg_count = SegmentCount(line_ix, counts);
diff --git a/shader/path_tiling.wgsl b/shader/path_tiling.wgsl
index 444974e..ace01ab 100644
--- a/shader/path_tiling.wgsl
+++ b/shader/path_tiling.wgsl
@@ -52,6 +52,8 @@
         let count = span(s0.x, s1.x) + span(s0.y, s1.y) - 1u;
         let dx = abs(s1.x - s0.x);
         let dy = s1.y - s0.y;
+        // Division by zero can't happen because zero-length lines
+        // have already been discarded in the path_count stage.
         let idxdy = 1.0 / (dx + dy);
         let a = dx * idxdy;
         let is_positive_slope = s1.x >= s0.x;
@@ -71,7 +73,7 @@
         let stride = bbox.z - bbox.x;
         let tile_ix = i32(path.tiles) + (y - bbox.y) * stride + x - bbox.x;
         let tile = tiles[tile_ix];
-        let seg_start = ~tile.segments;
+        let seg_start = ~tile.segment_count_or_ix;
         if i32(seg_start) < 0 {
             return;
         }
diff --git a/shader/shared/tile.wgsl b/shader/shared/tile.wgsl
index 7af8a62..063a5bc 100644
--- a/shader/shared/tile.wgsl
+++ b/shader/shared/tile.wgsl
@@ -11,5 +11,10 @@
 
 struct Tile {
     backdrop: i32,
-    segments: u32,
+    // This is used for the count of the number of segments in the
+    // tile up to coarse rasterization, and the index afterwards.
+    // In the latter variant, the bits are inverted so that tiling
+    // can detect whether the tile was allocated; it's best to
+    // consider this an enum packed into a u32.
+    segment_count_or_ix: u32,
 }