merge from dev branch - dev
diff --git a/piet-wgsl/shader/path_coarse_full.wgsl b/piet-wgsl/shader/path_coarse_full.wgsl
index 81f1917..e37038f 100644
--- a/piet-wgsl/shader/path_coarse_full.wgsl
+++ b/piet-wgsl/shader/path_coarse_full.wgsl
@@ -113,6 +113,7 @@
         // classic memory vs ALU tradeoff.
         let cubic = cubics[global_id.x];
         let path = paths[cubic.path_ix];
+        let is_stroke = (cubic.flags & CUBIC_IS_STROKE) != 0u;
         let bbox = vec4<i32>(path.bbox);
         let p0 = cubic.p0;
         let p1 = cubic.p1;
@@ -169,15 +170,15 @@
                 }
 
                 // Output line segment lp0..lp1
-                let xymin = min(lp0, lp1);
-                let xymax = max(lp0, lp1);
+                let xymin = min(lp0, lp1) - cubic.stroke;
+                let xymax = max(lp0, lp1) + cubic.stroke;
                 let dp = lp1 - lp0;
                 let recip_dx = 1.0 / dp.x;
                 let invslope = select(dp.x / dp.y, 1.0e9, abs(dp.y) < 1.0e-9);
-                let c = 0.5 * abs(invslope);
-                let b = invslope;
                 let SX = 1.0 / f32(TILE_WIDTH);
                 let SY = 1.0 / f32(TILE_HEIGHT);
+                let c = (cubic.stroke.x + abs(invslope) * (0.5 * f32(TILE_HEIGHT) + cubic.stroke.y)) * SX;
+                let b = invslope;
                 let a = (lp0.x - (lp0.y - 0.5 * f32(TILE_HEIGHT)) * b) * SX;
                 var x0 = i32(floor(xymin.x * SX));
                 var x1 = i32(floor(xymax.x * SX) + 1.0);
@@ -200,7 +201,7 @@
                 for (var y = y0; y < y1; y += 1) {
                     let tile_y0 = f32(y) * f32(TILE_HEIGHT);
                     let xbackdrop = max(xray + 1, bbox.x);
-                    if xymin.y < tile_y0 && xbackdrop < bbox.z {
+                    if !is_stroke && xymin.y < tile_y0 && xbackdrop < bbox.z {
                         let backdrop = select(-1, 1, dp.y < 0.0);
                         let tile_ix = base + xbackdrop;
                         atomicAdd(&tiles[tile_ix].backdrop, backdrop);
@@ -226,22 +227,25 @@
                         let old = atomicExchange(&tiles[tile_ix].segments, seg_ix);
                         tile_seg.origin = lp0;
                         tile_seg.delta = dp;
-                        var y_edge = mix(lp0.y, lp1.y, (tile_x0 - lp0.x) * recip_dx);
-                        if xymin.x < tile_x0 {
-                            let p = vec2(tile_x0, y_edge);
-                            if dp.x < 0.0 {
-                                tile_seg.delta = p - lp0;
-                            } else {
-                                tile_seg.origin = p;
-                                tile_seg.delta = lp1 - p;
+                        var y_edge = 0.0;
+                        if !is_stroke {
+                            y_edge = mix(lp0.y, lp1.y, (tile_x0 - lp0.x) * recip_dx);
+                            if xymin.x < tile_x0 {
+                                let p = vec2(tile_x0, y_edge);
+                                if dp.x < 0.0 {
+                                    tile_seg.delta = p - lp0;
+                                } else {
+                                    tile_seg.origin = p;
+                                    tile_seg.delta = lp1 - p;
+                                }
+                                if tile_seg.delta.x == 0.0 {
+                                    tile_seg.delta.x = sign(dp.x) * 1e-9;
+                                }
                             }
-                            if tile_seg.delta.x == 0.0 {
-                                tile_seg.delta.x = sign(dp.x) * 1e-9;
+                            if x <= min_xray || max_xray < x {
+                                y_edge = 1e9;
                             }
                         }
-                        if x <= min_xray || max_xray < x {
-                            y_edge = 1e9;
-                        }
                         tile_seg.y_edge = y_edge;
                         tile_seg.next = old;
                         segments[seg_ix] = tile_seg;
diff --git a/piet-wgsl/shader/pathseg.wgsl b/piet-wgsl/shader/pathseg.wgsl
index acd0fca..79674d6 100644
--- a/piet-wgsl/shader/pathseg.wgsl
+++ b/piet-wgsl/shader/pathseg.wgsl
@@ -129,9 +129,8 @@
     var tag_byte = (tag_word >> shift) & 0xffu;
 
     let out = &path_bboxes[tm.path_ix];
-    var linewidth: f32;
+    let linewidth = bitcast<f32>(scene[config.linewidth_base + tm.linewidth_ix]);
     if (tag_byte & PATH_TAG_PATH) != 0u {
-        linewidth = bitcast<f32>(scene[config.linewidth_base + tm.linewidth_ix]);
         (*out).linewidth = linewidth;
         (*out).trans_ix = tm.trans_ix;
     }
@@ -182,14 +181,16 @@
                 p1 = mix(p1, p0, 1.0 / 3.0);
             }
         }
+        var stroke = vec2(0.0, 0.0);
         if linewidth >= 0.0 {
             // See https://www.iquilezles.org/www/articles/ellipses/ellipses.htm
             // This is the correct bounding box, but we're not handling rendering
             // in the isotropic case, so it may mismatch.
-            let stroke = 0.5 * linewidth * vec2(length(transform.matrx.xz), length(transform.matrx.yw));
+            stroke = 0.5 * linewidth * vec2(length(transform.matrx.xz), length(transform.matrx.yw));
             bbox += vec4(-stroke, stroke);
         }
-        cubics[global_id.x] = Cubic(p0, p1, p2, p3, tm.path_ix, 0u);
+        let flags = u32(linewidth >= 0.0);
+        cubics[global_id.x] = Cubic(p0, p1, p2, p3, stroke, tm.path_ix, flags);
         // Update bounding box using atomics only. Computing a monoid is a
         // potential future optimization.
         if bbox.z > bbox.x || bbox.w > bbox.y {
diff --git a/piet-wgsl/shader/shared/cubic.wgsl b/piet-wgsl/shader/shared/cubic.wgsl
index 5cbfd8b..72292a8 100644
--- a/piet-wgsl/shader/shared/cubic.wgsl
+++ b/piet-wgsl/shader/shared/cubic.wgsl
@@ -5,7 +5,9 @@
     p1: vec2<f32>,
     p2: vec2<f32>,
     p3: vec2<f32>,
+    stroke: vec2<f32>,
     path_ix: u32,
-    // Needed?
-    padding: u32,
+    flags: u32,
 }
+
+let CUBIC_IS_STROKE = 1u;
diff --git a/piet-wgsl/src/render.rs b/piet-wgsl/src/render.rs
index aef46c1..4ba0a9a 100644
--- a/piet-wgsl/src/render.rs
+++ b/piet-wgsl/src/render.rs
@@ -11,7 +11,7 @@
 const TAG_MONOID_SIZE: u64 = 12;
 const TAG_MONOID_FULL_SIZE: u64 = 20;
 const PATH_BBOX_SIZE: u64 = 24;
-const CUBIC_SIZE: u64 = 40;
+const CUBIC_SIZE: u64 = 48;
 const DRAWMONOID_SIZE: u64 = 16;
 const MAX_DRAWINFO_SIZE: u64 = 44;
 const CLIP_BIC_SIZE: u64 = 8;