add clips and blends
diff --git a/piet-wgsl/shader/clip_leaf.wgsl b/piet-wgsl/shader/clip_leaf.wgsl
index f294317..d935550 100644
--- a/piet-wgsl/shader/clip_leaf.wgsl
+++ b/piet-wgsl/shader/clip_leaf.wgsl
@@ -33,15 +33,17 @@
 var<workgroup> sh_link: array<i32, WG_SIZE>;
 
 fn search_link(bic: ptr<function, Bic>, ix: u32) -> i32 {
+    var ix = ix;
     var j = 0u;
     while j < firstTrailingBit(WG_SIZE) {
         let base = 2u * WG_SIZE - (2u << (firstTrailingBit(WG_SIZE) - j));
         if ((ix >> j) & 1u) != 0u {
             let test = bic_combine(sh_bic[base + (ix >> j) - 1u], *bic);
-            if test.b >= 0u {
-                *bic = test;
-                ix -= 1u << j;
+            if test.b > 0u {
+                break;
             }
+            *bic = test;
+            ix -= 1u << j;
         }
         j += 1u;
     }
@@ -59,7 +61,7 @@
     if ix > 0u {
         return i32(ix) - 1;
     } else {
-        return i32(~(*bic).a);
+        return i32(~0u - (*bic).a);
     }
 }
 
@@ -67,7 +69,9 @@
     if ix < config.n_clip {
         return clip_inp[ix];
     } else {
-        return i32(0x80000000);
+        return -2147483648;
+        // literal too large?
+        // return 0x80000000;
     }
 }
 
@@ -129,12 +133,12 @@
     sh_bic[local_id.x] = bic;
     if is_push {
         let path_bbox = path_bboxes[inp];
-        bbox = vec4<f32>(path_bbox.x0, path_bbox.y0, path_bbox.x1, path_bbox.y1);
+        bbox = vec4<f32>(f32(path_bbox.x0), f32(path_bbox.y0), f32(path_bbox.x1), f32(path_bbox.y1));
     } else {
         bbox = vec4<f32>(-1e9, -1e9, 1e9, 1e9);
     }
     var inbase = 0u;
-    for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
+    for (var i = 0u; i < firstTrailingBit(WG_SIZE) - 1u; i += 1u) {
         let outbase = 2u * WG_SIZE - (1u << (firstTrailingBit(WG_SIZE) - i));
         workgroupBarrier();
         if local_id.x < 1u << (firstTrailingBit(WG_SIZE) - 1u - i) {
@@ -191,5 +195,5 @@
             bbox = vec4<f32>(-1e9, -1e9, 1e9, 1e9);
         }
     }
-    clip_bboxes[global_id.x] = bbox
+    clip_bboxes[global_id.x] = bbox;
 }
diff --git a/piet-wgsl/shader/clip_reduce.wgsl b/piet-wgsl/shader/clip_reduce.wgsl
index 3288b07..50c6402 100644
--- a/piet-wgsl/shader/clip_reduce.wgsl
+++ b/piet-wgsl/shader/clip_reduce.wgsl
@@ -60,7 +60,7 @@
         let path_ix = sh_path_ix[local_id.x];
         let path_bbox = path_bboxes[path_ix];
         let parent_ix = sh_parent[local_id.x] + wg_id.x * WG_SIZE;
-        let bbox = vec4<f32>(path_bbox.x0, path_bbox.y0, path_bbox.x1, path_bbox.y1);
+        let bbox = vec4<f32>(f32(path_bbox.x0), f32(path_bbox.y0), f32(path_bbox.x1), f32(path_bbox.y1));
         clip_out[global_id.x] = ClipEl(parent_ix, bbox);
     }
 }
diff --git a/piet-wgsl/shader/coarse.wgsl b/piet-wgsl/shader/coarse.wgsl
index 5741ec3..b27a215 100644
--- a/piet-wgsl/shader/coarse.wgsl
+++ b/piet-wgsl/shader/coarse.wgsl
@@ -145,9 +145,32 @@
     alloc_cmd(12u);
     ptcl[cmd_offset] = CMD_RAD_GRAD;
     ptcl[cmd_offset + 1u] = rad.index;
+    ptcl[cmd_offset + 2u] = bitcast<u32>(rad.matrx.x);
+    ptcl[cmd_offset + 3u] = bitcast<u32>(rad.matrx.y);
+    ptcl[cmd_offset + 4u] = bitcast<u32>(rad.matrx.z);
+    ptcl[cmd_offset + 5u] = bitcast<u32>(rad.matrx.w);
+    ptcl[cmd_offset + 6u] = bitcast<u32>(rad.xlat.x);
+    ptcl[cmd_offset + 7u] = bitcast<u32>(rad.xlat.y);
+    ptcl[cmd_offset + 8u] = bitcast<u32>(rad.c1.x);
+    ptcl[cmd_offset + 9u] = bitcast<u32>(rad.c1.y);
+    ptcl[cmd_offset + 10u] = bitcast<u32>(rad.ra);
+    ptcl[cmd_offset + 11u] = bitcast<u32>(rad.roff);
     cmd_offset += 12u;
 }
 
+fn write_begin_clip() {
+    alloc_cmd(1u);
+    ptcl[cmd_offset] = CMD_BEGIN_CLIP;
+    cmd_offset += 1u;
+}
+
+fn write_end_clip(blend: u32) {
+    alloc_cmd(2u);
+    ptcl[cmd_offset] = CMD_END_CLIP;
+    ptcl[cmd_offset + 1u] = blend;
+    cmd_offset += 2u;
+}
+
 @compute @workgroup_size(256)
 fn main(
     @builtin(local_invocation_id) local_id: vec3<u32>,
@@ -166,15 +189,20 @@
     let this_tile_ix = (bin_tile_y + tile_y) * config.width_in_tiles + bin_tile_x + tile_x;
     cmd_offset = this_tile_ix * PTCL_INITIAL_ALLOC;
     cmd_limit = cmd_offset + (PTCL_INITIAL_ALLOC - PTCL_HEADROOM);
-    // TODO: clip state
-    let clip_zero_depth = 0u;
+
+    // clip state
+    var clip_zero_depth = 0u;
+    var clip_depth = 0u;
 
     var partition_ix = 0u;
     var rd_ix = 0u;
     var wr_ix = 0u;
     var part_start_ix = 0u;
     var ready_ix = 0u;
-    // TODO: blend state
+
+    // blend state
+    var render_blend_depth = 0u;
+    var max_blend_depth = 0u;
     
     while true {
         for (var i = 0u; i < N_SLICE; i += 1u) {
@@ -286,8 +314,16 @@
             let y = sh_tile_y0[el_ix] + seq_ix / width;
             let tile_ix = sh_tile_base[el_ix] + sh_tile_stride[el_ix] * y + x;
             let tile = tiles[tile_ix];
-            // TODO: this predicate becomes more interesting with clip
-            let include_tile = tile.segments != 0u || tile.backdrop != 0;
+            let is_clip = (tag & 1u) != 0u;
+            var is_blend = false;
+            if is_clip {
+                let BLEND_CLIP = (128u << 8u) | 3u;
+                let scene_offset = draw_monoids[drawobj_ix].scene_offset;
+                let dd = config.drawdata_base + scene_offset;
+                let blend = scene[dd];
+                is_blend = blend != BLEND_CLIP;
+            }
+            let include_tile = tile.segments != 0u || (tile.backdrop == 0) == is_clip || is_blend;
             if include_tile {
                 let el_slice = el_ix / 32u;
                 let el_mask = 1u << (el_ix & 31u);
@@ -324,16 +360,18 @@
             if clip_zero_depth == 0u {
                 let tile_ix = sh_tile_base[el_ix] + sh_tile_stride[el_ix] * tile_y + tile_x;
                 let tile = tiles[tile_ix];
-                let linewidth = bitcast<f32>(info[di]);
-                write_path(tile, linewidth);
                 switch drawtag {
                     // DRAWTAG_FILL_COLOR
                     case 0x44u: {
+                        let linewidth = bitcast<f32>(info[di]);
+                        write_path(tile, linewidth);
                         let rgba_color = scene[dd];
                         write_color(CmdColor(rgba_color));
                     }
                     // DRAWTAG_FILL_LIN_GRADIENT
                     case 0x114u: {
+                        let linewidth = bitcast<f32>(info[di]);
+                        write_path(tile, linewidth);
                         var lin: CmdLinGrad;
                         lin.index = scene[dd];
                         lin.line_x = bitcast<f32>(info[di + 1u]);
@@ -343,6 +381,8 @@
                     }
                     // DRAWTAG_FILL_RAD_GRADIENT
                     case 0x2dcu: {
+                        let linewidth = bitcast<f32>(info[di]);
+                        write_path(tile, linewidth);
                         var rad: CmdRadGrad;
                         rad.index = scene[dd];
                         let m0 = bitcast<f32>(info[di + 1u]);
@@ -356,6 +396,40 @@
                         rad.roff = bitcast<f32>(info[di + 10u]);
                         write_rad_grad(rad);
                     }
+                    // DRAWTAG_BEGIN_CLIP
+                    case 0x05u: {
+                        if tile.segments == 0u && tile.backdrop == 0 {
+                            clip_zero_depth = clip_depth + 1u;
+                        } else {
+                            write_begin_clip();
+                            render_blend_depth += 1u;
+                            max_blend_depth = max(max_blend_depth, render_blend_depth);
+                        }
+                        clip_depth += 1u;
+                    }
+                    // DRAWTAG_END_CLIP
+                    case 0x25u: {
+                        clip_depth -= 1u;
+                        write_path(tile, -1.0);
+                        write_end_clip(scene[dd]);
+                        render_blend_depth -= 1u;
+                    }
+                    default: {}
+                }
+            } else {
+                // In "clip zero" state, suppress all drawing
+                switch drawtag {
+                    // DRAWTAG_BEGIN_CLIP
+                    case 0x05u: {
+                        clip_depth += 1u;
+                    }
+                    // DRAWTAG_END_CLIP
+                    case 0x25u: {
+                        if clip_depth == clip_zero_depth {
+                            clip_zero_depth = 0u;
+                        }
+                        clip_depth -= 1u;
+                    }
                     default: {}
                 }
             }
diff --git a/piet-wgsl/shader/draw_leaf.wgsl b/piet-wgsl/shader/draw_leaf.wgsl
index 5909fdd..f5140a4 100644
--- a/piet-wgsl/shader/draw_leaf.wgsl
+++ b/piet-wgsl/shader/draw_leaf.wgsl
@@ -17,6 +17,7 @@
 // Finish prefix sum of drawtags, decode draw objects.
 
 #import config
+#import clip
 #import drawtag
 #import bbox
 
@@ -38,6 +39,9 @@
 @group(0) @binding(5)
 var<storage, read_write> info: array<u32>;
 
+@group(0) @binding(6)
+var<storage, read_write> clip_inp: array<i32>;
+
 let WG_SIZE = 256u;
 
 // Possibly dedup?
@@ -183,4 +187,11 @@
             default: {}
         }
     }
-}
\ No newline at end of file
+    if tag_word == DRAWTAG_BEGIN_CLIP || tag_word == DRAWTAG_END_CLIP {
+        var path_ix = ~ix;
+        if tag_word == DRAWTAG_BEGIN_CLIP {
+            path_ix = m.path_ix;
+        }
+        clip_inp[m.clip_ix] = i32(path_ix);
+    }
+}
diff --git a/piet-wgsl/shader/fine.wgsl b/piet-wgsl/shader/fine.wgsl
index d76edef..5488e01 100644
--- a/piet-wgsl/shader/fine.wgsl
+++ b/piet-wgsl/shader/fine.wgsl
@@ -40,6 +40,8 @@
 var<storage, read_write> output: array<u32>;
 
 #ifdef full
+
+#import blend
 #import ptcl
 
 let GRADIENT_WIDTH = 512;
@@ -90,10 +92,6 @@
     return CmdRadGrad(index, matrx, xlat, c1, ra, roff);
 }
 
-fn mix_blend_compose(backdrop: vec4<f32>, src: vec4<f32>, mode: u32) -> vec4<f32> {
-    // TODO: ALL the blend modes. This is just vanilla src-over.
-    return backdrop * (1.0 - src.a) + src;
-}
 #endif
 
 let PIXELS_PER_THREAD = 4u;
@@ -233,7 +231,7 @@
                     let fg_i = fg_rgba * area[i];
                     rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i;
                 }
-                cmd_ix += 12u;
+                cmd_ix += 5u;
             }
             // CMD_RAD_GRAD
             case 7u: {
@@ -278,7 +276,7 @@
                     }
                     let bg = unpack4x8unorm(bg_rgba);
                     let fg = rgba[i] * area[i];
-                    rgba[i] = mix_blend_compose(bg, fg, blend);
+                    rgba[i] = blend_mix_compose(bg, fg, blend);
                 }
                 cmd_ix += 2u;
             }
diff --git a/piet-wgsl/shader/path_coarse_full.wgsl b/piet-wgsl/shader/path_coarse_full.wgsl
index fa3609e..d607bac 100644
--- a/piet-wgsl/shader/path_coarse_full.wgsl
+++ b/piet-wgsl/shader/path_coarse_full.wgsl
@@ -213,7 +213,7 @@
                 }
                 for (var y = y0; y < y1; y += 1) {
                     let tile_y0 = f32(y) * f32(TILE_HEIGHT);
-                    let xbackdrop = max(xray + 1, 0);
+                    let xbackdrop = max(xray + 1, bbox.x);
                     if xymin.y < tile_y0 && xbackdrop < bbox.z {
                         let backdrop = select(-1, 1, dp.y < 0.0);
                         let tile_ix = base + xbackdrop;
diff --git a/piet-wgsl/shader/shared/blend.wgsl b/piet-wgsl/shader/shared/blend.wgsl
new file mode 100644
index 0000000..34cb55b
--- /dev/null
+++ b/piet-wgsl/shader/shared/blend.wgsl
@@ -0,0 +1,351 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Also licensed under MIT license, at your choice.
+
+
+// Color mixing modes
+
+let MIX_NORMAL = 0u;
+let MIX_MULTIPLY = 1u;
+let MIX_SCREEN = 2u;
+let MIX_OVERLAY = 3u;
+let MIX_DARKEN = 4u;
+let MIX_LIGHTEN = 5u;
+let MIX_COLOR_DODGE = 6u;
+let MIX_COLOR_BURN = 7u;
+let MIX_HARD_LIGHT = 8u;
+let MIX_SOFT_LIGHT = 9u;
+let MIX_DIFFERENCE = 10u;
+let MIX_EXCLUSION = 11u;
+let MIX_HUE = 12u;
+let MIX_SATURATION = 13u;
+let MIX_COLOR = 14u;
+let MIX_LUMINOSITY = 15u;
+let MIX_CLIP = 128u;
+
+fn screen(cb: vec3<f32>, cs: vec3<f32>) -> vec3<f32> {
+    return cb + cs - (cb * cs);
+}
+
+fn color_dodge(cb: f32, cs: f32) -> f32 {
+    if cb == 0.0 {
+        return 0.0;
+    } else if cs == 1.0 {
+        return 1.0;
+    } else {
+        return min(1.0, cb / (1.0 - cs));
+    }
+}
+
+fn color_burn(cb: f32, cs: f32) -> f32 {
+    if cb == 1.0 {
+        return 1.0;
+    } else if cs == 0.0 {
+        return 0.0;
+    } else {
+        return 1.0 - min(1.0, (1.0 - cb) / cs);
+    }
+}
+
+fn hard_light(cb: vec3<f32>, cs: vec3<f32>) -> vec3<f32> {
+	return mix(
+		screen(cb, 2.0 * cs - 1.0),
+		cb * 2.0 * cs,
+        vec3<f32>(cs <= vec3<f32>(0.5))
+	);
+}
+
+fn soft_light(cb: vec3<f32>, cs: vec3<f32>) -> vec3<f32> {
+    let d = mix(
+        sqrt(cb),
+        ((16.0 * cb - vec3(12.0)) * cb + vec3(4.0)) * cb,
+        vec3<f32>(cb <= vec3<f32>(0.25))
+    );
+    return mix(
+        cb + (2.0 * cs - vec3(1.0)) * (d - cb),
+        cb - (vec3(1.0) - 2.0 * cs) * cb * (vec3(1.0) - cb),
+        vec3<f32>(cs <= vec3<f32>(0.5))
+    );
+}
+
+fn sat(c: vec3<f32>) -> f32 {
+    return max(c.x, max(c.y, c.z)) - min(c.x, min(c.y, c.z));
+}
+
+fn lum(c: vec3<f32>) -> f32 {
+    let f = vec3<f32>(0.3, 0.59, 0.11);
+    return dot(c, f);
+}
+
+fn clip_color(c: vec3<f32>) -> vec3<f32> {
+    var c = c;
+    let l = lum(c);
+    let n = min(c.x, min(c.y, c.z));
+    let x = max(c.x, max(c.y, c.z));
+    if n < 0.0 {
+        c = l + (((c - l) * l) / (l - n));
+    }
+    if x > 1.0 {
+        c = l + (((c - l) * (1.0 - l)) / (x - l));
+    }
+    return c;
+}
+
+fn set_lum(c: vec3<f32>, l: f32) -> vec3<f32> {
+    return clip_color(c + (l - lum(c)));
+}
+
+fn set_sat_inner(
+    cmin: ptr<function, f32>, 
+    cmid: ptr<function, f32>, 
+    cmax: ptr<function, f32>, 
+    s: f32
+) {
+    if *cmax > *cmin {
+        *cmid = ((*cmid - *cmin) * s) / (*cmax - *cmin);
+        *cmax = s;
+    } else {
+        *cmid = 0.0;
+        *cmax = 0.0;
+    }
+    *cmin = 0.0;
+}
+
+fn set_sat(c: vec3<f32>, s: f32) -> vec3<f32> {
+    var r = c.r;
+    var g = c.g;
+    var b = c.b;
+    if r <= g {
+        if g <= b {
+            set_sat_inner(&r, &g, &b, s);
+        } else {
+            if r <= b {
+                set_sat_inner(&r, &b, &g, s);
+            } else {
+                set_sat_inner(&b, &r, &g, s);
+            }
+        }
+    } else {
+        if r <= b {
+            set_sat_inner(&g, &r, &b, s);
+        } else {
+            if g <= b {
+                set_sat_inner(&g, &b, &r, s);
+            } else {
+                set_sat_inner(&b, &g, &r, s);
+            }
+        }
+    }
+    return vec3<f32>(r, g, b);
+}
+
+// Blends two RGB colors together. The colors are assumed to be in sRGB
+// color space, and this function does not take alpha into account.
+fn blend_mix(cb: vec3<f32>, cs: vec3<f32>, mode: u32) -> vec3<f32> {
+    var b = vec3<f32>(0.0);
+    switch mode {
+        // MIX_MULTIPLY
+        case 1u: {
+            b = cb * cs;
+        }
+        // MIX_SCREEN
+        case 2u: {
+            b = screen(cb, cs);
+        }
+        // MIX_OVERLAY
+        case 3u: {
+            b = hard_light(cs, cb);
+        }
+        // MIX_DARKEN
+        case 4u: {
+            b = min(cb, cs);
+        }
+        // MIX_LIGHTEN
+        case 5u: {
+            b = max(cb, cs);
+        }
+        // MIX_COLOR_DODGE
+        case 6u: {
+            b = vec3<f32>(color_dodge(cb.x, cs.x), color_dodge(cb.y, cs.y), color_dodge(cb.z, cs.z));
+        }
+        // MIX_COLOR_BURN
+        case 7u: {
+            b = vec3<f32>(color_burn(cb.x, cs.x), color_burn(cb.y, cs.y), color_burn(cb.z, cs.z));
+        }
+        // MIX_HARD_LIGHT
+        case 8u: {
+            b = hard_light(cb, cs);
+        }
+        // MIX_SOFT_LIGHT
+        case 9u: {
+            b = soft_light(cb, cs);
+        }
+        // MIX_DIFFERENCE
+        case 10u: {
+            b = abs(cb - cs);
+        }
+        // MIX_EXCLUSION
+        case 11u: {
+            b = cb + cs - 2.0 * cb * cs;
+        }
+        // MIX_HUE
+        case 12u: {
+            b = set_lum(set_sat(cs, sat(cb)), lum(cb));
+        }
+        // MIX_SATURATION
+        case 13u: {
+            b = set_lum(set_sat(cb, sat(cs)), lum(cb));
+        }
+        // MIX_COLOR
+        case 14u: {
+            b = set_lum(cs, lum(cb));
+        }
+        // MIX_LUMINOSITY
+        case 15u: {
+            b = set_lum(cb, lum(cs));
+        }
+        default: {
+            b = cs;
+        }
+    }
+    return b;
+}
+
+// Composition modes
+
+let COMPOSE_CLEAR = 0u;
+let COMPOSE_COPY = 1u;
+let COMPOSE_DEST = 2u;
+let COMPOSE_SRC_OVER = 3u;
+let COMPOSE_DEST_OVER = 4u;
+let COMPOSE_SRC_IN = 5u;
+let COMPOSE_DEST_IN = 6u;
+let COMPOSE_SRC_OUT = 7u;
+let COMPOSE_DEST_OUT = 8u;
+let COMPOSE_SRC_ATOP = 9u;
+let COMPOSE_DEST_ATOP = 10u;
+let COMPOSE_XOR = 11u;
+let COMPOSE_PLUS = 12u;
+let COMPOSE_PLUS_LIGHTER = 13u;
+
+// Apply general compositing operation.
+// Inputs are separated colors and alpha, output is premultiplied.
+fn blend_compose(
+    cb: vec3<f32>, 
+    cs: vec3<f32>, 
+    ab: f32, 
+    as_: f32, 
+    mode: u32
+) -> vec4<f32> {
+    var fa = 0.0;
+    var fb = 0.0;
+    switch mode {
+        // COMPOSE_COPY
+        case 1u: {
+            fa = 1.0;
+            fb = 0.0;
+        }
+        // COMPOSE_DEST
+        case 2u: {
+            fa = 0.0;
+            fb = 1.0;
+        }
+        // COMPOSE_SRC_OVER
+        case 3u: {
+            fa = 1.0;
+            fb = 1.0 - as_;
+        }
+        // COMPOSE_DEST_OVER
+        case 4u: {
+            fa = 1.0 - ab;
+            fb = 1.0;
+        }
+        // COMPOSE_SRC_IN
+        case 5u: {
+            fa = ab;
+            fb = 0.0;
+        }
+        // COMPOSE_DEST_IN
+        case 6u: {
+            fa = 0.0;
+            fb = as_;
+        }
+        // COMPOSE_SRC_OUT
+        case 7u: {
+            fa = 1.0 - ab;
+            fb = 0.0;
+        }
+        // COMPOSE_DEST_OUT
+        case 8u: {
+            fa = 0.0;
+            fb = 1.0 - as_;
+        }
+        // COMPOSE_SRC_ATOP
+        case 9u: {
+            fa = ab;
+            fb = 1.0 - as_;
+        }
+        // COMPOSE_DEST_ATOP
+        case 10u: {
+            fa = 1.0 - ab;
+            fb = as_;
+        }
+        // COMPOSE_XOR
+        case 11u: {
+            fa = 1.0 - ab;
+            fb = 1.0 - as_;
+        }
+        // COMPOSE_PLUS
+        case 12u: {
+            fa = 1.0;
+            fb = 1.0;
+        }
+        // COMPOSE_PLUS_LIGHTER
+        case 13u: {
+            return min(vec4<f32>(1.0), vec4<f32>(as_ * cs + ab * cb, as_ + ab));
+        }
+        default: {}
+    }
+    let as_fa = as_ * fa;
+    let ab_fb = ab * fb;
+    let co = as_fa * cs + ab_fb * cb;
+    return vec4<f32>(co, as_fa + ab_fb);
+}
+
+// Apply color mixing and composition. Both input and output colors are
+// premultiplied RGB.
+fn blend_mix_compose(backdrop: vec4<f32>, src: vec4<f32>, mode: u32) -> vec4<f32> {
+    let BLEND_DEFAULT = ((MIX_NORMAL << 8u) | COMPOSE_SRC_OVER);
+    let EPSILON = 1e-15;
+    if (mode & 0x7fffu) == BLEND_DEFAULT {
+        // Both normal+src_over blend and clip case
+        return backdrop * (1.0 - src.a) + src;
+    }
+    // Un-premultiply colors for blending
+    let inv_src_a = 1.0 / (src.a + EPSILON);
+    var cs = src.rgb * inv_src_a;
+    let inv_backdrop_a = 1.0 / (backdrop.a + EPSILON);
+    let cb = backdrop.rgb * inv_backdrop_a;
+    let mix_mode = mode >> 8u;
+    let mixed = blend_mix(cb, cs, mix_mode);
+    cs = mix(cs, mixed, backdrop.a);
+    let compose_mode = mode & 0xffu;
+    if compose_mode == COMPOSE_SRC_OVER {
+        let co = mix(backdrop.rgb, cs, src.a);
+        return vec4<f32>(co, src.a + backdrop.a * (1.0 - src.a));
+    } else {
+        return blend_compose(cb, cs, backdrop.a, src.a, compose_mode);
+    }
+}
diff --git a/piet-wgsl/src/debug.rs b/piet-wgsl/src/debug.rs
new file mode 100644
index 0000000..964c8ee
--- /dev/null
+++ b/piet-wgsl/src/debug.rs
@@ -0,0 +1,5 @@
+#![allow(dead_code)]
+
+pub mod clip;
+pub mod draw;
+pub mod fine;
diff --git a/piet-wgsl/src/debug/clip.rs b/piet-wgsl/src/debug/clip.rs
new file mode 100644
index 0000000..8b2a0e3
--- /dev/null
+++ b/piet-wgsl/src/debug/clip.rs
@@ -0,0 +1,13 @@
+use bytemuck::{Pod, Zeroable};
+
+#[derive(Copy, Clone, Debug, Zeroable, Pod)]
+#[repr(C)]
+pub struct ClipEl {
+    pub parent_ix: u32,
+    pub pad: [u32; 3],
+    pub bbox: [f32; 4],
+}
+
+pub fn parse_clip_els(data: &[u8]) -> Vec<ClipEl> {
+    Vec::from(bytemuck::cast_slice(data))
+}
diff --git a/piet-wgsl/src/debug/draw.rs b/piet-wgsl/src/debug/draw.rs
new file mode 100644
index 0000000..ab56ed5
--- /dev/null
+++ b/piet-wgsl/src/debug/draw.rs
@@ -0,0 +1,14 @@
+use bytemuck::{Pod, Zeroable};
+
+#[derive(Copy, Clone, Debug, Zeroable, Pod)]
+#[repr(C)]
+pub struct DrawMonoid {
+    pub path_ix: u32,
+    pub clip_ix: u32,
+    pub scene_offset: u32,
+    pub info_offset: u32,
+}
+
+pub fn parse_draw_monoids(data: &[u8]) -> Vec<DrawMonoid> {
+    Vec::from(bytemuck::cast_slice(data))
+}
diff --git a/piet-wgsl/src/debug/fine.rs b/piet-wgsl/src/debug/fine.rs
new file mode 100644
index 0000000..d9f05f0
--- /dev/null
+++ b/piet-wgsl/src/debug/fine.rs
@@ -0,0 +1,153 @@
+#[derive(Copy, Clone, Debug)]
+#[repr(C)]
+pub struct Fill {
+    pub tile: u32,
+    pub backdrop: i32,
+}
+
+#[derive(Copy, Clone, Debug)]
+#[repr(C)]
+pub struct Stroke {
+    pub tile: u32,
+    pub half_width: f32,
+}
+
+#[derive(Copy, Clone, Debug)]
+#[repr(C)]
+pub struct Color {
+    abgr: [u8; 4],
+}
+
+#[derive(Copy, Clone, Debug)]
+#[repr(C)]
+pub struct LinGrad {
+    pub index: u32,
+    pub line_x: f32,
+    pub line_y: f32,
+    pub line_c: f32,
+}
+
+#[derive(Copy, Clone, Debug)]
+#[repr(C)]
+pub struct RadGrad {
+    pub index: u32,
+    pub matrix: [f32; 4],
+    pub xlat: [f32; 2],
+    pub c1: [f32; 2],
+    pub ra: f32,
+    pub roff: f32,
+}
+
+#[derive(Copy, Clone, Debug)]
+pub enum Command {
+    Fill(Fill),
+    Stroke(Stroke),
+    Solid,
+    Color(Color),
+    LinGrad(LinGrad),
+    RadGrad(RadGrad),
+    BeginClip,
+    EndClip(u32),
+    End,
+}
+
+const PTCL_INITIAL_ALLOC: usize = 64;
+
+#[derive(Debug)]
+pub struct CommandList {
+    pub tiles: Vec<(u32, u32, Vec<Command>)>,
+}
+
+impl CommandList {
+    pub fn parse(width: usize, height: usize, ptcl: &[u8]) -> Self {
+        let mut tiles = vec![];
+        let width_tiles = width / 16;
+        let height_tiles = height / 16;
+        for y in 0..height_tiles {
+            for x in 0..width_tiles {
+                let tile_ix = y * width_tiles + x;
+                let ix = tile_ix * PTCL_INITIAL_ALLOC;
+                let commands = parse_commands(ptcl, ix);
+                if !commands.is_empty() {
+                    tiles.push((x as u32, y as u32, commands));
+                }
+            }
+        }
+        Self { tiles }
+    }
+}
+
+fn parse_commands(ptcl: &[u8], mut ix: usize) -> Vec<Command> {
+    let mut commands = vec![];
+    let words: &[u32] = bytemuck::cast_slice(ptcl);
+    while ix < words.len() {
+        let tag = words[ix];
+        ix += 1;
+        match tag {
+            0 => break,
+            1 => {
+                commands.push(Command::Fill(Fill {
+                    tile: words[ix],
+                    backdrop: words[ix + 1] as i32,
+                }));
+                ix += 2;
+            }
+            2 => {
+                commands.push(Command::Stroke(Stroke {
+                    tile: words[ix],
+                    half_width: bytemuck::cast(words[ix + 1]),
+                }));
+                ix += 2;
+            }
+            3 => {
+                commands.push(Command::Solid);
+            }
+            5 => {
+                commands.push(Command::Color(Color {
+                    abgr: bytemuck::cast(words[ix]),
+                }));
+                ix += 1;
+            }
+            6 => {
+                commands.push(Command::LinGrad(LinGrad {
+                    index: words[ix],
+                    line_x: bytemuck::cast(words[ix + 1]),
+                    line_y: bytemuck::cast(words[ix + 2]),
+                    line_c: bytemuck::cast(words[ix + 3]),
+                }));
+                ix += 4;
+            }
+            7 => {
+                let matrix = [
+                    bytemuck::cast(words[ix + 1]),
+                    bytemuck::cast(words[ix + 2]),
+                    bytemuck::cast(words[ix + 3]),
+                    bytemuck::cast(words[ix + 4]),
+                ];
+                let xlat = [bytemuck::cast(words[ix + 5]), bytemuck::cast(words[ix + 6])];
+                let c1 = [bytemuck::cast(words[ix + 7]), bytemuck::cast(words[ix + 8])];
+                commands.push(Command::RadGrad(RadGrad {
+                    index: words[ix],
+                    matrix,
+                    xlat,
+                    c1,
+                    ra: bytemuck::cast(words[ix + 9]),
+                    roff: bytemuck::cast(words[ix + 10]),
+                }));
+                ix += 11;
+            }
+            9 => {
+                commands.push(Command::BeginClip);
+            }
+            10 => {
+                commands.push(Command::EndClip(words[ix]));
+                ix += 1;
+            }
+            11 => {
+                ix = words[ix] as usize;
+            }
+            _ => {}
+        }
+    }
+    commands
+}
diff --git a/piet-wgsl/src/engine.rs b/piet-wgsl/src/engine.rs
index 9a3556c..049fc80 100644
--- a/piet-wgsl/src/engine.rs
+++ b/piet-wgsl/src/engine.rs
@@ -341,7 +341,10 @@
 impl BufProxy {
     pub fn new(size: u64) -> Self {
         let id = Id::next();
-        BufProxy { id, size }
+        BufProxy {
+            id,
+            size: size.max(16),
+        }
     }
 }
 
@@ -360,6 +363,20 @@
     pub fn new_image(width: u32, height: u32) -> Self {
         Self::Image(ImageProxy::new(width, height))
     }
+
+    pub fn as_buf(&self) -> Option<&BufProxy> {
+        match self {
+            Self::Buf(proxy) => Some(&proxy),
+            _ => None,
+        }
+    }
+
+    pub fn as_image(&self) -> Option<&ImageProxy> {
+        match self {
+            Self::Image(proxy) => Some(&proxy),
+            _ => None,
+        }
+    }
 }
 
 impl From<BufProxy> for ResourceProxy {
diff --git a/piet-wgsl/src/main.rs b/piet-wgsl/src/main.rs
index 2f1d885..4e10486 100644
--- a/piet-wgsl/src/main.rs
+++ b/piet-wgsl/src/main.rs
@@ -22,6 +22,7 @@
 
 use wgpu::{Device, Limits, Queue};
 
+mod debug;
 mod engine;
 mod pico_svg;
 mod ramp;
diff --git a/piet-wgsl/src/render.rs b/piet-wgsl/src/render.rs
index 4550927..ee13694 100644
--- a/piet-wgsl/src/render.rs
+++ b/piet-wgsl/src/render.rs
@@ -14,6 +14,10 @@
 const CUBIC_SIZE: u64 = 40;
 const DRAWMONOID_SIZE: u64 = 16;
 const MAX_DRAWINFO_SIZE: u64 = 44;
+const CLIP_BIC_SIZE: u64 = 8;
+const CLIP_EL_SIZE: u64 = 32;
+const CLIP_INP_SIZE: u64 = 4;
+const CLIP_BBOX_SIZE: u64 = 16;
 const PATH_SIZE: u64 = 32;
 const DRAW_BBOX_SIZE: u64 = 16;
 const BUMP_SIZE: u64 = 16;
@@ -187,7 +191,7 @@
     let n_path = data.n_path;
     // TODO: calculate for real when we do rectangles
     let n_drawobj = n_path;
-    let n_clip = 0; // TODO: wire up correctly
+    let n_clip = data.n_clip;
     let config = Config {
         width_in_tiles: 64,
         height_in_tiles: 64,
@@ -251,6 +255,7 @@
     );
     let draw_monoid_buf = ResourceProxy::new_buf(n_drawobj as u64 * DRAWMONOID_SIZE);
     let info_buf = ResourceProxy::new_buf(n_drawobj as u64 * MAX_DRAWINFO_SIZE);
+    let clip_inp_buf = ResourceProxy::new_buf(data.n_clip as u64 * CLIP_INP_SIZE);
     recording.dispatch(
         shaders.draw_leaf,
         (drawobj_wgs, 1, 1),
@@ -261,12 +266,45 @@
             path_bbox_buf,
             draw_monoid_buf,
             info_buf,
+            clip_inp_buf,
         ],
     );
+    let clip_el_buf = ResourceProxy::new_buf(data.n_clip as u64 * CLIP_EL_SIZE);
+    let clip_bic_buf =
+        ResourceProxy::new_buf((n_clip / shaders::CLIP_REDUCE_WG) as u64 * CLIP_BIC_SIZE);
+    let clip_wg_reduce = n_clip.saturating_sub(1) / shaders::CLIP_REDUCE_WG;
+    if clip_wg_reduce > 0 {
+        recording.dispatch(
+            shaders.clip_reduce,
+            (clip_wg_reduce, 1, 1),
+            [
+                config_buf,
+                clip_inp_buf,
+                path_bbox_buf,
+                clip_bic_buf,
+                clip_el_buf,
+            ],
+        );
+    }
+    let clip_wg = (n_clip + shaders::CLIP_REDUCE_WG - 1) / shaders::CLIP_REDUCE_WG;
+    let clip_bbox_buf = ResourceProxy::new_buf(n_clip as u64 * CLIP_BBOX_SIZE);
+    if clip_wg > 0 {
+        recording.dispatch(
+            shaders.clip_leaf,
+            (clip_wg, 1, 1),
+            [
+                config_buf,
+                clip_inp_buf,
+                path_bbox_buf,
+                clip_bic_buf,
+                clip_el_buf,
+                draw_monoid_buf,
+                clip_bbox_buf,
+            ],
+        );
+    }
     let draw_bbox_buf = ResourceProxy::new_buf(n_path as u64 * DRAW_BBOX_SIZE);
     let bump_buf = BufProxy::new(BUMP_SIZE);
-    // Not actually used yet.
-    let clip_bbox_buf = ResourceProxy::new_buf(1024);
     let bin_data_buf = ResourceProxy::new_buf(1 << 20);
     let width_in_bins = (config.width_in_tiles + 15) / 16;
     let height_in_bins = (config.height_in_tiles + 15) / 16;
diff --git a/piet-wgsl/src/shaders.rs b/piet-wgsl/src/shaders.rs
index 8e6e89f..0e61710 100644
--- a/piet-wgsl/src/shaders.rs
+++ b/piet-wgsl/src/shaders.rs
@@ -28,6 +28,7 @@
 pub const PATH_BBOX_WG: u32 = 256;
 pub const PATH_COARSE_WG: u32 = 256;
 pub const PATH_DRAWOBJ_WG: u32 = 256;
+pub const CLIP_REDUCE_WG: u32 = 256;
 
 pub struct Shaders {
     pub pathtag_reduce: ShaderId,
@@ -45,6 +46,8 @@
     pub pathseg: ShaderId,
     pub draw_reduce: ShaderId,
     pub draw_leaf: ShaderId,
+    pub clip_reduce: ShaderId,
+    pub clip_leaf: ShaderId,
     pub binning: ShaderId,
     pub tile_alloc: ShaderId,
     pub path_coarse: ShaderId,
@@ -178,6 +181,31 @@
             BindType::BufReadOnly,
             BindType::Buffer,
             BindType::Buffer,
+            BindType::Buffer,
+        ],
+    )?;
+    let clip_reduce = engine.add_shader(
+        device,
+        preprocess::preprocess(&read_shader("clip_reduce"), &empty, &imports).into(),
+        &[
+            BindType::BufReadOnly,
+            BindType::BufReadOnly,
+            BindType::BufReadOnly,
+            BindType::Buffer,
+            BindType::Buffer,
+        ],
+    )?;
+    let clip_leaf = engine.add_shader(
+        device,
+        preprocess::preprocess(&read_shader("clip_leaf"), &empty, &imports).into(),
+        &[
+            BindType::BufReadOnly,
+            BindType::BufReadOnly,
+            BindType::BufReadOnly,
+            BindType::BufReadOnly,
+            BindType::BufReadOnly,
+            BindType::Buffer,
+            BindType::Buffer,
         ],
     )?;
     let binning = engine.add_shader(
@@ -265,6 +293,8 @@
         pathseg,
         draw_reduce,
         draw_leaf,
+        clip_reduce,
+        clip_leaf,
         binning,
         tile_alloc,
         path_coarse,
diff --git a/piet-wgsl/src/test_scene.rs b/piet-wgsl/src/test_scene.rs
index bb6f7ad..861ac54 100644
--- a/piet-wgsl/src/test_scene.rs
+++ b/piet-wgsl/src/test_scene.rs
@@ -16,8 +16,8 @@
 
 use kurbo::BezPath;
 use piet_scene::{
-    Affine, Brush, Color, Fill, GradientStop, LinearGradient, PathElement, Point, Scene,
-    SceneBuilder, Stroke,
+    Affine, BlendMode, Brush, Color, Compose, ExtendMode, Fill, GradientStop, LinearGradient, Mix,
+    PathElement, Point, RadialGradient, Rect, Scene, SceneBuilder, SceneFragment, Stroke,
 };
 
 use crate::pico_svg::PicoSvg;
@@ -47,40 +47,7 @@
             builder.stroke(&style, transform, &brush, None, &path);
         }
         1 => {
-            let path = [
-                PathElement::MoveTo(Point::new(100.0, 100.0)),
-                PathElement::LineTo(Point::new(300.0, 100.0)),
-                PathElement::LineTo(Point::new(300.0, 300.0)),
-                PathElement::LineTo(Point::new(100.0, 300.0)),
-                PathElement::Close,
-            ];
-            let gradient = Brush::LinearGradient(LinearGradient {
-                start: Point::new(100.0, 100.0),
-                end: Point::new(300.0, 300.0),
-                extend: piet_scene::ExtendMode::Pad,
-                stops: vec![
-                    GradientStop {
-                        offset: 0.0,
-                        color: Color::rgb8(255, 0, 0),
-                    },
-                    GradientStop {
-                        offset: 0.5,
-                        color: Color::rgb8(0, 255, 0),
-                    },
-                    GradientStop {
-                        offset: 1.0,
-                        color: Color::rgb8(0, 0, 255),
-                    },
-                ]
-                .into(),
-            });
-            builder.fill(
-                Fill::NonZero,
-                Affine::scale(3.0, 3.0),
-                &gradient,
-                None,
-                &path,
-            );
+            render_blend_grid(&mut builder);
         }
         _ => {
             let xml_str =
@@ -90,6 +57,7 @@
             render_svg(&mut builder, &svg, false);
         }
     }
+    builder.finish();
     scene
 }
 
@@ -151,3 +119,163 @@
         scale: true,
     }
 }
+
+#[allow(unused)]
+pub fn render_blend_grid(sb: &mut SceneBuilder) {
+    const BLEND_MODES: &[Mix] = &[
+        Mix::Normal,
+        Mix::Multiply,
+        Mix::Darken,
+        Mix::Screen,
+        Mix::Lighten,
+        Mix::Overlay,
+        Mix::ColorDodge,
+        Mix::ColorBurn,
+        Mix::HardLight,
+        Mix::SoftLight,
+        Mix::Difference,
+        Mix::Exclusion,
+        Mix::Hue,
+        Mix::Saturation,
+        Mix::Color,
+        Mix::Luminosity,
+    ];
+    for (ix, &blend) in BLEND_MODES.iter().enumerate() {
+        let i = ix % 4;
+        let j = ix / 4;
+        let transform = Affine::translate(i as f32 * 225., j as f32 * 225.);
+        let square = blend_square(blend.into());
+        sb.append(&square, Some(transform));
+    }
+}
+
+#[allow(unused)]
+fn render_blend_square(sb: &mut SceneBuilder, blend: BlendMode, transform: Affine) {
+    // Inspired by https://developer.mozilla.org/en-US/docs/Web/CSS/mix-blend-mode
+    let rect = Rect::from_origin_size(Point::new(0., 0.), 200., 200.);
+    let stops = &[
+        GradientStop {
+            color: Color::rgb8(0, 0, 0),
+            offset: 0.0,
+        },
+        GradientStop {
+            color: Color::rgb8(255, 255, 255),
+            offset: 1.0,
+        },
+    ][..];
+    let linear = Brush::LinearGradient(LinearGradient {
+        start: Point::new(0.0, 0.0),
+        end: Point::new(200.0, 0.0),
+        stops: stops.into(),
+        extend: ExtendMode::Pad,
+    });
+    sb.fill(Fill::NonZero, transform, &linear, None, rect.elements());
+    const GRADIENTS: &[(f32, f32, Color)] = &[
+        (150., 0., Color::rgb8(255, 240, 64)),
+        (175., 100., Color::rgb8(255, 96, 240)),
+        (125., 200., Color::rgb8(64, 192, 255)),
+    ];
+    for (x, y, c) in GRADIENTS {
+        let mut color2 = c.clone();
+        color2.a = 0;
+        let stops = &[
+            GradientStop {
+                color: c.clone(),
+                offset: 0.0,
+            },
+            GradientStop {
+                color: color2,
+                offset: 1.0,
+            },
+        ][..];
+        let rad = Brush::RadialGradient(RadialGradient {
+            center0: Point::new(*x, *y),
+            center1: Point::new(*x, *y),
+            radius0: 0.0,
+            radius1: 100.0,
+            stops: stops.into(),
+            extend: ExtendMode::Pad,
+        });
+        sb.fill(Fill::NonZero, transform, &rad, None, rect.elements());
+    }
+    const COLORS: &[Color] = &[
+        Color::rgb8(255, 0, 0),
+        Color::rgb8(0, 255, 0),
+        Color::rgb8(0, 0, 255),
+    ];
+    sb.push_layer(Mix::Normal.into(), transform, rect.elements());
+    for (i, c) in COLORS.iter().enumerate() {
+        let stops = &[
+            GradientStop {
+                color: Color::rgb8(255, 255, 255),
+                offset: 0.0,
+            },
+            GradientStop {
+                color: c.clone(),
+                offset: 1.0,
+            },
+        ][..];
+        let linear = Brush::LinearGradient(LinearGradient {
+            start: Point::new(0.0, 0.0),
+            end: Point::new(0.0, 200.0),
+            stops: stops.into(),
+            extend: ExtendMode::Pad,
+        });
+        sb.push_layer(blend, transform, rect.elements());
+        // squash the ellipse
+        let a = transform
+            * Affine::translate(100., 100.)
+            * Affine::rotate(std::f32::consts::FRAC_PI_3 * (i * 2 + 1) as f32)
+            * Affine::scale(1.0, 0.357)
+            * Affine::translate(-100., -100.);
+        sb.fill(
+            Fill::NonZero,
+            a,
+            &linear,
+            None,
+            make_ellipse(100., 100., 90., 90.),
+        );
+        sb.pop_layer();
+    }
+    sb.pop_layer();
+}
+
+#[allow(unused)]
+fn blend_square(blend: BlendMode) -> SceneFragment {
+    let mut fragment = SceneFragment::default();
+    let mut sb = SceneBuilder::for_fragment(&mut fragment);
+    render_blend_square(&mut sb, blend, Affine::IDENTITY);
+    sb.finish();
+    fragment
+}
+
+fn make_ellipse(cx: f32, cy: f32, rx: f32, ry: f32) -> impl Iterator<Item = PathElement> + Clone {
+    let a = 0.551915024494;
+    let arx = a * rx;
+    let ary = a * ry;
+    let elements = [
+        PathElement::MoveTo(Point::new(cx + rx, cy)),
+        PathElement::CurveTo(
+            Point::new(cx + rx, cy + ary),
+            Point::new(cx + arx, cy + ry),
+            Point::new(cx, cy + ry),
+        ),
+        PathElement::CurveTo(
+            Point::new(cx - arx, cy + ry),
+            Point::new(cx - rx, cy + ary),
+            Point::new(cx - rx, cy),
+        ),
+        PathElement::CurveTo(
+            Point::new(cx - rx, cy - ary),
+            Point::new(cx - arx, cy - ry),
+            Point::new(cx, cy - ry),
+        ),
+        PathElement::CurveTo(
+            Point::new(cx + arx, cy - ry),
+            Point::new(cx + rx, cy - ary),
+            Point::new(cx + rx, cy),
+        ),
+        PathElement::Close,
+    ];
+    (0..elements.len()).map(move |i| elements[i])
+}