Fix missing blend/clip logic

We always do BeginClip/EndClip if it's a solid tile and the blend mode
is not default.

Also fix missing entry in pipeline layout (affects Vulkan but not Metal).
diff --git a/piet-gpu/shader/build.ninja b/piet-gpu/shader/build.ninja
index 785e016..60e5582 100644
--- a/piet-gpu/shader/build.ninja
+++ b/piet-gpu/shader/build.ninja
@@ -48,7 +48,7 @@
 build gen/backdrop_lg.dxil: dxil gen/backdrop_lg.hlsl
 build gen/backdrop_lg.msl: msl gen/backdrop_lg.spv
 
-build gen/coarse.spv: glsl coarse.comp | drawtag.h bins.h ptcl.h setup.h mem.h
+build gen/coarse.spv: glsl coarse.comp | drawtag.h bins.h ptcl.h blend.h setup.h mem.h
 build gen/coarse.hlsl: hlsl gen/coarse.spv
 build gen/coarse.dxil: dxil gen/coarse.hlsl
 build gen/coarse.msl: msl gen/coarse.spv
diff --git a/piet-gpu/shader/coarse.comp b/piet-gpu/shader/coarse.comp
index 961fc99..aec2936 100644
--- a/piet-gpu/shader/coarse.comp
+++ b/piet-gpu/shader/coarse.comp
@@ -31,6 +31,7 @@
 #include "bins.h"
 #include "tile.h"
 #include "ptcl.h"
+#include "blend.h"
 
 #define LG_N_PART_READ (7 + LG_WG_FACTOR)
 #define N_PART_READ (1 << LG_N_PART_READ)
@@ -278,7 +279,8 @@
                     el_ix = probe;
                 }
             }
-            uint tag = scene[drawtag_start + sh_elements[el_ix]];
+            uint element_ix = sh_elements[el_ix];
+            uint tag = scene[drawtag_start + element_ix];
             uint seq_ix = ix - (el_ix > 0 ? sh_tile_count[el_ix - 1] : 0);
             uint width = sh_tile_width[el_ix];
             uint x = sh_tile_x0[el_ix] + seq_ix % width;
@@ -294,7 +296,14 @@
                 // below will suppress the drawing of inner elements.
                 // For blends, include the tile if
                 // (blend_mode, composition_mode) != (Normal, SrcOver)
-                bool is_blend = false; // TODO
+                bool is_blend = false;
+                if (is_clip) {
+                    uint drawmonoid_base = drawmonoid_start + 4 * element_ix;
+                    uint scene_offset = memory[drawmonoid_base + 2];
+                    uint dd = drawdata_start + (scene_offset >> 2);
+                    uint blend = scene[dd];
+                    is_blend = (blend != BlendComp_default);
+                }
                 include_tile = tile.tile.offset != 0 || (tile.backdrop == 0) == is_clip
                     || (is_clip && is_blend);
             }
diff --git a/piet-gpu/shader/gen/backdrop.dxil b/piet-gpu/shader/gen/backdrop.dxil
index df2be88..0fb9622 100644
--- a/piet-gpu/shader/gen/backdrop.dxil
+++ b/piet-gpu/shader/gen/backdrop.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/backdrop_lg.dxil b/piet-gpu/shader/gen/backdrop_lg.dxil
index 81f9b65..e24a6d3 100644
--- a/piet-gpu/shader/gen/backdrop_lg.dxil
+++ b/piet-gpu/shader/gen/backdrop_lg.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/bbox_clear.dxil b/piet-gpu/shader/gen/bbox_clear.dxil
index 6b3efaf..6655b7f 100644
--- a/piet-gpu/shader/gen/bbox_clear.dxil
+++ b/piet-gpu/shader/gen/bbox_clear.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/binning.dxil b/piet-gpu/shader/gen/binning.dxil
index 4a4f073..3050aa8 100644
--- a/piet-gpu/shader/gen/binning.dxil
+++ b/piet-gpu/shader/gen/binning.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/clip_leaf.dxil b/piet-gpu/shader/gen/clip_leaf.dxil
index b681a65..29a158e 100644
--- a/piet-gpu/shader/gen/clip_leaf.dxil
+++ b/piet-gpu/shader/gen/clip_leaf.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/clip_reduce.dxil b/piet-gpu/shader/gen/clip_reduce.dxil
index 0ccaac9..0dff71b 100644
--- a/piet-gpu/shader/gen/clip_reduce.dxil
+++ b/piet-gpu/shader/gen/clip_reduce.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/coarse.dxil b/piet-gpu/shader/gen/coarse.dxil
index 2339e7d..5770e6f 100644
--- a/piet-gpu/shader/gen/coarse.dxil
+++ b/piet-gpu/shader/gen/coarse.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/coarse.hlsl b/piet-gpu/shader/gen/coarse.hlsl
index db1e496..57b400e 100644
--- a/piet-gpu/shader/gen/coarse.hlsl
+++ b/piet-gpu/shader/gen/coarse.hlsl
@@ -615,7 +615,7 @@
     uint element_ix;
     Alloc param_14;
     uint tile_count;
-    uint _1453;
+    uint _1455;
     float linewidth;
     CmdLinGrad cmd_lin;
     while (true)
@@ -809,16 +809,17 @@
                     el_ix = probe_1;
                 }
             }
-            uint tag_1 = _1222.Load((drawtag_start + sh_elements[el_ix]) * 4 + 0);
+            uint element_ix_1 = sh_elements[el_ix];
+            uint tag_1 = _1222.Load((drawtag_start + element_ix_1) * 4 + 0);
             if (el_ix > 0u)
             {
-                _1453 = sh_tile_count[el_ix - 1u];
+                _1455 = sh_tile_count[el_ix - 1u];
             }
             else
             {
-                _1453 = 0u;
+                _1455 = 0u;
             }
-            uint seq_ix = ix_1 - _1453;
+            uint seq_ix = ix_1 - _1455;
             uint width = sh_tile_width[el_ix];
             uint x = sh_tile_x0[el_ix] + (seq_ix % width);
             uint y = sh_tile_y0[el_ix] + (seq_ix / width);
@@ -827,39 +828,47 @@
             {
                 uint param_21 = el_ix;
                 bool param_22 = mem_ok;
-                TileRef _1505 = { sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) };
+                TileRef _1507 = { sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) };
                 Alloc param_23 = read_tile_alloc(param_21, param_22);
-                TileRef param_24 = _1505;
+                TileRef param_24 = _1507;
                 Tile tile = Tile_read(param_23, param_24);
                 bool is_clip = (tag_1 & 1u) != 0u;
                 bool is_blend = false;
-                bool _1516 = tile.tile.offset != 0u;
-                bool _1525;
-                if (!_1516)
+                if (is_clip)
                 {
-                    _1525 = (tile.backdrop == 0) == is_clip;
+                    uint drawmonoid_base_1 = drawmonoid_start + (4u * element_ix_1);
+                    uint scene_offset = _242.Load((drawmonoid_base_1 + 2u) * 4 + 8);
+                    uint dd = drawdata_start + (scene_offset >> uint(2));
+                    uint blend = _1222.Load(dd * 4 + 0);
+                    is_blend = blend != 3u;
+                }
+                bool _1542 = tile.tile.offset != 0u;
+                bool _1551;
+                if (!_1542)
+                {
+                    _1551 = (tile.backdrop == 0) == is_clip;
                 }
                 else
                 {
-                    _1525 = _1516;
+                    _1551 = _1542;
                 }
-                bool _1532;
-                if (!_1525)
+                bool _1558;
+                if (!_1551)
                 {
-                    _1532 = is_clip && is_blend;
+                    _1558 = is_clip && is_blend;
                 }
                 else
                 {
-                    _1532 = _1525;
+                    _1558 = _1551;
                 }
-                include_tile = _1532;
+                include_tile = _1558;
             }
             if (include_tile)
             {
                 uint el_slice = el_ix / 32u;
                 uint el_mask = 1u << (el_ix & 31u);
-                uint _1552;
-                InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1552);
+                uint _1578;
+                InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1578);
             }
         }
         GroupMemoryBarrierWithGroupSync();
@@ -881,21 +890,21 @@
                 }
             }
             uint element_ref_ix = (slice_ix * 32u) + uint(int(firstbitlow(bitmap)));
-            uint element_ix_1 = sh_elements[element_ref_ix];
+            uint element_ix_2 = sh_elements[element_ref_ix];
             bitmap &= (bitmap - 1u);
-            uint drawtag = _1222.Load((drawtag_start + element_ix_1) * 4 + 0);
+            uint drawtag = _1222.Load((drawtag_start + element_ix_2) * 4 + 0);
             if (clip_zero_depth == 0u)
             {
                 uint param_25 = element_ref_ix;
                 bool param_26 = mem_ok;
-                TileRef _1629 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
+                TileRef _1655 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
                 Alloc param_27 = read_tile_alloc(param_25, param_26);
-                TileRef param_28 = _1629;
+                TileRef param_28 = _1655;
                 Tile tile_1 = Tile_read(param_27, param_28);
-                uint drawmonoid_base_1 = drawmonoid_start + (4u * element_ix_1);
-                uint scene_offset = _242.Load((drawmonoid_base_1 + 2u) * 4 + 8);
-                uint info_offset = _242.Load((drawmonoid_base_1 + 3u) * 4 + 8);
-                uint dd = drawdata_start + (scene_offset >> uint(2));
+                uint drawmonoid_base_2 = drawmonoid_start + (4u * element_ix_2);
+                uint scene_offset_1 = _242.Load((drawmonoid_base_2 + 2u) * 4 + 8);
+                uint info_offset = _242.Load((drawmonoid_base_2 + 3u) * 4 + 8);
+                uint dd_1 = drawdata_start + (scene_offset_1 >> uint(2));
                 uint di = drawinfo_start + (info_offset >> uint(2));
                 switch (drawtag)
                 {
@@ -905,11 +914,11 @@
                         Alloc param_29 = cmd_alloc;
                         CmdRef param_30 = cmd_ref;
                         uint param_31 = cmd_limit;
-                        bool _1676 = alloc_cmd(param_29, param_30, param_31);
+                        bool _1702 = alloc_cmd(param_29, param_30, param_31);
                         cmd_alloc = param_29;
                         cmd_ref = param_30;
                         cmd_limit = param_31;
-                        if (!_1676)
+                        if (!_1702)
                         {
                             break;
                         }
@@ -919,11 +928,11 @@
                         float param_35 = linewidth;
                         write_fill(param_32, param_33, param_34, param_35);
                         cmd_ref = param_33;
-                        uint rgba = _1222.Load(dd * 4 + 0);
-                        CmdColor _1699 = { rgba };
+                        uint rgba = _1222.Load(dd_1 * 4 + 0);
+                        CmdColor _1725 = { rgba };
                         Alloc param_36 = cmd_alloc;
                         CmdRef param_37 = cmd_ref;
-                        CmdColor param_38 = _1699;
+                        CmdColor param_38 = _1725;
                         Cmd_Color_write(param_36, param_37, param_38);
                         cmd_ref.offset += 8u;
                         break;
@@ -933,11 +942,11 @@
                         Alloc param_39 = cmd_alloc;
                         CmdRef param_40 = cmd_ref;
                         uint param_41 = cmd_limit;
-                        bool _1717 = alloc_cmd(param_39, param_40, param_41);
+                        bool _1743 = alloc_cmd(param_39, param_40, param_41);
                         cmd_alloc = param_39;
                         cmd_ref = param_40;
                         cmd_limit = param_41;
-                        if (!_1717)
+                        if (!_1743)
                         {
                             break;
                         }
@@ -948,7 +957,7 @@
                         float param_45 = linewidth;
                         write_fill(param_42, param_43, param_44, param_45);
                         cmd_ref = param_43;
-                        cmd_lin.index = _1222.Load(dd * 4 + 0);
+                        cmd_lin.index = _1222.Load(dd_1 * 4 + 0);
                         cmd_lin.line_x = asfloat(_242.Load((di + 1u) * 4 + 8));
                         cmd_lin.line_y = asfloat(_242.Load((di + 2u) * 4 + 8));
                         cmd_lin.line_c = asfloat(_242.Load((di + 3u) * 4 + 8));
@@ -965,11 +974,11 @@
                         Alloc param_49 = cmd_alloc;
                         CmdRef param_50 = cmd_ref;
                         uint param_51 = cmd_limit;
-                        bool _1785 = alloc_cmd(param_49, param_50, param_51);
+                        bool _1811 = alloc_cmd(param_49, param_50, param_51);
                         cmd_alloc = param_49;
                         cmd_ref = param_50;
                         cmd_limit = param_51;
-                        if (!_1785)
+                        if (!_1811)
                         {
                             break;
                         }
@@ -979,30 +988,30 @@
                         float param_55 = linewidth;
                         write_fill(param_52, param_53, param_54, param_55);
                         cmd_ref = param_53;
-                        uint index = _1222.Load(dd * 4 + 0);
-                        uint raw1 = _1222.Load((dd + 1u) * 4 + 0);
+                        uint index = _1222.Load(dd_1 * 4 + 0);
+                        uint raw1 = _1222.Load((dd_1 + 1u) * 4 + 0);
                         int2 offset_1 = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16);
-                        CmdImage _1824 = { index, offset_1 };
+                        CmdImage _1850 = { index, offset_1 };
                         Alloc param_56 = cmd_alloc;
                         CmdRef param_57 = cmd_ref;
-                        CmdImage param_58 = _1824;
+                        CmdImage param_58 = _1850;
                         Cmd_Image_write(param_56, param_57, param_58);
                         cmd_ref.offset += 12u;
                         break;
                     }
                     case 5u:
                     {
-                        bool _1838 = tile_1.tile.offset == 0u;
-                        bool _1844;
-                        if (_1838)
+                        bool _1864 = tile_1.tile.offset == 0u;
+                        bool _1870;
+                        if (_1864)
                         {
-                            _1844 = tile_1.backdrop == 0;
+                            _1870 = tile_1.backdrop == 0;
                         }
                         else
                         {
-                            _1844 = _1838;
+                            _1870 = _1864;
                         }
-                        if (_1844)
+                        if (_1870)
                         {
                             clip_zero_depth = clip_depth + 1u;
                         }
@@ -1011,11 +1020,11 @@
                             Alloc param_59 = cmd_alloc;
                             CmdRef param_60 = cmd_ref;
                             uint param_61 = cmd_limit;
-                            bool _1856 = alloc_cmd(param_59, param_60, param_61);
+                            bool _1882 = alloc_cmd(param_59, param_60, param_61);
                             cmd_alloc = param_59;
                             cmd_ref = param_60;
                             cmd_limit = param_61;
-                            if (!_1856)
+                            if (!_1882)
                             {
                                 break;
                             }
@@ -1033,11 +1042,11 @@
                         Alloc param_64 = cmd_alloc;
                         CmdRef param_65 = cmd_ref;
                         uint param_66 = cmd_limit;
-                        bool _1884 = alloc_cmd(param_64, param_65, param_66);
+                        bool _1910 = alloc_cmd(param_64, param_65, param_66);
                         cmd_alloc = param_64;
                         cmd_ref = param_65;
                         cmd_limit = param_66;
-                        if (!_1884)
+                        if (!_1910)
                         {
                             break;
                         }
@@ -1047,11 +1056,11 @@
                         float param_70 = -1.0f;
                         write_fill(param_67, param_68, param_69, param_70);
                         cmd_ref = param_68;
-                        uint blend = _1222.Load(dd * 4 + 0);
-                        CmdEndClip _1907 = { blend };
+                        uint blend_1 = _1222.Load(dd_1 * 4 + 0);
+                        CmdEndClip _1933 = { blend_1 };
                         Alloc param_71 = cmd_alloc;
                         CmdRef param_72 = cmd_ref;
-                        CmdEndClip param_73 = _1907;
+                        CmdEndClip param_73 = _1933;
                         Cmd_EndClip_write(param_71, param_72, param_73);
                         cmd_ref.offset += 8u;
                         break;
@@ -1086,17 +1095,17 @@
             break;
         }
     }
-    bool _1954 = (bin_tile_x + tile_x) < _854.Load(8);
-    bool _1963;
-    if (_1954)
+    bool _1980 = (bin_tile_x + tile_x) < _854.Load(8);
+    bool _1989;
+    if (_1980)
     {
-        _1963 = (bin_tile_y + tile_y) < _854.Load(12);
+        _1989 = (bin_tile_y + tile_y) < _854.Load(12);
     }
     else
     {
-        _1963 = _1954;
+        _1989 = _1980;
     }
-    if (_1963)
+    if (_1989)
     {
         Alloc param_74 = cmd_alloc;
         CmdRef param_75 = cmd_ref;
diff --git a/piet-gpu/shader/gen/coarse.msl b/piet-gpu/shader/gen/coarse.msl
index 96f1026..29174f3 100644
--- a/piet-gpu/shader/gen/coarse.msl
+++ b/piet-gpu/shader/gen/coarse.msl
@@ -646,7 +646,7 @@
     uint element_ix;
     Alloc param_14;
     uint tile_count;
-    uint _1453;
+    uint _1455;
     float linewidth;
     CmdLinGrad cmd_lin;
     while (true)
@@ -832,16 +832,17 @@
                     el_ix = probe_1;
                 }
             }
-            uint tag_1 = _1222.scene[drawtag_start + sh_elements[el_ix]];
+            uint element_ix_1 = sh_elements[el_ix];
+            uint tag_1 = _1222.scene[drawtag_start + element_ix_1];
             if (el_ix > 0u)
             {
-                _1453 = sh_tile_count[el_ix - 1u];
+                _1455 = sh_tile_count[el_ix - 1u];
             }
             else
             {
-                _1453 = 0u;
+                _1455 = 0u;
             }
-            uint seq_ix = ix_1 - _1453;
+            uint seq_ix = ix_1 - _1455;
             uint width = sh_tile_width[el_ix];
             uint x = sh_tile_x0[el_ix] + (seq_ix % width);
             uint y = sh_tile_y0[el_ix] + (seq_ix / width);
@@ -855,32 +856,40 @@
                 Tile tile = Tile_read(param_23, param_24, v_242, v_242BufferSize);
                 bool is_clip = (tag_1 & 1u) != 0u;
                 bool is_blend = false;
-                bool _1516 = tile.tile.offset != 0u;
-                bool _1525;
-                if (!_1516)
+                if (is_clip)
                 {
-                    _1525 = (tile.backdrop == 0) == is_clip;
+                    uint drawmonoid_base_1 = drawmonoid_start + (4u * element_ix_1);
+                    uint scene_offset = v_242.memory[drawmonoid_base_1 + 2u];
+                    uint dd = drawdata_start + (scene_offset >> uint(2));
+                    uint blend = _1222.scene[dd];
+                    is_blend = blend != 3u;
+                }
+                bool _1542 = tile.tile.offset != 0u;
+                bool _1551;
+                if (!_1542)
+                {
+                    _1551 = (tile.backdrop == 0) == is_clip;
                 }
                 else
                 {
-                    _1525 = _1516;
+                    _1551 = _1542;
                 }
-                bool _1532;
-                if (!_1525)
+                bool _1558;
+                if (!_1551)
                 {
-                    _1532 = is_clip && is_blend;
+                    _1558 = is_clip && is_blend;
                 }
                 else
                 {
-                    _1532 = _1525;
+                    _1558 = _1551;
                 }
-                include_tile = _1532;
+                include_tile = _1558;
             }
             if (include_tile)
             {
                 uint el_slice = el_ix / 32u;
                 uint el_mask = 1u << (el_ix & 31u);
-                uint _1552 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&sh_bitmaps[el_slice][(y * 16u) + x], el_mask, memory_order_relaxed);
+                uint _1578 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&sh_bitmaps[el_slice][(y * 16u) + x], el_mask, memory_order_relaxed);
             }
         }
         threadgroup_barrier(mem_flags::mem_threadgroup);
@@ -902,9 +911,9 @@
                 }
             }
             uint element_ref_ix = (slice_ix * 32u) + uint(int(spvFindLSB(bitmap)));
-            uint element_ix_1 = sh_elements[element_ref_ix];
+            uint element_ix_2 = sh_elements[element_ref_ix];
             bitmap &= (bitmap - 1u);
-            uint drawtag = _1222.scene[drawtag_start + element_ix_1];
+            uint drawtag = _1222.scene[drawtag_start + element_ix_2];
             if (clip_zero_depth == 0u)
             {
                 uint param_25 = element_ref_ix;
@@ -912,10 +921,10 @@
                 Alloc param_27 = read_tile_alloc(param_25, param_26, v_242, v_242BufferSize);
                 TileRef param_28 = TileRef{ sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
                 Tile tile_1 = Tile_read(param_27, param_28, v_242, v_242BufferSize);
-                uint drawmonoid_base_1 = drawmonoid_start + (4u * element_ix_1);
-                uint scene_offset = v_242.memory[drawmonoid_base_1 + 2u];
-                uint info_offset = v_242.memory[drawmonoid_base_1 + 3u];
-                uint dd = drawdata_start + (scene_offset >> uint(2));
+                uint drawmonoid_base_2 = drawmonoid_start + (4u * element_ix_2);
+                uint scene_offset_1 = v_242.memory[drawmonoid_base_2 + 2u];
+                uint info_offset = v_242.memory[drawmonoid_base_2 + 3u];
+                uint dd_1 = drawdata_start + (scene_offset_1 >> uint(2));
                 uint di = drawinfo_start + (info_offset >> uint(2));
                 switch (drawtag)
                 {
@@ -925,11 +934,11 @@
                         Alloc param_29 = cmd_alloc;
                         CmdRef param_30 = cmd_ref;
                         uint param_31 = cmd_limit;
-                        bool _1676 = alloc_cmd(param_29, param_30, param_31, v_242, v_242BufferSize);
+                        bool _1702 = alloc_cmd(param_29, param_30, param_31, v_242, v_242BufferSize);
                         cmd_alloc = param_29;
                         cmd_ref = param_30;
                         cmd_limit = param_31;
-                        if (!_1676)
+                        if (!_1702)
                         {
                             break;
                         }
@@ -939,7 +948,7 @@
                         float param_35 = linewidth;
                         write_fill(param_32, param_33, param_34, param_35, v_242, v_242BufferSize);
                         cmd_ref = param_33;
-                        uint rgba = _1222.scene[dd];
+                        uint rgba = _1222.scene[dd_1];
                         Alloc param_36 = cmd_alloc;
                         CmdRef param_37 = cmd_ref;
                         CmdColor param_38 = CmdColor{ rgba };
@@ -952,11 +961,11 @@
                         Alloc param_39 = cmd_alloc;
                         CmdRef param_40 = cmd_ref;
                         uint param_41 = cmd_limit;
-                        bool _1717 = alloc_cmd(param_39, param_40, param_41, v_242, v_242BufferSize);
+                        bool _1743 = alloc_cmd(param_39, param_40, param_41, v_242, v_242BufferSize);
                         cmd_alloc = param_39;
                         cmd_ref = param_40;
                         cmd_limit = param_41;
-                        if (!_1717)
+                        if (!_1743)
                         {
                             break;
                         }
@@ -967,7 +976,7 @@
                         float param_45 = linewidth;
                         write_fill(param_42, param_43, param_44, param_45, v_242, v_242BufferSize);
                         cmd_ref = param_43;
-                        cmd_lin.index = _1222.scene[dd];
+                        cmd_lin.index = _1222.scene[dd_1];
                         cmd_lin.line_x = as_type<float>(v_242.memory[di + 1u]);
                         cmd_lin.line_y = as_type<float>(v_242.memory[di + 2u]);
                         cmd_lin.line_c = as_type<float>(v_242.memory[di + 3u]);
@@ -984,11 +993,11 @@
                         Alloc param_49 = cmd_alloc;
                         CmdRef param_50 = cmd_ref;
                         uint param_51 = cmd_limit;
-                        bool _1785 = alloc_cmd(param_49, param_50, param_51, v_242, v_242BufferSize);
+                        bool _1811 = alloc_cmd(param_49, param_50, param_51, v_242, v_242BufferSize);
                         cmd_alloc = param_49;
                         cmd_ref = param_50;
                         cmd_limit = param_51;
-                        if (!_1785)
+                        if (!_1811)
                         {
                             break;
                         }
@@ -998,8 +1007,8 @@
                         float param_55 = linewidth;
                         write_fill(param_52, param_53, param_54, param_55, v_242, v_242BufferSize);
                         cmd_ref = param_53;
-                        uint index = _1222.scene[dd];
-                        uint raw1 = _1222.scene[dd + 1u];
+                        uint index = _1222.scene[dd_1];
+                        uint raw1 = _1222.scene[dd_1 + 1u];
                         int2 offset_1 = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16);
                         Alloc param_56 = cmd_alloc;
                         CmdRef param_57 = cmd_ref;
@@ -1010,17 +1019,17 @@
                     }
                     case 5u:
                     {
-                        bool _1838 = tile_1.tile.offset == 0u;
-                        bool _1844;
-                        if (_1838)
+                        bool _1864 = tile_1.tile.offset == 0u;
+                        bool _1870;
+                        if (_1864)
                         {
-                            _1844 = tile_1.backdrop == 0;
+                            _1870 = tile_1.backdrop == 0;
                         }
                         else
                         {
-                            _1844 = _1838;
+                            _1870 = _1864;
                         }
-                        if (_1844)
+                        if (_1870)
                         {
                             clip_zero_depth = clip_depth + 1u;
                         }
@@ -1029,11 +1038,11 @@
                             Alloc param_59 = cmd_alloc;
                             CmdRef param_60 = cmd_ref;
                             uint param_61 = cmd_limit;
-                            bool _1856 = alloc_cmd(param_59, param_60, param_61, v_242, v_242BufferSize);
+                            bool _1882 = alloc_cmd(param_59, param_60, param_61, v_242, v_242BufferSize);
                             cmd_alloc = param_59;
                             cmd_ref = param_60;
                             cmd_limit = param_61;
-                            if (!_1856)
+                            if (!_1882)
                             {
                                 break;
                             }
@@ -1051,11 +1060,11 @@
                         Alloc param_64 = cmd_alloc;
                         CmdRef param_65 = cmd_ref;
                         uint param_66 = cmd_limit;
-                        bool _1884 = alloc_cmd(param_64, param_65, param_66, v_242, v_242BufferSize);
+                        bool _1910 = alloc_cmd(param_64, param_65, param_66, v_242, v_242BufferSize);
                         cmd_alloc = param_64;
                         cmd_ref = param_65;
                         cmd_limit = param_66;
-                        if (!_1884)
+                        if (!_1910)
                         {
                             break;
                         }
@@ -1065,10 +1074,10 @@
                         float param_70 = -1.0;
                         write_fill(param_67, param_68, param_69, param_70, v_242, v_242BufferSize);
                         cmd_ref = param_68;
-                        uint blend = _1222.scene[dd];
+                        uint blend_1 = _1222.scene[dd_1];
                         Alloc param_71 = cmd_alloc;
                         CmdRef param_72 = cmd_ref;
-                        CmdEndClip param_73 = CmdEndClip{ blend };
+                        CmdEndClip param_73 = CmdEndClip{ blend_1 };
                         Cmd_EndClip_write(param_71, param_72, param_73, v_242, v_242BufferSize);
                         cmd_ref.offset += 8u;
                         break;
@@ -1103,17 +1112,17 @@
             break;
         }
     }
-    bool _1954 = (bin_tile_x + tile_x) < _854.conf.width_in_tiles;
-    bool _1963;
-    if (_1954)
+    bool _1980 = (bin_tile_x + tile_x) < _854.conf.width_in_tiles;
+    bool _1989;
+    if (_1980)
     {
-        _1963 = (bin_tile_y + tile_y) < _854.conf.height_in_tiles;
+        _1989 = (bin_tile_y + tile_y) < _854.conf.height_in_tiles;
     }
     else
     {
-        _1963 = _1954;
+        _1989 = _1980;
     }
-    if (_1963)
+    if (_1989)
     {
         Alloc param_74 = cmd_alloc;
         CmdRef param_75 = cmd_ref;
diff --git a/piet-gpu/shader/gen/coarse.spv b/piet-gpu/shader/gen/coarse.spv
index bc27230..d246506 100644
--- a/piet-gpu/shader/gen/coarse.spv
+++ b/piet-gpu/shader/gen/coarse.spv
Binary files differ
diff --git a/piet-gpu/shader/gen/draw_leaf.dxil b/piet-gpu/shader/gen/draw_leaf.dxil
index 6ce3b8c..77396c1 100644
--- a/piet-gpu/shader/gen/draw_leaf.dxil
+++ b/piet-gpu/shader/gen/draw_leaf.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/draw_reduce.dxil b/piet-gpu/shader/gen/draw_reduce.dxil
index bfafc5f..4df0ec5 100644
--- a/piet-gpu/shader/gen/draw_reduce.dxil
+++ b/piet-gpu/shader/gen/draw_reduce.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/draw_root.dxil b/piet-gpu/shader/gen/draw_root.dxil
index 873fa29..4ea23f7 100644
--- a/piet-gpu/shader/gen/draw_root.dxil
+++ b/piet-gpu/shader/gen/draw_root.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/kernel4.dxil b/piet-gpu/shader/gen/kernel4.dxil
index bc0b8e8..c0c27c9 100644
--- a/piet-gpu/shader/gen/kernel4.dxil
+++ b/piet-gpu/shader/gen/kernel4.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/kernel4_gray.dxil b/piet-gpu/shader/gen/kernel4_gray.dxil
index 7e39c73..18c4b7e 100644
--- a/piet-gpu/shader/gen/kernel4_gray.dxil
+++ b/piet-gpu/shader/gen/kernel4_gray.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/path_coarse.dxil b/piet-gpu/shader/gen/path_coarse.dxil
index b6c9398..9fd593c 100644
--- a/piet-gpu/shader/gen/path_coarse.dxil
+++ b/piet-gpu/shader/gen/path_coarse.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/pathseg.dxil b/piet-gpu/shader/gen/pathseg.dxil
index 7ce4684..6130712 100644
--- a/piet-gpu/shader/gen/pathseg.dxil
+++ b/piet-gpu/shader/gen/pathseg.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/pathtag_reduce.dxil b/piet-gpu/shader/gen/pathtag_reduce.dxil
index ff544b8..4c2bd23 100644
--- a/piet-gpu/shader/gen/pathtag_reduce.dxil
+++ b/piet-gpu/shader/gen/pathtag_reduce.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/pathtag_root.dxil b/piet-gpu/shader/gen/pathtag_root.dxil
index 48584bd..77f12e6 100644
--- a/piet-gpu/shader/gen/pathtag_root.dxil
+++ b/piet-gpu/shader/gen/pathtag_root.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/tile_alloc.dxil b/piet-gpu/shader/gen/tile_alloc.dxil
index d4d0b49..7759910 100644
--- a/piet-gpu/shader/gen/tile_alloc.dxil
+++ b/piet-gpu/shader/gen/tile_alloc.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/tile_alloc.hlsl b/piet-gpu/shader/gen/tile_alloc.hlsl
index 9e370ed..73e0a8e 100644
--- a/piet-gpu/shader/gen/tile_alloc.hlsl
+++ b/piet-gpu/shader/gen/tile_alloc.hlsl
@@ -185,10 +185,6 @@
     Path path;
     path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1));
     uint tile_count = uint((x1 - x0) * (y1 - y0));
-    if (drawtag == 37u)
-    {
-        tile_count = 0u;
-    }
     sh_tile_count[th_ix] = tile_count;
     uint total_tile_count = tile_count;
     for (uint i = 0u; i < 8u; i++)
@@ -204,46 +200,46 @@
     if (th_ix == 255u)
     {
         uint param_1 = total_tile_count * 8u;
-        MallocResult _396 = malloc(param_1);
-        sh_tile_alloc = _396;
+        MallocResult _392 = malloc(param_1);
+        sh_tile_alloc = _392;
     }
     GroupMemoryBarrierWithGroupSync();
     MallocResult alloc_start = sh_tile_alloc;
-    bool _407;
+    bool _403;
     if (!alloc_start.failed)
     {
-        _407 = _70.Load(4) != 0u;
+        _403 = _70.Load(4) != 0u;
     }
     else
     {
-        _407 = alloc_start.failed;
+        _403 = alloc_start.failed;
     }
-    if (_407)
+    if (_403)
     {
         return;
     }
     if (element_ix < _181.Load(0))
     {
-        uint _420;
+        uint _416;
         if (th_ix > 0u)
         {
-            _420 = sh_tile_count[th_ix - 1u];
+            _416 = sh_tile_count[th_ix - 1u];
         }
         else
         {
-            _420 = 0u;
+            _416 = 0u;
         }
-        uint tile_subix = _420;
+        uint tile_subix = _416;
         Alloc param_2 = alloc_start.alloc;
         uint param_3 = 8u * tile_subix;
         uint param_4 = 8u * tile_count;
         Alloc tiles_alloc = slice_mem(param_2, param_3, param_4);
-        TileRef _442 = { tiles_alloc.offset };
-        path.tiles = _442;
-        Alloc _448;
-        _448.offset = _181.Load(16);
+        TileRef _438 = { tiles_alloc.offset };
+        path.tiles = _438;
+        Alloc _444;
+        _444.offset = _181.Load(16);
         Alloc param_5;
-        param_5.offset = _448.offset;
+        param_5.offset = _444.offset;
         PathRef param_6 = path_ref;
         Path param_7 = path;
         Path_write(param_5, param_6, param_7);
diff --git a/piet-gpu/shader/gen/tile_alloc.msl b/piet-gpu/shader/gen/tile_alloc.msl
index e6f486b..961be50 100644
--- a/piet-gpu/shader/gen/tile_alloc.msl
+++ b/piet-gpu/shader/gen/tile_alloc.msl
@@ -204,10 +204,6 @@
     Path path;
     path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1));
     uint tile_count = uint((x1 - x0) * (y1 - y0));
-    if (drawtag == 37u)
-    {
-        tile_count = 0u;
-    }
     sh_tile_count[th_ix] = tile_count;
     uint total_tile_count = tile_count;
     for (uint i = 0u; i < 8u; i++)
@@ -223,36 +219,36 @@
     if (th_ix == 255u)
     {
         uint param_1 = total_tile_count * 8u;
-        MallocResult _396 = malloc(param_1, v_70, v_70BufferSize);
-        sh_tile_alloc = _396;
+        MallocResult _392 = malloc(param_1, v_70, v_70BufferSize);
+        sh_tile_alloc = _392;
     }
     threadgroup_barrier(mem_flags::mem_threadgroup);
     MallocResult alloc_start = sh_tile_alloc;
-    bool _407;
+    bool _403;
     if (!alloc_start.failed)
     {
-        _407 = v_70.mem_error != 0u;
+        _403 = v_70.mem_error != 0u;
     }
     else
     {
-        _407 = alloc_start.failed;
+        _403 = alloc_start.failed;
     }
-    if (_407)
+    if (_403)
     {
         return;
     }
     if (element_ix < v_181.conf.n_elements)
     {
-        uint _420;
+        uint _416;
         if (th_ix > 0u)
         {
-            _420 = sh_tile_count[th_ix - 1u];
+            _416 = sh_tile_count[th_ix - 1u];
         }
         else
         {
-            _420 = 0u;
+            _416 = 0u;
         }
-        uint tile_subix = _420;
+        uint tile_subix = _416;
         Alloc param_2 = alloc_start.alloc;
         uint param_3 = 8u * tile_subix;
         uint param_4 = 8u * tile_count;
diff --git a/piet-gpu/shader/gen/tile_alloc.spv b/piet-gpu/shader/gen/tile_alloc.spv
index 3ae1073..dbc02a8 100644
--- a/piet-gpu/shader/gen/tile_alloc.spv
+++ b/piet-gpu/shader/gen/tile_alloc.spv
Binary files differ
diff --git a/piet-gpu/shader/gen/transform_leaf.dxil b/piet-gpu/shader/gen/transform_leaf.dxil
index 0c1e376..f9f31e6 100644
--- a/piet-gpu/shader/gen/transform_leaf.dxil
+++ b/piet-gpu/shader/gen/transform_leaf.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/transform_reduce.dxil b/piet-gpu/shader/gen/transform_reduce.dxil
index fc3a311..978dd98 100644
--- a/piet-gpu/shader/gen/transform_reduce.dxil
+++ b/piet-gpu/shader/gen/transform_reduce.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/transform_root.dxil b/piet-gpu/shader/gen/transform_root.dxil
index a33ff7f..5b4f059 100644
--- a/piet-gpu/shader/gen/transform_root.dxil
+++ b/piet-gpu/shader/gen/transform_root.dxil
Binary files differ
diff --git a/piet-gpu/src/blend.rs b/piet-gpu/src/blend.rs
index 6f1e791..aacf597 100644
--- a/piet-gpu/src/blend.rs
+++ b/piet-gpu/src/blend.rs
@@ -63,7 +63,10 @@
 
 impl Blend {
     pub fn new(mode: BlendMode, composition_mode: CompositionMode) -> Self {
-        Self { mode, composition_mode }
+        Self {
+            mode,
+            composition_mode,
+        }
     }
 
     pub(crate) fn pack(&self) -> u32 {
diff --git a/piet-gpu/src/encoder.rs b/piet-gpu/src/encoder.rs
index c25c260..62c59c4 100644
--- a/piet-gpu/src/encoder.rs
+++ b/piet-gpu/src/encoder.rs
@@ -134,9 +134,7 @@
     /// This should be encoded after a path.
     pub fn fill_color(&mut self, rgba_color: u32) {
         self.drawtag_stream.push(DRAWTAG_FILLCOLOR);
-        let element = FillColor {
-            rgba_color,
-        };
+        let element = FillColor { rgba_color };
         self.drawdata_stream.extend(bytemuck::bytes_of(&element));
     }
 
@@ -145,11 +143,7 @@
     /// This should be encoded after a path.
     pub fn fill_lin_gradient(&mut self, index: u32, p0: [f32; 2], p1: [f32; 2]) {
         self.drawtag_stream.push(DRAWTAG_FILLLINGRADIENT);
-        let element = FillLinGradient {
-            index,
-            p0,
-            p1,
-        };
+        let element = FillLinGradient { index, p0, p1 };
         self.drawdata_stream.extend(bytemuck::bytes_of(&element));
     }
 
@@ -334,9 +328,7 @@
     /// This should be encoded after a path.
     pub(crate) fn fill_color(&mut self, rgba_color: u32) {
         self.drawtag_stream.push(DRAWTAG_FILLCOLOR);
-        let element = FillColor {
-            rgba_color,
-        };
+        let element = FillColor { rgba_color };
         self.drawdata_stream.extend(bytemuck::bytes_of(&element));
     }
 
diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs
index 5c5c78a..e12f824 100644
--- a/piet-gpu/src/lib.rs
+++ b/piet-gpu/src/lib.rs
@@ -195,7 +195,7 @@
         let element_stage = ElementStage::new(session, &element_code);
         let element_bindings = scene_bufs
             .iter()
-            .map(|scene_buf|
+            .map(|scene_buf| {
                 element_stage.bind(
                     session,
                     &element_code,
@@ -203,15 +203,21 @@
                     scene_buf,
                     &memory_buf_dev,
                 )
-            )
+            })
             .collect();
 
         let clip_code = ClipCode::new(session);
         let clip_binding = ClipBinding::new(session, &clip_code, &config_buf, &memory_buf_dev);
 
         let tile_alloc_code = include_shader!(session, "../shader/gen/tile_alloc");
-        let tile_pipeline = session
-            .create_compute_pipeline(tile_alloc_code, &[BindType::Buffer, BindType::BufReadOnly])?;
+        let tile_pipeline = session.create_compute_pipeline(
+            tile_alloc_code,
+            &[
+                BindType::Buffer,
+                BindType::BufReadOnly,
+                BindType::BufReadOnly,
+            ],
+        )?;
         let tile_ds = scene_bufs
             .iter()
             .map(|scene_buf| {
@@ -265,7 +271,6 @@
                 )
             })
             .collect::<Result<Vec<_>, _>>()?;
-
         let bg_image = Self::make_test_bg_image(&session);
 
         const GRADIENT_BUF_SIZE: usize =
diff --git a/piet-gpu/src/render_ctx.rs b/piet-gpu/src/render_ctx.rs
index e9a24fa..024dd2b 100644
--- a/piet-gpu/src/render_ctx.rs
+++ b/piet-gpu/src/render_ctx.rs
@@ -228,9 +228,7 @@
         if self.clip_stack.len() >= MAX_BLEND_STACK {
             panic!("Maximum clip/blend stack size {} exceeded", MAX_BLEND_STACK);
         }
-        self.clip_stack.push(ClipElement {
-            blend: None,
-        });
+        self.clip_stack.push(ClipElement { blend: None });
         if let Some(tos) = self.state_stack.last_mut() {
             tos.n_clip += 1;
         }
@@ -334,9 +332,7 @@
         if self.clip_stack.len() >= MAX_BLEND_STACK {
             panic!("Maximum clip/blend stack size {} exceeded", MAX_BLEND_STACK);
         }
-        self.clip_stack.push(ClipElement {
-            blend: Some(blend),
-        });
+        self.clip_stack.push(ClipElement { blend: Some(blend) });
         if let Some(tos) = self.state_stack.last_mut() {
             tos.n_clip += 1;
         }
diff --git a/piet-gpu/src/test_scenes.rs b/piet-gpu/src/test_scenes.rs
index 118b727..ee5839d 100644
--- a/piet-gpu/src/test_scenes.rs
+++ b/piet-gpu/src/test_scenes.rs
@@ -2,7 +2,7 @@
 
 use rand::{Rng, RngCore};
 
-use crate::{PietGpuRenderContext, Blend, BlendMode, CompositionMode};
+use crate::{Blend, BlendMode, CompositionMode, PietGpuRenderContext};
 use piet::kurbo::{Affine, BezPath, Circle, Line, Point, Rect, Shape};
 use piet::{
     Color, FixedGradient, FixedLinearGradient, GradientStop, Text, TextAttribute, TextLayoutBuilder,
@@ -13,10 +13,7 @@
 const N_CIRCLES: usize = 0;
 
 pub fn render_blend_test(rc: &mut PietGpuRenderContext, i: usize, blend: Blend) {
-    rc.fill(
-        Rect::new(400., 400., 800., 800.),
-        &Color::rgb8(0, 0, 200),
-    );
+    rc.fill(Rect::new(400., 400., 800., 800.), &Color::rgb8(0, 0, 200));
     rc.save().unwrap();
     rc.blend(Rect::new(0., 0., 1000., 1000.), blend);
     rc.transform(Affine::translate(Vec2::new(600., 600.)) * Affine::rotate(0.01 * i as f64));
diff --git a/tests/src/draw.rs b/tests/src/draw.rs
index 692f943..4372da4 100644
--- a/tests/src/draw.rs
+++ b/tests/src/draw.rs
@@ -17,7 +17,7 @@
 //! Tests for the piet-gpu draw object stage.
 
 use piet_gpu_hal::{BufWrite, BufferUsage};
-use rand::{Rng, seq::SliceRandom};
+use rand::{seq::SliceRandom, Rng};
 
 use crate::{Config, Runner, TestResult};
 
@@ -33,7 +33,13 @@
 const DRAWTAG_BEGINCLIP: u32 = 5;
 const DRAWTAG_ENDCLIP: u32 = 37;
 
-const TAGS: &[u32] = &[DRAWTAG_FILLCOLOR, DRAWTAG_FILLLINGRADIENT, DRAWTAG_FILLIMAGE, DRAWTAG_BEGINCLIP, DRAWTAG_ENDCLIP];
+const TAGS: &[u32] = &[
+    DRAWTAG_FILLCOLOR,
+    DRAWTAG_FILLLINGRADIENT,
+    DRAWTAG_FILLIMAGE,
+    DRAWTAG_BEGINCLIP,
+    DRAWTAG_ENDCLIP,
+];
 
 struct DrawTestData {
     tags: Vec<u32>,