Merge branch 'master' into blend_mem

This does the merge and also rebuilds the generated shaders.
diff --git a/Cargo.lock b/Cargo.lock
index 1f80fa3..1b9a6e3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -43,6 +43,12 @@
 checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
 
 [[package]]
+name = "arrayvec"
+version = "0.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
+
+[[package]]
 name = "ash"
 version = "0.33.3+1.2.191"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -596,7 +602,16 @@
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "16cb54cd28cb3d2e964d9444ca185676a94fd9b7cce5f02b22c717947ed8e9a2"
 dependencies = [
- "arrayvec",
+ "arrayvec 0.5.2",
+]
+
+[[package]]
+name = "kurbo"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a53776d271cfb873b17c618af0298445c88afc52837f3e948fa3fafd131f449"
+dependencies = [
+ "arrayvec 0.7.2",
 ]
 
 [[package]]
@@ -755,8 +770,7 @@
 [[package]]
 name = "moscato"
 version = "0.1.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8372f6cdc8b2c431750a9c4edbc8d9c511ef1a68472aaa02500493414a407c64"
+source = "git+https://github.com/dfrg/pinot#59db153ff83420449a909dfaace18466bddbf814"
 dependencies = [
  "pinot",
 ]
@@ -954,7 +968,7 @@
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f00543608fb5ee6063f5ff1259246ae23073c1a5e413e643d0469da3d4b7b4de"
 dependencies = [
- "kurbo",
+ "kurbo 0.7.1",
  "unic-bidi",
 ]
 
@@ -1012,7 +1026,7 @@
 dependencies = [
  "bytemuck",
  "clap",
- "kurbo",
+ "kurbo 0.7.1",
  "piet-gpu",
  "piet-gpu-hal",
  "rand",
@@ -1031,8 +1045,8 @@
 version = "0.1.0"
 dependencies = [
  "bytemuck",
+ "kurbo 0.8.3",
  "moscato",
- "pinot",
  "smallvec",
 ]
 
diff --git a/doc/vision.md b/doc/vision.md
index d7cf6a3..5e69862 100644
--- a/doc/vision.md
+++ b/doc/vision.md
@@ -112,7 +112,7 @@
 
 ## Enriching the imaging model
 
-There is consensus on “the modern 2D imaging model,” roughly encompassing PDF, SVG, HTML Canvas, and Direct2D, but it is not set in stone and with considerable variation in advanced features within those systems (for example, gradient meshes are more or less unique to PDF — the feature was proposed for SVG 2 but [then removed](http://libregraphicsworld.org/blog/entry/gradient-meshes-and-hatching-to-be-removed-from-svg-2-0)).
+There is consensus on “the modern 2D imaging model,” roughly encompassing PDF, SVG, HTML Canvas, and Direct2D, but it is not set in stone and with considerable variation in advanced features within those systems (for example, gradient meshes are more or less unique to PDF — the feature was proposed for SVG 2 but [then removed](https://librearts.org/2018/05/gradient-meshes-and-hatching-to-be-removed-from-svg-2-0/)).
 
 I like this consensus 2D imaging model because I feel it is extremely well suited for UI and documents of considerable richness and complexity, and is quite designer-friendly. There is also tension pulling away from it, I think for two reasons. One is that it is not always implemented efficiently on GPU, especially with deeply nested soft clipping and other nontrivial compositing requirements. The other is that it’s possible to do things on GPU (especially using custom shaders) that are not easily possible with the standard 2D api. Shadertoy shows *many* things that are possible in shaders. One idea I’d like to explore is watercolor brush strokes (see [Computer-Generated Watercolor](https://grail.cs.washington.edu/projects/watercolor/paper_small.pdf) for inspiration). I think it would be possible to get pretty far with distance fields and procedural noise, and a simple function to go from those to paint values for paint-like compositing.
 
@@ -173,7 +173,7 @@
 [variable font technology]: https://docs.microsoft.com/en-us/typography/opentype/spec/otvaroverview
 [MSAA]: https://en.wikipedia.org/wiki/Multisample_anti-aliasing
 [tradition of libart]: https://people.gnome.org/~mathieu/libart/internals.html
-[stem darkening]: https://www.freetype.org/freetype2/docs/text-rendering-general.html
+[stem darkening]: https://freetype.org/freetype2/docs/hinting/text-rendering-general.html
 [subpixel rendering]: https://en.wikipedia.org/wiki/Subpixel_rendering
 [HDR]: https://en.wikipedia.org/wiki/High-dynamic-range_imaging
 [blurred rounded rectangle]: https://raphlinus.github.io/graphics/2020/04/21/blurred-rounded-rects.html
diff --git a/pgpu-render/src/lib.rs b/pgpu-render/src/lib.rs
index 7d4c60b..50462e7 100644
--- a/pgpu-render/src/lib.rs
+++ b/pgpu-render/src/lib.rs
@@ -215,7 +215,10 @@
 /// Computes the bounding box for the glyph after applying the specified
 /// transform.
 #[no_mangle]
-pub unsafe extern "C" fn pgpu_glyph_bbox(glyph: *const PgpuGlyph, transform: &[f32; 6]) -> PgpuRect {
+pub unsafe extern "C" fn pgpu_glyph_bbox(
+    glyph: *const PgpuGlyph,
+    transform: &[f32; 6],
+) -> PgpuRect {
     let transform = piet_scene::geometry::Affine::new(transform);
     let rect = (*glyph).bbox(Some(transform));
     PgpuRect {
diff --git a/pgpu-render/src/render.rs b/pgpu-render/src/render.rs
index 361ef42..5b5d328 100644
--- a/pgpu-render/src/render.rs
+++ b/pgpu-render/src/render.rs
@@ -16,8 +16,8 @@
 
 use piet_gpu::{EncodedSceneRef, PixelFormat, RenderConfig};
 use piet_gpu_hal::{QueryPool, Session};
-use piet_scene::glyph::pinot::{types::Tag, FontDataRef};
 use piet_scene::geometry::{Affine, Rect};
+use piet_scene::glyph::pinot::{types::Tag, FontDataRef};
 use piet_scene::glyph::{GlyphContext, GlyphProvider};
 use piet_scene::resource::ResourceContext;
 use piet_scene::scene::{Fragment, Scene};
@@ -214,7 +214,12 @@
 impl PgpuGlyph {
     pub fn bbox(&self, transform: Option<Affine>) -> Rect {
         if let Some(transform) = &transform {
-            Rect::from_points(self.fragment.points().iter().map(|p| p.transform(transform)))
+            Rect::from_points(
+                self.fragment
+                    .points()
+                    .iter()
+                    .map(|p| p.transform(transform)),
+            )
         } else {
             Rect::from_points(self.fragment.points())
         }
diff --git a/piet-gpu/bin/android.rs b/piet-gpu/bin/android.rs
index 8254cc0..968405e 100644
--- a/piet-gpu/bin/android.rs
+++ b/piet-gpu/bin/android.rs
@@ -110,7 +110,7 @@
                 .map(|_| session.create_semaphore())
                 .collect::<Result<Vec<_>, Error>>()?;
             let query_pools = (0..NUM_FRAMES)
-                .map(|_| session.create_query_pool(8))
+                .map(|_| session.create_query_pool(Renderer::QUERY_POOL_SIZE))
                 .collect::<Result<Vec<_>, Error>>()?;
             let submitted = Default::default();
             let cmd_bufs = Default::default();
diff --git a/piet-gpu/bin/cli.rs b/piet-gpu/bin/cli.rs
index abe6ae1..79914bf 100644
--- a/piet-gpu/bin/cli.rs
+++ b/piet-gpu/bin/cli.rs
@@ -232,7 +232,7 @@
         let session = Session::new(device);
 
         let mut cmd_buf = session.cmd_buf()?;
-        let query_pool = session.create_query_pool(8)?;
+        let query_pool = session.create_query_pool(Renderer::QUERY_POOL_SIZE)?;
 
         let mut ctx = PietGpuRenderContext::new();
         if let Some(input) = matches.value_of("INPUT") {
@@ -249,7 +249,8 @@
             println!("parsing time: {:?}", start.elapsed());
             test_scenes::render_svg(&mut ctx, &svg);
         } else {
-            test_scenes::render_scene(&mut ctx);
+            //test_scenes::render_scene(&mut ctx);
+            test_scenes::render_blend_grid(&mut ctx);
         }
 
         let mut renderer = Renderer::new(&session, WIDTH, HEIGHT, 1)?;
diff --git a/piet-gpu/bin/winit.rs b/piet-gpu/bin/winit.rs
index 1642026..8f84da4 100644
--- a/piet-gpu/bin/winit.rs
+++ b/piet-gpu/bin/winit.rs
@@ -70,7 +70,7 @@
             .map(|_| session.create_semaphore())
             .collect::<Result<Vec<_>, Error>>()?;
         let query_pools = (0..NUM_FRAMES)
-            .map(|_| session.create_query_pool(12))
+            .map(|_| session.create_query_pool(Renderer::QUERY_POOL_SIZE))
             .collect::<Result<Vec<_>, Error>>()?;
         let mut cmd_bufs: [Option<CmdBuf>; NUM_FRAMES] = Default::default();
         let mut submitted: [Option<SubmittedCmdBuf>; NUM_FRAMES] = Default::default();
@@ -125,7 +125,7 @@
                     }
 
                     let mut ctx = PietGpuRenderContext::new();
-                    let test_blend = false;
+                    let test_blend = true;
                     if let Some(svg) = &svg {
                         test_scenes::render_svg(&mut ctx, svg);
                     } else if test_blend {
diff --git a/piet-gpu/shader/blend.h b/piet-gpu/shader/blend.h
index c0ae6af..7366006 100644
--- a/piet-gpu/shader/blend.h
+++ b/piet-gpu/shader/blend.h
@@ -18,6 +18,7 @@
 #define Blend_Saturation 13
 #define Blend_Color 14
 #define Blend_Luminosity 15
+#define Blend_Clip 128
 
 vec3 screen(vec3 cb, vec3 cs) {
 	return cb + cs - (cb * cs);
@@ -45,7 +46,7 @@
 	return mix(
 		screen(cb, 2.0 * cs - 1.0),
 		cb * 2.0 * cs, 
-		vec3(lessThanEqual(cs, vec3(0.5)))
+		lessThanEqual(cs, vec3(0.5))
 	);
 }
 
@@ -53,12 +54,12 @@
 	vec3 d = mix(
 		sqrt(cb),
 		((16.0 * cb - vec3(12.0)) * cb + vec3(4.0)) * cb,
-		vec3(lessThanEqual(cb, vec3(0.25)))
+		lessThanEqual(cb, vec3(0.25))
 	);
 	return mix(
 		cb + (2.0 * cs - vec3(1.0)) * (d - cb),
 		cb - (vec3(1.0) - 2.0 * cs) * cb * (vec3(1.0) - cb),
-		vec3(lessThanEqual(cs, vec3(0.5)))
+		lessThanEqual(cs, vec3(0.5))
 	);
 }
 
@@ -260,6 +261,7 @@
 }
 
 #define BlendComp_default (Blend_Normal << 8 | Comp_SrcOver)
+#define BlendComp_clip (Blend_Clip << 8 | Comp_SrcOver)
 
 // This is added to alpha to prevent divide-by-zero
 #define EPSILON 1e-15
@@ -267,7 +269,8 @@
 // Apply blending and composition. Both input and output colors are
 // premultiplied RGB.
 vec4 mix_blend_compose(vec4 backdrop, vec4 src, uint mode) {
-	if (mode == BlendComp_default) {
+	if ((mode & 0x7fff) == BlendComp_default) {
+		// Both normal+src_over blend and clip case
 		return backdrop * (1.0 - src.a) + src;
 	}
 	// Un-premultiply colors for blending
@@ -276,9 +279,9 @@
 	float inv_backdrop_a = 1.0 / (backdrop.a + EPSILON);
 	vec3 cb = backdrop.rgb * inv_backdrop_a;
 	uint blend_mode = mode >> 8;
-	vec3 blended = mix_blend(cs, cb, blend_mode);
+	vec3 blended = mix_blend(cb, cs, blend_mode);
 	cs = mix(cs, blended, backdrop.a);
-	uint comp_mode = mode * 0xff;
+	uint comp_mode = mode & 0xff;
 	if (comp_mode == Comp_SrcOver) {
 		vec3 co = mix(backdrop.rgb, cs, src.a);
 		return vec4(co, src.a + backdrop.a * (1 - src.a));
diff --git a/piet-gpu/shader/coarse.comp b/piet-gpu/shader/coarse.comp
index c93d002..be891c1 100644
--- a/piet-gpu/shader/coarse.comp
+++ b/piet-gpu/shader/coarse.comp
@@ -308,7 +308,7 @@
                     uint scene_offset = memory[drawmonoid_base + 2];
                     uint dd = drawdata_start + (scene_offset >> 2);
                     uint blend = scene[dd];
-                    is_blend = (blend != BlendComp_default);
+                    is_blend = (blend != BlendComp_clip);
                 }
                 include_tile = tile.tile.offset != 0 || (tile.backdrop == 0) == is_clip
                     || is_blend;
diff --git a/piet-gpu/shader/gen/backdrop.dxil b/piet-gpu/shader/gen/backdrop.dxil
index df2be88..0fb9622 100644
--- a/piet-gpu/shader/gen/backdrop.dxil
+++ b/piet-gpu/shader/gen/backdrop.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/backdrop_lg.dxil b/piet-gpu/shader/gen/backdrop_lg.dxil
index 81f9b65..e24a6d3 100644
--- a/piet-gpu/shader/gen/backdrop_lg.dxil
+++ b/piet-gpu/shader/gen/backdrop_lg.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/bbox_clear.dxil b/piet-gpu/shader/gen/bbox_clear.dxil
index 6b3efaf..6655b7f 100644
--- a/piet-gpu/shader/gen/bbox_clear.dxil
+++ b/piet-gpu/shader/gen/bbox_clear.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/binning.dxil b/piet-gpu/shader/gen/binning.dxil
index 4a4f073..3050aa8 100644
--- a/piet-gpu/shader/gen/binning.dxil
+++ b/piet-gpu/shader/gen/binning.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/clip_leaf.dxil b/piet-gpu/shader/gen/clip_leaf.dxil
index b681a65..29a158e 100644
--- a/piet-gpu/shader/gen/clip_leaf.dxil
+++ b/piet-gpu/shader/gen/clip_leaf.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/clip_reduce.dxil b/piet-gpu/shader/gen/clip_reduce.dxil
index 0ccaac9..0dff71b 100644
--- a/piet-gpu/shader/gen/clip_reduce.dxil
+++ b/piet-gpu/shader/gen/clip_reduce.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/coarse.dxil b/piet-gpu/shader/gen/coarse.dxil
index cbebec0..3370f59 100644
--- a/piet-gpu/shader/gen/coarse.dxil
+++ b/piet-gpu/shader/gen/coarse.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/coarse.hlsl b/piet-gpu/shader/gen/coarse.hlsl
index 0519a63..3a5c1a5 100644
--- a/piet-gpu/shader/gen/coarse.hlsl
+++ b/piet-gpu/shader/gen/coarse.hlsl
@@ -922,26 +922,26 @@
                     uint scene_offset = _260.Load((drawmonoid_base_1 + 2u) * 4 + 8);
                     uint dd = drawdata_start + (scene_offset >> uint(2));
                     uint blend = _1378.Load(dd * 4 + 0);
-                    is_blend = blend != 3u;
+                    is_blend = blend != 32771u;
                 }
-                bool _1698 = tile.tile.offset != 0u;
-                bool _1707;
-                if (!_1698)
+                bool _1699 = tile.tile.offset != 0u;
+                bool _1708;
+                if (!_1699)
                 {
-                    _1707 = (tile.backdrop == 0) == is_clip;
+                    _1708 = (tile.backdrop == 0) == is_clip;
                 }
                 else
                 {
-                    _1707 = _1698;
+                    _1708 = _1699;
                 }
-                include_tile = _1707 || is_blend;
+                include_tile = _1708 || is_blend;
             }
             if (include_tile)
             {
                 uint el_slice = el_ix / 32u;
                 uint el_mask = 1u << (el_ix & 31u);
-                uint _1729;
-                InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1729);
+                uint _1730;
+                InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1730);
             }
         }
         GroupMemoryBarrierWithGroupSync();
@@ -970,9 +970,9 @@
             {
                 uint param_25 = element_ref_ix;
                 bool param_26 = mem_ok;
-                TileRef _1806 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
+                TileRef _1807 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
                 Alloc param_27 = read_tile_alloc(param_25, param_26);
-                TileRef param_28 = _1806;
+                TileRef param_28 = _1807;
                 Tile tile_1 = Tile_read(param_27, param_28);
                 uint drawmonoid_base_2 = drawmonoid_start + (4u * element_ix_2);
                 uint scene_offset_1 = _260.Load((drawmonoid_base_2 + 2u) * 4 + 8);
@@ -987,11 +987,11 @@
                         Alloc param_29 = cmd_alloc;
                         CmdRef param_30 = cmd_ref;
                         uint param_31 = cmd_limit;
-                        bool _1854 = alloc_cmd(param_29, param_30, param_31);
+                        bool _1855 = alloc_cmd(param_29, param_30, param_31);
                         cmd_alloc = param_29;
                         cmd_ref = param_30;
                         cmd_limit = param_31;
-                        if (!_1854)
+                        if (!_1855)
                         {
                             break;
                         }
@@ -1002,10 +1002,10 @@
                         write_fill(param_32, param_33, param_34, param_35);
                         cmd_ref = param_33;
                         uint rgba = _1378.Load(dd_1 * 4 + 0);
-                        CmdColor _1877 = { rgba };
+                        CmdColor _1878 = { rgba };
                         Alloc param_36 = cmd_alloc;
                         CmdRef param_37 = cmd_ref;
-                        CmdColor param_38 = _1877;
+                        CmdColor param_38 = _1878;
                         Cmd_Color_write(param_36, param_37, param_38);
                         cmd_ref.offset += 8u;
                         break;
@@ -1015,11 +1015,11 @@
                         Alloc param_39 = cmd_alloc;
                         CmdRef param_40 = cmd_ref;
                         uint param_41 = cmd_limit;
-                        bool _1895 = alloc_cmd(param_39, param_40, param_41);
+                        bool _1896 = alloc_cmd(param_39, param_40, param_41);
                         cmd_alloc = param_39;
                         cmd_ref = param_40;
                         cmd_limit = param_41;
-                        if (!_1895)
+                        if (!_1896)
                         {
                             break;
                         }
@@ -1046,11 +1046,11 @@
                         Alloc param_49 = cmd_alloc;
                         CmdRef param_50 = cmd_ref;
                         uint param_51 = cmd_limit;
-                        bool _1959 = alloc_cmd(param_49, param_50, param_51);
+                        bool _1960 = alloc_cmd(param_49, param_50, param_51);
                         cmd_alloc = param_49;
                         cmd_ref = param_50;
                         cmd_limit = param_51;
-                        if (!_1959)
+                        if (!_1960)
                         {
                             break;
                         }
@@ -1080,11 +1080,11 @@
                         Alloc param_59 = cmd_alloc;
                         CmdRef param_60 = cmd_ref;
                         uint param_61 = cmd_limit;
-                        bool _2065 = alloc_cmd(param_59, param_60, param_61);
+                        bool _2066 = alloc_cmd(param_59, param_60, param_61);
                         cmd_alloc = param_59;
                         cmd_ref = param_60;
                         cmd_limit = param_61;
-                        if (!_2065)
+                        if (!_2066)
                         {
                             break;
                         }
@@ -1097,27 +1097,27 @@
                         uint index = _1378.Load(dd_1 * 4 + 0);
                         uint raw1 = _1378.Load((dd_1 + 1u) * 4 + 0);
                         int2 offset_1 = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16);
-                        CmdImage _2104 = { index, offset_1 };
+                        CmdImage _2105 = { index, offset_1 };
                         Alloc param_66 = cmd_alloc;
                         CmdRef param_67 = cmd_ref;
-                        CmdImage param_68 = _2104;
+                        CmdImage param_68 = _2105;
                         Cmd_Image_write(param_66, param_67, param_68);
                         cmd_ref.offset += 12u;
                         break;
                     }
                     case 5u:
                     {
-                        bool _2118 = tile_1.tile.offset == 0u;
-                        bool _2124;
-                        if (_2118)
+                        bool _2119 = tile_1.tile.offset == 0u;
+                        bool _2125;
+                        if (_2119)
                         {
-                            _2124 = tile_1.backdrop == 0;
+                            _2125 = tile_1.backdrop == 0;
                         }
                         else
                         {
-                            _2124 = _2118;
+                            _2125 = _2119;
                         }
-                        if (_2124)
+                        if (_2125)
                         {
                             clip_zero_depth = clip_depth + 1u;
                         }
@@ -1126,11 +1126,11 @@
                             Alloc param_69 = cmd_alloc;
                             CmdRef param_70 = cmd_ref;
                             uint param_71 = cmd_limit;
-                            bool _2136 = alloc_cmd(param_69, param_70, param_71);
+                            bool _2137 = alloc_cmd(param_69, param_70, param_71);
                             cmd_alloc = param_69;
                             cmd_ref = param_70;
                             cmd_limit = param_71;
-                            if (!_2136)
+                            if (!_2137)
                             {
                                 break;
                             }
@@ -1150,11 +1150,11 @@
                         Alloc param_74 = cmd_alloc;
                         CmdRef param_75 = cmd_ref;
                         uint param_76 = cmd_limit;
-                        bool _2169 = alloc_cmd(param_74, param_75, param_76);
+                        bool _2170 = alloc_cmd(param_74, param_75, param_76);
                         cmd_alloc = param_74;
                         cmd_ref = param_75;
                         cmd_limit = param_76;
-                        if (!_2169)
+                        if (!_2170)
                         {
                             break;
                         }
@@ -1165,10 +1165,10 @@
                         write_fill(param_77, param_78, param_79, param_80);
                         cmd_ref = param_78;
                         uint blend_1 = _1378.Load(dd_1 * 4 + 0);
-                        CmdEndClip _2192 = { blend_1 };
+                        CmdEndClip _2193 = { blend_1 };
                         Alloc param_81 = cmd_alloc;
                         CmdRef param_82 = cmd_ref;
-                        CmdEndClip param_83 = _2192;
+                        CmdEndClip param_83 = _2193;
                         Cmd_EndClip_write(param_81, param_82, param_83);
                         cmd_ref.offset += 8u;
                         render_blend_depth--;
@@ -1204,17 +1204,17 @@
             break;
         }
     }
-    bool _2241 = (bin_tile_x + tile_x) < _1005.Load(8);
-    bool _2250;
-    if (_2241)
+    bool _2242 = (bin_tile_x + tile_x) < _1005.Load(8);
+    bool _2251;
+    if (_2242)
     {
-        _2250 = (bin_tile_y + tile_y) < _1005.Load(12);
+        _2251 = (bin_tile_y + tile_y) < _1005.Load(12);
     }
     else
     {
-        _2250 = _2241;
+        _2251 = _2242;
     }
-    if (_2250)
+    if (_2251)
     {
         Alloc param_84 = cmd_alloc;
         CmdRef param_85 = cmd_ref;
diff --git a/piet-gpu/shader/gen/coarse.msl b/piet-gpu/shader/gen/coarse.msl
index 578fa37..94b8738 100644
--- a/piet-gpu/shader/gen/coarse.msl
+++ b/piet-gpu/shader/gen/coarse.msl
@@ -945,25 +945,25 @@
                     uint scene_offset = v_260.memory[drawmonoid_base_1 + 2u];
                     uint dd = drawdata_start + (scene_offset >> uint(2));
                     uint blend = _1378.scene[dd];
-                    is_blend = blend != 3u;
+                    is_blend = blend != 32771u;
                 }
-                bool _1698 = tile.tile.offset != 0u;
-                bool _1707;
-                if (!_1698)
+                bool _1699 = tile.tile.offset != 0u;
+                bool _1708;
+                if (!_1699)
                 {
-                    _1707 = (tile.backdrop == 0) == is_clip;
+                    _1708 = (tile.backdrop == 0) == is_clip;
                 }
                 else
                 {
-                    _1707 = _1698;
+                    _1708 = _1699;
                 }
-                include_tile = _1707 || is_blend;
+                include_tile = _1708 || is_blend;
             }
             if (include_tile)
             {
                 uint el_slice = el_ix / 32u;
                 uint el_mask = 1u << (el_ix & 31u);
-                uint _1729 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&sh_bitmaps[el_slice][(y * 16u) + x], el_mask, memory_order_relaxed);
+                uint _1730 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&sh_bitmaps[el_slice][(y * 16u) + x], el_mask, memory_order_relaxed);
             }
         }
         threadgroup_barrier(mem_flags::mem_threadgroup);
@@ -1008,11 +1008,11 @@
                         Alloc param_29 = cmd_alloc;
                         CmdRef param_30 = cmd_ref;
                         uint param_31 = cmd_limit;
-                        bool _1854 = alloc_cmd(param_29, param_30, param_31, v_260, v_260BufferSize);
+                        bool _1855 = alloc_cmd(param_29, param_30, param_31, v_260, v_260BufferSize);
                         cmd_alloc = param_29;
                         cmd_ref = param_30;
                         cmd_limit = param_31;
-                        if (!_1854)
+                        if (!_1855)
                         {
                             break;
                         }
@@ -1035,11 +1035,11 @@
                         Alloc param_39 = cmd_alloc;
                         CmdRef param_40 = cmd_ref;
                         uint param_41 = cmd_limit;
-                        bool _1895 = alloc_cmd(param_39, param_40, param_41, v_260, v_260BufferSize);
+                        bool _1896 = alloc_cmd(param_39, param_40, param_41, v_260, v_260BufferSize);
                         cmd_alloc = param_39;
                         cmd_ref = param_40;
                         cmd_limit = param_41;
-                        if (!_1895)
+                        if (!_1896)
                         {
                             break;
                         }
@@ -1066,11 +1066,11 @@
                         Alloc param_49 = cmd_alloc;
                         CmdRef param_50 = cmd_ref;
                         uint param_51 = cmd_limit;
-                        bool _1959 = alloc_cmd(param_49, param_50, param_51, v_260, v_260BufferSize);
+                        bool _1960 = alloc_cmd(param_49, param_50, param_51, v_260, v_260BufferSize);
                         cmd_alloc = param_49;
                         cmd_ref = param_50;
                         cmd_limit = param_51;
-                        if (!_1959)
+                        if (!_1960)
                         {
                             break;
                         }
@@ -1100,11 +1100,11 @@
                         Alloc param_59 = cmd_alloc;
                         CmdRef param_60 = cmd_ref;
                         uint param_61 = cmd_limit;
-                        bool _2065 = alloc_cmd(param_59, param_60, param_61, v_260, v_260BufferSize);
+                        bool _2066 = alloc_cmd(param_59, param_60, param_61, v_260, v_260BufferSize);
                         cmd_alloc = param_59;
                         cmd_ref = param_60;
                         cmd_limit = param_61;
-                        if (!_2065)
+                        if (!_2066)
                         {
                             break;
                         }
@@ -1126,17 +1126,17 @@
                     }
                     case 5u:
                     {
-                        bool _2118 = tile_1.tile.offset == 0u;
-                        bool _2124;
-                        if (_2118)
+                        bool _2119 = tile_1.tile.offset == 0u;
+                        bool _2125;
+                        if (_2119)
                         {
-                            _2124 = tile_1.backdrop == 0;
+                            _2125 = tile_1.backdrop == 0;
                         }
                         else
                         {
-                            _2124 = _2118;
+                            _2125 = _2119;
                         }
-                        if (_2124)
+                        if (_2125)
                         {
                             clip_zero_depth = clip_depth + 1u;
                         }
@@ -1145,11 +1145,11 @@
                             Alloc param_69 = cmd_alloc;
                             CmdRef param_70 = cmd_ref;
                             uint param_71 = cmd_limit;
-                            bool _2136 = alloc_cmd(param_69, param_70, param_71, v_260, v_260BufferSize);
+                            bool _2137 = alloc_cmd(param_69, param_70, param_71, v_260, v_260BufferSize);
                             cmd_alloc = param_69;
                             cmd_ref = param_70;
                             cmd_limit = param_71;
-                            if (!_2136)
+                            if (!_2137)
                             {
                                 break;
                             }
@@ -1169,11 +1169,11 @@
                         Alloc param_74 = cmd_alloc;
                         CmdRef param_75 = cmd_ref;
                         uint param_76 = cmd_limit;
-                        bool _2169 = alloc_cmd(param_74, param_75, param_76, v_260, v_260BufferSize);
+                        bool _2170 = alloc_cmd(param_74, param_75, param_76, v_260, v_260BufferSize);
                         cmd_alloc = param_74;
                         cmd_ref = param_75;
                         cmd_limit = param_76;
-                        if (!_2169)
+                        if (!_2170)
                         {
                             break;
                         }
@@ -1222,17 +1222,17 @@
             break;
         }
     }
-    bool _2241 = (bin_tile_x + tile_x) < _1005.conf.width_in_tiles;
-    bool _2250;
-    if (_2241)
+    bool _2242 = (bin_tile_x + tile_x) < _1005.conf.width_in_tiles;
+    bool _2251;
+    if (_2242)
     {
-        _2250 = (bin_tile_y + tile_y) < _1005.conf.height_in_tiles;
+        _2251 = (bin_tile_y + tile_y) < _1005.conf.height_in_tiles;
     }
     else
     {
-        _2250 = _2241;
+        _2251 = _2242;
     }
-    if (_2250)
+    if (_2251)
     {
         Alloc param_84 = cmd_alloc;
         CmdRef param_85 = cmd_ref;
diff --git a/piet-gpu/shader/gen/coarse.spv b/piet-gpu/shader/gen/coarse.spv
index 718acca..bcac844 100644
--- a/piet-gpu/shader/gen/coarse.spv
+++ b/piet-gpu/shader/gen/coarse.spv
Binary files differ
diff --git a/piet-gpu/shader/gen/draw_leaf.dxil b/piet-gpu/shader/gen/draw_leaf.dxil
index 6353f19..200f169 100644
--- a/piet-gpu/shader/gen/draw_leaf.dxil
+++ b/piet-gpu/shader/gen/draw_leaf.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/draw_reduce.dxil b/piet-gpu/shader/gen/draw_reduce.dxil
index c101fc8..be69aad 100644
--- a/piet-gpu/shader/gen/draw_reduce.dxil
+++ b/piet-gpu/shader/gen/draw_reduce.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/draw_root.dxil b/piet-gpu/shader/gen/draw_root.dxil
index 873fa29..4ea23f7 100644
--- a/piet-gpu/shader/gen/draw_root.dxil
+++ b/piet-gpu/shader/gen/draw_root.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/kernel4.dxil b/piet-gpu/shader/gen/kernel4.dxil
index 5617c51..c4446c5 100644
--- a/piet-gpu/shader/gen/kernel4.dxil
+++ b/piet-gpu/shader/gen/kernel4.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/kernel4.hlsl b/piet-gpu/shader/gen/kernel4.hlsl
index 30779b7..408dcfc 100644
--- a/piet-gpu/shader/gen/kernel4.hlsl
+++ b/piet-gpu/shader/gen/kernel4.hlsl
@@ -162,7 +162,7 @@
 static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u);
 
 RWByteAddressBuffer _297 : register(u0, space0);
-ByteAddressBuffer _1725 : register(t1, space0);
+ByteAddressBuffer _1681 : register(t1, space0);
 RWTexture2D<unorm float4> image_atlas : register(u3, space0);
 RWTexture2D<unorm float4> gradients : register(u4, space0);
 RWTexture2D<unorm float4> image : register(u2, space0);
@@ -347,10 +347,7 @@
 
 float3 fromsRGB(float3 srgb)
 {
-    bool3 cutoff = bool3(srgb.x >= 0.040449999272823333740234375f.xxx.x, srgb.y >= 0.040449999272823333740234375f.xxx.y, srgb.z >= 0.040449999272823333740234375f.xxx.z);
-    float3 below = srgb / 12.9200000762939453125f.xxx;
-    float3 above = pow((srgb + 0.054999999701976776123046875f.xxx) / 1.05499994754791259765625f.xxx, 2.400000095367431640625f.xxx);
-    return float3(cutoff.x ? above.x : below.x, cutoff.y ? above.y : below.y, cutoff.z ? above.z : below.z);
+    return srgb;
 }
 
 float4 unpacksRGB(uint srgba)
@@ -477,10 +474,10 @@
         int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset;
         float4 fg_rgba = image_atlas[uv];
         float3 param_1 = fg_rgba.xyz;
-        float3 _1697 = fromsRGB(param_1);
-        fg_rgba.x = _1697.x;
-        fg_rgba.y = _1697.y;
-        fg_rgba.z = _1697.z;
+        float3 _1653 = fromsRGB(param_1);
+        fg_rgba.x = _1653.x;
+        fg_rgba.y = _1653.y;
+        fg_rgba.z = _1653.z;
         rgba[i] = fg_rgba;
     }
     spvReturnValue = rgba;
@@ -488,10 +485,7 @@
 
 float3 tosRGB(float3 rgb)
 {
-    bool3 cutoff = bool3(rgb.x >= 0.003130800090730190277099609375f.xxx.x, rgb.y >= 0.003130800090730190277099609375f.xxx.y, rgb.z >= 0.003130800090730190277099609375f.xxx.z);
-    float3 below = 12.9200000762939453125f.xxx * rgb;
-    float3 above = (1.05499994754791259765625f.xxx * pow(rgb, 0.416660010814666748046875f.xxx)) - 0.054999999701976776123046875f.xxx;
-    return float3(cutoff.x ? above.x : below.x, cutoff.y ? above.y : below.y, cutoff.z ? above.z : below.z);
+    return rgb;
 }
 
 uint packsRGB(inout float4 rgba)
@@ -529,7 +523,10 @@
 {
     float3 param = cb;
     float3 param_1 = (cs * 2.0f) - 1.0f.xxx;
-    return lerp(screen(param, param_1), (cb * 2.0f) * cs, float3(bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z)));
+    float3 _889 = screen(param, param_1);
+    float3 _893 = (cb * 2.0f) * cs;
+    bool3 _898 = bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z);
+    return float3(_898.x ? _893.x : _889.x, _898.y ? _893.y : _889.y, _898.z ? _893.z : _889.z);
 }
 
 float color_dodge(float cb, float cs)
@@ -572,8 +569,14 @@
 
 float3 soft_light(float3 cb, float3 cs)
 {
-    float3 d = lerp(sqrt(cb), ((((cb * 16.0f) - 12.0f.xxx) * cb) + 4.0f.xxx) * cb, float3(bool3(cb.x <= 0.25f.xxx.x, cb.y <= 0.25f.xxx.y, cb.z <= 0.25f.xxx.z)));
-    return lerp(cb + (((cs * 2.0f) - 1.0f.xxx) * (d - cb)), cb - (((1.0f.xxx - (cs * 2.0f)) * cb) * (1.0f.xxx - cb)), float3(bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z)));
+    float3 _904 = sqrt(cb);
+    float3 _917 = ((((cb * 16.0f) - 12.0f.xxx) * cb) + 4.0f.xxx) * cb;
+    bool3 _921 = bool3(cb.x <= 0.25f.xxx.x, cb.y <= 0.25f.xxx.y, cb.z <= 0.25f.xxx.z);
+    float3 d = float3(_921.x ? _917.x : _904.x, _921.y ? _917.y : _904.y, _921.z ? _917.z : _904.z);
+    float3 _932 = cb + (((cs * 2.0f) - 1.0f.xxx) * (d - cb));
+    float3 _942 = cb - (((1.0f.xxx - (cs * 2.0f)) * cb) * (1.0f.xxx - cb));
+    bool3 _944 = bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z);
+    return float3(_944.x ? _942.x : _932.x, _944.y ? _942.y : _932.y, _944.z ? _942.z : _932.z);
 }
 
 float sat(float3 c)
@@ -706,8 +709,8 @@
 {
     float3 param = c;
     float3 param_1 = c + (l - lum(param)).xxx;
-    float3 _1052 = clip_color(param_1);
-    return _1052;
+    float3 _1048 = clip_color(param_1);
+    return _1048;
 }
 
 float3 mix_blend(float3 cb, float3 cs, uint mode)
@@ -795,9 +798,9 @@
             float3 param_20 = cb;
             float3 param_21 = cs;
             float param_22 = sat(param_20);
-            float3 _1343 = set_sat(param_21, param_22);
+            float3 _1340 = set_sat(param_21, param_22);
             float3 param_23 = cb;
-            float3 param_24 = _1343;
+            float3 param_24 = _1340;
             float param_25 = lum(param_23);
             b = set_lum(param_24, param_25);
             break;
@@ -807,9 +810,9 @@
             float3 param_26 = cs;
             float3 param_27 = cb;
             float param_28 = sat(param_26);
-            float3 _1357 = set_sat(param_27, param_28);
+            float3 _1354 = set_sat(param_27, param_28);
             float3 param_29 = cb;
-            float3 param_30 = _1357;
+            float3 param_30 = _1354;
             float param_31 = lum(param_29);
             b = set_lum(param_30, param_31);
             break;
@@ -934,7 +937,7 @@
 
 float4 mix_blend_compose(float4 backdrop, float4 src, uint mode)
 {
-    if (mode == 3u)
+    if ((mode & 32767u) == 3u)
     {
         return (backdrop * (1.0f - src.w)) + src;
     }
@@ -943,12 +946,12 @@
     float inv_backdrop_a = 1.0f / (backdrop.w + 1.0000000036274937255387218471014e-15f);
     float3 cb = backdrop.xyz * inv_backdrop_a;
     uint blend_mode = mode >> uint(8);
-    float3 param = cs;
-    float3 param_1 = cb;
+    float3 param = cb;
+    float3 param_1 = cs;
     uint param_2 = blend_mode;
     float3 blended = mix_blend(param, param_1, param_2);
     cs = lerp(cs, blended, backdrop.w.xxx);
-    uint comp_mode = mode * 255u;
+    uint comp_mode = mode & 255u;
     if (comp_mode == 3u)
     {
         float3 co = lerp(backdrop.xyz, cs, src.w.xxx);
@@ -986,16 +989,16 @@
 
 void comp_main()
 {
-    uint tile_ix = (gl_WorkGroupID.y * _1725.Load(8)) + gl_WorkGroupID.x;
-    Alloc _1740;
-    _1740.offset = _1725.Load(24);
+    uint tile_ix = (gl_WorkGroupID.y * _1681.Load(8)) + gl_WorkGroupID.x;
+    Alloc _1696;
+    _1696.offset = _1681.Load(24);
     Alloc param;
-    param.offset = _1740.offset;
+    param.offset = _1696.offset;
     uint param_1 = tile_ix * 1024u;
     uint param_2 = 1024u;
     Alloc cmd_alloc = slice_mem(param, param_1, param_2);
-    CmdRef _1749 = { cmd_alloc.offset };
-    CmdRef cmd_ref = _1749;
+    CmdRef _1705 = { cmd_alloc.offset };
+    CmdRef cmd_ref = _1705;
     uint blend_offset = _297.Load((cmd_ref.offset >> uint(2)) * 4 + 8);
     cmd_ref.offset += 4u;
     uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y));
@@ -1033,8 +1036,8 @@
                 {
                     df[k] = 1000000000.0f;
                 }
-                TileSegRef _1854 = { stroke.tile_ref };
-                tile_seg_ref = _1854;
+                TileSegRef _1810 = { stroke.tile_ref };
+                tile_seg_ref = _1810;
                 do
                 {
                     uint param_7 = tile_seg_ref.offset;
@@ -1070,8 +1073,8 @@
                 {
                     area[k_3] = float(fill.backdrop);
                 }
-                TileSegRef _1974 = { fill.tile_ref };
-                tile_seg_ref = _1974;
+                TileSegRef _1930 = { fill.tile_ref };
+                tile_seg_ref = _1930;
                 do
                 {
                     uint param_15 = tile_seg_ref.offset;
@@ -1160,10 +1163,10 @@
                     int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f));
                     float4 fg_rgba = gradients[int2(x, int(lin.index))];
                     float3 param_29 = fg_rgba.xyz;
-                    float3 _2308 = fromsRGB(param_29);
-                    fg_rgba.x = _2308.x;
-                    fg_rgba.y = _2308.y;
-                    fg_rgba.z = _2308.z;
+                    float3 _2264 = fromsRGB(param_29);
+                    fg_rgba.x = _2264.x;
+                    fg_rgba.y = _2264.y;
+                    fg_rgba.z = _2264.z;
                     float4 fg_k_1 = fg_rgba * area[k_9];
                     rgba[k_9] = (rgba[k_9] * (1.0f - fg_k_1.w)) + fg_k_1;
                 }
@@ -1186,10 +1189,10 @@
                     int x_1 = int(round(clamp(t_2, 0.0f, 1.0f) * 511.0f));
                     float4 fg_rgba_1 = gradients[int2(x_1, int(rad.index))];
                     float3 param_33 = fg_rgba_1.xyz;
-                    float3 _2418 = fromsRGB(param_33);
-                    fg_rgba_1.x = _2418.x;
-                    fg_rgba_1.y = _2418.y;
-                    fg_rgba_1.z = _2418.z;
+                    float3 _2374 = fromsRGB(param_33);
+                    fg_rgba_1.x = _2374.x;
+                    fg_rgba_1.y = _2374.y;
+                    fg_rgba_1.z = _2374.z;
                     float4 fg_k_2 = fg_rgba_1 * area[k_10];
                     rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_2.w)) + fg_k_2;
                 }
@@ -1203,9 +1206,9 @@
                 CmdImage fill_img = Cmd_Image_read(param_34, param_35);
                 uint2 param_36 = xy_uint;
                 CmdImage param_37 = fill_img;
-                float4 _2461[8];
-                fillImage(_2461, param_36, param_37);
-                float4 img[8] = _2461;
+                float4 _2417[8];
+                fillImage(_2417, param_36, param_37);
+                float4 img[8] = _2417;
                 for (uint k_11 = 0u; k_11 < 8u; k_11++)
                 {
                     float4 fg_k_3 = img[k_11] * area[k_11];
@@ -1221,8 +1224,8 @@
                     for (uint k_12 = 0u; k_12 < 8u; k_12++)
                     {
                         float4 param_38 = float4(rgba[k_12]);
-                        uint _2523 = packsRGB(param_38);
-                        blend_stack[clip_depth][k_12] = _2523;
+                        uint _2479 = packsRGB(param_38);
+                        blend_stack[clip_depth][k_12] = _2479;
                         rgba[k_12] = 0.0f.xxxx;
                     }
                 }
@@ -1232,8 +1235,8 @@
                     for (uint k_13 = 0u; k_13 < 8u; k_13++)
                     {
                         float4 param_39 = float4(rgba[k_13]);
-                        uint _2566 = packsRGB(param_39);
-                        _297.Store((base_ix + k_13) * 4 + 8, _2566);
+                        uint _2522 = packsRGB(param_39);
+                        _297.Store((base_ix + k_13) * 4 + 8, _2522);
                         rgba[k_13] = 0.0f.xxxx;
                     }
                 }
@@ -1276,8 +1279,8 @@
             {
                 Alloc param_46 = cmd_alloc;
                 CmdRef param_47 = cmd_ref;
-                CmdRef _2665 = { Cmd_Jump_read(param_46, param_47).new_ref };
-                cmd_ref = _2665;
+                CmdRef _2621 = { Cmd_Jump_read(param_46, param_47).new_ref };
+                cmd_ref = _2621;
                 cmd_alloc.offset = cmd_ref.offset;
                 break;
             }
diff --git a/piet-gpu/shader/gen/kernel4.msl b/piet-gpu/shader/gen/kernel4.msl
index 6325914..c12e307 100644
--- a/piet-gpu/shader/gen/kernel4.msl
+++ b/piet-gpu/shader/gen/kernel4.msl
@@ -393,10 +393,7 @@
 static inline __attribute__((always_inline))
 float3 fromsRGB(thread const float3& srgb)
 {
-    bool3 cutoff = srgb >= float3(0.040449999272823333740234375);
-    float3 below = srgb / float3(12.9200000762939453125);
-    float3 above = pow((srgb + float3(0.054999999701976776123046875)) / float3(1.05499994754791259765625), float3(2.400000095367431640625));
-    return select(below, above, cutoff);
+    return srgb;
 }
 
 static inline __attribute__((always_inline))
@@ -528,10 +525,10 @@
         int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset;
         float4 fg_rgba = image_atlas.read(uint2(uv));
         float3 param_1 = fg_rgba.xyz;
-        float3 _1697 = fromsRGB(param_1);
-        fg_rgba.x = _1697.x;
-        fg_rgba.y = _1697.y;
-        fg_rgba.z = _1697.z;
+        float3 _1653 = fromsRGB(param_1);
+        fg_rgba.x = _1653.x;
+        fg_rgba.y = _1653.y;
+        fg_rgba.z = _1653.z;
         rgba[i] = fg_rgba;
     }
     return rgba;
@@ -540,10 +537,7 @@
 static inline __attribute__((always_inline))
 float3 tosRGB(thread const float3& rgb)
 {
-    bool3 cutoff = rgb >= float3(0.003130800090730190277099609375);
-    float3 below = float3(12.9200000762939453125) * rgb;
-    float3 above = (float3(1.05499994754791259765625) * pow(rgb, float3(0.416660010814666748046875))) - float3(0.054999999701976776123046875);
-    return select(below, above, cutoff);
+    return rgb;
 }
 
 static inline __attribute__((always_inline))
@@ -585,7 +579,7 @@
 {
     float3 param = cb;
     float3 param_1 = (cs * 2.0) - float3(1.0);
-    return mix(screen(param, param_1), (cb * 2.0) * cs, float3(cs <= float3(0.5)));
+    return select(screen(param, param_1), (cb * 2.0) * cs, cs <= float3(0.5));
 }
 
 static inline __attribute__((always_inline))
@@ -631,8 +625,8 @@
 static inline __attribute__((always_inline))
 float3 soft_light(thread const float3& cb, thread const float3& cs)
 {
-    float3 d = mix(sqrt(cb), ((((cb * 16.0) - float3(12.0)) * cb) + float3(4.0)) * cb, float3(cb <= float3(0.25)));
-    return mix(cb + (((cs * 2.0) - float3(1.0)) * (d - cb)), cb - (((float3(1.0) - (cs * 2.0)) * cb) * (float3(1.0) - cb)), float3(cs <= float3(0.5)));
+    float3 d = select(sqrt(cb), ((((cb * 16.0) - float3(12.0)) * cb) + float3(4.0)) * cb, cb <= float3(0.25));
+    return select(cb + (((cs * 2.0) - float3(1.0)) * (d - cb)), cb - (((float3(1.0) - (cs * 2.0)) * cb) * (float3(1.0) - cb)), cs <= float3(0.5));
 }
 
 static inline __attribute__((always_inline))
@@ -771,8 +765,8 @@
 {
     float3 param = c;
     float3 param_1 = c + float3(l - lum(param));
-    float3 _1052 = clip_color(param_1);
-    return _1052;
+    float3 _1048 = clip_color(param_1);
+    return _1048;
 }
 
 static inline __attribute__((always_inline))
@@ -861,9 +855,9 @@
             float3 param_20 = cb;
             float3 param_21 = cs;
             float param_22 = sat(param_20);
-            float3 _1343 = set_sat(param_21, param_22);
+            float3 _1340 = set_sat(param_21, param_22);
             float3 param_23 = cb;
-            float3 param_24 = _1343;
+            float3 param_24 = _1340;
             float param_25 = lum(param_23);
             b = set_lum(param_24, param_25);
             break;
@@ -873,9 +867,9 @@
             float3 param_26 = cs;
             float3 param_27 = cb;
             float param_28 = sat(param_26);
-            float3 _1357 = set_sat(param_27, param_28);
+            float3 _1354 = set_sat(param_27, param_28);
             float3 param_29 = cb;
-            float3 param_30 = _1357;
+            float3 param_30 = _1354;
             float param_31 = lum(param_29);
             b = set_lum(param_30, param_31);
             break;
@@ -1002,7 +996,7 @@
 static inline __attribute__((always_inline))
 float4 mix_blend_compose(thread const float4& backdrop, thread const float4& src, thread const uint& mode)
 {
-    if (mode == 3u)
+    if ((mode & 32767u) == 3u)
     {
         return (backdrop * (1.0 - src.w)) + src;
     }
@@ -1011,12 +1005,12 @@
     float inv_backdrop_a = 1.0 / (backdrop.w + 1.0000000036274937255387218471014e-15);
     float3 cb = backdrop.xyz * inv_backdrop_a;
     uint blend_mode = mode >> uint(8);
-    float3 param = cs;
-    float3 param_1 = cb;
+    float3 param = cb;
+    float3 param_1 = cs;
     uint param_2 = blend_mode;
     float3 blended = mix_blend(param, param_1, param_2);
     cs = mix(cs, blended, float3(backdrop.w));
-    uint comp_mode = mode * 255u;
+    uint comp_mode = mode & 255u;
     if (comp_mode == 3u)
     {
         float3 co = mix(backdrop.xyz, cs, float3(src.w));
@@ -1053,11 +1047,11 @@
     return CmdJump_read(param, param_1, v_297);
 }
 
-kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1725 [[buffer(1)]], texture2d<float, access::write> image [[texture(2)]], texture2d<float> image_atlas [[texture(3)]], texture2d<float> gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
+kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1681 [[buffer(1)]], texture2d<float, access::write> image [[texture(2)]], texture2d<float> image_atlas [[texture(3)]], texture2d<float> gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
 {
-    uint tile_ix = (gl_WorkGroupID.y * _1725.conf.width_in_tiles) + gl_WorkGroupID.x;
+    uint tile_ix = (gl_WorkGroupID.y * _1681.conf.width_in_tiles) + gl_WorkGroupID.x;
     Alloc param;
-    param.offset = _1725.conf.ptcl_alloc.offset;
+    param.offset = _1681.conf.ptcl_alloc.offset;
     uint param_1 = tile_ix * 1024u;
     uint param_2 = 1024u;
     Alloc cmd_alloc = slice_mem(param, param_1, param_2);
@@ -1224,10 +1218,10 @@
                     int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0));
                     float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index))));
                     float3 param_29 = fg_rgba.xyz;
-                    float3 _2308 = fromsRGB(param_29);
-                    fg_rgba.x = _2308.x;
-                    fg_rgba.y = _2308.y;
-                    fg_rgba.z = _2308.z;
+                    float3 _2264 = fromsRGB(param_29);
+                    fg_rgba.x = _2264.x;
+                    fg_rgba.y = _2264.y;
+                    fg_rgba.z = _2264.z;
                     float4 fg_k_1 = fg_rgba * area[k_9];
                     rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1;
                 }
@@ -1250,10 +1244,10 @@
                     int x_1 = int(round(fast::clamp(t_2, 0.0, 1.0) * 511.0));
                     float4 fg_rgba_1 = gradients.read(uint2(int2(x_1, int(rad.index))));
                     float3 param_33 = fg_rgba_1.xyz;
-                    float3 _2418 = fromsRGB(param_33);
-                    fg_rgba_1.x = _2418.x;
-                    fg_rgba_1.y = _2418.y;
-                    fg_rgba_1.z = _2418.z;
+                    float3 _2374 = fromsRGB(param_33);
+                    fg_rgba_1.x = _2374.x;
+                    fg_rgba_1.y = _2374.y;
+                    fg_rgba_1.z = _2374.z;
                     float4 fg_k_2 = fg_rgba_1 * area[k_10];
                     rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_2.w)) + fg_k_2;
                 }
@@ -1284,8 +1278,8 @@
                     for (uint k_12 = 0u; k_12 < 8u; k_12++)
                     {
                         float4 param_38 = float4(rgba[k_12]);
-                        uint _2523 = packsRGB(param_38);
-                        blend_stack[clip_depth][k_12] = _2523;
+                        uint _2479 = packsRGB(param_38);
+                        blend_stack[clip_depth][k_12] = _2479;
                         rgba[k_12] = float4(0.0);
                     }
                 }
@@ -1295,8 +1289,8 @@
                     for (uint k_13 = 0u; k_13 < 8u; k_13++)
                     {
                         float4 param_39 = float4(rgba[k_13]);
-                        uint _2566 = packsRGB(param_39);
-                        v_297.memory[base_ix + k_13] = _2566;
+                        uint _2522 = packsRGB(param_39);
+                        v_297.memory[base_ix + k_13] = _2522;
                         rgba[k_13] = float4(0.0);
                     }
                 }
diff --git a/piet-gpu/shader/gen/kernel4.spv b/piet-gpu/shader/gen/kernel4.spv
index 978e0a2..28d4cae 100644
--- a/piet-gpu/shader/gen/kernel4.spv
+++ b/piet-gpu/shader/gen/kernel4.spv
Binary files differ
diff --git a/piet-gpu/shader/gen/kernel4_gray.dxil b/piet-gpu/shader/gen/kernel4_gray.dxil
index 37fe62c..c91d35d 100644
--- a/piet-gpu/shader/gen/kernel4_gray.dxil
+++ b/piet-gpu/shader/gen/kernel4_gray.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/kernel4_gray.hlsl b/piet-gpu/shader/gen/kernel4_gray.hlsl
index 5bd7b3b..58d3f5a 100644
--- a/piet-gpu/shader/gen/kernel4_gray.hlsl
+++ b/piet-gpu/shader/gen/kernel4_gray.hlsl
@@ -162,7 +162,7 @@
 static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u);
 
 RWByteAddressBuffer _297 : register(u0, space0);
-ByteAddressBuffer _1725 : register(t1, space0);
+ByteAddressBuffer _1681 : register(t1, space0);
 RWTexture2D<unorm float4> image_atlas : register(u3, space0);
 RWTexture2D<unorm float4> gradients : register(u4, space0);
 RWTexture2D<unorm float> image : register(u2, space0);
@@ -347,10 +347,7 @@
 
 float3 fromsRGB(float3 srgb)
 {
-    bool3 cutoff = bool3(srgb.x >= 0.040449999272823333740234375f.xxx.x, srgb.y >= 0.040449999272823333740234375f.xxx.y, srgb.z >= 0.040449999272823333740234375f.xxx.z);
-    float3 below = srgb / 12.9200000762939453125f.xxx;
-    float3 above = pow((srgb + 0.054999999701976776123046875f.xxx) / 1.05499994754791259765625f.xxx, 2.400000095367431640625f.xxx);
-    return float3(cutoff.x ? above.x : below.x, cutoff.y ? above.y : below.y, cutoff.z ? above.z : below.z);
+    return srgb;
 }
 
 float4 unpacksRGB(uint srgba)
@@ -477,10 +474,10 @@
         int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset;
         float4 fg_rgba = image_atlas[uv];
         float3 param_1 = fg_rgba.xyz;
-        float3 _1697 = fromsRGB(param_1);
-        fg_rgba.x = _1697.x;
-        fg_rgba.y = _1697.y;
-        fg_rgba.z = _1697.z;
+        float3 _1653 = fromsRGB(param_1);
+        fg_rgba.x = _1653.x;
+        fg_rgba.y = _1653.y;
+        fg_rgba.z = _1653.z;
         rgba[i] = fg_rgba;
     }
     spvReturnValue = rgba;
@@ -488,10 +485,7 @@
 
 float3 tosRGB(float3 rgb)
 {
-    bool3 cutoff = bool3(rgb.x >= 0.003130800090730190277099609375f.xxx.x, rgb.y >= 0.003130800090730190277099609375f.xxx.y, rgb.z >= 0.003130800090730190277099609375f.xxx.z);
-    float3 below = 12.9200000762939453125f.xxx * rgb;
-    float3 above = (1.05499994754791259765625f.xxx * pow(rgb, 0.416660010814666748046875f.xxx)) - 0.054999999701976776123046875f.xxx;
-    return float3(cutoff.x ? above.x : below.x, cutoff.y ? above.y : below.y, cutoff.z ? above.z : below.z);
+    return rgb;
 }
 
 uint packsRGB(inout float4 rgba)
@@ -529,7 +523,10 @@
 {
     float3 param = cb;
     float3 param_1 = (cs * 2.0f) - 1.0f.xxx;
-    return lerp(screen(param, param_1), (cb * 2.0f) * cs, float3(bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z)));
+    float3 _889 = screen(param, param_1);
+    float3 _893 = (cb * 2.0f) * cs;
+    bool3 _898 = bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z);
+    return float3(_898.x ? _893.x : _889.x, _898.y ? _893.y : _889.y, _898.z ? _893.z : _889.z);
 }
 
 float color_dodge(float cb, float cs)
@@ -572,8 +569,14 @@
 
 float3 soft_light(float3 cb, float3 cs)
 {
-    float3 d = lerp(sqrt(cb), ((((cb * 16.0f) - 12.0f.xxx) * cb) + 4.0f.xxx) * cb, float3(bool3(cb.x <= 0.25f.xxx.x, cb.y <= 0.25f.xxx.y, cb.z <= 0.25f.xxx.z)));
-    return lerp(cb + (((cs * 2.0f) - 1.0f.xxx) * (d - cb)), cb - (((1.0f.xxx - (cs * 2.0f)) * cb) * (1.0f.xxx - cb)), float3(bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z)));
+    float3 _904 = sqrt(cb);
+    float3 _917 = ((((cb * 16.0f) - 12.0f.xxx) * cb) + 4.0f.xxx) * cb;
+    bool3 _921 = bool3(cb.x <= 0.25f.xxx.x, cb.y <= 0.25f.xxx.y, cb.z <= 0.25f.xxx.z);
+    float3 d = float3(_921.x ? _917.x : _904.x, _921.y ? _917.y : _904.y, _921.z ? _917.z : _904.z);
+    float3 _932 = cb + (((cs * 2.0f) - 1.0f.xxx) * (d - cb));
+    float3 _942 = cb - (((1.0f.xxx - (cs * 2.0f)) * cb) * (1.0f.xxx - cb));
+    bool3 _944 = bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z);
+    return float3(_944.x ? _942.x : _932.x, _944.y ? _942.y : _932.y, _944.z ? _942.z : _932.z);
 }
 
 float sat(float3 c)
@@ -706,8 +709,8 @@
 {
     float3 param = c;
     float3 param_1 = c + (l - lum(param)).xxx;
-    float3 _1052 = clip_color(param_1);
-    return _1052;
+    float3 _1048 = clip_color(param_1);
+    return _1048;
 }
 
 float3 mix_blend(float3 cb, float3 cs, uint mode)
@@ -795,9 +798,9 @@
             float3 param_20 = cb;
             float3 param_21 = cs;
             float param_22 = sat(param_20);
-            float3 _1343 = set_sat(param_21, param_22);
+            float3 _1340 = set_sat(param_21, param_22);
             float3 param_23 = cb;
-            float3 param_24 = _1343;
+            float3 param_24 = _1340;
             float param_25 = lum(param_23);
             b = set_lum(param_24, param_25);
             break;
@@ -807,9 +810,9 @@
             float3 param_26 = cs;
             float3 param_27 = cb;
             float param_28 = sat(param_26);
-            float3 _1357 = set_sat(param_27, param_28);
+            float3 _1354 = set_sat(param_27, param_28);
             float3 param_29 = cb;
-            float3 param_30 = _1357;
+            float3 param_30 = _1354;
             float param_31 = lum(param_29);
             b = set_lum(param_30, param_31);
             break;
@@ -934,7 +937,7 @@
 
 float4 mix_blend_compose(float4 backdrop, float4 src, uint mode)
 {
-    if (mode == 3u)
+    if ((mode & 32767u) == 3u)
     {
         return (backdrop * (1.0f - src.w)) + src;
     }
@@ -943,12 +946,12 @@
     float inv_backdrop_a = 1.0f / (backdrop.w + 1.0000000036274937255387218471014e-15f);
     float3 cb = backdrop.xyz * inv_backdrop_a;
     uint blend_mode = mode >> uint(8);
-    float3 param = cs;
-    float3 param_1 = cb;
+    float3 param = cb;
+    float3 param_1 = cs;
     uint param_2 = blend_mode;
     float3 blended = mix_blend(param, param_1, param_2);
     cs = lerp(cs, blended, backdrop.w.xxx);
-    uint comp_mode = mode * 255u;
+    uint comp_mode = mode & 255u;
     if (comp_mode == 3u)
     {
         float3 co = lerp(backdrop.xyz, cs, src.w.xxx);
@@ -986,16 +989,16 @@
 
 void comp_main()
 {
-    uint tile_ix = (gl_WorkGroupID.y * _1725.Load(8)) + gl_WorkGroupID.x;
-    Alloc _1740;
-    _1740.offset = _1725.Load(24);
+    uint tile_ix = (gl_WorkGroupID.y * _1681.Load(8)) + gl_WorkGroupID.x;
+    Alloc _1696;
+    _1696.offset = _1681.Load(24);
     Alloc param;
-    param.offset = _1740.offset;
+    param.offset = _1696.offset;
     uint param_1 = tile_ix * 1024u;
     uint param_2 = 1024u;
     Alloc cmd_alloc = slice_mem(param, param_1, param_2);
-    CmdRef _1749 = { cmd_alloc.offset };
-    CmdRef cmd_ref = _1749;
+    CmdRef _1705 = { cmd_alloc.offset };
+    CmdRef cmd_ref = _1705;
     uint blend_offset = _297.Load((cmd_ref.offset >> uint(2)) * 4 + 8);
     cmd_ref.offset += 4u;
     uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y));
@@ -1033,8 +1036,8 @@
                 {
                     df[k] = 1000000000.0f;
                 }
-                TileSegRef _1854 = { stroke.tile_ref };
-                tile_seg_ref = _1854;
+                TileSegRef _1810 = { stroke.tile_ref };
+                tile_seg_ref = _1810;
                 do
                 {
                     uint param_7 = tile_seg_ref.offset;
@@ -1070,8 +1073,8 @@
                 {
                     area[k_3] = float(fill.backdrop);
                 }
-                TileSegRef _1974 = { fill.tile_ref };
-                tile_seg_ref = _1974;
+                TileSegRef _1930 = { fill.tile_ref };
+                tile_seg_ref = _1930;
                 do
                 {
                     uint param_15 = tile_seg_ref.offset;
@@ -1160,10 +1163,10 @@
                     int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f));
                     float4 fg_rgba = gradients[int2(x, int(lin.index))];
                     float3 param_29 = fg_rgba.xyz;
-                    float3 _2308 = fromsRGB(param_29);
-                    fg_rgba.x = _2308.x;
-                    fg_rgba.y = _2308.y;
-                    fg_rgba.z = _2308.z;
+                    float3 _2264 = fromsRGB(param_29);
+                    fg_rgba.x = _2264.x;
+                    fg_rgba.y = _2264.y;
+                    fg_rgba.z = _2264.z;
                     float4 fg_k_1 = fg_rgba * area[k_9];
                     rgba[k_9] = (rgba[k_9] * (1.0f - fg_k_1.w)) + fg_k_1;
                 }
@@ -1186,10 +1189,10 @@
                     int x_1 = int(round(clamp(t_2, 0.0f, 1.0f) * 511.0f));
                     float4 fg_rgba_1 = gradients[int2(x_1, int(rad.index))];
                     float3 param_33 = fg_rgba_1.xyz;
-                    float3 _2418 = fromsRGB(param_33);
-                    fg_rgba_1.x = _2418.x;
-                    fg_rgba_1.y = _2418.y;
-                    fg_rgba_1.z = _2418.z;
+                    float3 _2374 = fromsRGB(param_33);
+                    fg_rgba_1.x = _2374.x;
+                    fg_rgba_1.y = _2374.y;
+                    fg_rgba_1.z = _2374.z;
                     float4 fg_k_2 = fg_rgba_1 * area[k_10];
                     rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_2.w)) + fg_k_2;
                 }
@@ -1203,9 +1206,9 @@
                 CmdImage fill_img = Cmd_Image_read(param_34, param_35);
                 uint2 param_36 = xy_uint;
                 CmdImage param_37 = fill_img;
-                float4 _2461[8];
-                fillImage(_2461, param_36, param_37);
-                float4 img[8] = _2461;
+                float4 _2417[8];
+                fillImage(_2417, param_36, param_37);
+                float4 img[8] = _2417;
                 for (uint k_11 = 0u; k_11 < 8u; k_11++)
                 {
                     float4 fg_k_3 = img[k_11] * area[k_11];
@@ -1221,8 +1224,8 @@
                     for (uint k_12 = 0u; k_12 < 8u; k_12++)
                     {
                         float4 param_38 = float4(rgba[k_12]);
-                        uint _2523 = packsRGB(param_38);
-                        blend_stack[clip_depth][k_12] = _2523;
+                        uint _2479 = packsRGB(param_38);
+                        blend_stack[clip_depth][k_12] = _2479;
                         rgba[k_12] = 0.0f.xxxx;
                     }
                 }
@@ -1232,8 +1235,8 @@
                     for (uint k_13 = 0u; k_13 < 8u; k_13++)
                     {
                         float4 param_39 = float4(rgba[k_13]);
-                        uint _2566 = packsRGB(param_39);
-                        _297.Store((base_ix + k_13) * 4 + 8, _2566);
+                        uint _2522 = packsRGB(param_39);
+                        _297.Store((base_ix + k_13) * 4 + 8, _2522);
                         rgba[k_13] = 0.0f.xxxx;
                     }
                 }
@@ -1276,8 +1279,8 @@
             {
                 Alloc param_46 = cmd_alloc;
                 CmdRef param_47 = cmd_ref;
-                CmdRef _2665 = { Cmd_Jump_read(param_46, param_47).new_ref };
-                cmd_ref = _2665;
+                CmdRef _2621 = { Cmd_Jump_read(param_46, param_47).new_ref };
+                cmd_ref = _2621;
                 cmd_alloc.offset = cmd_ref.offset;
                 break;
             }
diff --git a/piet-gpu/shader/gen/kernel4_gray.msl b/piet-gpu/shader/gen/kernel4_gray.msl
index 2b550b8..04f3d69 100644
--- a/piet-gpu/shader/gen/kernel4_gray.msl
+++ b/piet-gpu/shader/gen/kernel4_gray.msl
@@ -393,10 +393,7 @@
 static inline __attribute__((always_inline))
 float3 fromsRGB(thread const float3& srgb)
 {
-    bool3 cutoff = srgb >= float3(0.040449999272823333740234375);
-    float3 below = srgb / float3(12.9200000762939453125);
-    float3 above = pow((srgb + float3(0.054999999701976776123046875)) / float3(1.05499994754791259765625), float3(2.400000095367431640625));
-    return select(below, above, cutoff);
+    return srgb;
 }
 
 static inline __attribute__((always_inline))
@@ -528,10 +525,10 @@
         int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset;
         float4 fg_rgba = image_atlas.read(uint2(uv));
         float3 param_1 = fg_rgba.xyz;
-        float3 _1697 = fromsRGB(param_1);
-        fg_rgba.x = _1697.x;
-        fg_rgba.y = _1697.y;
-        fg_rgba.z = _1697.z;
+        float3 _1653 = fromsRGB(param_1);
+        fg_rgba.x = _1653.x;
+        fg_rgba.y = _1653.y;
+        fg_rgba.z = _1653.z;
         rgba[i] = fg_rgba;
     }
     return rgba;
@@ -540,10 +537,7 @@
 static inline __attribute__((always_inline))
 float3 tosRGB(thread const float3& rgb)
 {
-    bool3 cutoff = rgb >= float3(0.003130800090730190277099609375);
-    float3 below = float3(12.9200000762939453125) * rgb;
-    float3 above = (float3(1.05499994754791259765625) * pow(rgb, float3(0.416660010814666748046875))) - float3(0.054999999701976776123046875);
-    return select(below, above, cutoff);
+    return rgb;
 }
 
 static inline __attribute__((always_inline))
@@ -585,7 +579,7 @@
 {
     float3 param = cb;
     float3 param_1 = (cs * 2.0) - float3(1.0);
-    return mix(screen(param, param_1), (cb * 2.0) * cs, float3(cs <= float3(0.5)));
+    return select(screen(param, param_1), (cb * 2.0) * cs, cs <= float3(0.5));
 }
 
 static inline __attribute__((always_inline))
@@ -631,8 +625,8 @@
 static inline __attribute__((always_inline))
 float3 soft_light(thread const float3& cb, thread const float3& cs)
 {
-    float3 d = mix(sqrt(cb), ((((cb * 16.0) - float3(12.0)) * cb) + float3(4.0)) * cb, float3(cb <= float3(0.25)));
-    return mix(cb + (((cs * 2.0) - float3(1.0)) * (d - cb)), cb - (((float3(1.0) - (cs * 2.0)) * cb) * (float3(1.0) - cb)), float3(cs <= float3(0.5)));
+    float3 d = select(sqrt(cb), ((((cb * 16.0) - float3(12.0)) * cb) + float3(4.0)) * cb, cb <= float3(0.25));
+    return select(cb + (((cs * 2.0) - float3(1.0)) * (d - cb)), cb - (((float3(1.0) - (cs * 2.0)) * cb) * (float3(1.0) - cb)), cs <= float3(0.5));
 }
 
 static inline __attribute__((always_inline))
@@ -771,8 +765,8 @@
 {
     float3 param = c;
     float3 param_1 = c + float3(l - lum(param));
-    float3 _1052 = clip_color(param_1);
-    return _1052;
+    float3 _1048 = clip_color(param_1);
+    return _1048;
 }
 
 static inline __attribute__((always_inline))
@@ -861,9 +855,9 @@
             float3 param_20 = cb;
             float3 param_21 = cs;
             float param_22 = sat(param_20);
-            float3 _1343 = set_sat(param_21, param_22);
+            float3 _1340 = set_sat(param_21, param_22);
             float3 param_23 = cb;
-            float3 param_24 = _1343;
+            float3 param_24 = _1340;
             float param_25 = lum(param_23);
             b = set_lum(param_24, param_25);
             break;
@@ -873,9 +867,9 @@
             float3 param_26 = cs;
             float3 param_27 = cb;
             float param_28 = sat(param_26);
-            float3 _1357 = set_sat(param_27, param_28);
+            float3 _1354 = set_sat(param_27, param_28);
             float3 param_29 = cb;
-            float3 param_30 = _1357;
+            float3 param_30 = _1354;
             float param_31 = lum(param_29);
             b = set_lum(param_30, param_31);
             break;
@@ -1002,7 +996,7 @@
 static inline __attribute__((always_inline))
 float4 mix_blend_compose(thread const float4& backdrop, thread const float4& src, thread const uint& mode)
 {
-    if (mode == 3u)
+    if ((mode & 32767u) == 3u)
     {
         return (backdrop * (1.0 - src.w)) + src;
     }
@@ -1011,12 +1005,12 @@
     float inv_backdrop_a = 1.0 / (backdrop.w + 1.0000000036274937255387218471014e-15);
     float3 cb = backdrop.xyz * inv_backdrop_a;
     uint blend_mode = mode >> uint(8);
-    float3 param = cs;
-    float3 param_1 = cb;
+    float3 param = cb;
+    float3 param_1 = cs;
     uint param_2 = blend_mode;
     float3 blended = mix_blend(param, param_1, param_2);
     cs = mix(cs, blended, float3(backdrop.w));
-    uint comp_mode = mode * 255u;
+    uint comp_mode = mode & 255u;
     if (comp_mode == 3u)
     {
         float3 co = mix(backdrop.xyz, cs, float3(src.w));
@@ -1053,11 +1047,11 @@
     return CmdJump_read(param, param_1, v_297);
 }
 
-kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1725 [[buffer(1)]], texture2d<float, access::write> image [[texture(2)]], texture2d<float> image_atlas [[texture(3)]], texture2d<float> gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
+kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1681 [[buffer(1)]], texture2d<float, access::write> image [[texture(2)]], texture2d<float> image_atlas [[texture(3)]], texture2d<float> gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
 {
-    uint tile_ix = (gl_WorkGroupID.y * _1725.conf.width_in_tiles) + gl_WorkGroupID.x;
+    uint tile_ix = (gl_WorkGroupID.y * _1681.conf.width_in_tiles) + gl_WorkGroupID.x;
     Alloc param;
-    param.offset = _1725.conf.ptcl_alloc.offset;
+    param.offset = _1681.conf.ptcl_alloc.offset;
     uint param_1 = tile_ix * 1024u;
     uint param_2 = 1024u;
     Alloc cmd_alloc = slice_mem(param, param_1, param_2);
@@ -1224,10 +1218,10 @@
                     int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0));
                     float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index))));
                     float3 param_29 = fg_rgba.xyz;
-                    float3 _2308 = fromsRGB(param_29);
-                    fg_rgba.x = _2308.x;
-                    fg_rgba.y = _2308.y;
-                    fg_rgba.z = _2308.z;
+                    float3 _2264 = fromsRGB(param_29);
+                    fg_rgba.x = _2264.x;
+                    fg_rgba.y = _2264.y;
+                    fg_rgba.z = _2264.z;
                     float4 fg_k_1 = fg_rgba * area[k_9];
                     rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1;
                 }
@@ -1250,10 +1244,10 @@
                     int x_1 = int(round(fast::clamp(t_2, 0.0, 1.0) * 511.0));
                     float4 fg_rgba_1 = gradients.read(uint2(int2(x_1, int(rad.index))));
                     float3 param_33 = fg_rgba_1.xyz;
-                    float3 _2418 = fromsRGB(param_33);
-                    fg_rgba_1.x = _2418.x;
-                    fg_rgba_1.y = _2418.y;
-                    fg_rgba_1.z = _2418.z;
+                    float3 _2374 = fromsRGB(param_33);
+                    fg_rgba_1.x = _2374.x;
+                    fg_rgba_1.y = _2374.y;
+                    fg_rgba_1.z = _2374.z;
                     float4 fg_k_2 = fg_rgba_1 * area[k_10];
                     rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_2.w)) + fg_k_2;
                 }
@@ -1284,8 +1278,8 @@
                     for (uint k_12 = 0u; k_12 < 8u; k_12++)
                     {
                         float4 param_38 = float4(rgba[k_12]);
-                        uint _2523 = packsRGB(param_38);
-                        blend_stack[clip_depth][k_12] = _2523;
+                        uint _2479 = packsRGB(param_38);
+                        blend_stack[clip_depth][k_12] = _2479;
                         rgba[k_12] = float4(0.0);
                     }
                 }
@@ -1295,8 +1289,8 @@
                     for (uint k_13 = 0u; k_13 < 8u; k_13++)
                     {
                         float4 param_39 = float4(rgba[k_13]);
-                        uint _2566 = packsRGB(param_39);
-                        v_297.memory[base_ix + k_13] = _2566;
+                        uint _2522 = packsRGB(param_39);
+                        v_297.memory[base_ix + k_13] = _2522;
                         rgba[k_13] = float4(0.0);
                     }
                 }
diff --git a/piet-gpu/shader/gen/kernel4_gray.spv b/piet-gpu/shader/gen/kernel4_gray.spv
index bacd9a8..93ce73e 100644
--- a/piet-gpu/shader/gen/kernel4_gray.spv
+++ b/piet-gpu/shader/gen/kernel4_gray.spv
Binary files differ
diff --git a/piet-gpu/shader/gen/path_coarse.dxil b/piet-gpu/shader/gen/path_coarse.dxil
index b6c9398..9fd593c 100644
--- a/piet-gpu/shader/gen/path_coarse.dxil
+++ b/piet-gpu/shader/gen/path_coarse.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/pathseg.dxil b/piet-gpu/shader/gen/pathseg.dxil
index 7ce4684..6130712 100644
--- a/piet-gpu/shader/gen/pathseg.dxil
+++ b/piet-gpu/shader/gen/pathseg.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/pathtag_reduce.dxil b/piet-gpu/shader/gen/pathtag_reduce.dxil
index ff544b8..4c2bd23 100644
--- a/piet-gpu/shader/gen/pathtag_reduce.dxil
+++ b/piet-gpu/shader/gen/pathtag_reduce.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/pathtag_root.dxil b/piet-gpu/shader/gen/pathtag_root.dxil
index 48584bd..77f12e6 100644
--- a/piet-gpu/shader/gen/pathtag_root.dxil
+++ b/piet-gpu/shader/gen/pathtag_root.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/tile_alloc.dxil b/piet-gpu/shader/gen/tile_alloc.dxil
index 7b130e0..7759910 100644
--- a/piet-gpu/shader/gen/tile_alloc.dxil
+++ b/piet-gpu/shader/gen/tile_alloc.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/transform_leaf.dxil b/piet-gpu/shader/gen/transform_leaf.dxil
index 0c1e376..f9f31e6 100644
--- a/piet-gpu/shader/gen/transform_leaf.dxil
+++ b/piet-gpu/shader/gen/transform_leaf.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/transform_reduce.dxil b/piet-gpu/shader/gen/transform_reduce.dxil
index fc3a311..978dd98 100644
--- a/piet-gpu/shader/gen/transform_reduce.dxil
+++ b/piet-gpu/shader/gen/transform_reduce.dxil
Binary files differ
diff --git a/piet-gpu/shader/gen/transform_root.dxil b/piet-gpu/shader/gen/transform_root.dxil
index a33ff7f..5b4f059 100644
--- a/piet-gpu/shader/gen/transform_root.dxil
+++ b/piet-gpu/shader/gen/transform_root.dxil
Binary files differ
diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp
index c9b5dd3..2058acc 100644
--- a/piet-gpu/shader/kernel4.comp
+++ b/piet-gpu/shader/kernel4.comp
@@ -9,6 +9,11 @@
 #version 450
 #extension GL_GOOGLE_include_directive : enable
 
+// We can do rendering either in sRGB colorspace (for compatibility)
+// or in a linear colorspace, with conversions to sRGB (which will give
+// higher quality antialiasing among other things).
+#define DO_SRGB_CONVERSION 0
+
 #include "mem.h"
 #include "setup.h"
 
@@ -39,18 +44,26 @@
 
 #define MAX_BLEND_STACK 128
 mediump vec3 tosRGB(mediump vec3 rgb) {
+#if DO_SRGB_CONVERSION
     bvec3 cutoff = greaterThanEqual(rgb, vec3(0.0031308));
     mediump vec3 below = vec3(12.92) * rgb;
     mediump vec3 above = vec3(1.055) * pow(rgb, vec3(0.41666)) - vec3(0.055);
     return mix(below, above, cutoff);
+#else
+    return rgb;
+#endif
 }
 
 mediump vec3 fromsRGB(mediump vec3 srgb) {
+#if DO_SRGB_CONVERSION
     // Formula from EXT_sRGB.
     bvec3 cutoff = greaterThanEqual(srgb, vec3(0.04045));
     mediump vec3 below = srgb / vec3(12.92);
     mediump vec3 above = pow((srgb + vec3(0.055)) / vec3(1.055), vec3(2.4));
     return mix(below, above, cutoff);
+#else
+    return srgb;
+#endif
 }
 
 // unpacksRGB unpacks a color in the sRGB color space to a vec4 in the linear color
diff --git a/piet-gpu/src/blend.rs b/piet-gpu/src/blend.rs
index 7edcb4e..f0ca002 100644
--- a/piet-gpu/src/blend.rs
+++ b/piet-gpu/src/blend.rs
@@ -33,6 +33,8 @@
     Saturation = 13,
     Color = 14,
     Luminosity = 15,
+    // Clip is the same as normal, but doesn't always push a blend group.
+    Clip = 128,
 }
 
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
@@ -76,7 +78,7 @@
 impl Default for Blend {
     fn default() -> Self {
         Self {
-            mode: BlendMode::Normal,
+            mode: BlendMode::Clip,
             composition_mode: CompositionMode::SrcOver,
         }
     }
diff --git a/piet-gpu/src/encoder.rs b/piet-gpu/src/encoder.rs
index a24ddbc..d0ef1eb 100644
--- a/piet-gpu/src/encoder.rs
+++ b/piet-gpu/src/encoder.rs
@@ -306,16 +306,21 @@
         self.drawdata_stream.extend(bytemuck::bytes_of(&element));
     }
 
-
     /// Encode a fill radial gradient draw object.
     ///
     /// This should be encoded after a path.
     pub fn fill_rad_gradient(&mut self, index: u32, p0: [f32; 2], p1: [f32; 2], r0: f32, r1: f32) {
         self.drawtag_stream.push(DRAWTAG_FILLRADGRADIENT);
-        let element = FillRadGradient { index, p0, p1, r0, r1 };
+        let element = FillRadGradient {
+            index,
+            p0,
+            p1,
+            r0,
+            r1,
+        };
         self.drawdata_stream.extend(bytemuck::bytes_of(&element));
     }
-    
+
     /// Start a clip.
     pub fn begin_clip(&mut self, blend: Option<Blend>) {
         self.drawtag_stream.push(DRAWTAG_BEGINCLIP);
diff --git a/piet-gpu/src/gradient.rs b/piet-gpu/src/gradient.rs
index e655908..443eaec 100644
--- a/piet-gpu/src/gradient.rs
+++ b/piet-gpu/src/gradient.rs
@@ -19,7 +19,7 @@
 use std::collections::hash_map::{Entry, HashMap};
 
 use piet::kurbo::Point;
-use piet::{Color, FixedLinearGradient, GradientStop, FixedRadialGradient};
+use piet::{Color, FixedLinearGradient, FixedRadialGradient, GradientStop};
 
 /// Radial gradient compatible with COLRv1 spec
 #[derive(Debug, Clone)]
diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs
index 8915de4..1ebb5cf 100644
--- a/piet-gpu/src/lib.rs
+++ b/piet-gpu/src/lib.rs
@@ -13,8 +13,8 @@
 
 pub use blend::{Blend, BlendMode, CompositionMode};
 pub use encoder::EncodedSceneRef;
-pub use render_ctx::PietGpuRenderContext;
 pub use gradient::Colrv1RadialGradient;
+pub use render_ctx::PietGpuRenderContext;
 
 use piet::kurbo::Vec2;
 use piet::{ImageFormat, RenderContext};
@@ -141,6 +141,9 @@
 }
 
 impl Renderer {
+    /// The number of query pool entries needed to run the renderer.
+    pub const QUERY_POOL_SIZE: u32 = 12;
+
     pub unsafe fn new(
         session: &Session,
         width: usize,
diff --git a/piet-gpu/src/render_ctx.rs b/piet-gpu/src/render_ctx.rs
index f78e8ab..5d4ffd3 100644
--- a/piet-gpu/src/render_ctx.rs
+++ b/piet-gpu/src/render_ctx.rs
@@ -1,8 +1,11 @@
+// This should match the value in kernel4.comp for correct rendering.
+const DO_SRGB_CONVERSION: bool = false;
+
 use std::borrow::Cow;
 
 use crate::encoder::GlyphEncoder;
 use crate::stages::{Config, Transform};
-use piet::kurbo::{Affine, Insets, PathEl, Point, Rect, Shape};
+use piet::kurbo::{Affine, PathEl, Point, Rect, Shape};
 use piet::{
     Color, Error, FixedGradient, ImageFormat, InterpolationMode, IntoBrush, RenderContext,
     StrokeStyle,
@@ -12,7 +15,7 @@
 use piet_gpu_types::encoder::{Encode, Encoder};
 use piet_gpu_types::scene::Element;
 
-use crate::gradient::{LinearGradient, RadialGradient, RampCache, Colrv1RadialGradient};
+use crate::gradient::{Colrv1RadialGradient, LinearGradient, RadialGradient, RampCache};
 use crate::text::Font;
 pub use crate::text::{PietGpuText, PietGpuTextLayout, PietGpuTextLayoutBuilder};
 use crate::Blend;
@@ -464,19 +467,27 @@
 }
 
 fn to_srgb(f: f64) -> f64 {
-    if f <= 0.0031308 {
-        f * 12.92
+    if DO_SRGB_CONVERSION {
+        if f <= 0.0031308 {
+            f * 12.92
+        } else {
+            let a = 0.055;
+            (1. + a) * f64::powf(f, f64::recip(2.4)) - a
+        }
     } else {
-        let a = 0.055;
-        (1. + a) * f64::powf(f, f64::recip(2.4)) - a
+        f
     }
 }
 
 fn from_srgb(f: f64) -> f64 {
-    if f <= 0.04045 {
-        f / 12.92
+    if DO_SRGB_CONVERSION {
+        if f <= 0.04045 {
+            f / 12.92
+        } else {
+            let a = 0.055;
+            f64::powf((f + a) * f64::recip(1. + a), 2.4)
+        }
     } else {
-        let a = 0.055;
-        f64::powf((f + a) * f64::recip(1. + a), 2.4)
+        f
     }
 }
diff --git a/piet-gpu/src/stages/clip.rs b/piet-gpu/src/stages/clip.rs
index 2fd195b..b7b77eb 100644
--- a/piet-gpu/src/stages/clip.rs
+++ b/piet-gpu/src/stages/clip.rs
@@ -16,7 +16,9 @@
 
 //! The clip processing stage (includes substages).
 
-use piet_gpu_hal::{include_shader, BindType, Buffer, ComputePass, DescriptorSet, Pipeline, Session};
+use piet_gpu_hal::{
+    include_shader, BindType, Buffer, ComputePass, DescriptorSet, Pipeline, Session,
+};
 
 // Note that this isn't the code/stage/binding pattern of most of the other stages
 // in the new element processing pipeline. We want to move those temporary buffers
diff --git a/piet-gpu/src/test_scenes.rs b/piet-gpu/src/test_scenes.rs
index bfd2af2..e3aeaba 100644
--- a/piet-gpu/src/test_scenes.rs
+++ b/piet-gpu/src/test_scenes.rs
@@ -2,10 +2,10 @@
 
 use rand::{Rng, RngCore};
 
-use crate::{Blend, BlendMode, CompositionMode, PietGpuRenderContext, Colrv1RadialGradient};
+use crate::{Blend, BlendMode, Colrv1RadialGradient, CompositionMode, PietGpuRenderContext};
 use piet::kurbo::{Affine, BezPath, Circle, Line, Point, Rect, Shape};
 use piet::{
-    Color, FixedGradient, FixedRadialGradient, GradientStop, Text, TextAttribute, TextLayoutBuilder,
+    Color, GradientStop, LinearGradient, Text, TextAttribute, TextLayoutBuilder, UnitPoint,
 };
 
 use crate::{PicoSvg, RenderContext, Vec2};
@@ -200,6 +200,113 @@
     println!("flattening and encoding time: {:?}", start.elapsed());
 }
 
+pub fn render_blend_square(rc: &mut PietGpuRenderContext, blend: Blend) {
+    // Inspired by https://developer.mozilla.org/en-US/docs/Web/CSS/mix-blend-mode
+    let rect = Rect::new(0., 0., 200., 200.);
+    let stops = vec![
+        GradientStop {
+            color: Color::BLACK,
+            pos: 0.0,
+        },
+        GradientStop {
+            color: Color::WHITE,
+            pos: 1.0,
+        },
+    ];
+    let linear = LinearGradient::new(UnitPoint::LEFT, UnitPoint::RIGHT, stops);
+    rc.fill(rect, &linear);
+    const GRADIENTS: &[(f64, f64, Color)] = &[
+        (150., 0., Color::rgb8(255, 240, 64)),
+        (175., 100., Color::rgb8(255, 96, 240)),
+        (125., 200., Color::rgb8(64, 192, 255)),
+    ];
+    for (x, y, c) in GRADIENTS {
+        let stops = vec![
+            GradientStop {
+                color: c.clone(),
+                pos: 0.0,
+            },
+            GradientStop {
+                color: Color::rgba8(0, 0, 0, 0),
+                pos: 1.0,
+            },
+        ];
+        let rad = Colrv1RadialGradient {
+            center0: Point::new(*x, *y),
+            center1: Point::new(*x, *y),
+            radius0: 0.0,
+            radius1: 100.0,
+            stops,
+        };
+        let brush = rc.radial_gradient_colrv1(&rad);
+        rc.fill(Rect::new(0., 0., 200., 200.), &brush);
+    }
+    const COLORS: &[Color] = &[
+        Color::rgb8(255, 0, 0),
+        Color::rgb8(0, 255, 0),
+        Color::rgb8(0, 0, 255),
+    ];
+    let _ = rc.with_save(|rc| {
+        // Isolation (this can be removed for non-isolated version)
+        rc.blend(rect, BlendMode::Normal.into());
+        for (i, c) in COLORS.iter().enumerate() {
+            let stops = vec![
+                GradientStop {
+                    color: Color::WHITE,
+                    pos: 0.0,
+                },
+                GradientStop {
+                    color: c.clone(),
+                    pos: 1.0,
+                },
+            ];
+            // squash the ellipse
+            let a = Affine::translate((100., 100.))
+                * Affine::rotate(std::f64::consts::FRAC_PI_3 * (i * 2 + 1) as f64)
+                * Affine::scale_non_uniform(1.0, 0.357)
+                * Affine::translate((-100., -100.));
+            let linear = LinearGradient::new(UnitPoint::TOP, UnitPoint::BOTTOM, stops);
+            let _ = rc.with_save(|rc| {
+                rc.blend(rect, blend);
+                rc.transform(a);
+                rc.fill(Circle::new((100., 100.), 90.), &linear);
+                Ok(())
+            });
+        }
+        Ok(())
+    });
+}
+
+pub fn render_blend_grid(rc: &mut PietGpuRenderContext) {
+    const BLEND_MODES: &[BlendMode] = &[
+        BlendMode::Normal,
+        BlendMode::Multiply,
+        BlendMode::Darken,
+        BlendMode::Screen,
+        BlendMode::Lighten,
+        BlendMode::Overlay,
+        BlendMode::ColorDodge,
+        BlendMode::ColorBurn,
+        BlendMode::HardLight,
+        BlendMode::SoftLight,
+        BlendMode::Difference,
+        BlendMode::Exclusion,
+        BlendMode::Hue,
+        BlendMode::Saturation,
+        BlendMode::Color,
+        BlendMode::Luminosity,
+    ];
+    for (ix, &blend) in BLEND_MODES.iter().enumerate() {
+        let _ = rc.with_save(|rc| {
+            let i = ix % 4;
+            let j = ix / 4;
+            rc.transform(Affine::translate((i as f64 * 225., j as f64 * 225.)));
+            render_blend_square(rc, blend.into());
+            Ok(())
+        });
+    }
+}
+
 pub fn render_anim_frame(rc: &mut impl RenderContext, i: usize) {
     rc.fill(
         Rect::new(0.0, 0.0, 1000.0, 1000.0),
diff --git a/piet-scene/Cargo.toml b/piet-scene/Cargo.toml
index df66483..e80cbb8 100644
--- a/piet-scene/Cargo.toml
+++ b/piet-scene/Cargo.toml
@@ -7,5 +7,5 @@
 [dependencies]
 bytemuck = { version = "1.7.2", features = ["derive"] }
 smallvec = "1.8.0"
-pinot = "0.1.5"
-moscato = "0.1.2"
+moscato = { git = "https://github.com/dfrg/pinot" }
+kurbo = { version = "0.8.3", optional = true }
diff --git a/piet-scene/src/glyph/mod.rs b/piet-scene/src/glyph/mod.rs
index 3bfa36c..f6ebf14 100644
--- a/piet-scene/src/glyph/mod.rs
+++ b/piet-scene/src/glyph/mod.rs
@@ -14,7 +14,7 @@
 //
 // Also licensed under MIT license, at your choice.
 
-pub use pinot;
+pub use moscato::pinot;
 
 use crate::brush::{Brush, Color};
 use crate::geometry::Affine;
@@ -114,7 +114,9 @@
                     };
                     xform_stack.push(xform);
                 }
-                Command::PopTransform => { xform_stack.pop(); },
+                Command::PopTransform => {
+                    xform_stack.pop();
+                }
                 Command::PushClip(path_index) => {
                     let path = glyph.path(*path_index)?;
                     if let Some(xform) = xform_stack.last() {
diff --git a/piet-scene/src/lib.rs b/piet-scene/src/lib.rs
index a72ff54..8f436b2 100644
--- a/piet-scene/src/lib.rs
+++ b/piet-scene/src/lib.rs
@@ -20,3 +20,103 @@
 pub mod path;
 pub mod resource;
 pub mod scene;
+
+/// Implement conversions to and from Kurbo types when the `kurbo` feature is
+/// enabled.
+#[cfg(feature = "kurbo")]
+mod kurbo_conv {
+    use super::geometry::{Affine, Point, Rect};
+    use super::path::Element;
+
+    impl Point {
+        /// Creates a new point from the equivalent kurbo type.
+        pub fn from_kurbo(point: kurbo::Point) -> Self {
+            Self::new(point.x as f32, point.y as f32)
+        }
+    }
+
+    impl From<Point> for kurbo::Point {
+        fn from(p: Point) -> kurbo::Point {
+            Self::new(p.x as f64, p.y as f64)
+        }
+    }
+
+    impl Affine {
+        /// Creates a new affine transformation from the equivalent kurbo type.
+        pub fn from_kurbo(affine: kurbo::Affine) -> Self {
+            let c = affine.as_coeffs();
+            Self {
+                xx: c[0] as f32,
+                yx: c[1] as f32,
+                xy: c[2] as f32,
+                yy: c[3] as f32,
+                dx: c[4] as f32,
+                dy: c[5] as f32,
+            }
+        }
+    }
+
+    impl From<Affine> for kurbo::Affine {
+        fn from(a: Affine) -> Self {
+            Self::new([
+                a.xx as f64,
+                a.yx as f64,
+                a.yx as f64,
+                a.yy as f64,
+                a.dx as f64,
+                a.dy as f64,
+            ])
+        }
+    }
+
+    impl Rect {
+        /// Creates a new rectangle from the equivalent kurbo type.
+        pub fn from_kurbo(rect: kurbo::Rect) -> Self {
+            Self {
+                min: Point::new(rect.x0 as f32, rect.y0 as f32),
+                max: Point::new(rect.x1 as f32, rect.y1 as f32),
+            }
+        }
+    }
+
+    impl From<Rect> for kurbo::Rect {
+        fn from(r: Rect) -> Self {
+            Self {
+                x0: r.min.x as f64,
+                y0: r.min.y as f64,
+                x1: r.max.x as f64,
+                y1: r.max.y as f64,
+            }
+        }
+    }
+
+    impl Element {
+        /// Creates a new path element from the equivalent kurbo type.
+        pub fn from_kurbo(el: kurbo::PathEl) -> Self {
+            use kurbo::PathEl::*;
+            use Point::from_kurbo;
+            match e {
+                MoveTo(p0) => Self::MoveTo(from_kurbo(p0)),
+                LineTo(p0) => Self::LineTo(from_kurbo(p0)),
+                QuadTo(p0, p1) => Self::QuadTo(from_kurbo(p0), from_kurbo(p1)),
+                CurveTo(p0, p1, p2) => {
+                    Self::CurveTo(from_kurbo(p0), from_kurbo(p1), from_kurbo(p2))
+                }
+                ClosePath => Self::Close,
+            }
+        }
+    }
+
+    impl From<Element> for kurbo::PathEl {
+        fn from(e: Element) -> Self {
+            use Element::*;
+            match e {
+                MoveTo(p0) => Self::MoveTo(p0.into()),
+                LineTo(p0) => Self::LineTo(p0.into()),
+                QuadTo(p0, p1) => Self::QuadTo(p0.into(), p1.into()),
+                CurveTo(p0, p1, p2) => Self::CurveTo(p0.into(), p1.into(), p2.into()),
+                Close => Self::ClosePath,
+            }
+        }
+    }
+}
diff --git a/piet-scene/src/scene/blend.rs b/piet-scene/src/scene/blend.rs
index 7edc6cd..d6aa080 100644
--- a/piet-scene/src/scene/blend.rs
+++ b/piet-scene/src/scene/blend.rs
@@ -34,6 +34,8 @@
     Saturation = 13,
     Color = 14,
     Luminosity = 15,
+    // Clip is the same as normal, but doesn't always push a blend group.
+    Clip = 128,
 }
 
 /// Defines the layer composition function for a blend operation.
@@ -53,8 +55,7 @@
     DestAtop = 10,
     Xor = 11,
     Plus = 12,
-    PlusDarker = 13,
-    PlusLighter = 14,
+    PlusLighter = 13,
 }
 
 /// Blend mode consisting of mixing and composition functions.
@@ -77,7 +78,7 @@
 impl Default for Blend {
     fn default() -> Self {
         Self {
-            mix: Mix::Normal,
+            mix: Mix::Clip,
             compose: Compose::SrcOver,
         }
     }
diff --git a/piet-scene/src/scene/builder.rs b/piet-scene/src/scene/builder.rs
index 85d75b2..8aa1bf5 100644
--- a/piet-scene/src/scene/builder.rs
+++ b/piet-scene/src/scene/builder.rs
@@ -23,12 +23,14 @@
 
 const MAX_BLEND_STACK: usize = 256;
 
-/// Creates a new builder for constructing a scene.
-pub fn build_scene<'a>(scene: &'a mut Scene, resources: &'a mut ResourceContext) -> Builder<'a> {
-    Builder::new(&mut scene.data, ResourceData::Scene(resources))
+/// Creates a new builder for filling a scene. Any current content in the scene
+/// will be cleared.
+pub fn build_scene<'a>(scene: &'a mut Scene, rcx: &'a mut ResourceContext) -> Builder<'a> {
+    Builder::new(&mut scene.data, ResourceData::Scene(rcx))
 }
 
-/// Creates a new builder for construction a scene fragment.
+/// Creates a new builder for filling a scene fragment. Any current content in
+/// the fragment will be cleared.
 pub fn build_fragment<'a>(fragment: &'a mut Fragment) -> Builder<'a> {
     Builder::new(
         &mut fragment.data,
diff --git a/piet-scene/src/scene/mod.rs b/piet-scene/src/scene/mod.rs
index 9f7be2f..577f81e 100644
--- a/piet-scene/src/scene/mod.rs
+++ b/piet-scene/src/scene/mod.rs
@@ -23,11 +23,12 @@
 pub use style::*;
 
 use super::brush::*;
-use super::geometry::{Affine, Point, Rect};
+use super::geometry::{Affine, Point};
 use super::path::Element;
 
 use core::ops::Range;
 
+/// Raw data streams describing an encoded scene.
 #[derive(Default)]
 pub struct SceneData {
     pub transform_stream: Vec<Affine>,
@@ -83,6 +84,7 @@
 }
 
 impl Scene {
+    /// Returns the raw encoded scene data streams.
     pub fn data(&self) -> &SceneData {
         &self.data
     }
@@ -96,6 +98,8 @@
 }
 
 impl Fragment {
+    /// Returns the underlying stream of points that defined all encoded path
+    /// segments.
     pub fn points(&self) -> &[Point] {
         bytemuck::cast_slice(&self.data.pathseg_stream)
     }