Merge pull request #185 from dfrg/xform2

Encode absolute transforms
diff --git a/pgpu-render/src/render.rs b/pgpu-render/src/render.rs
index d3ae07b..1227f19 100644
--- a/pgpu-render/src/render.rs
+++ b/pgpu-render/src/render.rs
@@ -147,9 +147,7 @@
 
 impl<'a> PgpuSceneBuilder<'a> {
     pub fn add_glyph(&mut self, glyph: &PgpuGlyph, transform: &piet_scene::geometry::Affine) {
-        self.0.push_transform(*transform);
-        self.0.append(&glyph.fragment);
-        self.0.pop_transform();
+        self.0.append(&glyph.fragment, Some(*transform));
     }
 
     pub fn finish(self) {
diff --git a/piet-gpu/shader/build.ninja b/piet-gpu/shader/build.ninja
index 09b0683..079c0e7 100644
--- a/piet-gpu/shader/build.ninja
+++ b/piet-gpu/shader/build.ninja
@@ -66,22 +66,6 @@
 
 # New element pipeline follows
 
-build gen/transform_reduce.spv: glsl transform_reduce.comp | scene.h setup.h mem.h
-build gen/transform_reduce.hlsl: hlsl gen/transform_reduce.spv
-build gen/transform_reduce.dxil: dxil gen/transform_reduce.hlsl
-build gen/transform_reduce.msl: msl gen/transform_reduce.spv
-
-build gen/transform_root.spv: glsl transform_scan.comp | setup.h
-  flags = -DROOT
-build gen/transform_root.hlsl: hlsl gen/transform_root.spv
-build gen/transform_root.dxil: dxil gen/transform_root.hlsl
-build gen/transform_root.msl: msl gen/transform_root.spv
-
-build gen/transform_leaf.spv: glsl transform_leaf.comp | scene.h tile.h setup.h mem.h
-build gen/transform_leaf.hlsl: hlsl gen/transform_leaf.spv
-build gen/transform_leaf.dxil: dxil gen/transform_leaf.hlsl
-build gen/transform_leaf.msl: msl gen/transform_leaf.spv
-
 build gen/pathtag_reduce.spv: glsl pathtag_reduce.comp | pathtag.h setup.h mem.h
 build gen/pathtag_reduce.hlsl: hlsl gen/pathtag_reduce.spv
 build gen/pathtag_reduce.dxil: dxil gen/pathtag_reduce.hlsl
@@ -98,7 +82,7 @@
 build gen/bbox_clear.dxil: dxil gen/bbox_clear.hlsl
 build gen/bbox_clear.msl: msl gen/bbox_clear.spv
 
-build gen/pathseg.spv: glsl pathseg.comp | tile.h pathseg.h pathtag.h setup.h mem.h
+build gen/pathseg.spv: glsl pathseg.comp | scene.h tile.h pathseg.h pathtag.h setup.h mem.h
 build gen/pathseg.hlsl: hlsl gen/pathseg.spv
 build gen/pathseg.dxil: dxil gen/pathseg.hlsl
 build gen/pathseg.msl: msl gen/pathseg.spv
@@ -129,6 +113,6 @@
 build gen/clip_leaf.dxil: dxil gen/clip_leaf.hlsl
 build gen/clip_leaf.msl: msl gen/clip_leaf.spv
 
-build spv: phony gen/backdrop_lg.spv gen/backdrop.spv gen/bbox_clear.spv gen/binning.spv gen/clip_leaf.spv gen/clip_reduce.spv gen/coarse.spv gen/draw_leaf.spv gen/draw_reduce.spv gen/draw_root.spv gen/kernel4.spv gen/kernel4_gray.spv gen/path_coarse.spv gen/pathseg.spv gen/pathtag_reduce.spv gen/pathtag_root.spv gen/tile_alloc.spv gen/transform_leaf.spv gen/transform_reduce.spv gen/transform_root.spv
-build dxil: phony gen/backdrop.hlsl gen/backdrop_lg.hlsl gen/bbox_clear.hlsl gen/binning.hlsl gen/clip_leaf.hlsl gen/clip_reduce.hlsl gen/coarse.hlsl gen/draw_leaf.hlsl gen/draw_reduce.hlsl gen/draw_root.hlsl gen/kernel4.hlsl gen/kernel4_gray.hlsl gen/path_coarse.hlsl gen/pathseg.hlsl gen/pathtag_reduce.hlsl gen/pathtag_root.hlsl gen/tile_alloc.hlsl gen/transform_leaf.hlsl gen/transform_reduce.hlsl gen/transform_root.hlsl
-build msl: phony gen/backdrop_lg.msl gen/backdrop.msl gen/bbox_clear.msl gen/binning.msl gen/clip_leaf.msl gen/clip_reduce.msl gen/coarse.msl gen/draw_leaf.msl gen/draw_reduce.msl gen/draw_root.msl gen/kernel4.msl gen/kernel4_gray.msl gen/path_coarse.msl gen/pathseg.msl gen/pathtag_reduce.msl gen/pathtag_root.msl gen/tile_alloc.msl gen/transform_leaf.msl gen/transform_reduce.msl gen/transform_root.msl
+build spv: phony gen/backdrop_lg.spv gen/backdrop.spv gen/bbox_clear.spv gen/binning.spv gen/clip_leaf.spv gen/clip_reduce.spv gen/coarse.spv gen/draw_leaf.spv gen/draw_reduce.spv gen/draw_root.spv gen/kernel4.spv gen/kernel4_gray.spv gen/path_coarse.spv gen/pathseg.spv gen/pathtag_reduce.spv gen/pathtag_root.spv gen/tile_alloc.spv
+build dxil: phony gen/backdrop.hlsl gen/backdrop_lg.hlsl gen/bbox_clear.hlsl gen/binning.hlsl gen/clip_leaf.hlsl gen/clip_reduce.hlsl gen/coarse.hlsl gen/draw_leaf.hlsl gen/draw_reduce.hlsl gen/draw_root.hlsl gen/kernel4.hlsl gen/kernel4_gray.hlsl gen/path_coarse.hlsl gen/pathseg.hlsl gen/pathtag_reduce.hlsl gen/pathtag_root.hlsl gen/tile_alloc.hlsl
+build msl: phony gen/backdrop_lg.msl gen/backdrop.msl gen/bbox_clear.msl gen/binning.msl gen/clip_leaf.msl gen/clip_reduce.msl gen/coarse.msl gen/draw_leaf.msl gen/draw_reduce.msl gen/draw_root.msl gen/kernel4.msl gen/kernel4_gray.msl gen/path_coarse.msl gen/pathseg.msl gen/pathtag_reduce.msl gen/pathtag_root.msl gen/tile_alloc.msl
diff --git a/piet-gpu/shader/draw_leaf.comp b/piet-gpu/shader/draw_leaf.comp
index ef369c9..434c7ea 100644
--- a/piet-gpu/shader/draw_leaf.comp
+++ b/piet-gpu/shader/draw_leaf.comp
@@ -108,10 +108,10 @@
             vec2 translate;
             if (linewidth >= 0.0 || tag_word == Drawtag_FillLinGradient || tag_word == Drawtag_FillRadGradient) {
                 uint trans_ix = memory[bbox_offset + 5];
-                uint t = (conf.trans_alloc.offset >> 2) + 6 * trans_ix;
-                mat = uintBitsToFloat(uvec4(memory[t], memory[t + 1], memory[t + 2], memory[t + 3]));
+                uint t = (conf.trans_offset >> 2) + trans_ix * 6;
+                mat = uintBitsToFloat(uvec4(scene[t], scene[t + 1], scene[t + 2], scene[t + 3]));
                 if (tag_word == Drawtag_FillLinGradient || tag_word == Drawtag_FillRadGradient) {
-                    translate = uintBitsToFloat(uvec2(memory[t + 4], memory[t + 5]));
+                    translate = uintBitsToFloat(uvec2(scene[t + 4], scene[t + 5]));
                 }
             }
             if (linewidth >= 0.0) {
diff --git a/piet-gpu/shader/pathseg.comp b/piet-gpu/shader/pathseg.comp
index ce4ab84..0efa66f 100644
--- a/piet-gpu/shader/pathseg.comp
+++ b/piet-gpu/shader/pathseg.comp
@@ -26,6 +26,7 @@
 
 #include "tile.h"
 #include "pathseg.h"
+#include "scene.h"
 
 layout(binding = 3) readonly buffer ParentBuf {
     TagMonoid[] parent;
@@ -126,7 +127,7 @@
     uint lw_ix = (conf.linewidth_offset >> 2) + tm.linewidth_ix;
     uint save_path_ix = tm.path_ix;
     uint trans_ix = tm.trans_ix;
-    TransformSegRef trans_ref = TransformSegRef(conf.trans_alloc.offset + trans_ix * TransformSeg_size);
+    TransformRef trans_ref = TransformRef(conf.trans_offset + trans_ix * Transform_size);
     PathSegRef ps_ref = PathSegRef(conf.pathseg_alloc.offset + tm.pathseg_ix * PathSeg_size);
     for (uint i = 0; i < N_SEQ; i++) {
         linewidth[i] = uintBitsToFloat(scene[lw_ix]);
@@ -162,7 +163,7 @@
                     }
                 }
             }
-            TransformSeg transform = TransformSeg_read(conf.trans_alloc, trans_ref);
+            Transform transform = Transform_read(trans_ref);
             p0 = transform.mat.xy * p0.x + transform.mat.zw * p0.y + transform.translate;
             p1 = transform.mat.xy * p1.x + transform.mat.zw * p1.y + transform.translate;
             vec4 bbox = vec4(min(p0, p1), max(p0, p1));
@@ -219,7 +220,7 @@
             local[i].flags = is_path;
             tm.path_ix += is_path;
             trans_ix += (tag_byte >> 5) & 1;
-            trans_ref.offset += ((tag_byte >> 5) & 1) * TransformSeg_size;
+            trans_ref.offset += ((tag_byte >> 5) & 1) * Transform_size;
             lw_ix += (tag_byte >> 6) & 1;
         }
     }
diff --git a/piet-gpu/shader/setup.h b/piet-gpu/shader/setup.h
index eb9f9ea..e6b6e3f 100644
--- a/piet-gpu/shader/setup.h
+++ b/piet-gpu/shader/setup.h
@@ -43,7 +43,6 @@
     Alloc ptcl_alloc;
     Alloc pathseg_alloc;
     Alloc anno_alloc;
-    Alloc trans_alloc;
     // new element pipeline stuff follows
 
     // Bounding boxes of paths, stored as int (so atomics work)
diff --git a/piet-gpu/shader/transform_leaf.comp b/piet-gpu/shader/transform_leaf.comp
deleted file mode 100644
index a5e4003..0000000
--- a/piet-gpu/shader/transform_leaf.comp
+++ /dev/null
@@ -1,86 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
-
-// A scan for a tree reduction prefix scan that outputs the final result.
-// Output is written into memory at trans_alloc.
-
-#version 450
-#extension GL_GOOGLE_include_directive : enable
-
-#include "mem.h"
-#include "setup.h"
-
-#define N_ROWS 8
-#define LG_WG_SIZE (7 + LG_WG_FACTOR)
-#define WG_SIZE (1 << LG_WG_SIZE)
-#define PARTITION_SIZE (WG_SIZE * N_ROWS)
-
-layout(local_size_x = WG_SIZE, local_size_y = 1) in;
-
-layout(binding = 1) readonly buffer ConfigBuf {
-    Config conf;
-};
-
-layout(binding = 2) readonly buffer SceneBuf {
-    uint[] scene;
-};
-
-#include "scene.h"
-#include "tile.h"
-
-#define Monoid Transform
-
-layout(set = 0, binding = 3) readonly buffer ParentBuf {
-    Monoid[] parent;
-};
-
-Monoid monoid_identity() {
-    return Monoid(vec4(1.0, 0.0, 0.0, 1.0), vec2(0.0, 0.0));
-}
-
-Monoid combine_monoid(Monoid a, Monoid b) {
-    Monoid c;
-    c.mat = a.mat.xyxy * b.mat.xxzz + a.mat.zwzw * b.mat.yyww;
-    c.translate = a.mat.xy * b.translate.x + a.mat.zw * b.translate.y + a.translate;
-    return c;
-}
-
-shared Monoid sh_scratch[WG_SIZE];
-
-void main() {
-    Monoid local[N_ROWS];
-
-    uint ix = gl_GlobalInvocationID.x * N_ROWS;
-    TransformRef ref = TransformRef(conf.trans_offset + ix * Transform_size);
-
-    Monoid agg = Transform_read(ref);
-    local[0] = agg;
-    for (uint i = 1; i < N_ROWS; i++) {
-        agg = combine_monoid(agg, Transform_read(Transform_index(ref, i)));
-        local[i] = agg;
-    }
-    sh_scratch[gl_LocalInvocationID.x] = agg;
-    for (uint i = 0; i < LG_WG_SIZE; i++) {
-        barrier();
-        if (gl_LocalInvocationID.x >= (1u << i)) {
-            Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i)];
-            agg = combine_monoid(other, agg);
-        }
-        barrier();
-        sh_scratch[gl_LocalInvocationID.x] = agg;
-    }
-
-    barrier();
-    Monoid row = monoid_identity();
-    if (gl_WorkGroupID.x > 0) {
-        row = parent[gl_WorkGroupID.x - 1];
-    }
-    if (gl_LocalInvocationID.x > 0) {
-        row = combine_monoid(row, sh_scratch[gl_LocalInvocationID.x - 1]);
-    }
-    for (uint i = 0; i < N_ROWS; i++) {
-        Monoid m = combine_monoid(row, local[i]);
-        TransformSeg transform = TransformSeg(m.mat, m.translate);
-        TransformSegRef trans_ref = TransformSegRef(conf.trans_alloc.offset + (ix + i) * TransformSeg_size);
-        TransformSeg_write(conf.trans_alloc, trans_ref, transform);
-    }
-}
diff --git a/piet-gpu/shader/transform_reduce.comp b/piet-gpu/shader/transform_reduce.comp
deleted file mode 100644
index e59d559..0000000
--- a/piet-gpu/shader/transform_reduce.comp
+++ /dev/null
@@ -1,69 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
-
-// The reduction phase for transform scan implemented as a tree reduction.
-
-#version 450
-#extension GL_GOOGLE_include_directive : enable
-
-#include "mem.h"
-#include "setup.h"
-
-#define N_ROWS 8
-#define LG_WG_SIZE (7 + LG_WG_FACTOR)
-#define WG_SIZE (1 << LG_WG_SIZE)
-#define PARTITION_SIZE (WG_SIZE * N_ROWS)
-
-layout(local_size_x = WG_SIZE, local_size_y = 1) in;
-
-layout(binding = 1) readonly buffer ConfigBuf {
-    Config conf;
-};
-
-layout(binding = 2) readonly buffer SceneBuf {
-    uint[] scene;
-};
-
-#include "scene.h"
-
-#define Monoid Transform
-
-layout(set = 0, binding = 3) buffer OutBuf {
-    Monoid[] outbuf;
-};
-
-Monoid monoid_identity() {
-    return Monoid(vec4(1.0, 0.0, 0.0, 1.0), vec2(0.0, 0.0));
-}
-
-Monoid combine_monoid(Monoid a, Monoid b) {
-    Monoid c;
-    c.mat = a.mat.xyxy * b.mat.xxzz + a.mat.zwzw * b.mat.yyww;
-    c.translate = a.mat.xy * b.translate.x + a.mat.zw * b.translate.y + a.translate;
-    return c;
-}
-
-shared Monoid sh_scratch[WG_SIZE];
-
-void main() {
-    uint ix = gl_GlobalInvocationID.x * N_ROWS;
-    TransformRef ref = TransformRef(conf.trans_offset + ix * Transform_size);
-
-    Monoid agg = Transform_read(ref);
-    for (uint i = 1; i < N_ROWS; i++) {
-        agg = combine_monoid(agg, Transform_read(Transform_index(ref, i)));
-    }
-    sh_scratch[gl_LocalInvocationID.x] = agg;
-    for (uint i = 0; i < LG_WG_SIZE; i++) {
-        barrier();
-        // We could make this predicate tighter, but would it help?
-        if (gl_LocalInvocationID.x + (1u << i) < WG_SIZE) {
-            Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i)];
-            agg = combine_monoid(agg, other);
-        }
-        barrier();
-        sh_scratch[gl_LocalInvocationID.x] = agg;
-    }
-    if (gl_LocalInvocationID.x == 0) {
-        outbuf[gl_WorkGroupID.x] = agg;
-    }
-}
diff --git a/piet-gpu/shader/transform_scan.comp b/piet-gpu/shader/transform_scan.comp
deleted file mode 100644
index 20b2a8a..0000000
--- a/piet-gpu/shader/transform_scan.comp
+++ /dev/null
@@ -1,91 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
-
-// A scan for a tree reduction prefix scan (either root or not, by ifdef).
-
-#version 450
-#extension GL_GOOGLE_include_directive : enable
-
-#include "setup.h"
-
-#define N_ROWS 8
-#define LG_WG_SIZE (7 + LG_WG_FACTOR)
-#define WG_SIZE (1 << LG_WG_SIZE)
-#define PARTITION_SIZE (WG_SIZE * N_ROWS)
-
-layout(local_size_x = WG_SIZE, local_size_y = 1) in;
-
-// This is copy-pasted from scene.h. It might be better for DRY
-// to include it, but that pulls in more stuff we don't need.
-struct Transform {
-    vec4 mat;
-    vec2 translate;
-};
-
-#define Monoid Transform
-
-layout(binding = 0) buffer DataBuf {
-    Monoid[] data;
-};
-
-#ifndef ROOT
-layout(binding = 1) readonly buffer ParentBuf {
-    Monoid[] parent;
-};
-#endif
-
-Monoid monoid_identity() {
-    return Monoid(vec4(1.0, 0.0, 0.0, 1.0), vec2(0.0, 0.0));
-}
-
-Monoid combine_monoid(Monoid a, Monoid b) {
-    Monoid c;
-    c.mat = a.mat.xyxy * b.mat.xxzz + a.mat.zwzw * b.mat.yyww;
-    c.translate = a.mat.xy * b.translate.x + a.mat.zw * b.translate.y + a.translate;
-    return c;
-}
-
-shared Monoid sh_scratch[WG_SIZE];
-
-void main() {
-    Monoid local[N_ROWS];
-
-    uint ix = gl_GlobalInvocationID.x * N_ROWS;
-
-    local[0] = data[ix];
-    for (uint i = 1; i < N_ROWS; i++) {
-        local[i] = combine_monoid(local[i - 1], data[ix + i]);
-    }
-    Monoid agg = local[N_ROWS - 1];
-    sh_scratch[gl_LocalInvocationID.x] = agg;
-    for (uint i = 0; i < LG_WG_SIZE; i++) {
-        barrier();
-        if (gl_LocalInvocationID.x >= (1u << i)) {
-            Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i)];
-            agg = combine_monoid(other, agg);
-        }
-        barrier();
-        sh_scratch[gl_LocalInvocationID.x] = agg;
-    }
-
-    barrier();
-    // This could be a semigroup instead of a monoid if we reworked the
-    // conditional logic, but that might impact performance.
-    Monoid row = monoid_identity();
-#ifdef ROOT
-    if (gl_LocalInvocationID.x > 0) {
-        row = sh_scratch[gl_LocalInvocationID.x - 1];
-    }
-#else
-    if (gl_WorkGroupID.x > 0) {
-        row = parent[gl_WorkGroupID.x - 1];
-    }
-    if (gl_LocalInvocationID.x > 0) {
-        row = combine_monoid(row, sh_scratch[gl_LocalInvocationID.x - 1]);
-    }
-#endif
-    for (uint i = 0; i < N_ROWS; i++) {
-        Monoid m = combine_monoid(row, local[i]);
-        // TODO: gate buffer write
-        data[ix + i] = m;
-    }
-}
diff --git a/piet-gpu/src/encoder.rs b/piet-gpu/src/encoder.rs
index bddb6f4..2d7c23a 100644
--- a/piet-gpu/src/encoder.rs
+++ b/piet-gpu/src/encoder.rs
@@ -20,9 +20,7 @@
 use bytemuck::{Pod, Zeroable};
 use piet_gpu_hal::BufWrite;
 
-use crate::stages::{
-    self, PathEncoder, Transform, DRAW_PART_SIZE, PATHSEG_PART_SIZE, TRANSFORM_PART_SIZE,
-};
+use crate::stages::{self, PathEncoder, Transform, DRAW_PART_SIZE, PATHSEG_PART_SIZE};
 
 pub struct Encoder {
     transform_stream: Vec<stages::Transform>,
@@ -72,8 +70,6 @@
         buf.fill_zero(padding(n_drawobj, DRAW_PART_SIZE as usize) * DRAWTAG_SIZE);
         buf.extend_slice(&self.drawdata_stream);
         buf.extend_slice(&self.transform_stream);
-        let n_trans = self.transform_stream.len();
-        buf.fill_zero(padding(n_trans, TRANSFORM_PART_SIZE as usize) * TRANSFORM_SIZE);
         buf.extend_slice(&self.linewidth_stream);
         buf.extend_slice(&self.tag_stream);
         let n_pathtag = self.tag_stream.len();
@@ -244,8 +240,6 @@
         buf.fill_zero(padding(n_drawobj, DRAW_PART_SIZE as usize) * DRAWTAG_SIZE);
         buf.extend_slice(&self.drawdata_stream);
         buf.extend_slice(&self.transform_stream);
-        let n_trans = self.transform_stream.len();
-        buf.fill_zero(padding(n_trans, TRANSFORM_PART_SIZE as usize) * TRANSFORM_SIZE);
         buf.extend_slice(&self.linewidth_stream);
         buf.extend_slice(&self.tag_stream);
         let n_pathtag = self.tag_stream.len();
diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs
index bfb5f19..e0415d4 100644
--- a/piet-gpu/src/lib.rs
+++ b/piet-gpu/src/lib.rs
@@ -27,10 +27,7 @@
 };
 
 pub use pico_svg::PicoSvg;
-use stages::{
-    ClipBinding, ElementBinding, ElementCode, DRAW_PART_SIZE, PATHSEG_PART_SIZE,
-    TRANSFORM_PART_SIZE,
-};
+use stages::{ClipBinding, ElementBinding, ElementCode, DRAW_PART_SIZE, PATHSEG_PART_SIZE};
 
 use crate::stages::{ClipCode, Config, ElementStage, CLIP_PART_SIZE};
 
@@ -525,7 +522,6 @@
             &mut pass,
             &self.element_code,
             &self.element_bindings[buf_ix],
-            self.n_transform as u64,
             self.n_paths as u32,
             self.n_pathtag as u32,
             self.n_drawobj as u64,
@@ -796,7 +792,7 @@
     pub(crate) fn scene_size(&self) -> usize {
         align_up(self.n_drawobj, DRAW_PART_SIZE as usize) * DRAWTAG_SIZE
             + self.drawdata_len
-            + align_up(self.n_transform, TRANSFORM_PART_SIZE as usize) * TRANSFORM_SIZE
+            + self.n_transform * TRANSFORM_SIZE
             + self.linewidth_len
             + align_up(self.n_pathtag, PATHSEG_PART_SIZE as usize)
             + self.pathseg_len
@@ -813,8 +809,7 @@
         let drawdata_offset = drawtag_offset + n_drawobj_padded * DRAWTAG_SIZE;
         let trans_offset = drawdata_offset + self.drawdata_len;
         let n_trans = self.n_transform;
-        let n_trans_padded = align_up(n_trans, TRANSFORM_PART_SIZE as usize);
-        let linewidth_offset = trans_offset + n_trans_padded * TRANSFORM_SIZE;
+        let linewidth_offset = trans_offset + n_trans * TRANSFORM_SIZE;
         let pathtag_offset = linewidth_offset + self.linewidth_len;
         let n_pathtag = self.n_pathtag;
         let n_pathtag_padded = align_up(n_pathtag, PATHSEG_PART_SIZE as usize);
@@ -822,8 +817,6 @@
 
         // Layout of memory
         let mut alloc = 0;
-        let trans_alloc = alloc;
-        alloc += trans_alloc + n_trans_padded * TRANSFORM_SIZE;
         let pathseg_alloc = alloc;
         alloc += pathseg_alloc + self.n_pathseg as usize * PATHSEG_SIZE;
         let path_bbox_alloc = alloc;
@@ -872,7 +865,6 @@
             n_pathseg: self.n_pathseg,
             pathseg_alloc: pathseg_alloc as u32,
             anno_alloc: anno_alloc as u32,
-            trans_alloc: trans_alloc as u32,
             path_bbox_alloc: path_bbox_alloc as u32,
             drawmonoid_alloc: drawmonoid_alloc as u32,
             clip_alloc: clip_alloc as u32,
diff --git a/piet-gpu/src/render_ctx.rs b/piet-gpu/src/render_ctx.rs
index caef303..a283507 100644
--- a/piet-gpu/src/render_ctx.rs
+++ b/piet-gpu/src/render_ctx.rs
@@ -57,11 +57,7 @@
 
 #[derive(Default)]
 struct State {
-    /// The transform relative to the parent state.
-    rel_transform: Affine,
     /// The transform at the parent state.
-    ///
-    /// This invariant should hold: transform * rel_transform = cur_transform
     transform: Affine,
     n_clip: usize,
 }
@@ -219,7 +215,6 @@
 
     fn save(&mut self) -> Result<(), Error> {
         self.state_stack.push(State {
-            rel_transform: Affine::default(),
             transform: self.cur_transform,
             n_clip: 0,
         });
@@ -228,10 +223,7 @@
 
     fn restore(&mut self) -> Result<(), Error> {
         if let Some(state) = self.state_stack.pop() {
-            if state.rel_transform != Affine::default() {
-                let a_inv = state.rel_transform.inverse();
-                self.encode_transform(Transform::from_kurbo(a_inv));
-            }
+            self.encode_transform(Transform::from_kurbo(state.transform));
             self.cur_transform = state.transform;
             for _ in 0..state.n_clip {
                 self.pop_clip();
@@ -250,11 +242,8 @@
     }
 
     fn transform(&mut self, transform: Affine) {
-        self.encode_transform(Transform::from_kurbo(transform));
-        if let Some(tos) = self.state_stack.last_mut() {
-            tos.rel_transform *= transform;
-        }
         self.cur_transform *= transform;
+        self.encode_transform(Transform::from_kurbo(self.cur_transform));
     }
 
     fn make_image(
diff --git a/piet-gpu/src/stages.rs b/piet-gpu/src/stages.rs
index fd85776..e786ef5 100644
--- a/piet-gpu/src/stages.rs
+++ b/piet-gpu/src/stages.rs
@@ -27,9 +27,7 @@
 pub use draw::{DrawBinding, DrawCode, DrawMonoid, DrawStage, DRAW_PART_SIZE};
 pub use path::{PathBinding, PathCode, PathEncoder, PathStage, PATHSEG_PART_SIZE};
 use piet_gpu_hal::{Buffer, ComputePass, Session};
-pub use transform::{
-    Transform, TransformBinding, TransformCode, TransformStage, TRANSFORM_PART_SIZE,
-};
+pub use transform::Transform;
 
 /// The configuration block passed to piet-gpu shaders.
 ///
@@ -47,7 +45,6 @@
     pub ptcl_alloc: u32,
     pub pathseg_alloc: u32,
     pub anno_alloc: u32,
-    pub trans_alloc: u32,
     pub path_bbox_alloc: u32,
     pub drawmonoid_alloc: u32,
     pub clip_alloc: u32,
@@ -70,19 +67,16 @@
 // The "element" stage combines a number of stages for parts of the pipeline.
 
 pub struct ElementCode {
-    transform_code: TransformCode,
     path_code: PathCode,
     draw_code: DrawCode,
 }
 
 pub struct ElementStage {
-    transform_stage: TransformStage,
     path_stage: PathStage,
     draw_stage: DrawStage,
 }
 
 pub struct ElementBinding {
-    transform_binding: TransformBinding,
     path_binding: PathBinding,
     draw_binding: DrawBinding,
 }
@@ -90,7 +84,6 @@
 impl ElementCode {
     pub unsafe fn new(session: &Session) -> ElementCode {
         ElementCode {
-            transform_code: TransformCode::new(session),
             path_code: PathCode::new(session),
             draw_code: DrawCode::new(session),
         }
@@ -100,7 +93,6 @@
 impl ElementStage {
     pub unsafe fn new(session: &Session, code: &ElementCode) -> ElementStage {
         ElementStage {
-            transform_stage: TransformStage::new(session, &code.transform_code),
             path_stage: PathStage::new(session, &code.path_code),
             draw_stage: DrawStage::new(session, &code.draw_code),
         }
@@ -115,13 +107,6 @@
         memory_buf: &Buffer,
     ) -> ElementBinding {
         ElementBinding {
-            transform_binding: self.transform_stage.bind(
-                session,
-                &code.transform_code,
-                config_buf,
-                scene_buf,
-                memory_buf,
-            ),
             path_binding: self.path_stage.bind(
                 session,
                 &code.path_code,
@@ -144,17 +129,10 @@
         pass: &mut ComputePass,
         code: &ElementCode,
         binding: &ElementBinding,
-        n_transform: u64,
         n_paths: u32,
         n_tags: u32,
         n_drawobj: u64,
     ) {
-        self.transform_stage.record(
-            pass,
-            &code.transform_code,
-            &binding.transform_binding,
-            n_transform,
-        );
         // No memory barrier needed here; path has at least one before pathseg
         self.path_stage.record(
             pass,
@@ -171,13 +149,11 @@
 
 impl ElementBinding {
     pub unsafe fn rebind_memory(&mut self, session: &Session, memory: &Buffer) {
-        self.transform_binding.rebind_memory(session, memory);
         self.path_binding.rebind_memory(session, memory);
         self.draw_binding.rebind_memory(session, memory);
     }
 
     pub unsafe fn rebind_scene(&mut self, session: &Session, scene: &Buffer) {
-        self.transform_binding.rebind_scene(session, scene);
         self.path_binding.rebind_scene(session, scene);
         self.draw_binding.rebind_scene(session, scene);
     }
diff --git a/piet-gpu/src/stages/transform.rs b/piet-gpu/src/stages/transform.rs
index 43b68df..0e0a3f6 100644
--- a/piet-gpu/src/stages/transform.rs
+++ b/piet-gpu/src/stages/transform.rs
@@ -19,9 +19,6 @@
 use bytemuck::{Pod, Zeroable};
 
 use piet::kurbo::Affine;
-use piet_gpu_hal::{
-    include_shader, BindType, Buffer, BufferUsage, ComputePass, DescriptorSet, Pipeline, Session,
-};
 
 /// An affine transform.
 // This is equivalent to the version in piet-gpu-types, but the bytemuck
@@ -33,151 +30,6 @@
     pub translate: [f32; 2],
 }
 
-const TRANSFORM_WG: u64 = 256;
-const TRANSFORM_N_ROWS: u64 = 8;
-pub const TRANSFORM_PART_SIZE: u64 = TRANSFORM_WG * TRANSFORM_N_ROWS;
-
-pub struct TransformCode {
-    reduce_pipeline: Pipeline,
-    root_pipeline: Pipeline,
-    leaf_pipeline: Pipeline,
-}
-
-pub struct TransformStage {
-    // Right now we're limited to partition^2 (~16M) elements. This can be
-    // expanded but is tedious.
-    root_buf: Buffer,
-    root_ds: DescriptorSet,
-}
-
-pub struct TransformBinding {
-    reduce_ds: DescriptorSet,
-    leaf_ds: DescriptorSet,
-}
-
-impl TransformCode {
-    pub unsafe fn new(session: &Session) -> TransformCode {
-        let reduce_code = include_shader!(session, "../../shader/gen/transform_reduce");
-        let reduce_pipeline = session
-            .create_compute_pipeline(
-                reduce_code,
-                &[
-                    BindType::Buffer,
-                    BindType::BufReadOnly,
-                    BindType::BufReadOnly,
-                    BindType::Buffer,
-                ],
-            )
-            .unwrap();
-        let root_code = include_shader!(session, "../../shader/gen/transform_root");
-        let root_pipeline = session
-            .create_compute_pipeline(root_code, &[BindType::Buffer])
-            .unwrap();
-        let leaf_code = include_shader!(session, "../../shader/gen/transform_leaf");
-        let leaf_pipeline = session
-            .create_compute_pipeline(
-                leaf_code,
-                &[
-                    BindType::Buffer,
-                    BindType::BufReadOnly,
-                    BindType::BufReadOnly,
-                    BindType::BufReadOnly,
-                ],
-            )
-            .unwrap();
-        TransformCode {
-            reduce_pipeline,
-            root_pipeline,
-            leaf_pipeline,
-        }
-    }
-}
-
-impl TransformStage {
-    pub unsafe fn new(session: &Session, code: &TransformCode) -> TransformStage {
-        // We're limited to TRANSFORM_PART_SIZE^2
-        // Also note: size here allows padding
-        let root_buf_size = TRANSFORM_PART_SIZE * 32;
-        let root_buf = session
-            .create_buffer(root_buf_size, BufferUsage::STORAGE)
-            .unwrap();
-        let root_ds = session
-            .create_simple_descriptor_set(&code.root_pipeline, &[&root_buf])
-            .unwrap();
-        TransformStage { root_buf, root_ds }
-    }
-
-    pub unsafe fn bind(
-        &self,
-        session: &Session,
-        code: &TransformCode,
-        config_buf: &Buffer,
-        scene_buf: &Buffer,
-        memory_buf: &Buffer,
-    ) -> TransformBinding {
-        let reduce_ds = session
-            .create_simple_descriptor_set(
-                &code.reduce_pipeline,
-                &[memory_buf, config_buf, scene_buf, &self.root_buf],
-            )
-            .unwrap();
-        let leaf_ds = session
-            .create_simple_descriptor_set(
-                &code.leaf_pipeline,
-                &[memory_buf, config_buf, scene_buf, &self.root_buf],
-            )
-            .unwrap();
-        TransformBinding { reduce_ds, leaf_ds }
-    }
-
-    pub unsafe fn record(
-        &self,
-        pass: &mut ComputePass,
-        code: &TransformCode,
-        binding: &TransformBinding,
-        size: u64,
-    ) {
-        if size > TRANSFORM_PART_SIZE.pow(2) {
-            panic!("very large scan not yet implemented");
-        }
-        let n_workgroups = (size + TRANSFORM_PART_SIZE - 1) / TRANSFORM_PART_SIZE;
-        if n_workgroups > 1 {
-            pass.dispatch(
-                &code.reduce_pipeline,
-                &binding.reduce_ds,
-                (n_workgroups as u32, 1, 1),
-                (TRANSFORM_WG as u32, 1, 1),
-            );
-            pass.memory_barrier();
-            pass.dispatch(
-                &code.root_pipeline,
-                &self.root_ds,
-                (1, 1, 1),
-                (TRANSFORM_WG as u32, 1, 1),
-            );
-            pass.memory_barrier();
-        }
-        pass.dispatch(
-            &code.leaf_pipeline,
-            &binding.leaf_ds,
-            (n_workgroups as u32, 1, 1),
-            (TRANSFORM_WG as u32, 1, 1),
-        );
-    }
-}
-
-impl TransformBinding {
-    pub unsafe fn rebind_memory(&mut self, session: &Session, memory: &Buffer) {
-        session.update_buffer_descriptor(&mut self.reduce_ds, 0, memory);
-        session.update_buffer_descriptor(&mut self.leaf_ds, 0, memory);
-    }
-
-    pub unsafe fn rebind_scene(&mut self, session: &Session, scene: &Buffer) {
-        session.update_buffer_descriptor(&mut self.reduce_ds, 2, scene);
-        session.update_buffer_descriptor(&mut self.leaf_ds, 2, scene);
-    }
-}
-
 impl Transform {
     pub const IDENTITY: Transform = Transform {
         mat: [1.0, 0.0, 0.0, 1.0],
diff --git a/piet-gpu/src/text.rs b/piet-gpu/src/text.rs
index 0fb508b..da8b86e 100644
--- a/piet-gpu/src/text.rs
+++ b/piet-gpu/src/text.rs
@@ -6,8 +6,8 @@
 
 use piet::kurbo::{Point, Rect, Size};
 use piet::{
-    Error, FontFamily, HitTestPoint, HitTestPosition, LineMetric, Text, TextAttribute, TextLayout,
-    TextLayoutBuilder, TextStorage,
+    Error, FontFamily, HitTestPoint, HitTestPosition, LineMetric, RenderContext, Text,
+    TextAttribute, TextLayout, TextLayoutBuilder, TextStorage,
 };
 
 use crate::encoder::GlyphEncoder;
@@ -169,38 +169,14 @@
         // Should we use ppem from font, or let swash scale?
         const DEFAULT_UPEM: u16 = 2048;
         let scale = self.size as f32 / DEFAULT_UPEM as f32;
-        let mut inv_transform = None;
+        ctx.save().unwrap();
         // TODO: handle y offsets also
-        let mut last_x = 0.0;
         for glyph in &self.glyphs {
-            let transform = match &mut inv_transform {
-                None => {
-                    let inv_scale = scale.recip();
-                    let translate = render_ctx::to_f32_2(pos);
-                    inv_transform = Some(Transform {
-                        mat: [inv_scale, 0.0, 0.0, -inv_scale],
-                        translate: [
-                            -translate[0] * inv_scale - glyph.x,
-                            translate[1] * inv_scale,
-                        ],
-                    });
-                    let tpos = render_ctx::to_f32_2(pos);
-                    let translate = [tpos[0] + scale * glyph.x, tpos[1]];
-                    Transform {
-                        mat: [scale, 0.0, 0.0, -scale],
-                        translate,
-                    }
-                }
-                Some(inv) => {
-                    let delta_x = glyph.x - last_x;
-                    inv.translate[0] -= delta_x;
-                    Transform {
-                        mat: [1.0, 0.0, 0.0, 1.0],
-                        translate: [delta_x, 0.0],
-                    }
-                }
+            let tpos = render_ctx::to_f32_2(pos);
+            let transform = Transform {
+                mat: [scale, 0.0, 0.0, -scale],
+                translate: [tpos[0] + scale * glyph.x, tpos[1]],
             };
-            last_x = glyph.x;
             //println!("{:?}, {:?}", transform.mat, transform.translate);
             ctx.encode_transform(transform);
             let glyph = self.font.make_path(glyph.glyph_id, &mut tc);
@@ -209,9 +185,7 @@
                 ctx.fill_glyph(0xff_ff_ff_ff);
             }
         }
-        if let Some(transform) = inv_transform {
-            ctx.encode_transform(transform);
-        }
+        ctx.restore().unwrap();
     }
 }
 
diff --git a/piet-scene/src/geometry.rs b/piet-scene/src/geometry.rs
index 1ea8f33..2df7f83 100644
--- a/piet-scene/src/geometry.rs
+++ b/piet-scene/src/geometry.rs
@@ -61,7 +61,7 @@
 }
 
 /// Affine transformation matrix.
-#[derive(Copy, Clone, Debug, Pod, Zeroable)]
+#[derive(Copy, Clone, PartialEq, Debug, Pod, Zeroable)]
 #[repr(C)]
 pub struct Affine {
     pub xx: f32,
diff --git a/piet-scene/src/scene/builder.rs b/piet-scene/src/scene/builder.rs
index 8aa1bf5..5394f88 100644
--- a/piet-scene/src/scene/builder.rs
+++ b/piet-scene/src/scene/builder.rs
@@ -43,7 +43,6 @@
     scene: &'a mut SceneData,
     resources: ResourceData<'a>,
     layers: Vec<Blend>,
-    transforms: Vec<Affine>,
 }
 
 impl<'a> Builder<'a> {
@@ -59,21 +58,12 @@
             scene,
             resources,
             layers: vec![],
-            transforms: vec![],
         }
     }
 
-    /// Pushes a transform matrix onto the stack.
-    pub fn push_transform(&mut self, transform: Affine) {
-        self.transform(transform);
-        self.transforms.push(transform);
-    }
-
-    /// Pops the current transform matrix.
-    pub fn pop_transform(&mut self) {
-        if let Some(transform) = self.transforms.pop() {
-            self.transform(transform.inverse());
-        }
+    /// Sets the current transformation.
+    pub fn transform(&mut self, transform: Affine) {
+        self.encode_transform(transform);
     }
 
     /// Pushes a new layer bound by the specifed shape and composed with
@@ -117,10 +107,17 @@
         let elements = elements.into_iter();
         self.encode_path(elements, true);
         if let Some(brush_transform) = brush_transform {
-            self.transform(brush_transform);
-            self.swap_last_tags();
-            self.encode_brush(brush);
-            self.transform(brush_transform.inverse());
+            if let Some(last_transform) = self.scene.transform_stream.last().copied() {
+                self.encode_transform(brush_transform * last_transform);
+                self.swap_last_tags();
+                self.encode_brush(brush);
+                self.encode_transform(last_transform);
+            } else {
+                self.encode_transform(brush_transform);
+                self.swap_last_tags();
+                self.encode_brush(brush);
+                self.encode_transform(Affine::IDENTITY);
+            }
         } else {
             self.encode_brush(brush);
         }
@@ -143,19 +140,35 @@
         let elements = elements.into_iter();
         self.encode_path(elements, false);
         if let Some(brush_transform) = brush_transform {
-            self.transform(brush_transform);
-            self.swap_last_tags();
-            self.encode_brush(brush);
-            self.transform(brush_transform.inverse());
+            if let Some(last_transform) = self.scene.transform_stream.last().copied() {
+                self.encode_transform(brush_transform * last_transform);
+                self.swap_last_tags();
+                self.encode_brush(brush);
+                self.encode_transform(last_transform);
+            } else {
+                self.encode_transform(brush_transform);
+                self.swap_last_tags();
+                self.encode_brush(brush);
+                self.encode_transform(Affine::IDENTITY);
+            }
         } else {
             self.encode_brush(brush);
         }
     }
 
     /// Appends a fragment to the scene.
-    pub fn append(&mut self, fragment: &Fragment) {
+    pub fn append(&mut self, fragment: &Fragment, transform: Option<Affine>) {
         let drawdata_base = self.scene.drawdata_stream.len();
-        self.scene.append(&fragment.data);
+        let mut cur_transform = self.scene.transform_stream.last().copied();
+        if let Some(transform) = transform {
+            if cur_transform.is_none() {
+                cur_transform = Some(Affine::IDENTITY);
+            }
+            self.encode_transform(transform);
+        } else if cur_transform != Some(Affine::IDENTITY) {
+            self.encode_transform(Affine::IDENTITY);
+        }
+        self.scene.append(&fragment.data, &transform);
         match &mut self.resources {
             ResourceData::Scene(res) => {
                 for patch in &fragment.resources.patches {
@@ -189,6 +202,10 @@
                 ));
             }
         }
+        // Prevent fragments from affecting transform state. Should we allow this?
+        if let Some(transform) = cur_transform {
+            self.encode_transform(transform);
+        }
     }
 
     /// Completes construction and finalizes the underlying scene.
@@ -196,15 +213,6 @@
         while let Some(layer) = self.layers.pop() {
             self.end_clip(Some(layer));
         }
-        match self.resources {
-            ResourceData::Fragment(_) => {
-                // Make sure the transform state is invariant for fragments
-                while !self.transforms.is_empty() {
-                    self.pop_transform();
-                }
-            }
-            _ => {}
-        }
     }
 }
 
@@ -250,7 +258,7 @@
         self.scene.n_pathseg += n_pathseg;
     }
 
-    fn transform(&mut self, transform: Affine) {
+    fn encode_transform(&mut self, transform: Affine) {
         self.scene.tag_stream.push(0x20);
         self.scene.transform_stream.push(transform);
     }
diff --git a/piet-scene/src/scene/mod.rs b/piet-scene/src/scene/mod.rs
index 577f81e..5f0e77f 100644
--- a/piet-scene/src/scene/mod.rs
+++ b/piet-scene/src/scene/mod.rs
@@ -60,9 +60,14 @@
         }
     }
 
-    fn append(&mut self, other: &SceneData) {
-        self.transform_stream
-            .extend_from_slice(&other.transform_stream);
+    fn append(&mut self, other: &SceneData, transform: &Option<Affine>) {
+        if let Some(transform) = *transform {
+            self.transform_stream
+                .extend(other.transform_stream.iter().map(|x| *x * transform));
+        } else {
+            self.transform_stream
+                .extend_from_slice(&other.transform_stream);
+        }
         self.tag_stream.extend_from_slice(&other.tag_stream);
         self.pathseg_stream.extend_from_slice(&other.pathseg_stream);
         self.linewidth_stream
diff --git a/tests/src/main.rs b/tests/src/main.rs
index 5599f70..96504f1 100644
--- a/tests/src/main.rs
+++ b/tests/src/main.rs
@@ -29,8 +29,6 @@
 
 #[cfg(feature = "piet-gpu")]
 mod path;
-#[cfg(feature = "piet-gpu")]
-mod transform;
 
 use clap::{App, Arg};
 use piet_gpu_hal::InstanceFlags;
@@ -137,7 +135,6 @@
         }
         #[cfg(feature = "piet-gpu")]
         if config.groups.matches("piet") {
-            report(&transform::transform_test(&mut runner, &config));
             report(&path::path_test(&mut runner, &config));
             report(&draw::draw_test(&mut runner, &config));
             report(&clip::clip_test(&mut runner, &config));
diff --git a/tests/src/path.rs b/tests/src/path.rs
index 9d794e1..1a933d0 100644
--- a/tests/src/path.rs
+++ b/tests/src/path.rs
@@ -210,7 +210,6 @@
         let path_bbox_alloc = pathseg_alloc + self.n_pathseg * PATHSEG_SIZE;
         let stage_config = stages::Config {
             pathseg_alloc,
-            trans_alloc,
             path_bbox_alloc,
             n_trans,
             n_path: self.n_path,
diff --git a/tests/src/transform.rs b/tests/src/transform.rs
deleted file mode 100644
index 43bfc67..0000000
--- a/tests/src/transform.rs
+++ /dev/null
@@ -1,132 +0,0 @@
-// Copyright 2021 The piet-gpu authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Also licensed under MIT license, at your choice.
-
-//! Tests for the piet-gpu transform stage.
-
-use crate::{Config, Runner, TestResult};
-
-use kurbo::Affine;
-use piet_gpu::stages::{self, Transform, TransformCode, TransformStage};
-use piet_gpu_hal::BufferUsage;
-use rand::Rng;
-
-struct AffineTestData {
-    input_data: Vec<Transform>,
-    expected: Vec<Affine>,
-}
-
-pub unsafe fn transform_test(runner: &mut Runner, config: &Config) -> TestResult {
-    let mut result = TestResult::new("transform");
-    // TODO: implement large scan and set large to 1 << 24
-    let n_elements: u64 = config.size.choose(1 << 12, 1 << 18, 1 << 22);
-    // Validate with real transform data.
-    let data = AffineTestData::new(n_elements as usize);
-    let data_buf = runner
-        .session
-        .create_buffer_init(&data.input_data, BufferUsage::STORAGE)
-        .unwrap();
-    let memory = runner.buf_down(data_buf.size() + 8, BufferUsage::empty());
-    let stage_config = stages::Config {
-        n_trans: n_elements as u32,
-        ..Default::default()
-    };
-    let config_buf = runner
-        .session
-        .create_buffer_init(std::slice::from_ref(&stage_config), BufferUsage::STORAGE)
-        .unwrap();
-
-    let code = TransformCode::new(&runner.session);
-    let stage = TransformStage::new(&runner.session, &code);
-    let binding = stage.bind(
-        &runner.session,
-        &code,
-        &config_buf,
-        &data_buf,
-        &memory.dev_buf,
-    );
-    let mut total_elapsed = 0.0;
-    let n_iter = config.n_iter;
-    for i in 0..n_iter {
-        let mut commands = runner.commands();
-        let mut pass = commands.compute_pass(0, 1);
-        stage.record(&mut pass, &code, &binding, n_elements);
-        pass.end();
-        if i == 0 || config.verify_all {
-            commands.cmd_buf.memory_barrier();
-            commands.download(&memory);
-        }
-        total_elapsed += runner.submit(commands);
-        if i == 0 || config.verify_all {
-            let dst = memory.map_read(8..);
-            if let Some(failure) = data.verify(dst.cast_slice()) {
-                result.fail(failure);
-            }
-        }
-    }
-    result.timing(total_elapsed, n_elements * n_iter);
-    result
-}
-
-impl AffineTestData {
-    fn new(n: usize) -> AffineTestData {
-        let mut rng = rand::thread_rng();
-        let mut a = Affine::default();
-        let mut input_data = Vec::with_capacity(n);
-        let mut expected = Vec::with_capacity(n);
-        for _ in 0..n {
-            loop {
-                let b = Affine::new([
-                    rng.gen_range(-3.0, 3.0),
-                    rng.gen_range(-3.0, 3.0),
-                    rng.gen_range(-3.0, 3.0),
-                    rng.gen_range(-3.0, 3.0),
-                    rng.gen_range(-3.0, 3.0),
-                    rng.gen_range(-3.0, 3.0),
-                ]);
-                if b.determinant().abs() >= 1.0 {
-                    expected.push(b);
-                    let c = a.inverse() * b;
-                    input_data.push(Transform::from_kurbo(c));
-                    a = b;
-                    break;
-                }
-            }
-        }
-        AffineTestData {
-            input_data,
-            expected,
-        }
-    }
-
-    fn verify(&self, actual: &[Transform]) -> Option<String> {
-        for (i, (actual, expected)) in actual.iter().zip(&self.expected).enumerate() {
-            let error: f64 = actual
-                .to_kurbo()
-                .as_coeffs()
-                .iter()
-                .zip(expected.as_coeffs())
-                .map(|(actual, expected)| (actual - expected).powi(2))
-                .sum();
-            // Hopefully this is right; most of the time the error is much
-            // smaller, but occasionally we see outliers.
-            let tolerance = 1e-9 * (i + 1) as f64;
-            if error > tolerance {
-                return Some(format!("{}: {} {}", i, error, tolerance));
-            }
-        }
-        None
-    }
-}