diff --git a/Cargo.lock b/Cargo.lock
index 78b6326..207dc02 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -954,7 +954,15 @@
 version = "0.1.0"
 dependencies = [
  "bytemuck",
+ "clap",
+ "piet",
+ "piet-gpu",
+ "piet-gpu-hal",
+ "png",
+ "rand",
+ "roxmltree",
  "smallvec",
+ "winit",
 ]
 
 [[package]]
diff --git a/piet-gpu/src/encoder.rs b/piet-gpu/src/encoder.rs
index 2f4b85e..8f21485 100644
--- a/piet-gpu/src/encoder.rs
+++ b/piet-gpu/src/encoder.rs
@@ -37,6 +37,147 @@
     n_clip: u32,
 }
 
+#[derive(Copy, Clone, Debug)]
+pub struct EncodedSceneRef<'a, T: Copy + Pod> {
+    pub transform_stream: &'a [T],
+    pub tag_stream: &'a [u8],
+    pub pathseg_stream: &'a [u8],
+    pub linewidth_stream: &'a [f32],
+    pub drawtag_stream: &'a [u32],
+    pub drawdata_stream: &'a [u8],
+    pub n_path: u32,
+    pub n_pathseg: u32,
+    pub n_clip: u32,
+    pub ramp_data: &'a [u32],
+}
+
+impl<'a, T: Copy + Pod> EncodedSceneRef<'a, T> {
+    /// Return a config for the element processing pipeline.
+    ///
+    /// This does not include further pipeline processing. Also returns the
+    /// beginning of free memory.
+    pub fn stage_config(&self) -> (Config, usize) {
+        // Layout of scene buffer
+        let drawtag_offset = 0;
+        let n_drawobj = self.n_drawobj();
+        let n_drawobj_padded = align_up(n_drawobj, DRAW_PART_SIZE as usize);
+        let drawdata_offset = drawtag_offset + n_drawobj_padded * DRAWTAG_SIZE;
+        let trans_offset = drawdata_offset + self.drawdata_stream.len();
+        let n_trans = self.transform_stream.len();
+        let n_trans_padded = align_up(n_trans, TRANSFORM_PART_SIZE as usize);
+        let linewidth_offset = trans_offset + n_trans_padded * TRANSFORM_SIZE;
+        let n_linewidth = self.linewidth_stream.len();
+        let pathtag_offset = linewidth_offset + n_linewidth * LINEWIDTH_SIZE;
+        let n_pathtag = self.tag_stream.len();
+        let n_pathtag_padded = align_up(n_pathtag, PATHSEG_PART_SIZE as usize);
+        let pathseg_offset = pathtag_offset + n_pathtag_padded;
+
+        // Layout of memory
+        let mut alloc = 0;
+        let trans_alloc = alloc;
+        alloc += trans_alloc + n_trans_padded * TRANSFORM_SIZE;
+        let pathseg_alloc = alloc;
+        alloc += pathseg_alloc + self.n_pathseg as usize * PATHSEG_SIZE;
+        let path_bbox_alloc = alloc;
+        let n_path = self.n_path as usize;
+        alloc += path_bbox_alloc + n_path * PATH_BBOX_SIZE;
+        let drawmonoid_alloc = alloc;
+        alloc += n_drawobj_padded * DRAWMONOID_SIZE;
+        let anno_alloc = alloc;
+        alloc += n_drawobj * ANNOTATED_SIZE;
+        let clip_alloc = alloc;
+        let n_clip = self.n_clip as usize;
+        const CLIP_SIZE: usize = 4;
+        alloc += n_clip * CLIP_SIZE;
+        let clip_bic_alloc = alloc;
+        const CLIP_BIC_SIZE: usize = 8;
+        // This can round down, as we only reduce the prefix
+        alloc += (n_clip / CLIP_PART_SIZE as usize) * CLIP_BIC_SIZE;
+        let clip_stack_alloc = alloc;
+        const CLIP_EL_SIZE: usize = 20;
+        alloc += n_clip * CLIP_EL_SIZE;
+        let clip_bbox_alloc = alloc;
+        const CLIP_BBOX_SIZE: usize = 16;
+        alloc += align_up(n_clip as usize, CLIP_PART_SIZE as usize) * CLIP_BBOX_SIZE;
+        let draw_bbox_alloc = alloc;
+        alloc += n_drawobj * DRAW_BBOX_SIZE;
+        let drawinfo_alloc = alloc;
+        // TODO: not optimized; it can be accumulated during encoding or summed from drawtags
+        const MAX_DRAWINFO_SIZE: usize = 44;
+        alloc += n_drawobj * MAX_DRAWINFO_SIZE;
+
+        let config = Config {
+            n_elements: n_drawobj as u32,
+            n_pathseg: self.n_pathseg,
+            pathseg_alloc: pathseg_alloc as u32,
+            anno_alloc: anno_alloc as u32,
+            trans_alloc: trans_alloc as u32,
+            path_bbox_alloc: path_bbox_alloc as u32,
+            drawmonoid_alloc: drawmonoid_alloc as u32,
+            clip_alloc: clip_alloc as u32,
+            clip_bic_alloc: clip_bic_alloc as u32,
+            clip_stack_alloc: clip_stack_alloc as u32,
+            clip_bbox_alloc: clip_bbox_alloc as u32,
+            draw_bbox_alloc: draw_bbox_alloc as u32,
+            drawinfo_alloc: drawinfo_alloc as u32,
+            n_trans: n_trans as u32,
+            n_path: self.n_path,
+            n_clip: self.n_clip,
+            trans_offset: trans_offset as u32,
+            linewidth_offset: linewidth_offset as u32,
+            pathtag_offset: pathtag_offset as u32,
+            pathseg_offset: pathseg_offset as u32,
+            drawtag_offset: drawtag_offset as u32,
+            drawdata_offset: drawdata_offset as u32,
+            ..Default::default()
+        };
+        (config, alloc)
+    }
+
+    pub fn write_scene(&self, buf: &mut BufWrite) {
+        buf.extend_slice(&self.drawtag_stream);
+        let n_drawobj = self.drawtag_stream.len();
+        buf.fill_zero(padding(n_drawobj, DRAW_PART_SIZE as usize) * DRAWTAG_SIZE);
+        buf.extend_slice(&self.drawdata_stream);
+        buf.extend_slice(&self.transform_stream);
+        let n_trans = self.transform_stream.len();
+        buf.fill_zero(padding(n_trans, TRANSFORM_PART_SIZE as usize) * TRANSFORM_SIZE);
+        buf.extend_slice(&self.linewidth_stream);
+        buf.extend_slice(&self.tag_stream);
+        let n_pathtag = self.tag_stream.len();
+        buf.fill_zero(padding(n_pathtag, PATHSEG_PART_SIZE as usize));
+        buf.extend_slice(&self.pathseg_stream);
+    }
+
+    /// The number of draw objects in the draw object stream.
+    pub(crate) fn n_drawobj(&self) -> usize {
+        self.drawtag_stream.len()
+    }
+
+    /// The number of paths.
+    pub(crate) fn n_path(&self) -> u32 {
+        self.n_path
+    }
+
+    /// The number of path segments.
+    pub(crate) fn n_pathseg(&self) -> u32 {
+        self.n_pathseg
+    }
+
+    pub(crate) fn n_transform(&self) -> usize {
+        self.transform_stream.len()
+    }
+
+    /// The number of tags in the path stream.
+    pub(crate) fn n_pathtag(&self) -> usize {
+        self.tag_stream.len()
+    }
+
+    pub(crate) fn n_clip(&self) -> u32 {
+        self.n_clip
+    }
+}
+
 /// A scene fragment encoding a glyph.
 ///
 /// This is a reduced version of the full encoder.
@@ -330,6 +471,21 @@
         self.n_path += glyph.n_path;
         self.n_pathseg += glyph.n_pathseg;
     }
+
+    pub(crate) fn scene_ref(&self) -> EncodedSceneRef<stages::Transform> {
+        EncodedSceneRef {
+            transform_stream: &self.transform_stream,
+            tag_stream: &self.tag_stream,
+            pathseg_stream: &self.pathseg_stream,
+            linewidth_stream: &self.linewidth_stream,
+            drawtag_stream: &self.drawtag_stream,
+            drawdata_stream: &self.drawdata_stream,
+            n_path: self.n_path,
+            n_pathseg: self.n_pathseg,
+            n_clip: self.n_clip,
+            ramp_data: &[],
+        }
+    }
 }
 
 fn align_up(x: usize, align: usize) -> usize {
diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs
index 475d723..b3ead90 100644
--- a/piet-gpu/src/lib.rs
+++ b/piet-gpu/src/lib.rs
@@ -10,7 +10,10 @@
 
 use std::convert::TryInto;
 
+use bytemuck::Pod;
+
 pub use blend::{Blend, BlendMode, CompositionMode};
+pub use encoder::EncodedSceneRef;
 pub use render_ctx::PietGpuRenderContext;
 pub use gradient::Colrv1RadialGradient;
 
@@ -355,16 +358,27 @@
         render_ctx: &mut PietGpuRenderContext,
         buf_ix: usize,
     ) -> Result<(), Error> {
-        let (mut config, mut alloc) = render_ctx.stage_config();
-        let n_drawobj = render_ctx.n_drawobj();
+        let mut scene = render_ctx.encoded_scene();
+        let ramp_data = render_ctx.get_ramp_data();
+        scene.ramp_data = &ramp_data;
+        self.upload_scene(&scene, buf_ix)
+    }
+
+    pub fn upload_scene<T: Copy + Pod>(
+        &mut self,
+        scene: &EncodedSceneRef<T>,
+        buf_ix: usize,
+    ) -> Result<(), Error> {
+        let (mut config, mut alloc) = scene.stage_config();
+        let n_drawobj = scene.n_drawobj();
         // TODO: be more consistent in size types
-        let n_path = render_ctx.n_path() as usize;
+        let n_path = scene.n_path() as usize;
         self.n_paths = n_path;
-        self.n_transform = render_ctx.n_transform();
-        self.n_drawobj = render_ctx.n_drawobj();
-        self.n_pathseg = render_ctx.n_pathseg() as usize;
-        self.n_pathtag = render_ctx.n_pathtag();
-        self.n_clip = render_ctx.n_clip();
+        self.n_transform = scene.n_transform();
+        self.n_drawobj = scene.n_drawobj();
+        self.n_pathseg = scene.n_pathseg() as usize;
+        self.n_pathtag = scene.n_pathtag();
+        self.n_clip = scene.n_clip();
 
         // These constants depend on encoding and may need to be updated.
         // Perhaps we can plumb these from piet-gpu-derive?
@@ -388,19 +402,18 @@
             // TODO: reallocate scene buffer if size is inadequate
             {
                 let mut mapped_scene = self.scene_bufs[buf_ix].map_write(..)?;
-                render_ctx.write_scene(&mut mapped_scene);
+                scene.write_scene(&mut mapped_scene);
             }
             self.config_bufs[buf_ix].write(&[config])?;
             self.memory_buf_host[buf_ix].write(&[alloc as u32, 0 /* Overflow flag */])?;
 
             // Upload gradient data.
-            let ramp_data = render_ctx.get_ramp_data();
-            if !ramp_data.is_empty() {
+            if !scene.ramp_data.is_empty() {
                 assert!(
                     self.gradient_bufs[buf_ix].size() as usize
-                        >= std::mem::size_of_val(&*ramp_data)
+                        >= std::mem::size_of_val(&*scene.ramp_data)
                 );
-                self.gradient_bufs[buf_ix].write(&ramp_data)?;
+                self.gradient_bufs[buf_ix].write(scene.ramp_data)?;
             }
         }
         Ok(())
diff --git a/piet-gpu/src/render_ctx.rs b/piet-gpu/src/render_ctx.rs
index dca03eb..ad608ca 100644
--- a/piet-gpu/src/render_ctx.rs
+++ b/piet-gpu/src/render_ctx.rs
@@ -1,6 +1,6 @@
 use std::borrow::Cow;
 
-use crate::encoder::GlyphEncoder;
+use crate::encoder::{EncodedSceneRef, GlyphEncoder};
 use crate::stages::{Config, Transform};
 use crate::MAX_BLEND_STACK;
 use piet::kurbo::{Affine, Insets, PathEl, Point, Rect, Shape};
@@ -97,6 +97,10 @@
         self.new_encoder.stage_config()
     }
 
+    pub fn encoded_scene(&self) -> EncodedSceneRef<crate::stages::Transform> {
+        self.new_encoder.scene_ref()
+    }
+
     /// Number of draw objects.
     ///
     /// This is for the new element processing pipeline. It's not necessarily the
diff --git a/piet-scene/Cargo.toml b/piet-scene/Cargo.toml
index 8706119..fd9a6c5 100644
--- a/piet-scene/Cargo.toml
+++ b/piet-scene/Cargo.toml
@@ -7,3 +7,13 @@
 [dependencies]
 bytemuck = { version = "1.7.2", features = ["derive"] }
 smallvec = "1.8.0"
+
+# remove these and move demo to another directory
+piet-gpu = { path = "../piet-gpu" }
+piet-gpu-hal = { path = "../piet-gpu-hal" }
+winit = "0.25"
+piet = "0.2.0"
+png = "0.16.2"
+rand = "0.7.3"
+roxmltree = "0.13"
+clap = "2.33"
diff --git a/piet-scene/src/brush/color.rs b/piet-scene/src/brush/color.rs
index d079210..b9cbe52 100644
--- a/piet-scene/src/brush/color.rs
+++ b/piet-scene/src/brush/color.rs
@@ -57,6 +57,6 @@
         let r = (self.r as f64 * a) as u32;
         let g = (self.g as f64 * a) as u32;
         let b = (self.b as f64 * a) as u32;
-        r | (g << 8) | (b << 16) | ((self.a as u32) << 24)
+        (r << 24) | (g << 16) | (b << 8) | self.a as u32
     }
 }
diff --git a/piet-scene/src/geometry.rs b/piet-scene/src/geometry.rs
index b71c293..fbc8765 100644
--- a/piet-scene/src/geometry.rs
+++ b/piet-scene/src/geometry.rs
@@ -14,6 +14,7 @@
 //
 // Also licensed under MIT license, at your choice.
 
+use bytemuck::{Pod, Zeroable};
 use core::borrow::Borrow;
 use core::hash::{Hash, Hasher};
 
@@ -58,7 +59,7 @@
 }
 
 /// Affine transformation matrix.
-#[derive(Copy, Clone, Debug)]
+#[derive(Copy, Clone, Debug, Pod, Zeroable)]
 #[repr(C)]
 pub struct Affine {
     pub xx: f32,
@@ -70,6 +71,15 @@
 }
 
 impl Affine {
+    pub const IDENTITY: Self = Self {
+        xx: 1.0,
+        yx: 0.0,
+        xy: 0.0,
+        yy: 1.0,
+        dx: 0.0,
+        dy: 0.0,
+    };
+
     pub const fn new(elements: &[f32; 6]) -> Self {
         Self {
             xx: elements[0],
@@ -137,6 +147,12 @@
     }
 }
 
+impl Default for Affine {
+    fn default() -> Self {
+        Self::IDENTITY
+    }
+}
+
 impl std::ops::Mul for Affine {
     type Output = Self;
     fn mul(self, other: Self) -> Self {
diff --git a/piet-scene/src/main.rs b/piet-scene/src/main.rs
index 8fd361a..ce62b43 100644
--- a/piet-scene/src/main.rs
+++ b/piet-scene/src/main.rs
@@ -1,30 +1,434 @@
-use piet_scene::geometry::*;
-use piet_scene::path::*;
-use piet_scene::scene::*;
-use piet_scene::{geometry::*, path::*, resource::ResourceContext, scene::*};
+use piet::kurbo::Point;
+use piet::{RenderContext, Text, TextAttribute, TextLayoutBuilder};
+use piet_gpu_hal::{CmdBuf, Error, ImageLayout, Instance, Session, SubmittedCmdBuf};
 
-fn main() {
+use piet_gpu::{test_scenes, EncodedSceneRef, PietGpuRenderContext, Renderer};
+
+use piet_scene::resource::ResourceContext;
+use piet_scene::scene::{build_scene, Scene};
+
+use clap::{App, Arg};
+
+use winit::{
+    event::{Event, WindowEvent},
+    event_loop::{ControlFlow, EventLoop},
+    window::WindowBuilder,
+};
+
+const NUM_FRAMES: usize = 2;
+
+const WIDTH: usize = 2048;
+const HEIGHT: usize = 1536;
+
+fn main() -> Result<(), Error> {
+    let matches = App::new("piet-gpu test")
+        .arg(Arg::with_name("INPUT").index(1))
+        .arg(Arg::with_name("flip").short("f").long("flip"))
+        .arg(
+            Arg::with_name("scale")
+                .short("s")
+                .long("scale")
+                .takes_value(true),
+        )
+        .get_matches();
+
+    let event_loop = EventLoop::new();
+    let window = WindowBuilder::new()
+        .with_inner_size(winit::dpi::LogicalSize {
+            width: (WIDTH / 2) as f64,
+            height: (HEIGHT / 2) as f64,
+        })
+        .with_resizable(false) // currently not supported
+        .build(&event_loop)?;
+
+    let (instance, surface) = Instance::new(Some(&window), Default::default())?;
+    let mut info_string = "info".to_string();
+
     let mut scene = Scene::default();
     let mut rcx = ResourceContext::new();
-    let mut sb = build_scene(&mut scene, &mut rcx);
+    unsafe {
+        let device = instance.device(surface.as_ref())?;
+        let mut swapchain =
+            instance.swapchain(WIDTH / 2, HEIGHT / 2, &device, surface.as_ref().unwrap())?;
+        let session = Session::new(device);
 
-    sb.push_layer(Blend::default(), Rect::default().elements());
+        let mut current_frame = 0;
+        let present_semaphores = (0..NUM_FRAMES)
+            .map(|_| session.create_semaphore())
+            .collect::<Result<Vec<_>, Error>>()?;
+        let query_pools = (0..NUM_FRAMES)
+            .map(|_| session.create_query_pool(8))
+            .collect::<Result<Vec<_>, Error>>()?;
+        let mut cmd_bufs: [Option<CmdBuf>; NUM_FRAMES] = Default::default();
+        let mut submitted: [Option<SubmittedCmdBuf>; NUM_FRAMES] = Default::default();
 
-    let mut path = Path::new();
-    let mut b = PathBuilder::new(&mut path);
-    b.move_to(100., 100.);
-    b.line_to(200., 200.);
-    b.close_path();
-    b.move_to(50., 50.);
-    b.line_to(600., 150.);
-    b.move_to(4., 2.);
-    b.quad_to(8., 8., 9., 9.);
-    b.close_path();
-    println!("{:?}", path);
-    for el in path.elements() {
-        println!("{:?}", el);
+        let mut renderer = Renderer::new(&session, WIDTH, HEIGHT, NUM_FRAMES)?;
+        let mut mode = 0usize;
+
+        event_loop.run(move |event, _, control_flow| {
+            *control_flow = ControlFlow::Poll; // `ControlFlow::Wait` if only re-render on event
+
+            match event {
+                Event::WindowEvent { event, window_id } if window_id == window.id() => {
+                    use winit::event::{ElementState, VirtualKeyCode};
+                    match event {
+                        WindowEvent::CloseRequested => {
+                            *control_flow = ControlFlow::Exit;
+                        }
+                        WindowEvent::KeyboardInput { input, .. } => {
+                            if input.state == ElementState::Pressed {
+                                match input.virtual_keycode {
+                                    Some(VirtualKeyCode::Left) => mode = mode.wrapping_sub(1),
+                                    Some(VirtualKeyCode::Right) => mode = mode.wrapping_add(1),
+                                    _ => {}
+                                }
+                            }
+                        }
+                        _ => (),
+                    }
+                }
+                Event::MainEventsCleared => {
+                    window.request_redraw();
+                }
+                Event::RedrawRequested(window_id) if window_id == window.id() => {
+                    let frame_idx = current_frame % NUM_FRAMES;
+
+                    if let Some(submitted) = submitted[frame_idx].take() {
+                        cmd_bufs[frame_idx] = submitted.wait().unwrap();
+                        let ts = session.fetch_query_pool(&query_pools[frame_idx]).unwrap();
+                        if !ts.is_empty() {
+                            info_string = format!(
+                                "{:.3}ms :: e:{:.3}ms|alloc:{:.3}ms|cp:{:.3}ms|bd:{:.3}ms|bin:{:.3}ms|cr:{:.3}ms|r:{:.3}ms",
+                                ts[6] * 1e3,
+                                ts[0] * 1e3,
+                                (ts[1] - ts[0]) * 1e3,
+                                (ts[2] - ts[1]) * 1e3,
+                                (ts[3] - ts[2]) * 1e3,
+                                (ts[4] - ts[3]) * 1e3,
+                                (ts[5] - ts[4]) * 1e3,
+                                (ts[6] - ts[5]) * 1e3,
+                            );
+                        }
+                    }
+
+                    let mut ctx = PietGpuRenderContext::new();
+                    if let Some(input) = matches.value_of("INPUT") {
+                        let mut scale = matches
+                            .value_of("scale")
+                            .map(|scale| scale.parse().unwrap())
+                            .unwrap_or(8.0);
+                        if matches.is_present("flip") {
+                            scale = -scale;
+                        }
+                        test_scenes::render_svg(&mut ctx, input, scale);
+                    } else {
+                        use piet_gpu::{Blend, BlendMode::*, CompositionMode::*};
+                        let blends = [
+                            Blend::new(Normal, SrcOver),
+                            Blend::new(Multiply, SrcOver),
+                            Blend::new(Screen, SrcOver),
+                            Blend::new(Overlay, SrcOver),
+                            Blend::new(Darken, SrcOver),
+                            Blend::new(Lighten, SrcOver),
+                            Blend::new(ColorDodge, SrcOver),
+                            Blend::new(ColorBurn, SrcOver),
+                            Blend::new(HardLight, SrcOver),
+                            Blend::new(SoftLight, SrcOver),
+                            Blend::new(Difference, SrcOver),
+                            Blend::new(Exclusion, SrcOver),
+                            Blend::new(Hue, SrcOver),
+                            Blend::new(Saturation, SrcOver),
+                            Blend::new(Color, SrcOver),
+                            Blend::new(Luminosity, SrcOver),
+                            Blend::new(Normal, Clear),
+                            Blend::new(Normal, Copy),
+                            Blend::new(Normal, Dest),
+                            Blend::new(Normal, SrcOver),
+                            Blend::new(Normal, DestOver),
+                            Blend::new(Normal, SrcIn),
+                            Blend::new(Normal, DestIn),
+                            Blend::new(Normal, SrcOut),
+                            Blend::new(Normal, DestOut),
+                            Blend::new(Normal, SrcAtop),
+                            Blend::new(Normal, DestAtop),
+                            Blend::new(Normal, Xor),
+                            Blend::new(Normal, Plus),
+                        ];
+                        let blend = blends[mode % blends.len()];
+                        test_scenes::render_blend_test(&mut ctx, current_frame, blend);
+                        info_string = format!("{:?}", blend);
+                    }
+                    render_info_string(&mut ctx, &info_string);
+
+                    ctx = PietGpuRenderContext::new();
+                    test_scene1_old(&mut ctx);
+                    let mut encoded_scene_old = ctx.encoded_scene();
+                    let ramp_data = ctx.get_ramp_data();
+                    encoded_scene_old.ramp_data = &ramp_data;
+                    test_scene1(&mut scene, &mut rcx);
+                    let encoded_scene = scene_to_encoded_scene(&scene, &rcx);
+                    // println!("{:?}\n============\n{:?}", encoded_scene_old, encoded_scene);
+                    // panic!();
+                    let res = if mode & 1 == 0 {
+                        render_info_string(&mut ctx, &info_string);
+                        renderer.upload_render_ctx(&mut ctx, frame_idx)
+                    } else {
+                        renderer.upload_scene(&encoded_scene, frame_idx)
+                    };
+                    if let Err(e) = res {
+                        println!("error in uploading: {}", e);
+                    }
+
+                    let (image_idx, acquisition_semaphore) = swapchain.next().unwrap();
+                    let swap_image = swapchain.image(image_idx);
+                    let query_pool = &query_pools[frame_idx];
+                    let mut cmd_buf = cmd_bufs[frame_idx].take().unwrap_or_else(|| session.cmd_buf().unwrap());
+                    cmd_buf.begin();
+                    renderer.record(&mut cmd_buf, &query_pool, frame_idx);
+
+                    // Image -> Swapchain
+                    cmd_buf.image_barrier(
+                        &swap_image,
+                        ImageLayout::Undefined,
+                        ImageLayout::BlitDst,
+                    );
+                    cmd_buf.blit_image(&renderer.image_dev, &swap_image);
+                    cmd_buf.image_barrier(&swap_image, ImageLayout::BlitDst, ImageLayout::Present);
+                    cmd_buf.finish();
+
+                    submitted[frame_idx] = Some(session
+                        .run_cmd_buf(
+                            cmd_buf,
+                            &[&acquisition_semaphore],
+                            &[&present_semaphores[frame_idx]],
+                        )
+                        .unwrap());
+
+                    swapchain
+                        .present(image_idx, &[&present_semaphores[frame_idx]])
+                        .unwrap();
+
+                    current_frame += 1;
+                }
+                Event::LoopDestroyed => {
+                    for cmd_buf in &mut submitted {
+                        // Wait for command list submission, otherwise dropping of renderer may
+                        // cause validation errors (and possibly crashes).
+                        if let Some(cmd_buf) = cmd_buf.take() {
+                            cmd_buf.wait().unwrap();
+                        }
+                    }
+                }
+                _ => (),
+            }
+        })
     }
-    //sb.push_layer(path.elements(), BlendMode::default());
-
-    sb.push_layer(Blend::default(), [Element::MoveTo((0., 0.).into())]);
 }
+
+fn test_scene1_old(ctx: &mut PietGpuRenderContext) {
+    use piet::kurbo::{Affine, Rect, Vec2};
+    use piet::{Color, GradientStop};
+    ctx.transform(Affine::translate(Vec2::new(200., 200.)) * Affine::rotate(45f64.to_radians()));
+    let linear = ctx
+        .gradient(piet::FixedGradient::Linear(piet::FixedLinearGradient {
+            start: Point::new(0., 0.),
+            end: Point::new(200., 100.),
+            stops: vec![
+                GradientStop {
+                    pos: 0.0,
+                    color: Color::rgb8(0, 0, 255),
+                },
+                GradientStop {
+                    pos: 0.5,
+                    color: Color::rgb8(0, 255, 0),
+                },
+                GradientStop {
+                    pos: 1.0,
+                    color: Color::rgb8(255, 0, 0),
+                },
+            ],
+        }))
+        .unwrap();
+    let radial = ctx
+        .gradient(piet::FixedGradient::Radial(piet::FixedRadialGradient {
+            center: Point::new(50., 50.),
+            origin_offset: Vec2::new(0., 0.),
+            radius: 240.,
+            stops: vec![
+                GradientStop {
+                    pos: 0.0,
+                    color: Color::rgb8(0, 0, 255),
+                },
+                GradientStop {
+                    pos: 0.5,
+                    color: Color::rgb8(0, 255, 0),
+                },
+                GradientStop {
+                    pos: 1.0,
+                    color: Color::rgb8(255, 0, 0),
+                },
+            ],
+        }))
+        .unwrap();
+    ctx.fill_transform(
+        Rect {
+            x0: 0.,
+            y0: 0.,
+            x1: 200.,
+            y1: 100.,
+        },
+        // &piet::PaintBrush::Color(piet::Color::rgb8(0, 255, 0)),
+        &radial,
+        // &piet::FixedGradient::Linear(piet::FixedLinearGradient {
+        //     start: Point::new(0., 0.),
+        //     end: Point::new(200., 100.),
+        //     stops: vec![
+        //         GradientStop {
+        //             pos: 0.0,
+        //             color: Color::rgb8(0, 0, 255)
+        //         },
+        //         GradientStop {
+        //             pos: 0.5,
+        //             color: Color::rgb8(0, 255, 0)
+        //         },
+        //         GradientStop {
+        //             pos: 1.0,
+        //             color: Color::rgb8(255, 0, 0)
+        //         },
+        //     ],
+        // }),
+        Affine::default(), // rotate(90f64.to_radians()),
+    );
+}
+
+fn test_scene1(scene: &mut Scene, rcx: &mut ResourceContext) {
+    use piet_scene::brush::*;
+    use piet_scene::geometry::{Affine, Point, Rect};
+    use piet_scene::scene::*;
+    let mut fragment = Fragment::default();
+    let mut b = build_fragment(&mut fragment);
+    let linear = Brush::LinearGradient(LinearGradient {
+        space: Space::Global,
+        start: Point::new(0., 0.),
+        end: Point::new(200., 100.),
+        extend: Extend::Pad,
+        stops: (&[
+            Stop {
+                offset: 0.,
+                color: Color::rgb8(0, 0, 255),
+            },
+            Stop {
+                offset: 0.5,
+                color: Color::rgb8(0, 255, 0),
+            },
+            Stop {
+                offset: 1.,
+                color: Color::rgb8(255, 0, 0),
+            },
+        ][..])
+            .into(),
+    });
+    let radial = Brush::RadialGradient(RadialGradient {
+        space: Space::Global,
+        center0: Point::new(50., 50.),
+        center1: Point::new(50., 50.),
+        radius0: 0.,
+        radius1: 240.,
+        extend: Extend::Pad,
+        stops: (&[
+            Stop {
+                offset: 0.,
+                color: Color::rgb8(0, 0, 255),
+            },
+            Stop {
+                offset: 0.5,
+                color: Color::rgb8(0, 255, 0),
+            },
+            Stop {
+                offset: 1.,
+                color: Color::rgb8(255, 0, 0),
+            },
+        ][..])
+            .into(),
+    });
+    //b.push_transform(Affine::translate(200., 200.) * Affine::rotate(45f32.to_radians()));
+    b.fill(
+        Fill::NonZero,
+        // &Brush::Solid(Color::rgba8(0, 255, 0, 255)),
+        &radial,
+        None, //Some(Affine::rotate(90f32.to_radians())),
+        Rect {
+            min: Point::new(0., 0.),
+            max: Point::new(200., 100.),
+        }
+        .elements(),
+    );
+    b.finish();
+    let mut b = build_scene(scene, rcx);
+    b.push_transform(Affine::translate(200., 200.) * Affine::rotate(45f32.to_radians()));
+    b.append(&fragment);
+    b.pop_transform();
+    b.push_transform(Affine::translate(400., 600.));
+    b.append(&fragment);
+    b.finish();
+}
+
+fn scene_to_encoded_scene<'a>(
+    scene: &'a Scene,
+    rcx: &'a ResourceContext,
+) -> EncodedSceneRef<'a, piet_scene::geometry::Affine> {
+    let d = scene.data();
+    EncodedSceneRef {
+        transform_stream: &d.transform_stream,
+        tag_stream: &d.tag_stream,
+        pathseg_stream: &d.pathseg_stream,
+        linewidth_stream: &d.linewidth_stream,
+        drawtag_stream: &d.drawtag_stream,
+        drawdata_stream: &d.drawdata_stream,
+        n_path: d.n_path,
+        n_pathseg: d.n_pathseg,
+        n_clip: d.n_clip,
+        ramp_data: rcx.ramp_data(),
+    }
+}
+
+fn render_info_string(rc: &mut impl RenderContext, info: &str) {
+    let layout = rc
+        .text()
+        .new_text_layout(info.to_string())
+        .default_attribute(TextAttribute::FontSize(40.0))
+        .build()
+        .unwrap();
+    rc.draw_text(&layout, Point::new(110.0, 50.0));
+}
+
+// use piet_scene::geometry::*;
+// use piet_scene::path::*;
+// use piet_scene::scene::*;
+// use piet_scene::{geometry::*, path::*, resource::ResourceContext, scene::*};
+
+// fn main() {
+//     let mut scene = Scene::default();
+//     let mut rcx = ResourceContext::new();
+//     let mut sb = build_scene(&mut scene, &mut rcx);
+
+//     sb.push_layer(Blend::default(), Rect::default().elements());
+
+//     // let mut path = Path::new();
+//     // let mut b = PathBuilder::new(&mut path);
+//     // b.move_to(100., 100.);
+//     // b.line_to(200., 200.);
+//     // b.close_path();
+//     // b.move_to(50., 50.);
+//     // b.line_to(600., 150.);
+//     // b.move_to(4., 2.);
+//     // b.quad_to(8., 8., 9., 9.);
+//     // b.close_path();
+//     // println!("{:?}", path);
+//     // for el in path.elements() {
+//     //     println!("{:?}", el);
+//     // }
+//     //sb.push_layer(path.elements(), BlendMode::default());
+
+//     sb.push_layer(Blend::default(), [Element::MoveTo((0., 0.).into())]);
+// }
diff --git a/piet-scene/src/resource/gradient.rs b/piet-scene/src/resource/gradient.rs
index 0c509e4..bd2491c 100644
--- a/piet-scene/src/resource/gradient.rs
+++ b/piet-scene/src/resource/gradient.rs
@@ -102,7 +102,7 @@
     })
 }
 
-#[derive(Copy, Clone)]
+#[derive(Copy, Clone, Debug)]
 struct ColorF64([f64; 4]);
 
 impl ColorF64 {
@@ -117,7 +117,7 @@
 
     fn lerp(&self, other: &Self, a: f64) -> Self {
         fn l(x: f64, y: f64, a: f64) -> f64 {
-            x + (y - x) * a
+            x * (1.0 - a) + y * a
         }
         Self([
             l(self.0[0], other.0[0], a),
@@ -129,10 +129,10 @@
 
     fn to_premul_u32(&self) -> u32 {
         let a = self.0[3].min(1.0).max(0.0);
-        let r = ((self.0[0] * a).min(1.0).max(0.0) / 255.0) as u32;
-        let g = ((self.0[1] * a).min(1.0).max(0.0) / 255.0) as u32;
-        let b = ((self.0[2] * a).min(1.0).max(0.0) / 255.0) as u32;
-        let a = (a / 255.0) as u32;
+        let r = ((self.0[0] * a).min(1.0).max(0.0) * 255.0) as u32;
+        let g = ((self.0[1] * a).min(1.0).max(0.0) * 255.0) as u32;
+        let b = ((self.0[2] * a).min(1.0).max(0.0) * 255.0) as u32;
+        let a = (a * 255.0) as u32;
         r | (g << 8) | (b << 16) | (a << 24)
     }
 }
diff --git a/piet-scene/src/resource/mod.rs b/piet-scene/src/resource/mod.rs
index 1dfaa60..a1ea58b 100644
--- a/piet-scene/src/resource/mod.rs
+++ b/piet-scene/src/resource/mod.rs
@@ -38,6 +38,10 @@
     }
 
     pub fn destroy_brush(&mut self, brush: PersistentBrush) {}
+
+    pub fn ramp_data(&self) -> &[u32] {
+        &self.ramps.data()
+    }
 }
 
 /// Handle for a brush that is managed by the resource context.
diff --git a/piet-scene/src/scene/builder.rs b/piet-scene/src/scene/builder.rs
index a92267b..85d75b2 100644
--- a/piet-scene/src/scene/builder.rs
+++ b/piet-scene/src/scene/builder.rs
@@ -41,25 +41,38 @@
     scene: &'a mut SceneData,
     resources: ResourceData<'a>,
     layers: Vec<Blend>,
+    transforms: Vec<Affine>,
 }
 
 impl<'a> Builder<'a> {
     /// Creates a new builder for constructing a scene.
     fn new(scene: &'a mut SceneData, mut resources: ResourceData<'a>) -> Self {
-        scene.clear();
+        let is_fragment = match resources {
+            ResourceData::Fragment(_) => true,
+            _ => false,
+        };
+        scene.reset(is_fragment);
         resources.clear();
         Self {
             scene,
             resources,
             layers: vec![],
+            transforms: vec![],
         }
     }
 
     /// Pushes a transform matrix onto the stack.
-    pub fn push_transform(&mut self, transform: &Affine) {}
+    pub fn push_transform(&mut self, transform: Affine) {
+        self.transform(transform);
+        self.transforms.push(transform);
+    }
 
     /// Pops the current transform matrix.
-    pub fn pop_transform(&mut self) {}
+    pub fn pop_transform(&mut self) {
+        if let Some(transform) = self.transforms.pop() {
+            self.transform(transform.inverse());
+        }
+    }
 
     /// Pushes a new layer bound by the specifed shape and composed with
     /// previous layers using the specified blend mode.
@@ -181,6 +194,15 @@
         while let Some(layer) = self.layers.pop() {
             self.end_clip(Some(layer));
         }
+        match self.resources {
+            ResourceData::Fragment(_) => {
+                // Make sure the transform state is invariant for fragments
+                while !self.transforms.is_empty() {
+                    self.pop_transform();
+                }
+            }
+            _ => {}
+        }
     }
 }
 
diff --git a/piet-scene/src/scene/mod.rs b/piet-scene/src/scene/mod.rs
index df9db90..ba0b069 100644
--- a/piet-scene/src/scene/mod.rs
+++ b/piet-scene/src/scene/mod.rs
@@ -28,20 +28,20 @@
 use core::ops::Range;
 
 #[derive(Default)]
-struct SceneData {
-    transform_stream: Vec<Affine>,
-    tag_stream: Vec<u8>,
-    pathseg_stream: Vec<u8>,
-    linewidth_stream: Vec<f32>,
-    drawtag_stream: Vec<u32>,
-    drawdata_stream: Vec<u8>,
-    n_path: u32,
-    n_pathseg: u32,
-    n_clip: u32,
+pub struct SceneData {
+    pub transform_stream: Vec<Affine>,
+    pub tag_stream: Vec<u8>,
+    pub pathseg_stream: Vec<u8>,
+    pub linewidth_stream: Vec<f32>,
+    pub drawtag_stream: Vec<u32>,
+    pub drawdata_stream: Vec<u8>,
+    pub n_path: u32,
+    pub n_pathseg: u32,
+    pub n_clip: u32,
 }
 
 impl SceneData {
-    fn clear(&mut self) {
+    fn reset(&mut self, is_fragment: bool) {
         self.transform_stream.clear();
         self.tag_stream.clear();
         self.pathseg_stream.clear();
@@ -51,6 +51,11 @@
         self.n_path = 0;
         self.n_pathseg = 0;
         self.n_clip = 0;
+        if !is_fragment {
+            self.transform_stream
+                .push(Affine::new(&[1.0, 0.0, 0.0, 1.0, 0.0, 0.0]));
+            self.linewidth_stream.push(-1.0);
+        }
     }
 
     fn append(&mut self, other: &SceneData) {
@@ -76,6 +81,12 @@
     data: SceneData,
 }
 
+impl Scene {
+    pub fn data(&self) -> &SceneData {
+        &self.data
+    }
+}
+
 /// Encoded definition of a scene fragment and associated resources.
 #[derive(Default)]
 pub struct Fragment {
