Partial DX12 fixes

As of this patch, cli works in release mode, but hangs in debug. There
are some validation errors about incompatible resouce states.
diff --git a/piet-gpu-hal/src/dx12.rs b/piet-gpu-hal/src/dx12.rs
index 6c8464d..337ca04 100644
--- a/piet-gpu-hal/src/dx12.rs
+++ b/piet-gpu-hal/src/dx12.rs
@@ -554,6 +554,7 @@
             Flags: d3d12::D3D12_PIPELINE_STATE_FLAG_NONE,
         };
         let pipeline_state = self.device.create_compute_pipeline_state(&desc)?;
+
         Ok(Pipeline {
             pipeline_state,
             root_signature,
@@ -725,8 +726,10 @@
         }
     }
 
-    fn add_textures(&mut self, _images: &[&Image]) {
-        todo!()
+    fn add_textures(&mut self, images: &[&Image]) {
+        for img in images {
+            self.handles.push(img.cpu_ref.as_ref().unwrap().handle());
+        }
     }
 
     unsafe fn build(
diff --git a/piet-gpu/bin/cli.rs b/piet-gpu/bin/cli.rs
index 60c9660..469a01e 100644
--- a/piet-gpu/bin/cli.rs
+++ b/piet-gpu/bin/cli.rs
@@ -4,7 +4,7 @@
 
 use clap::{App, Arg};
 
-use piet_gpu_hal::{BufferUsage, Error, Instance, Session};
+use piet_gpu_hal::{BufferUsage, Error, Instance, InstanceFlags, Session};
 
 use piet_gpu::{test_scenes, PietGpuRenderContext, Renderer};
 
@@ -226,7 +226,7 @@
                 .takes_value(true),
         )
         .get_matches();
-    let (instance, _) = Instance::new(None, Default::default())?;
+    let (instance, _) = Instance::new(None, InstanceFlags::default())?;
     unsafe {
         let device = instance.device(None)?;
         let session = Session::new(device);
@@ -256,6 +256,7 @@
         cmd_buf.begin();
         renderer.record(&mut cmd_buf, &query_pool, 0);
         cmd_buf.copy_image_to_buffer(&renderer.image_dev, &image_buf);
+        cmd_buf.finish_timestamps(&query_pool);
         cmd_buf.host_barrier();
         cmd_buf.finish();
         let start = std::time::Instant::now();
diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs
index e1bde6a..a1584f9 100644
--- a/piet-gpu/src/lib.rs
+++ b/piet-gpu/src/lib.rs
@@ -14,8 +14,8 @@
 use piet::{ImageFormat, RenderContext};
 
 use piet_gpu_hal::{
-    BindType, Buffer, BufferUsage, CmdBuf, DescriptorSet, Error, Image, ImageLayout, Pipeline,
-    QueryPool, Session, ShaderCode, include_shader,
+    include_shader, BindType, Buffer, BufferUsage, CmdBuf, DescriptorSet, Error, Image,
+    ImageLayout, Pipeline, QueryPool, Session,
 };
 
 use pico_svg::PicoSvg;
@@ -63,8 +63,6 @@
     memory_buf_host: Vec<Buffer>,
     memory_buf_dev: Buffer,
 
-    state_buf: Buffer,
-
     // Staging buffers
     config_bufs: Vec<Buffer>,
     // Device config buf
@@ -125,7 +123,6 @@
             .map(|_| session.create_buffer(8 * 1024 * 1024, host_upload).unwrap())
             .collect::<Vec<_>>();
 
-        let state_buf = session.create_buffer(1 * 1024 * 1024, dev)?;
         let image_dev = session.create_image2d(width as u32, height as u32)?;
 
         // Note: this must be updated when the config struct size changes.
@@ -163,13 +160,13 @@
 
         let tile_alloc_code = include_shader!(session, "../shader/gen/tile_alloc");
         let tile_pipeline = session
-            .create_compute_pipeline(tile_alloc_code, &[BindType::Buffer, BindType::Buffer])?;
+            .create_compute_pipeline(tile_alloc_code, &[BindType::Buffer, BindType::BufReadOnly])?;
         let tile_ds = session
             .create_simple_descriptor_set(&tile_pipeline, &[&memory_buf_dev, &config_buf])?;
 
         let path_alloc_code = include_shader!(session, "../shader/gen/path_coarse");
         let path_pipeline = session
-            .create_compute_pipeline(path_alloc_code, &[BindType::Buffer, BindType::Buffer])?;
+            .create_compute_pipeline(path_alloc_code, &[BindType::Buffer, BindType::BufReadOnly])?;
         let path_ds = session
             .create_simple_descriptor_set(&path_pipeline, &[&memory_buf_dev, &config_buf])?;
 
@@ -180,20 +177,20 @@
             include_shader!(session, "../shader/gen/backdrop")
         };
         let backdrop_pipeline = session
-            .create_compute_pipeline(backdrop_code, &[BindType::Buffer, BindType::Buffer])?;
+            .create_compute_pipeline(backdrop_code, &[BindType::Buffer, BindType::BufReadOnly])?;
         let backdrop_ds = session
             .create_simple_descriptor_set(&backdrop_pipeline, &[&memory_buf_dev, &config_buf])?;
 
         // TODO: constants
         let bin_code = include_shader!(session, "../shader/gen/binning");
-        let bin_pipeline =
-            session.create_compute_pipeline(bin_code, &[BindType::Buffer, BindType::Buffer])?;
+        let bin_pipeline = session
+            .create_compute_pipeline(bin_code, &[BindType::Buffer, BindType::BufReadOnly])?;
         let bin_ds =
             session.create_simple_descriptor_set(&bin_pipeline, &[&memory_buf_dev, &config_buf])?;
 
         let coarse_code = include_shader!(session, "../shader/gen/coarse");
-        let coarse_pipeline =
-            session.create_compute_pipeline(coarse_code, &[BindType::Buffer, BindType::Buffer])?;
+        let coarse_pipeline = session
+            .create_compute_pipeline(coarse_code, &[BindType::Buffer, BindType::BufReadOnly])?;
         let coarse_ds = session
             .create_simple_descriptor_set(&coarse_pipeline, &[&memory_buf_dev, &config_buf])?;
 
@@ -215,7 +212,7 @@
             k4_code,
             &[
                 BindType::Buffer,
-                BindType::Buffer,
+                BindType::BufReadOnly,
                 BindType::Image,
                 BindType::ImageRead,
                 BindType::ImageRead,
@@ -234,7 +231,6 @@
             scene_bufs,
             memory_buf_host,
             memory_buf_dev,
-            state_buf,
             config_buf,
             config_bufs,
             image_dev,
@@ -324,7 +320,6 @@
     pub unsafe fn record(&self, cmd_buf: &mut CmdBuf, query_pool: &QueryPool, buf_ix: usize) {
         cmd_buf.copy_buffer(&self.config_bufs[buf_ix], &self.config_buf);
         cmd_buf.copy_buffer(&self.memory_buf_host[buf_ix], &self.memory_buf_dev);
-        cmd_buf.clear_buffer(&self.state_buf, None);
         cmd_buf.memory_barrier();
         cmd_buf.image_barrier(
             &self.image_dev,
@@ -386,7 +381,9 @@
             (256, 1, 1),
         );
         cmd_buf.write_timestamp(&query_pool, 5);
+        println!("before barrier");
         cmd_buf.memory_barrier();
+        println!("after barrier, before coarse");
         cmd_buf.dispatch(
             &self.coarse_pipeline,
             &self.coarse_ds,
@@ -397,6 +394,7 @@
             ),
             (256, 256, 1),
         );
+        println!("after coarse");
         cmd_buf.write_timestamp(&query_pool, 6);
         cmd_buf.memory_barrier();
         cmd_buf.dispatch(