WIP render pipeline

diff --git a/src/recording.rs b/src/recording.rs
index 0adb0f3..6dc48c8 100644
--- a/src/recording.rs
+++ b/src/recording.rs

@@ -50,6 +50,11 @@
 #[derive(Clone, Copy)]
 pub enum ResourceProxy {
     Buffer(BufferProxy),
+    BufferRange {
+        proxy: BufferProxy,
+        offset: u64,
+        size: u64,
+    },
     Image(ImageProxy),
 }
 
@@ -67,6 +72,7 @@
     // Alternative: provide bufs & images as separate sequences
     Dispatch(ShaderId, (u32, u32, u32), Vec<ResourceProxy>),
     DispatchIndirect(ShaderId, BufferProxy, u64, Vec<ResourceProxy>),
+    Draw(DrawParams),
     Download(BufferProxy),
     /// Commands to clear the buffer from an offset on for a length of the given size.
     /// If the size is [None], it clears until the end.
@@ -94,6 +100,16 @@
     // TODO: Uniform, Sampler, maybe others
 }
 
+pub struct DrawParams {
+    pub shader_id: ShaderId,
+    pub instance_count: u32,
+    pub vertex_count: u32,
+    pub vertex_buffer: Option<BufferProxy>,
+    pub resources: Vec<ResourceProxy>,
+    pub target: ImageProxy,
+    pub clear_color: Option<[f32; 4]>,
+}
+
 impl Recording {
     /// Appends a [`Command`] to the back of the [`Recording`].
     pub fn push(&mut self, cmd: Command) {
@@ -175,6 +191,11 @@
         self.push(Command::DispatchIndirect(shader, buf, offset, r));
     }
 
+    /// Issue a draw call
+    pub fn draw<R>(&mut self, params: DrawParams) {
+        self.push(Command::Draw(params));
+    }
+
     /// Prepare a buffer for downloading.
     ///
     /// Currently this copies to a download buffer. The original buffer can be freed
@@ -202,6 +223,11 @@
     pub fn free_resource(&mut self, resource: ResourceProxy) {
         match resource {
             ResourceProxy::Buffer(buf) => self.free_buffer(buf),
+            ResourceProxy::BufferRange {
+                proxy,
+                offset: _,
+                size: _,
+            } => self.free_buffer(proxy),
             ResourceProxy::Image(image) => self.free_image(image),
         }
     }
@@ -228,6 +254,15 @@
             Self::Bgra8 => wgpu::TextureFormat::Bgra8Unorm,
         }
     }
+
+    #[cfg(feature = "wgpu")]
+    pub fn from_wgpu(format: wgpu::TextureFormat) -> Self {
+        match format {
+            wgpu::TextureFormat::Rgba8Unorm => Self::Rgba8,
+            wgpu::TextureFormat::Bgra8Unorm => Self::Bgra8,
+            _ => unimplemented!(),
+        }
+    }
 }
 
 impl ImageProxy {

diff --git a/src/shaders.rs b/src/shaders.rs
index 7b9cdb9..2700ad1 100644
--- a/src/shaders.rs
+++ b/src/shaders.rs

@@ -80,7 +80,7 @@
                 .into();
             #[cfg(not(feature = "hot_reload"))]
             let source = shaders.$name.wgsl.code;
-            engine.add_shader(
+            engine.add_compute_shader(
                 device,
                 $label,
                 source,

diff --git a/src/wgpu_engine.rs b/src/wgpu_engine.rs
index f743d12..9923439 100644
--- a/src/wgpu_engine.rs
+++ b/src/wgpu_engine.rs

@@ -10,13 +10,14 @@
 
 use wgpu::{
     BindGroup, BindGroupLayout, Buffer, BufferUsages, CommandEncoder, CommandEncoderDescriptor,
-    ComputePipeline, Device, PipelineCompilationOptions, Queue, Texture, TextureAspect,
+    ComputePipeline, Device, PipelineCompilationOptions, Queue, RenderPipeline, Texture, TextureAspect,
     TextureUsages, TextureView, TextureViewDimension,
 };
 
 use crate::recording::BindType;
 use crate::{
-    BufferProxy, Command, Error, ImageProxy, Recording, ResourceId, ResourceProxy, ShaderId,
+    BufferProxy, Command, Error, ImageProxy, Recording, ResourceId, ResourceProxy,
+    ShaderId,
 };
 
 #[cfg(not(target_arch = "wasm32"))]
@@ -38,8 +39,13 @@
     pub(crate) use_cpu: bool,
 }
 
+enum PipelineState {
+    Compute(ComputePipeline),
+    Render(RenderPipeline),
+}
+
 struct WgpuShader {
-    pipeline: ComputePipeline,
+    pipeline: PipelineState,
     bind_group_layout: BindGroupLayout,
 }
 
@@ -230,7 +236,7 @@
     ///
     /// Maybe should do template instantiation here? But shader compilation pipeline feels maybe
     /// a bit separate.
-    pub fn add_shader(
+    pub fn add_compute_shader(
         &mut self,
         device: &Device,
         label: &'static str,
@@ -266,54 +272,9 @@
             }
         }
 
-        let entries = layout
-            .iter()
-            .enumerate()
-            .map(|(i, bind_type)| match bind_type {
-                BindType::Buffer | BindType::BufReadOnly => wgpu::BindGroupLayoutEntry {
-                    binding: i as u32,
-                    visibility: wgpu::ShaderStages::COMPUTE,
-                    ty: wgpu::BindingType::Buffer {
-                        ty: wgpu::BufferBindingType::Storage {
-                            read_only: *bind_type == BindType::BufReadOnly,
-                        },
-                        has_dynamic_offset: false,
-                        min_binding_size: None,
-                    },
-                    count: None,
-                },
-                BindType::Uniform => wgpu::BindGroupLayoutEntry {
-                    binding: i as u32,
-                    visibility: wgpu::ShaderStages::COMPUTE,
-                    ty: wgpu::BindingType::Buffer {
-                        ty: wgpu::BufferBindingType::Uniform,
-                        has_dynamic_offset: false,
-                        min_binding_size: None,
-                    },
-                    count: None,
-                },
-                BindType::Image(format) | BindType::ImageRead(format) => {
-                    wgpu::BindGroupLayoutEntry {
-                        binding: i as u32,
-                        visibility: wgpu::ShaderStages::COMPUTE,
-                        ty: if *bind_type == BindType::ImageRead(*format) {
-                            wgpu::BindingType::Texture {
-                                sample_type: wgpu::TextureSampleType::Float { filterable: true },
-                                view_dimension: wgpu::TextureViewDimension::D2,
-                                multisampled: false,
-                            }
-                        } else {
-                            wgpu::BindingType::StorageTexture {
-                                access: wgpu::StorageTextureAccess::WriteOnly,
-                                format: format.to_wgpu(),
-                                view_dimension: wgpu::TextureViewDimension::D2,
-                            }
-                        },
-                        count: None,
-                    }
-                }
-            })
-            .collect::<Vec<_>>();
+        let entries = Self::create_bind_group_layout_entries(
+            layout.iter().map(|b| (*b, wgpu::ShaderStages::COMPUTE)),
+        );
         #[cfg(not(target_arch = "wasm32"))]
         if let Some(uninit) = self.shaders_to_initialise.as_mut() {
             let id = add(Shader {
@@ -337,6 +298,70 @@
         })
     }
 
+    #[allow(clippy::too_many_arguments)]
+    pub fn add_render_shader(
+        &mut self,
+        device: &Device,
+        label: &'static str,
+        module: &wgpu::ShaderModule,
+        vertex_main: &'static str,
+        fragment_main: &'static str,
+        topology: wgpu::PrimitiveTopology,
+        color_attachment: wgpu::ColorTargetState,
+        vertex_buffer: Option<wgpu::VertexBufferLayout>,
+        bind_layout: &[(BindType, wgpu::ShaderStages)],
+    ) -> ShaderId {
+        let entries = Self::create_bind_group_layout_entries(bind_layout.iter().copied());
+        let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+            label: None,
+            entries: &entries,
+        });
+        let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+            label: None,
+            bind_group_layouts: &[&bind_group_layout],
+            push_constant_ranges: &[],
+        });
+        let pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
+            label: Some(label),
+            layout: Some(&pipeline_layout),
+            vertex: wgpu::VertexState {
+                module,
+                entry_point: vertex_main,
+                buffers: vertex_buffer
+                    .as_ref()
+                    .map(core::slice::from_ref)
+                    .unwrap_or_default(),
+            },
+            fragment: Some(wgpu::FragmentState {
+                module,
+                entry_point: fragment_main,
+                targets: &[Some(color_attachment)],
+            }),
+            primitive: wgpu::PrimitiveState {
+                topology,
+                strip_index_format: None,
+                front_face: wgpu::FrontFace::Ccw,
+                cull_mode: Some(wgpu::Face::Back),
+                polygon_mode: wgpu::PolygonMode::Fill,
+                unclipped_depth: false,
+                conservative: false,
+            },
+            depth_stencil: None,
+            multisample: wgpu::MultisampleState::default(),
+            multiview: None,
+        });
+        let id = self.shaders.len();
+        self.shaders.push(Shader {
+            wgpu: Some(WgpuShader {
+                pipeline: PipelineState::Render(pipeline),
+                bind_group_layout,
+            }),
+            cpu: None,
+            label,
+        });
+        ShaderId(id)
+    }
+
     pub fn run_recording(
         &mut self,
         device: &Device,
@@ -360,8 +385,11 @@
                     transient_map
                         .bufs
                         .insert(buf_proxy.id, TransientBuf::Cpu(bytes));
-                    let usage =
-                        BufferUsages::COPY_SRC | BufferUsages::COPY_DST | BufferUsages::STORAGE;
+                    // TODO: restrict VERTEX usage to "debug_layers" feature?
+                    let usage = BufferUsages::COPY_SRC
+                        | BufferUsages::COPY_DST
+                        | BufferUsages::STORAGE
+                        | BufferUsages::VERTEX;
                     let buf = self
                         .pool
                         .get_buf(buf_proxy.size, buf_proxy.name, usage, device);
@@ -492,7 +520,10 @@
                             let query = profiler
                                 .begin_query(shader.label, &mut cpass, device)
                                 .with_parent(Some(&query));
-                            cpass.set_pipeline(&wgpu_shader.pipeline);
+                            let PipelineState::Compute(pipeline) = &wgpu_shader.pipeline else {
+                                panic!("cannot issue a dispatch with a render pipeline");
+                            };
+                            cpass.set_pipeline(pipeline);
                             cpass.set_bind_group(0, &bind_group, &[]);
                             cpass.dispatch_workgroups(wg_size.0, wg_size.1, wg_size.2);
                             #[cfg(feature = "wgpu-profiler")]
@@ -539,7 +570,10 @@
                             let query = profiler
                                 .begin_query(shader.label, &mut cpass, device)
                                 .with_parent(Some(&query));
-                            cpass.set_pipeline(&wgpu_shader.pipeline);
+                            let PipelineState::Compute(pipeline) = &wgpu_shader.pipeline else {
+                                panic!("cannot issue a dispatch with a render pipeline");
+                            };
+                            cpass.set_pipeline(pipeline);
                             cpass.set_bind_group(0, &bind_group, &[]);
                             let buf = self
                                 .bind_map
@@ -551,6 +585,60 @@
                         }
                     }
                 }
+                Command::Draw(draw_params) => {
+                    let shader = &self.shaders[draw_params.shader_id.0];
+                    let ShaderKind::Wgpu(shader) = shader.select() else {
+                        panic!("a render pass does not have a CPU equivalent");
+                    };
+                    let bind_group = transient_map.create_bind_group(
+                        &mut self.bind_map,
+                        &mut self.pool,
+                        device,
+                        queue,
+                        &mut encoder,
+                        &shader.bind_group_layout,
+                        &draw_params.resources,
+                    )?;
+                    let render_target =
+                        transient_map.materialize_external_image_for_render_pass(&draw_params.target);
+                    let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
+                        label: None,
+                        color_attachments: &[Some(wgpu::RenderPassColorAttachment {
+                            view: render_target,
+                            resolve_target: None,
+                            ops: wgpu::Operations {
+                                load: match draw_params.clear_color {
+                                    Some(c) => wgpu::LoadOp::Clear(wgpu::Color {
+                                        r: c[0] as f64,
+                                        g: c[1] as f64,
+                                        b: c[2] as f64,
+                                        a: c[3] as f64,
+                                    }),
+                                    None => wgpu::LoadOp::Load,
+                                },
+                                store: wgpu::StoreOp::Store,
+                            },
+                        })],
+                        depth_stencil_attachment: None,
+                        occlusion_query_set: None,
+                        timestamp_writes: None,
+                    });
+                    let PipelineState::Render(pipeline) = &shader.pipeline else {
+                        panic!("cannot issue a draw with a compute pipeline");
+                    };
+                    rpass.set_pipeline(pipeline);
+                    if let Some(proxy) = draw_params.vertex_buffer {
+                        // TODO: need a way to materialize a CPU initialized buffer. For now assume
+                        // buffer exists? Also, need to materialize this buffer with vertex usage
+                        let buf = self
+                            .bind_map
+                            .get_gpu_buf(proxy.id)
+                            .ok_or("vertex buffer not in map")?;
+                        rpass.set_vertex_buffer(0, buf.slice(..));
+                    }
+                    rpass.set_bind_group(0, &bind_group, &[]);
+                    rpass.draw(0..draw_params.vertex_count, 0..draw_params.instance_count);
+                }
                 Command::Download(proxy) => {
                     let src_buf = self
                         .bind_map
@@ -619,6 +707,58 @@
         self.downloads.remove(&buf.id);
     }
 
+    fn create_bind_group_layout_entries(
+        layout: impl Iterator<Item = (BindType, wgpu::ShaderStages)>,
+    ) -> Vec<wgpu::BindGroupLayoutEntry> {
+        layout
+            .enumerate()
+            .map(|(i, (bind_type, visibility))| match bind_type {
+                BindType::Buffer | BindType::BufReadOnly => wgpu::BindGroupLayoutEntry {
+                    binding: i as u32,
+                    visibility,
+                    ty: wgpu::BindingType::Buffer {
+                        ty: wgpu::BufferBindingType::Storage {
+                            read_only: bind_type == BindType::BufReadOnly,
+                        },
+                        has_dynamic_offset: false,
+                        min_binding_size: None,
+                    },
+                    count: None,
+                },
+                BindType::Uniform => wgpu::BindGroupLayoutEntry {
+                    binding: i as u32,
+                    visibility,
+                    ty: wgpu::BindingType::Buffer {
+                        ty: wgpu::BufferBindingType::Uniform,
+                        has_dynamic_offset: false,
+                        min_binding_size: None,
+                    },
+                    count: None,
+                },
+                BindType::Image(format) | BindType::ImageRead(format) => {
+                    wgpu::BindGroupLayoutEntry {
+                        binding: i as u32,
+                        visibility,
+                        ty: if bind_type == BindType::ImageRead(format) {
+                            wgpu::BindingType::Texture {
+                                sample_type: wgpu::TextureSampleType::Float { filterable: true },
+                                view_dimension: wgpu::TextureViewDimension::D2,
+                                multisampled: false,
+                            }
+                        } else {
+                            wgpu::BindingType::StorageTexture {
+                                access: wgpu::StorageTextureAccess::WriteOnly,
+                                format: format.to_wgpu(),
+                                view_dimension: wgpu::TextureViewDimension::D2,
+                            }
+                        },
+                        count: None,
+                    }
+                }
+            })
+            .collect::<Vec<_>>()
+    }
+
     fn create_compute_pipeline(
         device: &Device,
         label: &str,
@@ -650,7 +790,7 @@
             },
         });
         WgpuShader {
-            pipeline,
+            pipeline: PipelineState::Compute(pipeline),
             bind_group_layout,
         }
     }
@@ -847,6 +987,14 @@
         }
     }
 
+    fn materialize_external_image_for_render_pass(&mut self, proxy: &ImageProxy) -> &TextureView {
+        // TODO: Maybe this should support instantiating a transient texture. Right now all render
+        // passes target a `SurfaceTexture`, so supporting external textures is sufficient.
+        self.images
+            .get(&proxy.id)
+            .expect("texture not materialized")
+    }
+
     #[allow(clippy::too_many_arguments)]
     fn create_bind_group(
         &mut self,
@@ -860,17 +1008,23 @@
     ) -> Result<BindGroup, Error> {
         for proxy in bindings {
             match proxy {
-                ResourceProxy::Buffer(proxy) => {
+                ResourceProxy::Buffer(proxy)
+                | ResourceProxy::BufferRange {
+                    proxy,
+                    offset: _,
+                    size: _,
+                } => {
                     if self.bufs.contains_key(&proxy.id) {
                         continue;
                     }
                     match bind_map.buf_map.entry(proxy.id) {
                         Entry::Vacant(v) => {
-                            // TODO: only some buffers will need indirect, but does it hurt?
+                            // TODO: only some buffers will need indirect & vertex, but does it hurt?
                             let usage = BufferUsages::COPY_SRC
                                 | BufferUsages::COPY_DST
                                 | BufferUsages::STORAGE
-                                | BufferUsages::INDIRECT;
+                                | BufferUsages::INDIRECT
+                                | BufferUsages::VERTEX;
                             let buf = pool.get_buf(proxy.size, proxy.name, usage, device);
                             if bind_map.pending_clears.remove(&proxy.id) {
                                 encoder.clear_buffer(&buf, 0, None);
@@ -934,6 +1088,24 @@
                         resource: buf.as_entire_binding(),
                     })
                 }
+                ResourceProxy::BufferRange {
+                    proxy,
+                    offset,
+                    size,
+                } => {
+                    let buf = match self.bufs.get(&proxy.id) {
+                        Some(TransientBuf::Gpu(b)) => b,
+                        _ => bind_map.get_gpu_buf(proxy.id).unwrap(),
+                    };
+                    Ok(wgpu::BindGroupEntry {
+                        binding: i as u32,
+                        resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding {
+                            buffer: buf,
+                            offset: *offset,
+                            size: core::num::NonZeroU64::new(*size),
+                        }),
+                    })
+                }
                 ResourceProxy::Image(proxy) => {
                     let view = self
                         .images
@@ -964,10 +1136,15 @@
         // First pass is mutable; create buffers as needed
         for resource in bindings {
             match resource {
-                ResourceProxy::Buffer(buf) => match self.bufs.get(&buf.id) {
+                ResourceProxy::Buffer(proxy)
+				| ResourceProxy::BufferRange {
+					proxy,
+					offset: _,
+					size: _,
+				} => match self.bufs.get(&proxy.id) {
                     Some(TransientBuf::Cpu(_)) => (),
                     Some(TransientBuf::Gpu(_)) => panic!("buffer was already materialized on GPU"),
-                    _ => bind_map.materialize_cpu_buf(buf),
+                    _ => bind_map.materialize_cpu_buf(proxy),
                 },
                 ResourceProxy::Image(_) => todo!(),
             };
@@ -980,6 +1157,7 @@
                     Some(TransientBuf::Cpu(b)) => CpuBinding::Buffer(b),
                     _ => bind_map.get_cpu_buf(buf.id),
                 },
+				ResourceProxy::BufferRange { .. } => todo!(),
                 ResourceProxy::Image(_) => todo!(),
             })
             .collect()