Start async wiring
Make async versions of the main library entry points, and invoke those from the with_winit example.
Right now this just prints the contents of the bump buffer before just running the fine dispatch, but it could apply conditional logic.
diff --git a/examples/with_winit/src/main.rs b/examples/with_winit/src/main.rs
index 4046538..f12f8c3 100644
--- a/examples/with_winit/src/main.rs
+++ b/examples/with_winit/src/main.rs
@@ -22,6 +22,7 @@
use clap::Parser;
use vello::{
+ block_on_wgpu,
kurbo::{Affine, Vec2},
util::RenderContext,
Renderer, Scene, SceneBuilder,
@@ -187,18 +188,25 @@
.surface
.get_current_texture()
.expect("failed to get surface texture");
- renderer
- .render_to_surface(
- &device_handle.device,
- &device_handle.queue,
- &scene,
- &surface_texture,
- width,
- height,
- )
- .expect("failed to render to surface");
- surface_texture.present();
- device_handle.device.poll(wgpu::Maintain::Wait);
+ let fut = async {
+ renderer
+ .render_to_surface_async(
+ &device_handle.device,
+ &device_handle.queue,
+ &scene,
+ &surface_texture,
+ width,
+ height,
+ )
+ .await
+ .expect("failed to render to surface");
+ surface_texture.present();
+ };
+ #[cfg(not(target_arch = "wasm32"))]
+ block_on_wgpu(&device_handle.device, fut);
+ #[cfg(target_arch = "wasm32")]
+ wasm_bindgen_futures::spawn_local(fut);
+ device_handle.device.poll(wgpu::Maintain::Poll);
}
Event::UserEvent(event) => match event {
#[cfg(not(target_arch = "wasm32"))]
diff --git a/src/engine.rs b/src/engine.rs
index d4d9c91..76d7524 100644
--- a/src/engine.rs
+++ b/src/engine.rs
@@ -21,12 +21,9 @@
sync::atomic::{AtomicU64, Ordering},
};
-use futures_intrusive::channel::shared::GenericOneshotReceiver;
-use parking_lot::RawMutex;
use wgpu::{
- util::DeviceExt, BindGroup, BindGroupLayout, Buffer, BufferAsyncError, BufferSlice,
- BufferUsages, BufferView, ComputePipeline, Device, Queue, Texture, TextureAspect,
- TextureFormat, TextureUsages, TextureView, TextureViewDimension,
+ util::DeviceExt, BindGroup, BindGroupLayout, Buffer, BufferUsages, ComputePipeline, Device,
+ Queue, Texture, TextureAspect, TextureFormat, TextureUsages, TextureView, TextureViewDimension,
};
pub type Error = Box<dyn std::error::Error>;
@@ -43,6 +40,7 @@
shaders: Vec<Shader>,
pool: ResourcePool,
bind_map: BindMap,
+ downloads: HashMap<Id, Buffer>,
}
struct Shader {
@@ -101,11 +99,6 @@
FreeImage(ImageProxy),
}
-#[derive(Default)]
-pub struct Downloads {
- buf_map: HashMap<Id, Buffer>,
-}
-
/// The type of resource that will be bound to a slot in a shader.
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum BindType {
@@ -153,6 +146,7 @@
shaders: vec![],
pool: Default::default(),
bind_map: Default::default(),
+ downloads: Default::default(),
}
}
@@ -253,8 +247,7 @@
queue: &Queue,
recording: &Recording,
external_resources: &[ExternalResource],
- ) -> Result<Downloads, Error> {
- let mut downloads = Downloads::default();
+ ) -> Result<(), Error> {
let mut free_bufs: HashSet<Id> = Default::default();
let mut free_images: HashSet<Id> = Default::default();
@@ -264,7 +257,9 @@
Command::Upload(buf_proxy, bytes) => {
let usage =
BufferUsages::COPY_SRC | BufferUsages::COPY_DST | BufferUsages::STORAGE;
- let buf = self.pool.get_buf(buf_proxy, usage, device);
+ let buf = self
+ .pool
+ .get_buf(buf_proxy.size, buf_proxy.name, usage, device);
// TODO: if buffer is newly created, might be better to make it mapped at creation
// and copy. However, we expect reuse will be most common.
queue.write_buffer(&buf, 0, bytes);
@@ -273,7 +268,9 @@
Command::UploadUniform(buf_proxy, bytes) => {
let usage = BufferUsages::UNIFORM | BufferUsages::COPY_DST;
// Same consideration as above
- let buf = self.pool.get_buf(buf_proxy, usage, device);
+ let buf = self
+ .pool
+ .get_buf(buf_proxy.size, buf_proxy.name, usage, device);
queue.write_buffer(&buf, 0, bytes);
self.bind_map.insert_buf(buf_proxy, buf);
}
@@ -351,14 +348,10 @@
.buf_map
.get(&proxy.id)
.ok_or("buffer not in map")?;
- let buf = device.create_buffer(&wgpu::BufferDescriptor {
- label: Some(proxy.name),
- size: proxy.size,
- usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
- mapped_at_creation: false,
- });
+ let usage = BufferUsages::MAP_READ | BufferUsages::COPY_DST;
+ let buf = self.pool.get_buf(proxy.size, "download", usage, device);
encoder.copy_buffer_to_buffer(&src_buf.buffer, 0, &buf, 0, proxy.size);
- downloads.buf_map.insert(proxy.id, buf);
+ self.downloads.insert(proxy.id, buf);
}
Command::Clear(proxy, offset, size) => {
let buffer = self
@@ -393,7 +386,15 @@
drop(view);
}
}
- Ok(downloads)
+ Ok(())
+ }
+
+ pub fn get_download(&self, buf: BufProxy) -> Option<&Buffer> {
+ self.downloads.get(&buf.id)
+ }
+
+ pub fn free_download(&mut self, buf: BufProxy) {
+ self.downloads.remove(&buf.id);
}
}
@@ -441,6 +442,10 @@
));
}
+ /// Prepare a buffer for downloading.
+ ///
+ /// Currently this copies to a download buffer. The original buffer can be freed
+ /// immediately after.
pub fn download(&mut self, buf: BufProxy) {
self.push(Command::Download(buf));
}
@@ -603,7 +608,7 @@
if let Entry::Vacant(v) = self.buf_map.entry(proxy.id) {
let usage =
BufferUsages::COPY_SRC | BufferUsages::COPY_DST | BufferUsages::STORAGE;
- let buf = pool.get_buf(&proxy, usage, device);
+ let buf = pool.get_buf(proxy.size, proxy.name, usage, device);
v.insert(BindMapBuffer {
buffer: buf,
label: proxy.name,
@@ -685,7 +690,7 @@
Entry::Occupied(occupied) => Ok(&occupied.into_mut().buffer),
Entry::Vacant(vacant) => {
let usage = BufferUsages::COPY_SRC | BufferUsages::COPY_DST | BufferUsages::STORAGE;
- let buf = pool.get_buf(&proxy, usage, device);
+ let buf = pool.get_buf(proxy.size, proxy.name, usage, device);
Ok(&vacant
.insert(BindMapBuffer {
buffer: buf,
@@ -697,52 +702,22 @@
}
}
-pub struct DownloadsMapped<'a>(
- HashMap<
- Id,
- (
- BufferSlice<'a>,
- GenericOneshotReceiver<RawMutex, Result<(), BufferAsyncError>>,
- ),
- >,
-);
-
-impl Downloads {
- // Discussion: should API change so we get one buffer, rather than mapping all?
- pub fn map(&self) -> DownloadsMapped {
- let mut map = HashMap::new();
- for (id, buf) in &self.buf_map {
- let buf_slice = buf.slice(..);
- let (sender, receiver) = futures_intrusive::channel::shared::oneshot_channel();
- buf_slice.map_async(wgpu::MapMode::Read, move |v| sender.send(v).unwrap());
- map.insert(*id, (buf_slice, receiver));
- }
- DownloadsMapped(map)
- }
-}
-
-impl<'a> DownloadsMapped<'a> {
- pub async fn get_mapped(&self, proxy: BufProxy) -> Result<BufferView, Error> {
- let (slice, recv) = self.0.get(&proxy.id).ok_or("buffer not in map")?;
- if let Some(recv_result) = recv.receive().await {
- recv_result?;
- } else {
- return Err("channel was closed".into());
- }
- Ok(slice.get_mapped_range())
- }
-}
-
const SIZE_CLASS_BITS: u32 = 1;
impl ResourcePool {
/// Get a buffer from the pool or create one.
- fn get_buf(&mut self, proxy: &BufProxy, usage: BufferUsages, device: &Device) -> Buffer {
- let rounded_size = Self::size_class(proxy.size, SIZE_CLASS_BITS);
+ fn get_buf(
+ &mut self,
+ size: u64,
+ name: &'static str,
+ usage: BufferUsages,
+ device: &Device,
+ ) -> Buffer {
+ let rounded_size = Self::size_class(size, SIZE_CLASS_BITS);
let props = BufferProperties {
size: rounded_size,
usages: usage,
- name: proxy.name,
+ name: name,
};
if let Some(buf_vec) = self.bufs.get_mut(&props) {
if let Some(buf) = buf_vec.pop() {
@@ -751,7 +726,7 @@
}
device.create_buffer(&wgpu::BufferDescriptor {
#[cfg(feature = "buffer_labels")]
- label: Some(proxy.name),
+ label: Some(name),
#[cfg(not(feature = "buffer_labels"))]
label: None,
size: rounded_size,
diff --git a/src/lib.rs b/src/lib.rs
index 6dc7bb2..db2968f 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -29,9 +29,11 @@
pub mod glyph;
pub mod util;
+use render::Render;
pub use scene::{Scene, SceneBuilder, SceneFragment};
+pub use util::block_on_wgpu;
-use engine::{Engine, ExternalResource};
+use engine::{Engine, ExternalResource, Recording};
use shaders::FullShaders;
use wgpu::{Device, Queue, SurfaceTexture, TextureFormat, TextureView};
@@ -83,8 +85,7 @@
*target.as_image().unwrap(),
texture,
)];
- let _ = self
- .engine
+ self.engine
.run_recording(device, queue, &recording, &external_resources)?;
Ok(())
}
@@ -164,6 +165,105 @@
self.shaders = shaders;
Ok(())
}
+
+ /// Renders a scene to the target texture.
+ ///
+ /// The texture is assumed to be of the specified dimensions and have been created with
+ /// the [wgpu::TextureFormat::Rgba8Unorm] format and the [wgpu::TextureUsages::STORAGE_BINDING]
+ /// flag set.
+ pub async fn render_to_texture_async(
+ &mut self,
+ device: &Device,
+ queue: &Queue,
+ scene: &Scene,
+ texture: &TextureView,
+ width: u32,
+ height: u32,
+ ) -> Result<()> {
+ let mut render = Render::new();
+ let encoding = scene.data();
+ let recording = render.render_encoding_coarse(encoding, &self.shaders, width, height);
+ let target = render.out_image();
+ let bump_buf = render.bump_buf();
+ self.engine.run_recording(device, queue, &recording, &[])?;
+ if let Some(bump_buf) = self.engine.get_download(bump_buf) {
+ let buf_slice = bump_buf.slice(..);
+ let (sender, receiver) = futures_intrusive::channel::shared::oneshot_channel();
+ buf_slice.map_async(wgpu::MapMode::Read, move |v| sender.send(v).unwrap());
+ if let Some(recv_result) = receiver.receive().await {
+ recv_result?;
+ } else {
+ return Err("channel was closed".into());
+ }
+ let mapped = buf_slice.get_mapped_range();
+ println!("{:?}", bytemuck::cast_slice::<_, u32>(&mapped));
+ }
+ // TODO: apply logic to determine whether we need to rerun coarse, and also
+ // allocate the blend stack as needed.
+ self.engine.free_download(bump_buf);
+ // Maybe clear to reuse allocation?
+ let mut recording = Recording::default();
+ render.record_fine(&self.shaders, &mut recording);
+ let external_resources = [ExternalResource::Image(target, texture)];
+ self.engine
+ .run_recording(device, queue, &recording, &external_resources)?;
+ Ok(())
+ }
+
+ pub async fn render_to_surface_async(
+ &mut self,
+ device: &Device,
+ queue: &Queue,
+ scene: &Scene,
+ surface: &SurfaceTexture,
+ width: u32,
+ height: u32,
+ ) -> Result<()> {
+ let mut target = self
+ .target
+ .take()
+ .unwrap_or_else(|| TargetTexture::new(device, width, height));
+ // TODO: implement clever resizing semantics here to avoid thrashing the memory allocator
+ // during resize, specifically on metal.
+ if target.width != width || target.height != height {
+ target = TargetTexture::new(device, width, height);
+ }
+ self.render_to_texture_async(device, queue, scene, &target.view, width, height)
+ .await?;
+ let mut encoder =
+ device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+ {
+ let surface_view = surface
+ .texture
+ .create_view(&wgpu::TextureViewDescriptor::default());
+ let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
+ label: None,
+ layout: &self.blit.bind_layout,
+ entries: &[wgpu::BindGroupEntry {
+ binding: 0,
+ resource: wgpu::BindingResource::TextureView(&target.view),
+ }],
+ });
+ let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
+ label: None,
+ color_attachments: &[Some(wgpu::RenderPassColorAttachment {
+ view: &surface_view,
+ resolve_target: None,
+ ops: wgpu::Operations {
+ load: wgpu::LoadOp::Clear(wgpu::Color::default()),
+ store: true,
+ },
+ })],
+ depth_stencil_attachment: None,
+ });
+ render_pass.set_pipeline(&self.blit.pipeline);
+ render_pass.set_bind_group(0, &bind_group, &[]);
+ render_pass.draw(0..6, 0..1);
+ }
+ queue.submit(Some(encoder.finish()));
+ self.target = Some(target);
+ Ok(())
+ }
}
struct TargetTexture {
diff --git a/src/render.rs b/src/render.rs
index fa3d97b..710c77b 100644
--- a/src/render.rs
+++ b/src/render.rs
@@ -196,6 +196,8 @@
height: u32,
) -> (Recording, ResourceProxy) {
let mut render = Render::new();
+ // TODO: leaks the download of the bump buf; a good way to fix would be to conditionalize
+ // that download.
let mut recording = render.render_encoding_coarse(encoding, shaders, width, height);
let out_image = render.out_image();
render.record_fine(shaders, &mut recording);
@@ -524,13 +526,14 @@
info_bin_data_buf,
out_image,
});
+ recording.download(*bump_buf.as_buf().unwrap());
+ recording.free_resource(bump_buf);
recording
}
/// Run fine rasterization assuming the coarse phase succeeded.
pub fn record_fine(&mut self, shaders: &FullShaders, recording: &mut Recording) {
let fine = self.fine.take().unwrap();
- recording.free_resource(fine.bump_buf);
recording.dispatch(
shaders.fine,
(self.width_in_tiles, self.height_in_tiles, 1),
@@ -559,4 +562,8 @@
pub fn out_image(&self) -> ImageProxy {
self.fine.as_ref().unwrap().out_image
}
+
+ pub fn bump_buf(&self) -> BufProxy {
+ *self.fine.as_ref().unwrap().bump_buf.as_buf().unwrap()
+ }
}
diff --git a/src/util.rs b/src/util.rs
index 0245d11..a75f804 100644
--- a/src/util.rs
+++ b/src/util.rs
@@ -16,6 +16,8 @@
//! Simple helpers for managing wgpu state and surfaces.
+use std::future::Future;
+
use super::Result;
use raw_window_handle::{HasRawDisplayHandle, HasRawWindowHandle};
@@ -132,3 +134,27 @@
pub config: SurfaceConfiguration,
pub dev_id: usize,
}
+
+struct NullWake;
+
+impl std::task::Wake for NullWake {
+ fn wake(self: std::sync::Arc<Self>) {}
+}
+
+/// Block on a future, polling the device as needed.
+///
+/// This will deadlock if the future is awaiting anything other than GPU progress.
+pub fn block_on_wgpu<F: Future>(device: &Device, mut fut: F) -> F::Output {
+ let waker = std::task::Waker::from(std::sync::Arc::new(NullWake));
+ let mut context = std::task::Context::from_waker(&waker);
+ // Same logic as `pin_mut!` macro from `pin_utils`.
+ let mut fut = unsafe { std::pin::Pin::new_unchecked(&mut fut) };
+ loop {
+ match fut.as_mut().poll(&mut context) {
+ std::task::Poll::Pending => {
+ device.poll(wgpu::Maintain::Wait);
+ }
+ std::task::Poll::Ready(item) => break item,
+ }
+ }
+}