blob: f743d12c3e27585af46b68b958237d09d192b000 [file] [log] [blame]
// Copyright 2023 the Vello Authors
// SPDX-License-Identifier: Apache-2.0 OR MIT
use std::borrow::Cow;
use std::cell::RefCell;
use std::collections::hash_map::Entry;
use std::collections::{HashMap, HashSet};
use vello_shaders::cpu::CpuBinding;
use wgpu::{
BindGroup, BindGroupLayout, Buffer, BufferUsages, CommandEncoder, CommandEncoderDescriptor,
ComputePipeline, Device, PipelineCompilationOptions, Queue, Texture, TextureAspect,
TextureUsages, TextureView, TextureViewDimension,
};
use crate::recording::BindType;
use crate::{
BufferProxy, Command, Error, ImageProxy, Recording, ResourceId, ResourceProxy, ShaderId,
};
#[cfg(not(target_arch = "wasm32"))]
struct UninitialisedShader {
wgsl: Cow<'static, str>,
label: &'static str,
entries: Vec<wgpu::BindGroupLayoutEntry>,
shader_id: ShaderId,
}
#[derive(Default)]
pub struct WgpuEngine {
shaders: Vec<Shader>,
pool: ResourcePool,
bind_map: BindMap,
downloads: HashMap<ResourceId, Buffer>,
#[cfg(not(target_arch = "wasm32"))]
shaders_to_initialise: Option<Vec<UninitialisedShader>>,
pub(crate) use_cpu: bool,
}
struct WgpuShader {
pipeline: ComputePipeline,
bind_group_layout: BindGroupLayout,
}
pub enum CpuShaderType {
Present(fn(u32, &[CpuBinding])),
Missing,
Skipped,
}
struct CpuShader {
shader: fn(u32, &[CpuBinding]),
}
enum ShaderKind<'a> {
Wgpu(&'a WgpuShader),
Cpu(&'a CpuShader),
}
struct Shader {
#[allow(dead_code)]
label: &'static str,
wgpu: Option<WgpuShader>,
cpu: Option<CpuShader>,
}
impl Shader {
fn select(&self) -> ShaderKind {
if let Some(cpu) = self.cpu.as_ref() {
ShaderKind::Cpu(cpu)
} else if let Some(wgpu) = self.wgpu.as_ref() {
ShaderKind::Wgpu(wgpu)
} else {
panic!("no available shader for {}", self.label)
}
}
}
pub enum ExternalResource<'a> {
#[allow(unused)]
Buffer(BufferProxy, &'a Buffer),
Image(ImageProxy, &'a TextureView),
}
/// A buffer can exist either on the GPU or on CPU.
enum MaterializedBuffer {
Gpu(Buffer),
Cpu(RefCell<Vec<u8>>),
}
struct BindMapBuffer {
buffer: MaterializedBuffer,
#[cfg_attr(not(feature = "buffer_labels"), allow(unused))]
label: &'static str,
}
#[derive(Default)]
struct BindMap {
buf_map: HashMap<ResourceId, BindMapBuffer>,
image_map: HashMap<ResourceId, (Texture, TextureView)>,
pending_clears: HashSet<ResourceId>,
}
#[derive(Hash, PartialEq, Eq)]
struct BufferProperties {
size: u64,
usages: BufferUsages,
#[cfg(feature = "buffer_labels")]
name: &'static str,
}
#[derive(Default)]
struct ResourcePool {
bufs: HashMap<BufferProperties, Vec<Buffer>>,
}
/// The transient bind map contains short-lifetime resources.
///
/// In particular, it has resources scoped to a single call of
/// `run_recording()`, including external resources and also buffer
/// uploads.
#[derive(Default)]
struct TransientBindMap<'a> {
bufs: HashMap<ResourceId, TransientBuf<'a>>,
// TODO: create transient image type
images: HashMap<ResourceId, &'a TextureView>,
}
enum TransientBuf<'a> {
Cpu(&'a [u8]),
Gpu(&'a Buffer),
}
impl WgpuEngine {
pub fn new(use_cpu: bool) -> WgpuEngine {
Self {
use_cpu,
..Default::default()
}
}
/// Enable creating any remaining shaders in parallel
#[cfg(not(target_arch = "wasm32"))]
pub fn use_parallel_initialisation(&mut self) {
if self.shaders_to_initialise.is_some() {
return;
}
self.shaders_to_initialise = Some(Vec::new());
}
#[cfg(not(target_arch = "wasm32"))]
/// Initialise (in parallel) any shaders which are yet to be created
pub fn build_shaders_if_needed(
&mut self,
device: &Device,
num_threads: Option<std::num::NonZeroUsize>,
) {
use std::num::NonZeroUsize;
if let Some(mut new_shaders) = self.shaders_to_initialise.take() {
let num_threads = num_threads
.map(NonZeroUsize::get)
.unwrap_or_else(|| {
// Fallback onto a heuristic. This tries to not to use all threads.
// We keep the main thread blocked and not doing much whilst this is running,
// so we broadly leave two cores unused at the point of maximum parallelism
// (This choice is arbitrary, and could be tuned, although a 'proper' threadpool
// should probably be used instead)
std::thread::available_parallelism().map_or(2, |it| it.get().max(4) - 2)
})
.min(new_shaders.len());
eprintln!("Initialising in parallel using {num_threads} threads");
let remainder = new_shaders.split_off(num_threads);
let (tx, rx) = std::sync::mpsc::channel::<(ShaderId, WgpuShader)>();
// We expect each initialisation to take much longer than acquiring a lock, so we just
// use a mutex for our work queue
let work_queue = std::sync::Mutex::new(remainder.into_iter());
let work_queue = &work_queue;
std::thread::scope(|scope| {
let tx = tx;
new_shaders
.into_iter()
.map(|it| {
let tx = tx.clone();
std::thread::Builder::new()
.name("Vello shader initialisation worker thread".into())
.spawn_scoped(scope, move || {
let shader = Self::create_compute_pipeline(
device, it.label, it.wgsl, it.entries,
);
// We know the rx can only be closed if all the tx references are dropped
tx.send((it.shader_id, shader)).unwrap();
while let Ok(mut guard) = work_queue.lock() {
if let Some(value) = guard.next() {
drop(guard);
let shader = Self::create_compute_pipeline(
device,
value.label,
value.wgsl,
value.entries,
);
tx.send((value.shader_id, shader)).unwrap();
} else {
break;
}
}
// Another thread panicked or we finished.
// If another thread panicked, we ignore that here and finish our processing
drop(tx);
})
.expect("failed to spawn thread");
})
.for_each(drop);
// Drop the initial sender, to mean that there will be no more senders if and only if all other threads have finished
drop(tx);
while let Ok((id, value)) = rx.recv() {
self.shaders[id.0].wgpu = Some(value);
}
});
}
}
/// Add a shader.
///
/// This function is somewhat limited, it doesn't apply a label, only allows one bind group,
/// doesn't support push constants, and entry point is hardcoded as "main".
///
/// Maybe should do template instantiation here? But shader compilation pipeline feels maybe
/// a bit separate.
pub fn add_shader(
&mut self,
device: &Device,
label: &'static str,
wgsl: Cow<'static, str>,
layout: &[BindType],
cpu_shader: CpuShaderType,
) -> ShaderId {
let mut add = |shader| {
let id = self.shaders.len();
self.shaders.push(shader);
ShaderId(id)
};
if self.use_cpu {
match cpu_shader {
CpuShaderType::Present(shader) => {
return add(Shader {
wgpu: None,
cpu: Some(CpuShader { shader }),
label,
});
}
// This shader is unused in CPU mode, create a dummy shader
CpuShaderType::Skipped => {
return add(Shader {
wgpu: None,
cpu: None,
label,
});
}
// Create a GPU shader as we don't have a CPU shader
CpuShaderType::Missing => {}
}
}
let entries = layout
.iter()
.enumerate()
.map(|(i, bind_type)| match bind_type {
BindType::Buffer | BindType::BufReadOnly => wgpu::BindGroupLayoutEntry {
binding: i as u32,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Storage {
read_only: *bind_type == BindType::BufReadOnly,
},
has_dynamic_offset: false,
min_binding_size: None,
},
count: None,
},
BindType::Uniform => wgpu::BindGroupLayoutEntry {
binding: i as u32,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Uniform,
has_dynamic_offset: false,
min_binding_size: None,
},
count: None,
},
BindType::Image(format) | BindType::ImageRead(format) => {
wgpu::BindGroupLayoutEntry {
binding: i as u32,
visibility: wgpu::ShaderStages::COMPUTE,
ty: if *bind_type == BindType::ImageRead(*format) {
wgpu::BindingType::Texture {
sample_type: wgpu::TextureSampleType::Float { filterable: true },
view_dimension: wgpu::TextureViewDimension::D2,
multisampled: false,
}
} else {
wgpu::BindingType::StorageTexture {
access: wgpu::StorageTextureAccess::WriteOnly,
format: format.to_wgpu(),
view_dimension: wgpu::TextureViewDimension::D2,
}
},
count: None,
}
}
})
.collect::<Vec<_>>();
#[cfg(not(target_arch = "wasm32"))]
if let Some(uninit) = self.shaders_to_initialise.as_mut() {
let id = add(Shader {
label,
wgpu: None,
cpu: None,
});
uninit.push(UninitialisedShader {
wgsl,
label,
entries,
shader_id: id,
});
return id;
}
let wgpu = Self::create_compute_pipeline(device, label, wgsl, entries);
add(Shader {
wgpu: Some(wgpu),
cpu: None,
label,
})
}
pub fn run_recording(
&mut self,
device: &Device,
queue: &Queue,
recording: &Recording,
external_resources: &[ExternalResource],
label: &'static str,
#[cfg(feature = "wgpu-profiler")] profiler: &mut wgpu_profiler::GpuProfiler,
) -> Result<(), Error> {
let mut free_bufs: HashSet<ResourceId> = Default::default();
let mut free_images: HashSet<ResourceId> = Default::default();
let mut transient_map = TransientBindMap::new(external_resources);
let mut encoder =
device.create_command_encoder(&CommandEncoderDescriptor { label: Some(label) });
#[cfg(feature = "wgpu-profiler")]
let query = profiler.begin_query(label, &mut encoder, device);
for command in &recording.commands {
match command {
Command::Upload(buf_proxy, bytes) => {
transient_map
.bufs
.insert(buf_proxy.id, TransientBuf::Cpu(bytes));
let usage =
BufferUsages::COPY_SRC | BufferUsages::COPY_DST | BufferUsages::STORAGE;
let buf = self
.pool
.get_buf(buf_proxy.size, buf_proxy.name, usage, device);
// TODO: if buffer is newly created, might be better to make it mapped at creation
// and copy. However, we expect reuse will be most common.
queue.write_buffer(&buf, 0, bytes);
self.bind_map.insert_buf(buf_proxy, buf);
}
Command::UploadUniform(buf_proxy, bytes) => {
transient_map
.bufs
.insert(buf_proxy.id, TransientBuf::Cpu(bytes));
let usage = BufferUsages::UNIFORM | BufferUsages::COPY_DST;
// Same consideration as above
let buf = self
.pool
.get_buf(buf_proxy.size, buf_proxy.name, usage, device);
queue.write_buffer(&buf, 0, bytes);
self.bind_map.insert_buf(buf_proxy, buf);
}
Command::UploadImage(image_proxy, bytes) => {
let format = image_proxy.format.to_wgpu();
let block_size = format
.block_copy_size(None)
.expect("ImageFormat must have a valid block size");
let texture = device.create_texture(&wgpu::TextureDescriptor {
label: None,
size: wgpu::Extent3d {
width: image_proxy.width,
height: image_proxy.height,
depth_or_array_layers: 1,
},
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
usage: TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_DST,
format,
view_formats: &[],
});
let texture_view = texture.create_view(&wgpu::TextureViewDescriptor {
label: None,
dimension: Some(TextureViewDimension::D2),
aspect: TextureAspect::All,
mip_level_count: None,
base_mip_level: 0,
base_array_layer: 0,
array_layer_count: None,
format: Some(format),
});
queue.write_texture(
wgpu::ImageCopyTexture {
texture: &texture,
mip_level: 0,
origin: wgpu::Origin3d { x: 0, y: 0, z: 0 },
aspect: TextureAspect::All,
},
bytes,
wgpu::ImageDataLayout {
offset: 0,
bytes_per_row: Some(image_proxy.width * block_size),
rows_per_image: None,
},
wgpu::Extent3d {
width: image_proxy.width,
height: image_proxy.height,
depth_or_array_layers: 1,
},
);
self.bind_map
.insert_image(image_proxy.id, texture, texture_view);
}
Command::WriteImage(proxy, [x, y, width, height], data) => {
if let Ok((texture, _)) = self.bind_map.get_or_create_image(*proxy, device) {
let format = proxy.format.to_wgpu();
let block_size = format
.block_copy_size(None)
.expect("ImageFormat must have a valid block size");
queue.write_texture(
wgpu::ImageCopyTexture {
texture,
mip_level: 0,
origin: wgpu::Origin3d { x: *x, y: *y, z: 0 },
aspect: TextureAspect::All,
},
&data[..],
wgpu::ImageDataLayout {
offset: 0,
bytes_per_row: Some(*width * block_size),
rows_per_image: None,
},
wgpu::Extent3d {
width: *width,
height: *height,
depth_or_array_layers: 1,
},
);
}
}
Command::Dispatch(shader_id, wg_size, bindings) => {
// println!("dispatching {:?} with {} bindings", wg_size, bindings.len());
let shader = &self.shaders[shader_id.0];
match shader.select() {
ShaderKind::Cpu(cpu_shader) => {
// The current strategy is to run the CPU shader synchronously. This
// works because there is currently the added constraint that data
// can only flow from CPU to GPU, not the other way around. If and
// when we implement that, we will need to defer the execution. Of
// course, we will also need to wire up more async synchronization
// mechanisms, as the CPU dispatch can't run until the preceding
// command buffer submission completes (and, in WebGPU, the async
// mapping operations on the buffers completes).
let resources =
transient_map.create_cpu_resources(&mut self.bind_map, bindings);
(cpu_shader.shader)(wg_size.0, &resources);
}
ShaderKind::Wgpu(wgpu_shader) => {
let bind_group = transient_map.create_bind_group(
&mut self.bind_map,
&mut self.pool,
device,
queue,
&mut encoder,
&wgpu_shader.bind_group_layout,
bindings,
)?;
let mut cpass = encoder.begin_compute_pass(&Default::default());
#[cfg(feature = "wgpu-profiler")]
let query = profiler
.begin_query(shader.label, &mut cpass, device)
.with_parent(Some(&query));
cpass.set_pipeline(&wgpu_shader.pipeline);
cpass.set_bind_group(0, &bind_group, &[]);
cpass.dispatch_workgroups(wg_size.0, wg_size.1, wg_size.2);
#[cfg(feature = "wgpu-profiler")]
profiler.end_query(&mut cpass, query);
}
}
}
Command::DispatchIndirect(shader_id, proxy, offset, bindings) => {
let shader = &self.shaders[shader_id.0];
match shader.select() {
ShaderKind::Cpu(cpu_shader) => {
// Same consideration as above about running the CPU shader synchronously.
let n_wg;
if let CpuBinding::BufferRW(b) = self.bind_map.get_cpu_buf(proxy.id) {
let slice = b.borrow();
let indirect: &[u32] = bytemuck::cast_slice(&slice);
n_wg = indirect[0];
} else {
panic!("indirect buffer missing from bind map");
}
let resources =
transient_map.create_cpu_resources(&mut self.bind_map, bindings);
(cpu_shader.shader)(n_wg, &resources);
}
ShaderKind::Wgpu(wgpu_shader) => {
let bind_group = transient_map.create_bind_group(
&mut self.bind_map,
&mut self.pool,
device,
queue,
&mut encoder,
&wgpu_shader.bind_group_layout,
bindings,
)?;
transient_map.materialize_gpu_buf_for_indirect(
&mut self.bind_map,
&mut self.pool,
device,
queue,
proxy,
);
let mut cpass = encoder.begin_compute_pass(&Default::default());
#[cfg(feature = "wgpu-profiler")]
let query = profiler
.begin_query(shader.label, &mut cpass, device)
.with_parent(Some(&query));
cpass.set_pipeline(&wgpu_shader.pipeline);
cpass.set_bind_group(0, &bind_group, &[]);
let buf = self
.bind_map
.get_gpu_buf(proxy.id)
.ok_or("buffer for indirect dispatch not in map")?;
cpass.dispatch_workgroups_indirect(buf, *offset);
#[cfg(feature = "wgpu-profiler")]
profiler.end_query(&mut cpass, query);
}
}
}
Command::Download(proxy) => {
let src_buf = self
.bind_map
.get_gpu_buf(proxy.id)
.ok_or("buffer not in map")?;
let usage = BufferUsages::MAP_READ | BufferUsages::COPY_DST;
let buf = self.pool.get_buf(proxy.size, "download", usage, device);
encoder.copy_buffer_to_buffer(src_buf, 0, &buf, 0, proxy.size);
self.downloads.insert(proxy.id, buf);
}
Command::Clear(proxy, offset, size) => {
if let Some(buf) = self.bind_map.get_buf(*proxy) {
match &buf.buffer {
MaterializedBuffer::Gpu(b) => encoder.clear_buffer(b, *offset, *size),
MaterializedBuffer::Cpu(b) => {
let mut slice = &mut b.borrow_mut()[*offset as usize..];
if let Some(size) = size {
slice = &mut slice[..*size as usize];
}
slice.fill(0);
}
}
} else {
self.bind_map.pending_clears.insert(proxy.id);
}
}
Command::FreeBuffer(proxy) => {
free_bufs.insert(proxy.id);
}
Command::FreeImage(proxy) => {
free_images.insert(proxy.id);
}
}
}
#[cfg(feature = "wgpu-profiler")]
profiler.end_query(&mut encoder, query);
queue.submit(Some(encoder.finish()));
for id in free_bufs {
if let Some(buf) = self.bind_map.buf_map.remove(&id) {
if let MaterializedBuffer::Gpu(gpu_buf) = buf.buffer {
let props = BufferProperties {
size: gpu_buf.size(),
usages: gpu_buf.usage(),
#[cfg(feature = "buffer_labels")]
name: buf.label,
};
self.pool.bufs.entry(props).or_default().push(gpu_buf);
}
}
}
for id in free_images {
if let Some((texture, view)) = self.bind_map.image_map.remove(&id) {
// TODO: have a pool to avoid needless re-allocation
drop(texture);
drop(view);
}
}
Ok(())
}
pub fn get_download(&self, buf: BufferProxy) -> Option<&Buffer> {
self.downloads.get(&buf.id)
}
pub fn free_download(&mut self, buf: BufferProxy) {
self.downloads.remove(&buf.id);
}
fn create_compute_pipeline(
device: &Device,
label: &str,
wgsl: Cow<'_, str>,
entries: Vec<wgpu::BindGroupLayoutEntry>,
) -> WgpuShader {
let shader_module = device.create_shader_module(wgpu::ShaderModuleDescriptor {
label: Some(label),
source: wgpu::ShaderSource::Wgsl(wgsl),
});
let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
label: None,
entries: &entries,
});
let compute_pipeline_layout =
device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
label: None,
bind_group_layouts: &[&bind_group_layout],
push_constant_ranges: &[],
});
let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
label: Some(label),
layout: Some(&compute_pipeline_layout),
module: &shader_module,
entry_point: "main",
compilation_options: PipelineCompilationOptions {
zero_initialize_workgroup_memory: false,
..Default::default()
},
});
WgpuShader {
pipeline,
bind_group_layout,
}
}
}
impl BindMap {
fn insert_buf(&mut self, proxy: &BufferProxy, buffer: Buffer) {
self.buf_map.insert(
proxy.id,
BindMapBuffer {
buffer: MaterializedBuffer::Gpu(buffer),
label: proxy.name,
},
);
}
/// Get a buffer, only if it's on GPU.
fn get_gpu_buf(&self, id: ResourceId) -> Option<&Buffer> {
self.buf_map.get(&id).and_then(|b| match &b.buffer {
MaterializedBuffer::Gpu(b) => Some(b),
_ => None,
})
}
/// Get a CPU buffer.
///
/// Panics if buffer is not present or is on GPU.
fn get_cpu_buf(&self, id: ResourceId) -> CpuBinding {
match &self.buf_map[&id].buffer {
MaterializedBuffer::Cpu(b) => CpuBinding::BufferRW(b),
_ => panic!("getting cpu buffer, but it's on gpu"),
}
}
fn materialize_cpu_buf(&mut self, buf: &BufferProxy) {
self.buf_map.entry(buf.id).or_insert_with(|| {
let buffer = MaterializedBuffer::Cpu(RefCell::new(vec![0; buf.size as usize]));
BindMapBuffer {
buffer,
// TODO: do we need to cfg this?
label: buf.name,
}
});
}
fn insert_image(&mut self, id: ResourceId, image: Texture, image_view: TextureView) {
self.image_map.insert(id, (image, image_view));
}
fn get_buf(&mut self, proxy: BufferProxy) -> Option<&BindMapBuffer> {
self.buf_map.get(&proxy.id)
}
fn get_or_create_image(
&mut self,
proxy: ImageProxy,
device: &Device,
) -> Result<&(Texture, TextureView), Error> {
match self.image_map.entry(proxy.id) {
Entry::Occupied(occupied) => Ok(occupied.into_mut()),
Entry::Vacant(vacant) => {
let format = proxy.format.to_wgpu();
let texture = device.create_texture(&wgpu::TextureDescriptor {
label: None,
size: wgpu::Extent3d {
width: proxy.width,
height: proxy.height,
depth_or_array_layers: 1,
},
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
usage: TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_DST,
format,
view_formats: &[],
});
let texture_view = texture.create_view(&wgpu::TextureViewDescriptor {
label: None,
dimension: Some(TextureViewDimension::D2),
aspect: TextureAspect::All,
mip_level_count: None,
base_mip_level: 0,
base_array_layer: 0,
array_layer_count: None,
format: Some(proxy.format.to_wgpu()),
});
Ok(vacant.insert((texture, texture_view)))
}
}
}
}
const SIZE_CLASS_BITS: u32 = 1;
impl ResourcePool {
/// Get a buffer from the pool or create one.
fn get_buf(
&mut self,
size: u64,
#[allow(unused)] name: &'static str,
usage: BufferUsages,
device: &Device,
) -> Buffer {
let rounded_size = Self::size_class(size, SIZE_CLASS_BITS);
let props = BufferProperties {
size: rounded_size,
usages: usage,
#[cfg(feature = "buffer_labels")]
name,
};
if let Some(buf_vec) = self.bufs.get_mut(&props) {
if let Some(buf) = buf_vec.pop() {
return buf;
}
}
device.create_buffer(&wgpu::BufferDescriptor {
#[cfg(feature = "buffer_labels")]
label: Some(name),
#[cfg(not(feature = "buffer_labels"))]
label: None,
size: rounded_size,
usage,
mapped_at_creation: false,
})
}
/// Quantize a size up to the nearest size class.
fn size_class(x: u64, bits: u32) -> u64 {
if x > 1 << bits {
let a = (x - 1).leading_zeros();
let b = (x - 1) | (((u64::MAX / 2) >> bits) >> a);
b + 1
} else {
1 << bits
}
}
}
impl BindMapBuffer {
// Upload a buffer from CPU to GPU if needed.
//
// Note data flow is one way only, from CPU to GPU. Once this method is
// called, the buffer is no longer materialized on CPU, and cannot be
// accessed from a CPU shader.
fn upload_if_needed(
&mut self,
proxy: &BufferProxy,
device: &Device,
queue: &Queue,
pool: &mut ResourcePool,
) {
if let MaterializedBuffer::Cpu(cpu_buf) = &self.buffer {
let usage = BufferUsages::COPY_SRC
| BufferUsages::COPY_DST
| BufferUsages::STORAGE
| BufferUsages::INDIRECT;
let buf = pool.get_buf(proxy.size, proxy.name, usage, device);
queue.write_buffer(&buf, 0, &cpu_buf.borrow());
self.buffer = MaterializedBuffer::Gpu(buf);
}
}
}
impl<'a> TransientBindMap<'a> {
/// Create new transient bind map, seeded from external resources
fn new(external_resources: &'a [ExternalResource]) -> Self {
let mut bufs = HashMap::default();
let mut images = HashMap::default();
for resource in external_resources {
match resource {
ExternalResource::Buffer(proxy, gpu_buf) => {
bufs.insert(proxy.id, TransientBuf::Gpu(gpu_buf));
}
ExternalResource::Image(proxy, gpu_image) => {
images.insert(proxy.id, *gpu_image);
}
}
}
TransientBindMap { bufs, images }
}
fn materialize_gpu_buf_for_indirect(
&mut self,
bind_map: &mut BindMap,
pool: &mut ResourcePool,
device: &Device,
queue: &Queue,
buf: &BufferProxy,
) {
if !self.bufs.contains_key(&buf.id) {
if let Some(b) = bind_map.buf_map.get_mut(&buf.id) {
b.upload_if_needed(buf, device, queue, pool);
}
}
}
#[allow(clippy::too_many_arguments)]
fn create_bind_group(
&mut self,
bind_map: &mut BindMap,
pool: &mut ResourcePool,
device: &Device,
queue: &Queue,
encoder: &mut CommandEncoder,
layout: &BindGroupLayout,
bindings: &[ResourceProxy],
) -> Result<BindGroup, Error> {
for proxy in bindings {
match proxy {
ResourceProxy::Buffer(proxy) => {
if self.bufs.contains_key(&proxy.id) {
continue;
}
match bind_map.buf_map.entry(proxy.id) {
Entry::Vacant(v) => {
// TODO: only some buffers will need indirect, but does it hurt?
let usage = BufferUsages::COPY_SRC
| BufferUsages::COPY_DST
| BufferUsages::STORAGE
| BufferUsages::INDIRECT;
let buf = pool.get_buf(proxy.size, proxy.name, usage, device);
if bind_map.pending_clears.remove(&proxy.id) {
encoder.clear_buffer(&buf, 0, None);
}
v.insert(BindMapBuffer {
buffer: MaterializedBuffer::Gpu(buf),
label: proxy.name,
});
}
Entry::Occupied(mut o) => {
o.get_mut().upload_if_needed(proxy, device, queue, pool);
}
}
}
ResourceProxy::Image(proxy) => {
if self.images.contains_key(&proxy.id) {
continue;
}
if let Entry::Vacant(v) = bind_map.image_map.entry(proxy.id) {
let format = proxy.format.to_wgpu();
let texture = device.create_texture(&wgpu::TextureDescriptor {
label: None,
size: wgpu::Extent3d {
width: proxy.width,
height: proxy.height,
depth_or_array_layers: 1,
},
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
usage: TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_DST,
format,
view_formats: &[],
});
let texture_view = texture.create_view(&wgpu::TextureViewDescriptor {
label: None,
dimension: Some(TextureViewDimension::D2),
aspect: TextureAspect::All,
mip_level_count: None,
base_mip_level: 0,
base_array_layer: 0,
array_layer_count: None,
format: Some(proxy.format.to_wgpu()),
});
v.insert((texture, texture_view));
}
}
}
}
let entries = bindings
.iter()
.enumerate()
.map(|(i, proxy)| match proxy {
ResourceProxy::Buffer(proxy) => {
let buf = match self.bufs.get(&proxy.id) {
Some(TransientBuf::Gpu(b)) => b,
_ => bind_map.get_gpu_buf(proxy.id).unwrap(),
};
Ok(wgpu::BindGroupEntry {
binding: i as u32,
resource: buf.as_entire_binding(),
})
}
ResourceProxy::Image(proxy) => {
let view = self
.images
.get(&proxy.id)
.copied()
.or_else(|| bind_map.image_map.get(&proxy.id).map(|v| &v.1))
.unwrap();
Ok(wgpu::BindGroupEntry {
binding: i as u32,
resource: wgpu::BindingResource::TextureView(view),
})
}
})
.collect::<Result<Vec<_>, Error>>()?;
let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
label: None,
layout,
entries: &entries,
});
Ok(bind_group)
}
fn create_cpu_resources(
&self,
bind_map: &'a mut BindMap,
bindings: &[ResourceProxy],
) -> Vec<CpuBinding> {
// First pass is mutable; create buffers as needed
for resource in bindings {
match resource {
ResourceProxy::Buffer(buf) => match self.bufs.get(&buf.id) {
Some(TransientBuf::Cpu(_)) => (),
Some(TransientBuf::Gpu(_)) => panic!("buffer was already materialized on GPU"),
_ => bind_map.materialize_cpu_buf(buf),
},
ResourceProxy::Image(_) => todo!(),
};
}
// Second pass takes immutable references
bindings
.iter()
.map(|resource| match resource {
ResourceProxy::Buffer(buf) => match self.bufs.get(&buf.id) {
Some(TransientBuf::Cpu(b)) => CpuBinding::Buffer(b),
_ => bind_map.get_cpu_buf(buf.id),
},
ResourceProxy::Image(_) => todo!(),
})
.collect()
}
}