blob: b445331447e42cb171d595a29bfb52477028051c [file] [log] [blame]
mod gradient;
pub mod mmark;
mod pico_svg;
mod render_ctx;
pub mod test_scenes;
mod text;
use std::convert::TryInto;
pub use render_ctx::PietGpuRenderContext;
use piet::kurbo::Vec2;
use piet::{ImageFormat, RenderContext};
use piet_gpu_types::encoder::Encode;
use piet_gpu_hal::{
Buffer, BufferUsage, CmdBuf, DescriptorSet, Error, Image, ImageLayout, Pipeline, QueryPool,
Session, ShaderCode,
};
use pico_svg::PicoSvg;
const TILE_W: usize = 16;
const TILE_H: usize = 16;
const PTCL_INITIAL_ALLOC: usize = 1024;
const MAX_BLEND_STACK: usize = 128;
#[allow(unused)]
fn dump_scene(buf: &[u8]) {
for i in 0..(buf.len() / 4) {
let mut buf_u32 = [0u8; 4];
buf_u32.copy_from_slice(&buf[i * 4..i * 4 + 4]);
println!("{:4x}: {:8x}", i * 4, u32::from_le_bytes(buf_u32));
}
}
#[allow(unused)]
pub fn dump_k1_data(k1_buf: &[u32]) {
for i in 0..k1_buf.len() {
if k1_buf[i] != 0 {
println!("{:4x}: {:8x}", i * 4, k1_buf[i]);
}
}
}
pub struct Renderer {
// These sizes are aligned to tile boundaries, though at some point
// we'll want to have a good strategy for dealing with odd sizes.
width: usize,
height: usize,
pub image_dev: Image, // resulting image
// The reference is held by the pipelines. We will be changing
// this to make the scene upload dynamic.
scene_bufs: Vec<Buffer>,
memory_buf_host: Vec<Buffer>,
memory_buf_dev: Buffer,
state_buf: Buffer,
// Staging buffers
config_bufs: Vec<Buffer>,
// Device config buf
config_buf: Buffer,
el_pipeline: Pipeline,
el_ds: Vec<DescriptorSet>,
tile_pipeline: Pipeline,
tile_ds: DescriptorSet,
path_pipeline: Pipeline,
path_ds: DescriptorSet,
backdrop_pipeline: Pipeline,
backdrop_ds: DescriptorSet,
bin_pipeline: Pipeline,
bin_ds: DescriptorSet,
coarse_pipeline: Pipeline,
coarse_ds: DescriptorSet,
k4_pipeline: Pipeline,
k4_ds: DescriptorSet,
n_elements: usize,
n_paths: usize,
n_pathseg: usize,
// Keep a reference to the image so that it is not destroyed.
_bg_image: Image,
gradient_bufs: Vec<Buffer>,
gradients: Image,
}
impl Renderer {
/// Create a new renderer.
pub unsafe fn new(
session: &Session,
width: usize,
height: usize,
n_bufs: usize,
) -> Result<Self, Error> {
// For now, round up to tile alignment
let width = width + (width.wrapping_neg() & (TILE_W - 1));
let height = height + (height.wrapping_neg() & (TILE_W - 1));
let dev = BufferUsage::STORAGE | BufferUsage::COPY_DST;
let host_upload = BufferUsage::MAP_WRITE | BufferUsage::COPY_SRC;
// This may be inadequate for very complex scenes (paris etc)
// TODO: separate staging buffer (if needed)
let scene_bufs = (0..n_bufs)
.map(|_| session.create_buffer(8 * 1024 * 1024, host_upload).unwrap())
.collect();
let state_buf = session.create_buffer(1 * 1024 * 1024, dev)?;
let image_dev = session.create_image2d(width as u32, height as u32)?;
// Note: this must be updated when the config struct size changes.
const CONFIG_BUFFER_SIZE: u64 = 40;
let config_buf = session.create_buffer(CONFIG_BUFFER_SIZE, dev).unwrap();
// TODO: separate staging buffer (if needed)
let config_bufs = (0..n_bufs)
.map(|_| {
session
.create_buffer(CONFIG_BUFFER_SIZE, host_upload)
.unwrap()
})
.collect();
let memory_buf_host = (0..n_bufs)
.map(|_| session.create_buffer(2 * 4, host_upload).unwrap())
.collect();
let memory_buf_dev = session.create_buffer(128 * 1024 * 1024, dev)?;
let el_code = ShaderCode::Spv(include_bytes!("../shader/elements.spv"));
let el_pipeline = session.create_simple_compute_pipeline(el_code, 4)?;
let mut el_ds = Vec::with_capacity(n_bufs);
for scene_buf in &scene_bufs {
el_ds.push(session.create_simple_descriptor_set(
&el_pipeline,
&[&memory_buf_dev, &config_buf, scene_buf, &state_buf],
)?);
}
let tile_alloc_code = ShaderCode::Spv(include_bytes!("../shader/tile_alloc.spv"));
let tile_pipeline = session.create_simple_compute_pipeline(tile_alloc_code, 2)?;
let tile_ds = session
.create_simple_descriptor_set(&tile_pipeline, &[&memory_buf_dev, &config_buf])?;
let path_alloc_code = ShaderCode::Spv(include_bytes!("../shader/path_coarse.spv"));
let path_pipeline = session.create_simple_compute_pipeline(path_alloc_code, 2)?;
let path_ds = session
.create_simple_descriptor_set(&path_pipeline, &[&memory_buf_dev, &config_buf])?;
let backdrop_code = if session.gpu_info().workgroup_limits.max_invocations >= 1024 {
ShaderCode::Spv(include_bytes!("../shader/backdrop_lg.spv"))
} else {
println!("using small workgroup backdrop kernel");
ShaderCode::Spv(include_bytes!("../shader/backdrop.spv"))
};
let backdrop_pipeline = session.create_simple_compute_pipeline(backdrop_code, 2)?;
let backdrop_ds = session
.create_simple_descriptor_set(&backdrop_pipeline, &[&memory_buf_dev, &config_buf])?;
// TODO: constants
let bin_code = ShaderCode::Spv(include_bytes!("../shader/binning.spv"));
let bin_pipeline = session.create_simple_compute_pipeline(bin_code, 2)?;
let bin_ds =
session.create_simple_descriptor_set(&bin_pipeline, &[&memory_buf_dev, &config_buf])?;
let coarse_code = ShaderCode::Spv(include_bytes!("../shader/coarse.spv"));
let coarse_pipeline = session.create_simple_compute_pipeline(coarse_code, 2)?;
let coarse_ds = session
.create_simple_descriptor_set(&coarse_pipeline, &[&memory_buf_dev, &config_buf])?;
let bg_image = Self::make_test_bg_image(&session);
const GRADIENT_BUF_SIZE: usize =
crate::gradient::N_GRADIENTS * crate::gradient::N_SAMPLES * 4;
let gradient_bufs = (0..n_bufs)
.map(|_| {
session
.create_buffer(GRADIENT_BUF_SIZE as u64, host_upload)
.unwrap()
})
.collect();
let gradients = Self::make_gradient_image(&session);
let k4_code = ShaderCode::Spv(include_bytes!("../shader/kernel4.spv"));
// This is a bit of a stand-in for future development. For now, we assume one
// atlas image for all images, and another image for the gradients. In the future,
// on GPUs that support it, we will probably want to go to descriptor indexing in
// order to cut down on allocation and copying for the atlas image.
let max_textures = 2;
let k4_pipeline = session
.pipeline_builder()
.add_buffers(2)
.add_images(1)
.add_textures(max_textures)
.create_compute_pipeline(&session, k4_code)?;
let k4_ds = session
.descriptor_set_builder()
.add_buffers(&[&memory_buf_dev, &config_buf])
.add_images(&[&image_dev])
.add_textures(&[&bg_image, &gradients])
.build(&session, &k4_pipeline)?;
Ok(Renderer {
width,
height,
scene_bufs,
memory_buf_host,
memory_buf_dev,
state_buf,
config_buf,
config_bufs,
image_dev,
el_pipeline,
el_ds,
tile_pipeline,
tile_ds,
path_pipeline,
path_ds,
backdrop_pipeline,
backdrop_ds,
bin_pipeline,
bin_ds,
coarse_pipeline,
coarse_ds,
k4_pipeline,
k4_ds,
n_elements: 0,
n_paths: 0,
n_pathseg: 0,
_bg_image: bg_image,
gradient_bufs,
gradients,
})
}
/// Convert the scene in the render context to GPU resources.
///
/// At present, this requires that any command buffer submission has completed.
/// A future evolution will handle staging of the next frame's scene while the
/// rendering of the current frame is in flight.
pub fn upload_render_ctx(
&mut self,
render_ctx: &mut PietGpuRenderContext,
buf_ix: usize,
) -> Result<(), Error> {
let n_paths = render_ctx.path_count();
let n_pathseg = render_ctx.pathseg_count();
let n_trans = render_ctx.trans_count();
self.n_paths = n_paths;
self.n_pathseg = n_pathseg;
// These constants depend on encoding and may need to be updated.
// Perhaps we can plumb these from piet-gpu-derive?
const PATH_SIZE: usize = 12;
const BIN_SIZE: usize = 8;
const PATHSEG_SIZE: usize = 52;
const ANNO_SIZE: usize = 40;
const TRANS_SIZE: usize = 24;
let width_in_tiles = self.width / TILE_W;
let height_in_tiles = self.height / TILE_H;
let mut alloc = 0;
let tile_base = alloc;
alloc += ((n_paths + 3) & !3) * PATH_SIZE;
let bin_base = alloc;
alloc += ((n_paths + 255) & !255) * BIN_SIZE;
let ptcl_base = alloc;
alloc += width_in_tiles * height_in_tiles * PTCL_INITIAL_ALLOC;
let pathseg_base = alloc;
alloc += (n_pathseg * PATHSEG_SIZE + 3) & !3;
let anno_base = alloc;
alloc += (n_paths * ANNO_SIZE + 3) & !3;
let trans_base = alloc;
alloc += (n_trans * TRANS_SIZE + 3) & !3;
let config = &[
n_paths as u32,
n_pathseg as u32,
width_in_tiles as u32,
height_in_tiles as u32,
tile_base as u32,
bin_base as u32,
ptcl_base as u32,
pathseg_base as u32,
anno_base as u32,
trans_base as u32,
];
unsafe {
let scene = render_ctx.get_scene_buf();
self.n_elements = scene.len() / piet_gpu_types::scene::Element::fixed_size();
// TODO: reallocate scene buffer if size is inadequate
assert!(self.scene_bufs[buf_ix].size() as usize >= scene.len());
self.scene_bufs[buf_ix].write(scene)?;
self.config_bufs[buf_ix].write(config)?;
self.memory_buf_host[buf_ix].write(&[alloc as u32, 0 /* Overflow flag */])?;
// Upload gradient data.
let ramp_data = render_ctx.get_ramp_data();
if !ramp_data.is_empty() {
assert!(
self.gradient_bufs[buf_ix].size() as usize
>= std::mem::size_of_val(&*ramp_data)
);
self.gradient_bufs[buf_ix].write(&ramp_data)?;
}
}
Ok(())
}
pub unsafe fn record(&self, cmd_buf: &mut CmdBuf, query_pool: &QueryPool, buf_ix: usize) {
cmd_buf.copy_buffer(&self.config_bufs[buf_ix], &self.config_buf);
cmd_buf.copy_buffer(&self.memory_buf_host[buf_ix], &self.memory_buf_dev);
cmd_buf.clear_buffer(&self.state_buf, None);
cmd_buf.memory_barrier();
cmd_buf.image_barrier(
&self.image_dev,
ImageLayout::Undefined,
ImageLayout::General,
);
// TODO: make gradient upload optional, only if it's changed
cmd_buf.image_barrier(
&self.gradients,
ImageLayout::Undefined,
ImageLayout::BlitDst,
);
cmd_buf.copy_buffer_to_image(&self.gradient_bufs[buf_ix], &self.gradients);
cmd_buf.image_barrier(&self.gradients, ImageLayout::BlitDst, ImageLayout::General);
cmd_buf.reset_query_pool(&query_pool);
cmd_buf.write_timestamp(&query_pool, 0);
cmd_buf.dispatch(
&self.el_pipeline,
&self.el_ds[buf_ix],
(((self.n_elements + 127) / 128) as u32, 1, 1),
(128, 1, 1),
);
cmd_buf.write_timestamp(&query_pool, 1);
cmd_buf.memory_barrier();
cmd_buf.dispatch(
&self.tile_pipeline,
&self.tile_ds,
(((self.n_paths + 255) / 256) as u32, 1, 1),
(256, 1, 1),
);
cmd_buf.write_timestamp(&query_pool, 2);
cmd_buf.memory_barrier();
cmd_buf.dispatch(
&self.path_pipeline,
&self.path_ds,
(((self.n_pathseg + 31) / 32) as u32, 1, 1),
(32, 1, 1),
);
cmd_buf.write_timestamp(&query_pool, 3);
cmd_buf.memory_barrier();
cmd_buf.dispatch(
&self.backdrop_pipeline,
&self.backdrop_ds,
(((self.n_paths + 255) / 256) as u32, 1, 1),
(256, 1, 1),
);
cmd_buf.write_timestamp(&query_pool, 4);
// Note: this barrier is not needed as an actual dependency between
// pipeline stages, but I am keeping it in so that timer queries are
// easier to interpret.
cmd_buf.memory_barrier();
cmd_buf.dispatch(
&self.bin_pipeline,
&self.bin_ds,
(((self.n_paths + 255) / 256) as u32, 1, 1),
(256, 1, 1),
);
cmd_buf.write_timestamp(&query_pool, 5);
cmd_buf.memory_barrier();
cmd_buf.dispatch(
&self.coarse_pipeline,
&self.coarse_ds,
(
(self.width as u32 + 255) / 256,
(self.height as u32 + 255) / 256,
1,
),
(256, 256, 1),
);
cmd_buf.write_timestamp(&query_pool, 6);
cmd_buf.memory_barrier();
cmd_buf.dispatch(
&self.k4_pipeline,
&self.k4_ds,
(
(self.width / TILE_W) as u32,
(self.height / TILE_H) as u32,
1,
),
(8, 4, 1),
);
cmd_buf.write_timestamp(&query_pool, 7);
cmd_buf.memory_barrier();
cmd_buf.image_barrier(&self.image_dev, ImageLayout::General, ImageLayout::BlitSrc);
}
pub fn make_image(
session: &Session,
width: usize,
height: usize,
buf: &[u8],
format: ImageFormat,
) -> Result<Image, Error> {
unsafe {
if format != ImageFormat::RgbaPremul {
return Err("unsupported image format".into());
}
let buffer = session.create_buffer_init(&buf, BufferUsage::COPY_SRC)?;
let image = session.create_image2d(width.try_into()?, height.try_into()?)?;
let mut cmd_buf = session.cmd_buf()?;
cmd_buf.begin();
cmd_buf.image_barrier(&image, ImageLayout::Undefined, ImageLayout::BlitDst);
cmd_buf.copy_buffer_to_image(&buffer, &image);
cmd_buf.image_barrier(&image, ImageLayout::BlitDst, ImageLayout::General);
cmd_buf.finish();
// Make sure not to drop the buffer and image until the command buffer completes.
cmd_buf.add_resource(&buffer);
cmd_buf.add_resource(&image);
let _ = session.run_cmd_buf(cmd_buf, &[], &[]);
// We let the session reclaim the fence.
Ok(image)
}
}
/// Make a test image.
fn make_test_bg_image(session: &Session) -> Image {
const WIDTH: usize = 256;
const HEIGHT: usize = 256;
let mut buf = vec![255u8; WIDTH * HEIGHT * 4];
for y in 0..HEIGHT {
for x in 0..WIDTH {
let r = x as u8;
let g = y as u8;
let b = r ^ g;
buf[(y * WIDTH + x) * 4] = r;
buf[(y * WIDTH + x) * 4 + 1] = g;
buf[(y * WIDTH + x) * 4 + 2] = b;
}
}
Self::make_image(session, WIDTH, HEIGHT, &buf, ImageFormat::RgbaPremul).unwrap()
}
fn make_gradient_image(session: &Session) -> Image {
unsafe {
session
.create_image2d(gradient::N_SAMPLES as u32, gradient::N_GRADIENTS as u32)
.unwrap()
}
}
}