Merge branch 'master' into prefix
diff --git a/Cargo.lock b/Cargo.lock
index 1bec058..5f9b877 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -7,6 +7,12 @@
checksum = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2"
[[package]]
+name = "arrayvec"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cff77d8686867eceff3105329d4698d96c2391c176d5d03adc90c7389162b5b8"
+
+[[package]]
name = "ash"
version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -70,6 +76,12 @@
]
[[package]]
+name = "half"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f36b5f248235f45773d4944f555f83ea61fe07b18b561ccf99d7483d7381e54d"
+
+[[package]]
name = "inflate"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -79,6 +91,14 @@
]
[[package]]
+name = "kurbo"
+version = "0.5.11"
+source = "git+https://github.com/linebender/kurbo?rev=7bd7e66bd137e757305d170a0f9f2b4f7beeb299#7bd7e66bd137e757305d170a0f9f2b4f7beeb299"
+dependencies = [
+ "arrayvec",
+]
+
+[[package]]
name = "libc"
version = "0.2.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -95,9 +115,26 @@
]
[[package]]
+name = "once_cell"
+version = "1.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1c601810575c99596d4afc46f78a678c80105117c379eb3650cf99b8a21ce5b"
+
+[[package]]
+name = "piet"
+version = "0.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29420eccb68d6b9ad2f8dd87caf9c3bcd3bbd056bfe67871c48b6efab9316b79"
+dependencies = [
+ "kurbo",
+]
+
+[[package]]
name = "piet-gpu"
version = "0.1.0"
dependencies = [
+ "kurbo",
+ "piet",
"piet-gpu-hal",
"piet-gpu-types",
"png",
@@ -118,12 +155,14 @@
version = "0.1.0"
dependencies = [
"ash",
+ "once_cell",
]
[[package]]
name = "piet-gpu-types"
version = "0.0.0"
dependencies = [
+ "half",
"piet-gpu-derive",
]
diff --git a/Cargo.toml b/Cargo.toml
index f71f2de..efa5f88 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -6,3 +6,7 @@
"piet-gpu-hal",
"piet-gpu-types"
]
+
+# TODO: remove when the flatten feature is published
+[patch.crates-io]
+kurbo = { git = "https://github.com/linebender/kurbo", rev = "7bd7e66bd137e757305d170a0f9f2b4f7beeb299" }
diff --git a/piet-gpu-derive/src/derive.rs b/piet-gpu-derive/src/derive.rs
index bc84bfb..3b4c478 100644
--- a/piet-gpu-derive/src/derive.rs
+++ b/piet-gpu-derive/src/derive.rs
@@ -14,6 +14,16 @@
}
quote! {
mod #module_name {
+ pub trait HalfToLeBytes {
+ fn to_le_bytes(&self) -> [u8; 2];
+ }
+
+ impl HalfToLeBytes for half::f16 {
+ fn to_le_bytes(&self) -> [u8; 2] {
+ self.to_bits().to_le_bytes()
+ }
+ }
+
#ts
}
}
@@ -121,6 +131,7 @@
fn gen_derive_scalar_ty(ty: &GpuScalar) -> proc_macro2::TokenStream {
match ty {
+ GpuScalar::F16 => quote!(half::f16),
GpuScalar::F32 => quote!(f32),
GpuScalar::I8 => quote!(i8),
GpuScalar::I16 => quote!(i16),
diff --git a/piet-gpu-derive/src/glsl.rs b/piet-gpu-derive/src/glsl.rs
index 617669a..ec87640 100644
--- a/piet-gpu-derive/src/glsl.rs
+++ b/piet-gpu-derive/src/glsl.rs
@@ -14,6 +14,7 @@
for name in &module.def_names {
gen_refdef(&mut r, &name);
}
+
for name in &module.def_names {
match module.defs.get(name).unwrap() {
(size, LayoutTypeDef::Struct(fields)) => {
@@ -26,6 +27,7 @@
}
}
}
+
for name in &module.def_names {
let def = module.defs.get(name).unwrap();
match def {
@@ -43,6 +45,7 @@
}
}
}
+
r
}
@@ -98,9 +101,21 @@
}
}
writeln!(r, " {} s;", name).unwrap();
+
+ let mut preload: bool = false;
for (name, offset, ty) in fields {
- writeln!(r, " s.{} = {};", name, gen_extract(*offset, &ty.ty)).unwrap();
+ let (setup, extract) = gen_extract(*offset, &ty.ty, preload);
+ writeln!(r, "{} s.{} = {};", setup, name, extract).unwrap();
+
+ if let GpuType::Scalar(GpuScalar::F16) = &ty.ty {
+ if offset % 4 == 0 {
+ preload = true;
+ continue;
+ }
+ }
+ preload = false;
}
+
writeln!(r, " return s;").unwrap();
writeln!(r, "}}\n").unwrap();
}
@@ -136,34 +151,67 @@
}
}
-fn gen_extract(offset: usize, ty: &GpuType) -> String {
+fn gen_extract(offset: usize, ty: &GpuType, preload: bool) -> (String, String) {
match ty {
- GpuType::Scalar(scalar) => gen_extract_scalar(offset, scalar),
+ GpuType::Scalar(scalar) => {
+ let setup = match scalar {
+ GpuScalar::F16 => {
+ if preload {
+ String::new()
+ } else {
+ let ix = offset / 4;
+ format!(" vec2 halves{} = unpackHalf2x16(raw{});\n", ix, ix)
+ }
+ }
+ _ => String::new(),
+ };
+
+ (setup, gen_extract_scalar(offset, scalar))
+ }
GpuType::Vector(scalar, size) => {
- let mut r = glsl_type(ty);
- r.push_str("(");
+ let is_f16 = match scalar {
+ GpuScalar::F16 => true,
+ _ => false,
+ };
+
+ let mut setup = String::new();
+ let mut extract = glsl_type(ty);
+ &extract.push_str("(");
for i in 0..*size {
if i != 0 {
- r.push_str(", ");
+ &extract.push_str(", ");
}
+
+ if is_f16 && i % 2 == 0 {
+ let ix = (offset + i * scalar.size()) / 4;
+ let s = format!(" vec2 halves{} = unpackHalf2x16(raw{});\n", ix, ix);
+ setup.push_str(&s);
+ };
+
let el_offset = offset + i * scalar.size();
- r.push_str(&gen_extract_scalar(el_offset, scalar));
+ &extract.push_str(&gen_extract_scalar(el_offset, scalar));
}
- r.push_str(")");
- r
+ &extract.push_str(")");
+ (setup, extract)
}
- GpuType::InlineStruct(name) => format!(
- "{}_read({}Ref({}))",
- name,
- name,
- simplified_add("ref.offset", offset)
+ GpuType::InlineStruct(name) => (
+ String::new(),
+ format!(
+ "{}_read({}Ref({}))",
+ name,
+ name,
+ simplified_add("ref.offset", offset)
+ ),
),
GpuType::Ref(inner) => {
if let GpuType::InlineStruct(name) = inner.deref() {
- format!(
- "{}Ref({})",
- name,
- gen_extract_scalar(offset, &GpuScalar::U32)
+ (
+ String::new(),
+ format!(
+ "{}Ref({})",
+ name,
+ gen_extract_scalar(offset, &GpuScalar::U32)
+ ),
)
} else {
panic!("only know how to deal with Ref of struct")
@@ -174,7 +222,7 @@
fn gen_extract_scalar(offset: usize, ty: &GpuScalar) -> String {
match ty {
- GpuScalar::F32 => format!("uintBitsToFloat(raw{})", offset / 4),
+ GpuScalar::F16 | GpuScalar::F32 => extract_fbits(offset, ty.size()),
GpuScalar::U8 | GpuScalar::U16 | GpuScalar::U32 => extract_ubits(offset, ty.size()),
GpuScalar::I8 | GpuScalar::I16 | GpuScalar::I32 => extract_ibits(offset, ty.size()),
}
@@ -210,8 +258,41 @@
}
}
+fn extract_fbits(offset: usize, nbytes: usize) -> String {
+ match nbytes {
+ 4 => format!("uintBitsToFloat(raw{})", offset / 4),
+ 2 => match offset % 4 {
+ 0 => {
+ let ix = offset / 4;
+ format!("halves{}.x", ix)
+ }
+ 2 => format!("halves{}.y", offset / 4),
+ _ => panic!("unexpected packing of f16 at offset {}", offset % 4),
+ },
+ _ => {
+ panic!("unexpected extraction of float with nbytes = {}", nbytes);
+ }
+ }
+}
+
// Writing
+fn is_f16(ty: &GpuType) -> bool {
+ match ty {
+ GpuType::Scalar(GpuScalar::F16) => true,
+ GpuType::Vector(GpuScalar::F16, _) => true,
+ _ => false,
+ }
+}
+
+fn is_f16_pair(field_ixs: &[usize], fields: &[(String, usize, LayoutType)]) -> bool {
+ if field_ixs.len() == 2 {
+ fields.iter().all(|(_, _, t)| is_f16(&t.ty))
+ } else {
+ false
+ }
+}
+
fn gen_struct_write(
r: &mut String,
bufname: &str,
@@ -221,39 +302,78 @@
writeln!(r, "void {}_write({}Ref ref, {} s) {{", name, name, name).unwrap();
writeln!(r, " uint ix = ref.offset >> 2;").unwrap();
let coverage = crate::layout::struct_coverage(fields, true);
+
for (i, field_ixs) in coverage.iter().enumerate() {
let mut pieces = Vec::new();
- for field_ix in field_ixs {
- let (name, offset, ty) = &fields[*field_ix];
- match &ty.ty {
- GpuType::Scalar(scalar) => {
- let inner = format!("s.{}", name);
- pieces.push(gen_pack_bits_scalar(scalar, *offset, &inner));
- }
- GpuType::Vector(scalar, len) => {
- let size = scalar.size();
- let ix_lo = (i * 4 - offset) / size;
- let ix_hi = ((4 + i * 4 - offset) / size).min(*len);
- for ix in ix_lo..ix_hi {
- let scalar_offset = offset + ix * size;
- let inner = format!("s.{}.{}", name, &"xyzw"[ix..ix + 1]);
- pieces.push(gen_pack_bits_scalar(scalar, scalar_offset, &inner));
+
+ if is_f16_pair(field_ixs, fields) {
+ let (ix0, ix1) = (field_ixs[0], field_ixs[1]);
+ let inner0 = format!("s.{}", fields[ix0].0);
+ let inner1 = format!("s.{}", fields[ix1].0);
+ pieces.push(format!("packHalf2x16(vec2({}, {}))", &inner0, &inner1));
+ } else {
+ for field_ix in field_ixs {
+ let (name, offset, ty) = &fields[*field_ix];
+ match &ty.ty {
+ GpuType::Scalar(scalar) => {
+ let inner = format!("s.{}", name);
+ pieces.push(gen_pack_bits_scalar(scalar, *offset, &inner));
}
+ GpuType::Vector(scalar, len) => {
+ let size = scalar.size();
+ let ix_lo = (i * 4 - offset) / size;
+ let ix_hi = ((4 + i * 4 - offset) / size).min(*len);
+ match scalar {
+ GpuScalar::F16 => {
+ if ix_hi - ix_lo == 2 {
+ let inner0 =
+ format!("s.{}.{}", name, &"xyzw"[ix_lo..ix_lo + 1]);
+ let inner1 =
+ format!("s.{}.{}", name, &"xyzw"[ix_lo + 1..ix_hi]);
+ pieces.push(format!(
+ "packHalf2x16(vec2({}, {}))",
+ &inner0, &inner1
+ ));
+ } else {
+ let ix = ix_lo;
+ let scalar_offset = offset + ix * size;
+ let inner = format!("s.{}.{}", name, &"xyzw"[ix..ix + 1]);
+ pieces.push(gen_pack_bits_scalar(
+ scalar,
+ scalar_offset,
+ &inner,
+ ));
+ }
+ }
+ _ => {
+ for ix in ix_lo..ix_hi {
+ let scalar_offset = offset + ix * size;
+ let inner = format!("s.{}.{}", name, &"xyzw"[ix..ix + 1]);
+ pieces.push(gen_pack_bits_scalar(
+ scalar,
+ scalar_offset,
+ &inner,
+ ));
+ }
+ }
+ }
+ }
+ GpuType::InlineStruct(structname) => {
+ writeln!(
+ r,
+ " {}_write({}Ref({}), s.{});",
+ structname,
+ structname,
+ simplified_add("ref.offset", *offset),
+ name
+ )
+ .unwrap();
+ }
+ GpuType::Ref(_) => pieces.push(format!("s.{}.offset", name)),
}
- GpuType::InlineStruct(structname) => {
- writeln!(
- r,
- " {}_write({}Ref({}), s.{});",
- structname,
- structname,
- simplified_add("ref.offset", *offset),
- name
- )
- .unwrap();
- }
- GpuType::Ref(_) => pieces.push(format!("s.{}.offset", name)),
}
}
+
if !pieces.is_empty() {
write!(r, " {}[ix + {}] = ", bufname, i).unwrap();
for (j, piece) in pieces.iter().enumerate() {
@@ -271,6 +391,7 @@
fn gen_pack_bits_scalar(ty: &GpuScalar, offset: usize, inner: &str) -> String {
let shift = (offset % 4) * 8;
let bits = match ty {
+ GpuScalar::F16 => format!("packHalf2x16(vec2({}, 0.0)) & 0xffff", inner),
GpuScalar::F32 => format!("floatBitsToUint({})", inner),
// Note: this doesn't mask small unsigned int types; the caller is
// responsible for making sure they don't overflow.
@@ -367,7 +488,7 @@
// GLSL type that can contain the scalar value.
fn glsl_scalar(s: &GpuScalar) -> &'static str {
match s {
- GpuScalar::F32 => "float",
+ GpuScalar::F16 | GpuScalar::F32 => "float",
GpuScalar::I8 | GpuScalar::I16 | GpuScalar::I32 => "int",
GpuScalar::U8 | GpuScalar::U16 | GpuScalar::U32 => "uint",
}
@@ -375,7 +496,7 @@
fn glsl_vecname(s: &GpuScalar) -> &'static str {
match s {
- GpuScalar::F32 => "vec",
+ GpuScalar::F16 | GpuScalar::F32 => "vec",
GpuScalar::I8 | GpuScalar::I16 | GpuScalar::I32 => "ivec",
GpuScalar::U8 | GpuScalar::U16 | GpuScalar::U32 => "uvec",
}
diff --git a/piet-gpu-derive/src/parse.rs b/piet-gpu-derive/src/parse.rs
index 8e51bab..9461338 100644
--- a/piet-gpu-derive/src/parse.rs
+++ b/piet-gpu-derive/src/parse.rs
@@ -12,14 +12,14 @@
/// A scalar that can be represented in a packed data structure.
#[derive(Clone, Copy, PartialEq)]
pub enum GpuScalar {
+ F16,
+ F32,
I8,
I16,
I32,
- F32,
U8,
U16,
U32,
- // TODO: Add F16
}
/// An algebraic datatype.
@@ -52,6 +52,7 @@
fn from_syn(ty: &syn::Type) -> Option<Self> {
ty_as_single_ident(ty).and_then(|ident| match ident.as_str() {
"f32" => Some(GpuScalar::F32),
+ "f16" => Some(GpuScalar::F16),
"i8" => Some(GpuScalar::I8),
"i16" => Some(GpuScalar::I16),
"i32" => Some(GpuScalar::I32),
@@ -70,7 +71,7 @@
match self {
GpuScalar::F32 | GpuScalar::I32 | GpuScalar::U32 => 4,
GpuScalar::I8 | GpuScalar::U8 => 1,
- GpuScalar::I16 | GpuScalar::U16 => 2,
+ GpuScalar::F16 | GpuScalar::I16 | GpuScalar::U16 => 2,
}
}
}
diff --git a/piet-gpu-hal/Cargo.toml b/piet-gpu-hal/Cargo.toml
index a6a373a..7019002 100644
--- a/piet-gpu-hal/Cargo.toml
+++ b/piet-gpu-hal/Cargo.toml
@@ -8,3 +8,4 @@
[dependencies]
ash = "0.30"
+once_cell = "1.3.1"
diff --git a/piet-gpu-hal/examples/collatz.rs b/piet-gpu-hal/examples/collatz.rs
index 7195891..a4777b4 100644
--- a/piet-gpu-hal/examples/collatz.rs
+++ b/piet-gpu-hal/examples/collatz.rs
@@ -17,6 +17,7 @@
let query_pool = device.create_query_pool(2).unwrap();
let mut cmd_buf = device.create_cmd_buf().unwrap();
cmd_buf.begin();
+ cmd_buf.reset_query_pool(&query_pool);
cmd_buf.write_timestamp(&query_pool, 0);
cmd_buf.dispatch(&pipeline, &descriptor_set, (256, 1, 1));
cmd_buf.write_timestamp(&query_pool, 1);
diff --git a/piet-gpu-hal/examples/prefix.rs b/piet-gpu-hal/examples/prefix.rs
index 6a38e53..2f80a20 100644
--- a/piet-gpu-hal/examples/prefix.rs
+++ b/piet-gpu-hal/examples/prefix.rs
@@ -35,6 +35,7 @@
cmd_buf.clear_buffer(&work_buffer);
cmd_buf.copy_buffer(&buffer, &buffer_dev);
cmd_buf.memory_barrier();
+ cmd_buf.reset_query_pool(&query_pool);
cmd_buf.write_timestamp(&query_pool, 0);
cmd_buf.dispatch(&pipeline, &descriptor_set, (n_tiles as u32, 1, 1));
cmd_buf.write_timestamp(&query_pool, 1);
diff --git a/piet-gpu-hal/src/lib.rs b/piet-gpu-hal/src/lib.rs
index c62678f..d215490 100644
--- a/piet-gpu-hal/src/lib.rs
+++ b/piet-gpu-hal/src/lib.rs
@@ -71,10 +71,22 @@
unsafe fn memory_barrier(&mut self);
+ /// Clear the buffer.
+ ///
+ /// This is readily supported in Vulkan, but for portability it is remarkably
+ /// tricky (unimplemented in gfx-hal right now). Possibly best to write a compute
+ /// kernel, or organize the code not to need it.
unsafe fn clear_buffer(&self, buffer: &D::Buffer);
unsafe fn copy_buffer(&self, src: &D::Buffer, dst: &D::Buffer);
+ /// Reset the query pool.
+ ///
+ /// The query pool must be reset before each use, to avoid validation errors.
+ /// This is annoying, and we could tweak the API to make it implicit, doing
+ /// the reset before the first timestamp write.
+ unsafe fn reset_query_pool(&mut self, pool: &D::QueryPool);
+
unsafe fn write_timestamp(&mut self, pool: &D::QueryPool, query: u32);
}
diff --git a/piet-gpu-hal/src/vulkan.rs b/piet-gpu-hal/src/vulkan.rs
index 8ad7a13..35cf68f 100644
--- a/piet-gpu-hal/src/vulkan.rs
+++ b/piet-gpu-hal/src/vulkan.rs
@@ -1,10 +1,13 @@
//! Vulkan implemenation of HAL trait.
-use std::ffi::CString;
+use std::borrow::Cow;
+use std::ffi::{CStr, CString};
use std::sync::Arc;
+use ash::extensions::ext::DebugUtils;
use ash::version::{DeviceV1_0, EntryV1_0, InstanceV1_0};
use ash::{vk, Device, Entry, Instance};
+use once_cell::sync::Lazy;
use crate::Error;
@@ -12,8 +15,9 @@
/// Retain the dynamic lib.
#[allow(unused)]
entry: Entry,
-
instance: Instance,
+ _dbg_loader: Option<DebugUtils>,
+ _dbg_callbk: Option<vk::DebugUtilsMessengerEXT>,
}
pub struct VkDevice {
@@ -61,6 +65,55 @@
#[derive(Clone, Copy)]
pub struct MemFlags(vk::MemoryPropertyFlags);
+unsafe extern "system" fn vulkan_debug_callback(
+ message_severity: vk::DebugUtilsMessageSeverityFlagsEXT,
+ message_type: vk::DebugUtilsMessageTypeFlagsEXT,
+ p_callback_data: *const vk::DebugUtilsMessengerCallbackDataEXT,
+ _user_data: *mut std::os::raw::c_void,
+) -> vk::Bool32 {
+ let callback_data = &*p_callback_data;
+ let message_id_number: i32 = callback_data.message_id_number as i32;
+
+ let message_id_name = if callback_data.p_message_id_name.is_null() {
+ Cow::from("")
+ } else {
+ CStr::from_ptr(callback_data.p_message_id_name).to_string_lossy()
+ };
+
+ let message = if callback_data.p_message.is_null() {
+ Cow::from("")
+ } else {
+ CStr::from_ptr(callback_data.p_message).to_string_lossy()
+ };
+
+ println!(
+ "{:?}:\n{:?} [{} ({})] : {}\n",
+ message_severity,
+ message_type,
+ message_id_name,
+ message_id_number,
+ message,
+ );
+
+ vk::FALSE
+}
+
+static LAYERS: Lazy<Vec<&'static CStr>> = Lazy::new(|| {
+ let mut layers: Vec<&'static CStr> = vec![];
+ if cfg!(debug_assertions) {
+ layers.push(CStr::from_bytes_with_nul(b"VK_LAYER_KHRONOS_validation\0").unwrap());
+ }
+ layers
+});
+
+static EXTS: Lazy<Vec<&'static CStr>> = Lazy::new(|| {
+ let mut exts: Vec<&'static CStr> = vec![];
+ if cfg!(debug_assertions) {
+ exts.push(DebugUtils::name());
+ }
+ exts
+});
+
impl VkInstance {
/// Create a new instance.
///
@@ -70,18 +123,74 @@
unsafe {
let app_name = CString::new("VkToy").unwrap();
let entry = Entry::new()?;
+
+ let exist_layers = entry
+ .enumerate_instance_layer_properties()?;
+ let layers = LAYERS.iter().filter_map(|&lyr| {
+ exist_layers
+ .iter()
+ .find(|x|
+ CStr::from_ptr(x.layer_name.as_ptr()) == lyr
+ )
+ .map(|_| lyr.as_ptr())
+ .or_else(|| {
+ println!("Unable to find layer: {}, have you installed the Vulkan SDK?", lyr.to_string_lossy());
+ None
+ })
+ }).collect::<Vec<_>>();
+
+ let exist_exts = entry
+ .enumerate_instance_extension_properties()?;
+ let exts = EXTS.iter().filter_map(|&ext| {
+ exist_exts
+ .iter()
+ .find(|x|
+ CStr::from_ptr(x.extension_name.as_ptr()) == ext
+ )
+ .map(|_| ext.as_ptr())
+ .or_else(|| {
+ println!("Unable to find extension: {}, have you installed the Vulkan SDK?", ext.to_string_lossy());
+ None
+ })
+ }).collect::<Vec<_>>();
+
let instance = entry.create_instance(
- &vk::InstanceCreateInfo::builder().application_info(
- &vk::ApplicationInfo::builder()
- .application_name(&app_name)
- .application_version(0)
- .engine_name(&app_name)
- .api_version(vk::make_version(1, 0, 0)),
- ),
+ &vk::InstanceCreateInfo::builder()
+ .application_info(
+ &vk::ApplicationInfo::builder()
+ .application_name(&app_name)
+ .application_version(0)
+ .engine_name(&app_name)
+ .api_version(vk::make_version(1, 0, 0)),
+ )
+ .enabled_layer_names(&layers)
+ .enabled_extension_names(&exts),
None,
)?;
- Ok(VkInstance { entry, instance })
+ let (_dbg_loader, _dbg_callbk) = if cfg!(debug_assertions) {
+ let dbg_info = vk::DebugUtilsMessengerCreateInfoEXT::builder()
+ .message_severity(
+ vk::DebugUtilsMessageSeverityFlagsEXT::ERROR
+ | vk::DebugUtilsMessageSeverityFlagsEXT::WARNING,
+ )
+ .message_type(vk::DebugUtilsMessageTypeFlagsEXT::all())
+ .pfn_user_callback(Some(vulkan_debug_callback));
+ let dbg_loader = DebugUtils::new(&entry, &instance);
+ let dbg_callbk = dbg_loader
+ .create_debug_utils_messenger(&dbg_info, None)
+ .unwrap();
+ (Some(dbg_loader), Some(dbg_callbk))
+ } else {
+ (None, None)
+ };
+
+ Ok(VkInstance {
+ entry,
+ instance,
+ _dbg_loader,
+ _dbg_callbk,
+ })
}
}
@@ -467,6 +576,16 @@
);
}
+ unsafe fn reset_query_pool(&mut self, pool: &QueryPool) {
+ let device = &self.device.device;
+ device.cmd_reset_query_pool(
+ self.cmd_buf,
+ pool.pool,
+ 0,
+ pool.n_queries,
+ );
+ }
+
unsafe fn write_timestamp(&mut self, pool: &QueryPool, query: u32) {
let device = &self.device.device;
device.cmd_write_timestamp(
diff --git a/piet-gpu-types/Cargo.toml b/piet-gpu-types/Cargo.toml
index 6de92a5..629cd62 100644
--- a/piet-gpu-types/Cargo.toml
+++ b/piet-gpu-types/Cargo.toml
@@ -9,3 +9,4 @@
[dependencies]
piet-gpu-derive = { path = "../piet-gpu-derive" }
+half = "1.5.0"
diff --git a/piet-gpu-types/src/lib.rs b/piet-gpu-types/src/lib.rs
index 44d4843..db9516f 100644
--- a/piet-gpu-types/src/lib.rs
+++ b/piet-gpu-types/src/lib.rs
@@ -1,4 +1,6 @@
pub mod encoder;
pub mod ptcl;
pub mod scene;
+pub mod segment;
+pub mod test;
pub mod tilegroup;
diff --git a/piet-gpu-types/src/main.rs b/piet-gpu-types/src/main.rs
index 7ed941f..834f1b6 100644
--- a/piet-gpu-types/src/main.rs
+++ b/piet-gpu-types/src/main.rs
@@ -6,7 +6,9 @@
match mod_name.as_str() {
"scene" => print!("{}", piet_gpu_types::scene::gen_gpu_scene()),
"tilegroup" => print!("{}", piet_gpu_types::tilegroup::gen_gpu_tilegroup()),
+ "segment" => print!("{}", piet_gpu_types::segment::gen_gpu_segment()),
"ptcl" => print!("{}", piet_gpu_types::ptcl::gen_gpu_ptcl()),
+ "test" => print!("{}", piet_gpu_types::test::gen_gpu_test()),
_ => println!("Oops, unknown module name"),
}
}
diff --git a/piet-gpu-types/src/ptcl.rs b/piet-gpu-types/src/ptcl.rs
index f5e42af..3faffb9 100644
--- a/piet-gpu-types/src/ptcl.rs
+++ b/piet-gpu-types/src/ptcl.rs
@@ -4,16 +4,19 @@
#[gpu_write]
mod ptcl {
struct CmdCircle {
- // In existing code, this is packed; we might need an annotation for this.
- bbox: [u16; 4],
+ center: [f32; 2],
+ radius: f32,
+ rgba_color: u32,
}
struct CmdLine {
start: [f32; 2],
end: [f32; 2],
}
struct CmdStroke {
- // In existing code, this is f16. Should we have support?
- halfWidth: f32,
+ n_segs: u32,
+ // Should be Ref<Segment> if we had cross-module references.
+ seg_ref: u32,
+ half_width: f32,
rgba_color: u32,
}
struct CmdFill {
@@ -32,6 +35,9 @@
struct CmdSolid {
rgba_color: u32,
}
+ struct CmdJump {
+ new_ref: u32,
+ }
enum Cmd {
End,
Circle(CmdCircle),
@@ -41,6 +47,7 @@
FillEdge(CmdFillEdge),
DrawFill(CmdDrawFill),
Solid(CmdSolid),
+ Jump(CmdJump),
Bail,
}
}
diff --git a/piet-gpu-types/src/segment.rs b/piet-gpu-types/src/segment.rs
new file mode 100644
index 0000000..ba5f3e2
--- /dev/null
+++ b/piet-gpu-types/src/segment.rs
@@ -0,0 +1,27 @@
+use piet_gpu_derive::piet_gpu;
+
+// Structures representing segments for stroke/fill items.
+
+piet_gpu! {
+ #[gpu_write]
+ mod segment {
+ struct TileHeader {
+ n: u32,
+ items: Ref<ItemHeader>,
+ }
+
+ // Note: this is only suitable for strokes, fills require backdrop.
+ struct ItemHeader {
+ n: u32,
+ segments: Ref<Segment>,
+ }
+
+ // TODO: strongly consider using f16. If so, these would be
+ // relative to the tile. We're doing f32 for now to minimize
+ // divergence from piet-metal originals.
+ struct Segment {
+ start: [f32; 2],
+ end: [f32; 2],
+ }
+ }
+}
diff --git a/piet-gpu-types/src/test.rs b/piet-gpu-types/src/test.rs
new file mode 100644
index 0000000..e92aaca
--- /dev/null
+++ b/piet-gpu-types/src/test.rs
@@ -0,0 +1,33 @@
+use piet_gpu_derive::piet_gpu;
+
+piet_gpu! {
+ #[rust_encode]
+ #[gpu_write]
+ mod test {
+ struct StructA {
+ a: f16,
+ b: f16,
+ }
+
+ struct StructB {
+ a: f16,
+ b: u16,
+ c: f16,
+ }
+
+ struct StructC {
+ a: f16,
+ b: u16,
+ c: u16,
+ d: f16,
+ }
+
+ struct StructD {
+ a: [f16; 2],
+ }
+
+ struct StructE {
+ a: [f16; 3],
+ }
+ }
+}
diff --git a/piet-gpu-types/src/tilegroup.rs b/piet-gpu-types/src/tilegroup.rs
index 4824178..ea295d9 100644
--- a/piet-gpu-types/src/tilegroup.rs
+++ b/piet-gpu-types/src/tilegroup.rs
@@ -1,5 +1,18 @@
use piet_gpu_derive::piet_gpu;
+// Structures representing tilegroup instances (output of kernel 1).
+// There are three outputs: the main instances, the stroke instances,
+// and the fill instances. All three are conceptually a list of
+// instances, but the encoding is slightly different. The first is
+// encoded with Instance, Jump, and End. The other two are encoded
+// as a linked list of Chunk.
+
+// The motivation for the difference is that the first requires fewer
+// registers to track state, but the second contains information that
+// is useful up front for doing dynamic allocation in kernel 2, as
+// well as increasing read parallelism; the "jump" approach really is
+// geared to sequential reading.
+
piet_gpu! {
#[gpu_write]
mod tilegroup {
@@ -10,8 +23,16 @@
// A better type would be Point.
offset: [f32; 2],
}
+ struct Jump {
+ new_ref: Ref<TileGroup>,
+ }
+ struct Chunk {
+ chunk_n: u32,
+ next: Ref<Chunk>,
+ }
enum TileGroup {
Instance(Instance),
+ Jump(Jump),
End,
}
}
diff --git a/piet-gpu/Cargo.toml b/piet-gpu/Cargo.toml
index 4b7a7e9..b082868 100644
--- a/piet-gpu/Cargo.toml
+++ b/piet-gpu/Cargo.toml
@@ -13,5 +13,7 @@
path = "../piet-gpu-types"
[dependencies]
+kurbo = "0.5.11"
+piet = "0.0.12"
png = "0.16.2"
rand = "0.7.3"
diff --git a/piet-gpu/shader/build.ninja b/piet-gpu/shader/build.ninja
index 5befa7f..3da40c9 100644
--- a/piet-gpu/shader/build.ninja
+++ b/piet-gpu/shader/build.ninja
@@ -9,4 +9,10 @@
build image.spv: glsl image.comp | scene.h
-build kernel1.spv: glsl kernel1.comp | scene.h tilegroup.h
+build kernel1.spv: glsl kernel1.comp | scene.h tilegroup.h setup.h
+
+build kernel2s.spv: glsl kernel2s.comp | scene.h tilegroup.h segment.h setup.h
+
+build kernel3.spv: glsl kernel3.comp | scene.h tilegroup.h ptcl.h setup.h
+
+build kernel4.spv: glsl kernel4.comp | ptcl.h setup.h
diff --git a/piet-gpu/shader/image.comp b/piet-gpu/shader/image.comp
index 60739d5..6d84eb5 100644
--- a/piet-gpu/shader/image.comp
+++ b/piet-gpu/shader/image.comp
@@ -40,7 +40,7 @@
if (tag == PietItem_Circle) {
PietCircle circle = PietItem_Circle_read(item_ref);
float r = length(xy + vec2(0.5, 0.5) - circle.center.xy);
- float alpha = clamp(circle.radius - r, 0.0, 1.0);
+ float alpha = clamp(0.5 + circle.radius - r, 0.0, 1.0);
vec4 fg_rgba = unpackUnorm4x8(circle.rgba_color);
// TODO: sRGB
rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a);
diff --git a/piet-gpu/shader/image.spv b/piet-gpu/shader/image.spv
index 527c9ae..097add1 100644
--- a/piet-gpu/shader/image.spv
+++ b/piet-gpu/shader/image.spv
Binary files differ
diff --git a/piet-gpu/shader/kernel1.comp b/piet-gpu/shader/kernel1.comp
index 436b8bd..ce99005 100644
--- a/piet-gpu/shader/kernel1.comp
+++ b/piet-gpu/shader/kernel1.comp
@@ -1,3 +1,15 @@
+// This is "kernel 1" in a 4-kernel pipeline. It traverses the scene graph
+// and outputs "instances" (references to item + translation) for each item
+// that intersects the tilegroup.
+//
+// This implementation is simplistic and leaves a lot of performance on the
+// table. A fancier implementation would use threadgroup shared memory or
+// subgroups (or possibly both) to parallelize the reading of the input and
+// the computation of tilegroup intersection.
+//
+// In addition, there are some features currently missing, such as support
+// for clipping.
+
#version 450
#extension GL_GOOGLE_include_directive : enable
@@ -12,16 +24,14 @@
uint[] tilegroup;
};
+layout(set = 0, binding = 2) buffer AllocBuf {
+ uint alloc;
+};
+
#include "scene.h"
#include "tilegroup.h"
-// TODO: compute this
-#define WIDTH_IN_TILEGROUPS 4
-
-#define TILEGROUP_WIDTH 512
-#define TILEGROUP_HEIGHT 16
-
-#define INITIAL_ALLOC 1024
+#include "setup.h"
#define MAX_STACK 8
@@ -35,8 +45,18 @@
StackElement stack[MAX_STACK];
uint stack_ix = 0;
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS + gl_GlobalInvocationID.x;
- TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * INITIAL_ALLOC);
- vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH, TILEGROUP_HEIGHT);
+ TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE);
+ uint tg_limit = tg_ref.offset + TILEGROUP_INITIAL_ALLOC - 2 * TileGroup_size;
+
+ // State for stroke references.
+ TileGroupRef stroke_start = TileGroupRef(tg_ref.offset + TILEGROUP_STROKE_START);
+ ChunkRef stroke_chunk_start = ChunkRef(stroke_start.offset + 4);
+ InstanceRef stroke_ref = InstanceRef(stroke_chunk_start.offset + Chunk_size);
+ uint stroke_limit = stroke_start.offset + TILEGROUP_INITIAL_ALLOC - Instance_size;
+ uint stroke_chunk_n = 0;
+ uint stroke_n = 0;
+
+ vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH_PX, TILEGROUP_HEIGHT_PX);
PietItemRef root = PietItemRef(0);
SimpleGroup group = PietItem_Group_read(root);
StackElement tos = StackElement(root, 0, group.offset.xy);
@@ -45,19 +65,42 @@
if (tos.index < group.n_items) {
Bbox bbox = Bbox_read(Bbox_index(group.bboxes, tos.index));
vec4 bb = vec4(bbox.bbox) + tos.offset.xyxy;
- bool hit = max(bb.x, xy0.x) < min(bb.z, xy0.x + float(TILEGROUP_WIDTH))
- && max(bb.y, xy0.y) < min(bb.w, xy0.y + float(TILEGROUP_HEIGHT));
+ bool hit = max(bb.x, xy0.x) < min(bb.z, xy0.x + float(TILEGROUP_WIDTH_PX))
+ && max(bb.y, xy0.y) < min(bb.w, xy0.y + float(TILEGROUP_HEIGHT_PX));
bool is_group = false;
+ uint tag;
if (hit) {
PietItemRef item_ref = PietItem_index(group.items, tos.index);
- is_group = PietItem_tag(item_ref) == PietItem_Group;
+ tag = PietItem_tag(item_ref);
+ is_group = tag == PietItem_Group;
}
if (hit && !is_group) {
PietItemRef item_ref = PietItem_index(group.items, tos.index);
Instance ins = Instance(item_ref.offset, tos.offset);
+ if (tg_ref.offset > tg_limit) {
+ // Allocation exceeded; do atomic bump alloc.
+ uint new_tg = atomicAdd(alloc, TILEGROUP_INITIAL_ALLOC);
+ Jump jump = Jump(TileGroupRef(new_tg));
+ TileGroup_Jump_write(tg_ref, jump);
+ tg_ref = TileGroupRef(new_tg);
+ tg_limit = tg_ref.offset + TILEGROUP_INITIAL_ALLOC - 2 * TileGroup_size;
+ }
TileGroup_Instance_write(tg_ref, ins);
tg_ref.offset += TileGroup_size;
- // TODO: bump allocate if allocation exceeded
+ if (tag == PietItem_Poly) {
+ if (stroke_ref.offset > stroke_limit) {
+ uint new_stroke = atomicAdd(alloc, TILEGROUP_STROKE_ALLOC);
+ Chunk_write(stroke_chunk_start, Chunk(stroke_chunk_n, ChunkRef(new_stroke)));
+ stroke_chunk_start = ChunkRef(new_stroke);
+ stroke_ref = InstanceRef(new_stroke + Chunk_size);
+ stroke_n += stroke_chunk_n;
+ stroke_chunk_n = 0;
+ stroke_limit = new_stroke + TILEGROUP_STROKE_ALLOC - Instance_size;
+ }
+ Instance_write(stroke_ref, ins);
+ stroke_chunk_n++;
+ stroke_ref.offset += Instance_size;
+ }
}
if (is_group) {
PietItemRef item_ref = PietItem_index(group.items, tos.index);
@@ -80,4 +123,10 @@
}
}
TileGroup_End_write(tg_ref);
+
+ stroke_n += stroke_chunk_n;
+ if (stroke_n > 0) {
+ Chunk_write(stroke_chunk_start, Chunk(stroke_chunk_n, ChunkRef(0)));
+ }
+ tilegroup[stroke_start.offset >> 2] = stroke_n;
}
diff --git a/piet-gpu/shader/kernel1.spv b/piet-gpu/shader/kernel1.spv
index 0e9a497..8430d74 100644
--- a/piet-gpu/shader/kernel1.spv
+++ b/piet-gpu/shader/kernel1.spv
Binary files differ
diff --git a/piet-gpu/shader/kernel2s.comp b/piet-gpu/shader/kernel2s.comp
new file mode 100644
index 0000000..3eb2d00
--- /dev/null
+++ b/piet-gpu/shader/kernel2s.comp
@@ -0,0 +1,127 @@
+// This is "kernel 2" (strokes) in a 4-kernel pipeline. It processes the stroke
+// (polyline) items in the scene and generates a list of segments for each, for
+// each tile.
+
+#version 450
+#extension GL_GOOGLE_include_directive : enable
+
+layout(local_size_x = 32) in;
+
+layout(set = 0, binding = 0) readonly buffer SceneBuf {
+ uint[] scene;
+};
+
+layout(set = 0, binding = 1) buffer TilegroupBuf {
+ uint[] tilegroup;
+};
+
+layout(set = 0, binding = 2) buffer SegmentBuf {
+ uint[] segment;
+};
+
+layout(set = 0, binding = 3) buffer AllocBuf {
+ uint alloc;
+};
+
+#include "scene.h"
+#include "tilegroup.h"
+#include "segment.h"
+
+#include "setup.h"
+
+void main() {
+ uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x;
+ uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS
+ + (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES);
+ vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
+ TileGroupRef stroke_start = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE + TILEGROUP_STROKE_START);
+ uint stroke_n = tilegroup[stroke_start.offset >> 2];
+
+ TileHeaderRef tile_header_ref = TileHeaderRef(tile_ix * TileHeader_size);
+ if (stroke_n > 0) {
+ ChunkRef chunk_ref = ChunkRef(stroke_start.offset + 4);
+ Chunk chunk = Chunk_read(chunk_ref);
+ InstanceRef stroke_ref = InstanceRef(chunk_ref.offset + Chunk_size);
+ ItemHeaderRef item_header = ItemHeaderRef(atomicAdd(alloc, stroke_n * ItemHeader_size));
+ TileHeader_write(tile_header_ref, TileHeader(stroke_n, item_header));
+ SegmentRef seg_ref = SegmentRef(0);
+ uint seg_limit = 0;
+ // Iterate through items; stroke_n holds count remaining.
+ while (true) {
+ if (chunk.chunk_n == 0) {
+ chunk_ref = chunk.next;
+ chunk = Chunk_read(chunk_ref);
+ stroke_ref = InstanceRef(chunk_ref.offset + Chunk_size);
+ }
+ Instance ins = Instance_read(stroke_ref);
+ PietStrokePolyLine poly = PietItem_Poly_read(PietItemRef(ins.item_ref));
+
+ // Process the stroke polyline item.
+ uint max_n_segs = poly.n_points - 1;
+ uint reserve = max_n_segs * Segment_size;
+ if (seg_ref.offset + reserve > seg_limit) {
+ // This is a heuristic to balance atomic bandwidth and utilization.
+ // The output always gets a contiguous allocation. We might use
+ // all, some, or none of the capacity.
+ uint capacity_bytes = stroke_n > 1 ? reserve * 2 + 128 : reserve;
+ seg_ref.offset = atomicAdd(alloc, capacity_bytes);
+ seg_limit = seg_ref.offset + capacity_bytes;
+ }
+ uint n_segs = 0;
+ vec2 start = Point_read(poly.points).xy;
+ for (uint j = 0; j < max_n_segs; j++) {
+ poly.points.offset += Point_size;
+ vec2 end = Point_read(poly.points).xy;
+
+ // Process one segment.
+
+ // This logic just tests for collision. What we probably want to do
+ // is a clipping algorithm like Liang-Barsky, and then store coords
+ // relative to the tile in f16. See also:
+ // https://tavianator.com/fast-branchless-raybounding-box-intersections/
+
+ // Also note that when we go to the fancy version, we want to compute
+ // the (horizontal projection of) the bounding box of the intersection
+ // once per tilegroup, so we can assign work to individual tiles.
+
+ float a = end.y - start.y;
+ float b = start.x - end.x;
+ float c = -(a * start.x + b * start.y);
+ float half_width = 0.5 * poly.width;
+ // Tile boundaries padded by half-width.
+ float xmin = xy0.x - half_width;
+ float ymin = xy0.y - half_width;
+ float xmax = xy0.x + float(TILE_WIDTH_PX) + half_width;
+ float ymax = xy0.y + float(TILE_HEIGHT_PX) + half_width;
+ float s00 = sign(b * ymin + a * xmin + c);
+ float s01 = sign(b * ymin + a * xmax + c);
+ float s10 = sign(b * ymax + a * xmin + c);
+ float s11 = sign(b * ymax + a * xmax + c);
+ // If bounding boxes intersect and not all four corners are on the same side, hit.
+ // Also note: this is designed to be false on NAN input.
+ if (max(min(start.x, end.x), xmin) < min(max(start.x, end.x), xmax)
+ && max(min(start.y, end.y), ymin) < min(max(start.y, end.y), ymax)
+ && s00 * s01 + s00 * s10 + s00 * s11 < 3.0)
+ {
+ Segment seg = Segment(start, end);
+ Segment_write(Segment_index(seg_ref, n_segs), seg);
+ n_segs++;
+ }
+
+ start = end;
+ }
+ ItemHeader_write(item_header, ItemHeader(n_segs, seg_ref));
+ if (--stroke_n == 0) {
+ break;
+ }
+ seg_ref.offset += n_segs * Segment_size;
+
+ stroke_ref.offset += Instance_size;
+ chunk.chunk_n--;
+ item_header.offset += ItemHeader_size;
+ }
+ } else {
+ // As an optimization, we could just write 0 for the size.
+ TileHeader_write(tile_header_ref, TileHeader(stroke_n, ItemHeaderRef(0)));
+ }
+}
diff --git a/piet-gpu/shader/kernel2s.spv b/piet-gpu/shader/kernel2s.spv
new file mode 100644
index 0000000..7c7f48f
--- /dev/null
+++ b/piet-gpu/shader/kernel2s.spv
Binary files differ
diff --git a/piet-gpu/shader/kernel3.comp b/piet-gpu/shader/kernel3.comp
new file mode 100644
index 0000000..fc4f9ea
--- /dev/null
+++ b/piet-gpu/shader/kernel3.comp
@@ -0,0 +1,107 @@
+// This is "kernel 3" in a 4-kernel pipeline. It walks the active items
+// for the tilegroup and produces a per-tile command list for each tile.
+
+#version 450
+#extension GL_GOOGLE_include_directive : enable
+
+layout(local_size_x = 32, local_size_y = 1) in;
+
+layout(set = 0, binding = 0) readonly buffer SceneBuf {
+ uint[] scene;
+};
+
+// TODO: this should have a `readonly` qualifier, but then inclusion
+// of ptcl.h would fail because of the writers.
+layout(set = 0, binding = 1) buffer TilegroupBuf {
+ uint[] tilegroup;
+};
+
+// Used readonly
+layout(set = 0, binding = 2) buffer SegmentBuf {
+ uint[] segment;
+};
+
+layout(set = 0, binding = 3) buffer PtclBuf {
+ uint[] ptcl;
+};
+
+layout(set = 0, binding = 4) buffer AllocBuf {
+ uint alloc;
+};
+
+#include "scene.h"
+#include "tilegroup.h"
+#include "segment.h"
+#include "ptcl.h"
+
+#include "setup.h"
+
+void alloc_cmd(inout CmdRef cmd_ref, inout uint cmd_limit) {
+ if (cmd_ref.offset > cmd_limit) {
+ uint new_cmd = atomicAdd(alloc, PTCL_INITIAL_ALLOC);
+ CmdJump jump = CmdJump(new_cmd);
+ Cmd_Jump_write(cmd_ref, jump);
+ cmd_ref = CmdRef(new_cmd);
+ cmd_limit = new_cmd + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
+ }
+}
+
+void main() {
+ uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x;
+ uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS
+ + (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES);
+ vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
+ TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE);
+ CmdRef cmd_ref = CmdRef(tile_ix * PTCL_INITIAL_ALLOC);
+ uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
+
+ TileHeader stroke_th = TileHeader_read(TileHeaderRef(tile_ix * TileHeader_size));
+
+ while (true) {
+ uint tg_tag = TileGroup_tag(tg_ref);
+ if (tg_tag == TileGroup_End) {
+ break;
+ }
+ if (tg_tag == TileGroup_Jump) {
+ tg_ref = TileGroup_Jump_read(tg_ref).new_ref;
+ continue;
+ }
+ // Assume tg_tag is `Instance`, though there will be more cases.
+ Instance ins = TileGroup_Instance_read(tg_ref);
+ PietItemRef item_ref = PietItemRef(ins.item_ref);
+ uint item_tag = PietItem_tag(item_ref);
+ switch (item_tag) {
+ case PietItem_Circle:
+ PietCircle circle = PietItem_Circle_read(item_ref);
+ vec2 center = ins.offset + circle.center.xy;
+ float r = circle.radius;
+ if (max(center.x - r, xy0.x) < min(center.x + r, xy0.x + float(TILE_WIDTH_PX))
+ && max(center.y - r, xy0.y) < min(center.y + r, xy0.y + float(TILE_HEIGHT_PX)))
+ {
+ CmdCircle cmd = CmdCircle(center, r, circle.rgba_color);
+ alloc_cmd(cmd_ref, cmd_limit);
+ Cmd_Circle_write(cmd_ref, cmd);
+ cmd_ref.offset += Cmd_size;
+ }
+ break;
+ case PietItem_Poly:
+ ItemHeader stroke_item = ItemHeader_read(stroke_th.items);
+ stroke_th.items.offset += ItemHeader_size;
+ if (stroke_item.n > 0) {
+ PietStrokePolyLine poly = PietItem_Poly_read(item_ref);
+ CmdStroke cmd = CmdStroke(
+ stroke_item.n,
+ stroke_item.segments.offset,
+ 0.5 * poly.width,
+ poly.rgba_color
+ );
+ alloc_cmd(cmd_ref, cmd_limit);
+ Cmd_Stroke_write(cmd_ref, cmd);
+ cmd_ref.offset += Cmd_size;
+ }
+ break;
+ }
+ tg_ref.offset += TileGroup_size;
+ }
+ Cmd_End_write(cmd_ref);
+}
diff --git a/piet-gpu/shader/kernel3.spv b/piet-gpu/shader/kernel3.spv
new file mode 100644
index 0000000..f5b83bc
--- /dev/null
+++ b/piet-gpu/shader/kernel3.spv
Binary files differ
diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp
new file mode 100644
index 0000000..931f28b
--- /dev/null
+++ b/piet-gpu/shader/kernel4.comp
@@ -0,0 +1,79 @@
+// This is "kernel 4" in a 4-kernel pipeline. It renders the commands
+// in the per-tile command list to an image.
+
+// Right now, this kernel stores the image in a buffer, but a better
+// plan is to use a texture. This is because of limited support.
+
+#version 450
+#extension GL_GOOGLE_include_directive : enable
+
+layout(local_size_x = 16, local_size_y = 16) in;
+
+// Same concern that this should be readonly as in kernel 3.
+layout(set = 0, binding = 0) buffer PtclBuf {
+ uint[] ptcl;
+};
+
+// Used readonly
+layout(set = 0, binding = 1) buffer SegmentBuf {
+ uint[] segment;
+};
+
+layout(set = 0, binding = 2) buffer ImageBuf {
+ uint[] image;
+};
+
+#include "ptcl.h"
+#include "segment.h"
+
+#include "setup.h"
+
+void main() {
+ uint tile_ix = gl_WorkGroupID.y * WIDTH_IN_TILES + gl_WorkGroupID.x;
+ CmdRef cmd_ref = CmdRef(tile_ix * PTCL_INITIAL_ALLOC);
+
+ uvec2 xy_uint = gl_GlobalInvocationID.xy;
+ vec2 xy = vec2(xy_uint);
+ vec2 uv = xy * vec2(1.0 / IMAGE_WIDTH, 1.0 / IMAGE_HEIGHT);
+ vec3 rgb = uv.xyy;
+
+ while (true) {
+ uint tag = Cmd_tag(cmd_ref);
+ if (tag == Cmd_End) {
+ break;
+ }
+ switch (tag) {
+ case Cmd_Circle:
+ CmdCircle circle = Cmd_Circle_read(cmd_ref);
+ float r = length(xy + vec2(0.5, 0.5) - circle.center.xy);
+ float alpha = clamp(0.5 + circle.radius - r, 0.0, 1.0);
+ vec4 fg_rgba = unpackUnorm4x8(circle.rgba_color).wzyx;
+ // TODO: sRGB
+ rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a);
+ break;
+ case Cmd_Stroke:
+ CmdStroke stroke = Cmd_Stroke_read(cmd_ref);
+ float df = 1e9;
+ for (int i = 0; i < stroke.n_segs; i++) {
+ Segment seg = Segment_read(Segment_index(SegmentRef(stroke.seg_ref), i));
+ vec2 line_vec = seg.end - seg.start;
+ vec2 dpos = xy + vec2(0.5, 0.5) - seg.start;
+ float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);
+ df = min(df, length(line_vec * t - dpos));
+ }
+ fg_rgba = unpackUnorm4x8(stroke.rgba_color).wzyx;
+ alpha = clamp(stroke.half_width + 0.5 - df, 0.0, 1.0);
+ rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a);
+ break;
+ case Cmd_Jump:
+ cmd_ref = CmdRef(Cmd_Jump_read(cmd_ref).new_ref);
+ continue;
+ }
+ cmd_ref.offset += Cmd_size;
+ }
+
+ // TODO: sRGB
+ uvec4 s = uvec4(round(vec4(rgb, 1.0) * 255.0));
+ uint rgba_packed = s.r | (s.g << 8) | (s.b << 16) | (s.a << 24);
+ image[xy_uint.y * IMAGE_WIDTH + xy_uint.x] = rgba_packed;
+}
diff --git a/piet-gpu/shader/kernel4.spv b/piet-gpu/shader/kernel4.spv
new file mode 100644
index 0000000..b931f23
--- /dev/null
+++ b/piet-gpu/shader/kernel4.spv
Binary files differ
diff --git a/piet-gpu/shader/ptcl.h b/piet-gpu/shader/ptcl.h
new file mode 100644
index 0000000..8b62538
--- /dev/null
+++ b/piet-gpu/shader/ptcl.h
@@ -0,0 +1,368 @@
+// Code auto-generated by piet-gpu-derive
+
+struct CmdCircleRef {
+ uint offset;
+};
+
+struct CmdLineRef {
+ uint offset;
+};
+
+struct CmdStrokeRef {
+ uint offset;
+};
+
+struct CmdFillRef {
+ uint offset;
+};
+
+struct CmdFillEdgeRef {
+ uint offset;
+};
+
+struct CmdDrawFillRef {
+ uint offset;
+};
+
+struct CmdSolidRef {
+ uint offset;
+};
+
+struct CmdJumpRef {
+ uint offset;
+};
+
+struct CmdRef {
+ uint offset;
+};
+
+struct CmdCircle {
+ vec2 center;
+ float radius;
+ uint rgba_color;
+};
+
+#define CmdCircle_size 16
+
+CmdCircleRef CmdCircle_index(CmdCircleRef ref, uint index) {
+ return CmdCircleRef(ref.offset + index * CmdCircle_size);
+}
+
+struct CmdLine {
+ vec2 start;
+ vec2 end;
+};
+
+#define CmdLine_size 16
+
+CmdLineRef CmdLine_index(CmdLineRef ref, uint index) {
+ return CmdLineRef(ref.offset + index * CmdLine_size);
+}
+
+struct CmdStroke {
+ uint n_segs;
+ uint seg_ref;
+ float half_width;
+ uint rgba_color;
+};
+
+#define CmdStroke_size 16
+
+CmdStrokeRef CmdStroke_index(CmdStrokeRef ref, uint index) {
+ return CmdStrokeRef(ref.offset + index * CmdStroke_size);
+}
+
+struct CmdFill {
+ vec2 start;
+ vec2 end;
+};
+
+#define CmdFill_size 16
+
+CmdFillRef CmdFill_index(CmdFillRef ref, uint index) {
+ return CmdFillRef(ref.offset + index * CmdFill_size);
+}
+
+struct CmdFillEdge {
+ int sign;
+ float y;
+};
+
+#define CmdFillEdge_size 8
+
+CmdFillEdgeRef CmdFillEdge_index(CmdFillEdgeRef ref, uint index) {
+ return CmdFillEdgeRef(ref.offset + index * CmdFillEdge_size);
+}
+
+struct CmdDrawFill {
+ int backdrop;
+ uint rgba_color;
+};
+
+#define CmdDrawFill_size 8
+
+CmdDrawFillRef CmdDrawFill_index(CmdDrawFillRef ref, uint index) {
+ return CmdDrawFillRef(ref.offset + index * CmdDrawFill_size);
+}
+
+struct CmdSolid {
+ uint rgba_color;
+};
+
+#define CmdSolid_size 4
+
+CmdSolidRef CmdSolid_index(CmdSolidRef ref, uint index) {
+ return CmdSolidRef(ref.offset + index * CmdSolid_size);
+}
+
+struct CmdJump {
+ uint new_ref;
+};
+
+#define CmdJump_size 4
+
+CmdJumpRef CmdJump_index(CmdJumpRef ref, uint index) {
+ return CmdJumpRef(ref.offset + index * CmdJump_size);
+}
+
+#define Cmd_End 0
+#define Cmd_Circle 1
+#define Cmd_Line 2
+#define Cmd_Fill 3
+#define Cmd_Stroke 4
+#define Cmd_FillEdge 5
+#define Cmd_DrawFill 6
+#define Cmd_Solid 7
+#define Cmd_Jump 8
+#define Cmd_Bail 9
+#define Cmd_size 20
+
+CmdRef Cmd_index(CmdRef ref, uint index) {
+ return CmdRef(ref.offset + index * Cmd_size);
+}
+
+CmdCircle CmdCircle_read(CmdCircleRef ref) {
+ uint ix = ref.offset >> 2;
+ uint raw0 = ptcl[ix + 0];
+ uint raw1 = ptcl[ix + 1];
+ uint raw2 = ptcl[ix + 2];
+ uint raw3 = ptcl[ix + 3];
+ CmdCircle s;
+ s.center = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
+ s.radius = uintBitsToFloat(raw2);
+ s.rgba_color = raw3;
+ return s;
+}
+
+void CmdCircle_write(CmdCircleRef ref, CmdCircle s) {
+ uint ix = ref.offset >> 2;
+ ptcl[ix + 0] = floatBitsToUint(s.center.x);
+ ptcl[ix + 1] = floatBitsToUint(s.center.y);
+ ptcl[ix + 2] = floatBitsToUint(s.radius);
+ ptcl[ix + 3] = s.rgba_color;
+}
+
+CmdLine CmdLine_read(CmdLineRef ref) {
+ uint ix = ref.offset >> 2;
+ uint raw0 = ptcl[ix + 0];
+ uint raw1 = ptcl[ix + 1];
+ uint raw2 = ptcl[ix + 2];
+ uint raw3 = ptcl[ix + 3];
+ CmdLine s;
+ s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
+ s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
+ return s;
+}
+
+void CmdLine_write(CmdLineRef ref, CmdLine s) {
+ uint ix = ref.offset >> 2;
+ ptcl[ix + 0] = floatBitsToUint(s.start.x);
+ ptcl[ix + 1] = floatBitsToUint(s.start.y);
+ ptcl[ix + 2] = floatBitsToUint(s.end.x);
+ ptcl[ix + 3] = floatBitsToUint(s.end.y);
+}
+
+CmdStroke CmdStroke_read(CmdStrokeRef ref) {
+ uint ix = ref.offset >> 2;
+ uint raw0 = ptcl[ix + 0];
+ uint raw1 = ptcl[ix + 1];
+ uint raw2 = ptcl[ix + 2];
+ uint raw3 = ptcl[ix + 3];
+ CmdStroke s;
+ s.n_segs = raw0;
+ s.seg_ref = raw1;
+ s.half_width = uintBitsToFloat(raw2);
+ s.rgba_color = raw3;
+ return s;
+}
+
+void CmdStroke_write(CmdStrokeRef ref, CmdStroke s) {
+ uint ix = ref.offset >> 2;
+ ptcl[ix + 0] = s.n_segs;
+ ptcl[ix + 1] = s.seg_ref;
+ ptcl[ix + 2] = floatBitsToUint(s.half_width);
+ ptcl[ix + 3] = s.rgba_color;
+}
+
+CmdFill CmdFill_read(CmdFillRef ref) {
+ uint ix = ref.offset >> 2;
+ uint raw0 = ptcl[ix + 0];
+ uint raw1 = ptcl[ix + 1];
+ uint raw2 = ptcl[ix + 2];
+ uint raw3 = ptcl[ix + 3];
+ CmdFill s;
+ s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
+ s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
+ return s;
+}
+
+void CmdFill_write(CmdFillRef ref, CmdFill s) {
+ uint ix = ref.offset >> 2;
+ ptcl[ix + 0] = floatBitsToUint(s.start.x);
+ ptcl[ix + 1] = floatBitsToUint(s.start.y);
+ ptcl[ix + 2] = floatBitsToUint(s.end.x);
+ ptcl[ix + 3] = floatBitsToUint(s.end.y);
+}
+
+CmdFillEdge CmdFillEdge_read(CmdFillEdgeRef ref) {
+ uint ix = ref.offset >> 2;
+ uint raw0 = ptcl[ix + 0];
+ uint raw1 = ptcl[ix + 1];
+ CmdFillEdge s;
+ s.sign = int(raw0);
+ s.y = uintBitsToFloat(raw1);
+ return s;
+}
+
+void CmdFillEdge_write(CmdFillEdgeRef ref, CmdFillEdge s) {
+ uint ix = ref.offset >> 2;
+ ptcl[ix + 0] = uint(s.sign);
+ ptcl[ix + 1] = floatBitsToUint(s.y);
+}
+
+CmdDrawFill CmdDrawFill_read(CmdDrawFillRef ref) {
+ uint ix = ref.offset >> 2;
+ uint raw0 = ptcl[ix + 0];
+ uint raw1 = ptcl[ix + 1];
+ CmdDrawFill s;
+ s.backdrop = int(raw0);
+ s.rgba_color = raw1;
+ return s;
+}
+
+void CmdDrawFill_write(CmdDrawFillRef ref, CmdDrawFill s) {
+ uint ix = ref.offset >> 2;
+ ptcl[ix + 0] = uint(s.backdrop);
+ ptcl[ix + 1] = s.rgba_color;
+}
+
+CmdSolid CmdSolid_read(CmdSolidRef ref) {
+ uint ix = ref.offset >> 2;
+ uint raw0 = ptcl[ix + 0];
+ CmdSolid s;
+ s.rgba_color = raw0;
+ return s;
+}
+
+void CmdSolid_write(CmdSolidRef ref, CmdSolid s) {
+ uint ix = ref.offset >> 2;
+ ptcl[ix + 0] = s.rgba_color;
+}
+
+CmdJump CmdJump_read(CmdJumpRef ref) {
+ uint ix = ref.offset >> 2;
+ uint raw0 = ptcl[ix + 0];
+ CmdJump s;
+ s.new_ref = raw0;
+ return s;
+}
+
+void CmdJump_write(CmdJumpRef ref, CmdJump s) {
+ uint ix = ref.offset >> 2;
+ ptcl[ix + 0] = s.new_ref;
+}
+
+uint Cmd_tag(CmdRef ref) {
+ return ptcl[ref.offset >> 2];
+}
+
+CmdCircle Cmd_Circle_read(CmdRef ref) {
+ return CmdCircle_read(CmdCircleRef(ref.offset + 4));
+}
+
+CmdLine Cmd_Line_read(CmdRef ref) {
+ return CmdLine_read(CmdLineRef(ref.offset + 4));
+}
+
+CmdFill Cmd_Fill_read(CmdRef ref) {
+ return CmdFill_read(CmdFillRef(ref.offset + 4));
+}
+
+CmdStroke Cmd_Stroke_read(CmdRef ref) {
+ return CmdStroke_read(CmdStrokeRef(ref.offset + 4));
+}
+
+CmdFillEdge Cmd_FillEdge_read(CmdRef ref) {
+ return CmdFillEdge_read(CmdFillEdgeRef(ref.offset + 4));
+}
+
+CmdDrawFill Cmd_DrawFill_read(CmdRef ref) {
+ return CmdDrawFill_read(CmdDrawFillRef(ref.offset + 4));
+}
+
+CmdSolid Cmd_Solid_read(CmdRef ref) {
+ return CmdSolid_read(CmdSolidRef(ref.offset + 4));
+}
+
+CmdJump Cmd_Jump_read(CmdRef ref) {
+ return CmdJump_read(CmdJumpRef(ref.offset + 4));
+}
+
+void Cmd_End_write(CmdRef ref) {
+ ptcl[ref.offset >> 2] = Cmd_End;
+}
+
+void Cmd_Circle_write(CmdRef ref, CmdCircle s) {
+ ptcl[ref.offset >> 2] = Cmd_Circle;
+ CmdCircle_write(CmdCircleRef(ref.offset + 4), s);
+}
+
+void Cmd_Line_write(CmdRef ref, CmdLine s) {
+ ptcl[ref.offset >> 2] = Cmd_Line;
+ CmdLine_write(CmdLineRef(ref.offset + 4), s);
+}
+
+void Cmd_Fill_write(CmdRef ref, CmdFill s) {
+ ptcl[ref.offset >> 2] = Cmd_Fill;
+ CmdFill_write(CmdFillRef(ref.offset + 4), s);
+}
+
+void Cmd_Stroke_write(CmdRef ref, CmdStroke s) {
+ ptcl[ref.offset >> 2] = Cmd_Stroke;
+ CmdStroke_write(CmdStrokeRef(ref.offset + 4), s);
+}
+
+void Cmd_FillEdge_write(CmdRef ref, CmdFillEdge s) {
+ ptcl[ref.offset >> 2] = Cmd_FillEdge;
+ CmdFillEdge_write(CmdFillEdgeRef(ref.offset + 4), s);
+}
+
+void Cmd_DrawFill_write(CmdRef ref, CmdDrawFill s) {
+ ptcl[ref.offset >> 2] = Cmd_DrawFill;
+ CmdDrawFill_write(CmdDrawFillRef(ref.offset + 4), s);
+}
+
+void Cmd_Solid_write(CmdRef ref, CmdSolid s) {
+ ptcl[ref.offset >> 2] = Cmd_Solid;
+ CmdSolid_write(CmdSolidRef(ref.offset + 4), s);
+}
+
+void Cmd_Jump_write(CmdRef ref, CmdJump s) {
+ ptcl[ref.offset >> 2] = Cmd_Jump;
+ CmdJump_write(CmdJumpRef(ref.offset + 4), s);
+}
+
+void Cmd_Bail_write(CmdRef ref) {
+ ptcl[ref.offset >> 2] = Cmd_Bail;
+}
+
diff --git a/piet-gpu/shader/segment.h b/piet-gpu/shader/segment.h
new file mode 100644
index 0000000..517c115
--- /dev/null
+++ b/piet-gpu/shader/segment.h
@@ -0,0 +1,99 @@
+// Code auto-generated by piet-gpu-derive
+
+struct TileHeaderRef {
+ uint offset;
+};
+
+struct ItemHeaderRef {
+ uint offset;
+};
+
+struct SegmentRef {
+ uint offset;
+};
+
+struct TileHeader {
+ uint n;
+ ItemHeaderRef items;
+};
+
+#define TileHeader_size 8
+
+TileHeaderRef TileHeader_index(TileHeaderRef ref, uint index) {
+ return TileHeaderRef(ref.offset + index * TileHeader_size);
+}
+
+struct ItemHeader {
+ uint n;
+ SegmentRef segments;
+};
+
+#define ItemHeader_size 8
+
+ItemHeaderRef ItemHeader_index(ItemHeaderRef ref, uint index) {
+ return ItemHeaderRef(ref.offset + index * ItemHeader_size);
+}
+
+struct Segment {
+ vec2 start;
+ vec2 end;
+};
+
+#define Segment_size 16
+
+SegmentRef Segment_index(SegmentRef ref, uint index) {
+ return SegmentRef(ref.offset + index * Segment_size);
+}
+
+TileHeader TileHeader_read(TileHeaderRef ref) {
+ uint ix = ref.offset >> 2;
+ uint raw0 = segment[ix + 0];
+ uint raw1 = segment[ix + 1];
+ TileHeader s;
+ s.n = raw0;
+ s.items = ItemHeaderRef(raw1);
+ return s;
+}
+
+void TileHeader_write(TileHeaderRef ref, TileHeader s) {
+ uint ix = ref.offset >> 2;
+ segment[ix + 0] = s.n;
+ segment[ix + 1] = s.items.offset;
+}
+
+ItemHeader ItemHeader_read(ItemHeaderRef ref) {
+ uint ix = ref.offset >> 2;
+ uint raw0 = segment[ix + 0];
+ uint raw1 = segment[ix + 1];
+ ItemHeader s;
+ s.n = raw0;
+ s.segments = SegmentRef(raw1);
+ return s;
+}
+
+void ItemHeader_write(ItemHeaderRef ref, ItemHeader s) {
+ uint ix = ref.offset >> 2;
+ segment[ix + 0] = s.n;
+ segment[ix + 1] = s.segments.offset;
+}
+
+Segment Segment_read(SegmentRef ref) {
+ uint ix = ref.offset >> 2;
+ uint raw0 = segment[ix + 0];
+ uint raw1 = segment[ix + 1];
+ uint raw2 = segment[ix + 2];
+ uint raw3 = segment[ix + 3];
+ Segment s;
+ s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
+ s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
+ return s;
+}
+
+void Segment_write(SegmentRef ref, Segment s) {
+ uint ix = ref.offset >> 2;
+ segment[ix + 0] = floatBitsToUint(s.start.x);
+ segment[ix + 1] = floatBitsToUint(s.start.y);
+ segment[ix + 2] = floatBitsToUint(s.end.x);
+ segment[ix + 3] = floatBitsToUint(s.end.y);
+}
+
diff --git a/piet-gpu/shader/setup.h b/piet-gpu/shader/setup.h
new file mode 100644
index 0000000..a644dc0
--- /dev/null
+++ b/piet-gpu/shader/setup.h
@@ -0,0 +1,34 @@
+// Various constants for the sizes of groups and tiles.
+
+// Much of this will be made dynamic in various ways, but for now it's easiest
+// to hardcode and keep all in one place.
+
+// TODO: make the image size dynamic.
+#define IMAGE_WIDTH 2048
+#define IMAGE_HEIGHT 1536
+
+// TODO: compute this
+#define WIDTH_IN_TILEGROUPS 4
+
+#define TILEGROUP_WIDTH_PX 512
+#define TILEGROUP_HEIGHT_PX 16
+
+#define TILEGROUP_INITIAL_ALLOC 1024
+
+// Quick note on layout of tilegroups (k1 output): in the base,
+// there is a region of size TILEGROUP_STRIDE for each tilegroup.
+// At offset 0 are the main instances, encoded with Jump. At offset
+// TILEGROUP_STROKE_START are the stroke instances, encoded with
+// Head and Link.
+#define TILEGROUP_STRIDE 2048
+#define TILEGROUP_STROKE_START 1024
+#define TILEGROUP_STROKE_ALLOC 1024
+
+// TODO: compute all these
+
+#define WIDTH_IN_TILES 128
+#define TILEGROUP_WIDTH_TILES 32
+#define TILE_WIDTH_PX 16
+#define TILE_HEIGHT_PX 16
+
+#define PTCL_INITIAL_ALLOC 1024
diff --git a/piet-gpu/shader/tilegroup.h b/piet-gpu/shader/tilegroup.h
index f1d646f..213ddc3 100644
--- a/piet-gpu/shader/tilegroup.h
+++ b/piet-gpu/shader/tilegroup.h
@@ -4,6 +4,14 @@
uint offset;
};
+struct JumpRef {
+ uint offset;
+};
+
+struct ChunkRef {
+ uint offset;
+};
+
struct TileGroupRef {
uint offset;
};
@@ -19,8 +27,30 @@
return InstanceRef(ref.offset + index * Instance_size);
}
+struct Jump {
+ TileGroupRef new_ref;
+};
+
+#define Jump_size 4
+
+JumpRef Jump_index(JumpRef ref, uint index) {
+ return JumpRef(ref.offset + index * Jump_size);
+}
+
+struct Chunk {
+ uint chunk_n;
+ ChunkRef next;
+};
+
+#define Chunk_size 8
+
+ChunkRef Chunk_index(ChunkRef ref, uint index) {
+ return ChunkRef(ref.offset + index * Chunk_size);
+}
+
#define TileGroup_Instance 0
-#define TileGroup_End 1
+#define TileGroup_Jump 1
+#define TileGroup_End 2
#define TileGroup_size 16
TileGroupRef TileGroup_index(TileGroupRef ref, uint index) {
@@ -45,6 +75,35 @@
tilegroup[ix + 2] = floatBitsToUint(s.offset.y);
}
+Jump Jump_read(JumpRef ref) {
+ uint ix = ref.offset >> 2;
+ uint raw0 = tilegroup[ix + 0];
+ Jump s;
+ s.new_ref = TileGroupRef(raw0);
+ return s;
+}
+
+void Jump_write(JumpRef ref, Jump s) {
+ uint ix = ref.offset >> 2;
+ tilegroup[ix + 0] = s.new_ref.offset;
+}
+
+Chunk Chunk_read(ChunkRef ref) {
+ uint ix = ref.offset >> 2;
+ uint raw0 = tilegroup[ix + 0];
+ uint raw1 = tilegroup[ix + 1];
+ Chunk s;
+ s.chunk_n = raw0;
+ s.next = ChunkRef(raw1);
+ return s;
+}
+
+void Chunk_write(ChunkRef ref, Chunk s) {
+ uint ix = ref.offset >> 2;
+ tilegroup[ix + 0] = s.chunk_n;
+ tilegroup[ix + 1] = s.next.offset;
+}
+
uint TileGroup_tag(TileGroupRef ref) {
return tilegroup[ref.offset >> 2];
}
@@ -53,11 +112,20 @@
return Instance_read(InstanceRef(ref.offset + 4));
}
+Jump TileGroup_Jump_read(TileGroupRef ref) {
+ return Jump_read(JumpRef(ref.offset + 4));
+}
+
void TileGroup_Instance_write(TileGroupRef ref, Instance s) {
tilegroup[ref.offset >> 2] = TileGroup_Instance;
Instance_write(InstanceRef(ref.offset + 4), s);
}
+void TileGroup_Jump_write(TileGroupRef ref, Jump s) {
+ tilegroup[ref.offset >> 2] = TileGroup_Jump;
+ Jump_write(JumpRef(ref.offset + 4), s);
+}
+
void TileGroup_End_write(TileGroupRef ref) {
tilegroup[ref.offset >> 2] = TileGroup_End;
}
diff --git a/piet-gpu/src/main.rs b/piet-gpu/src/main.rs
index 72f0d3c..9f4f25f 100644
--- a/piet-gpu/src/main.rs
+++ b/piet-gpu/src/main.rs
@@ -4,11 +4,15 @@
use rand::{Rng, RngCore};
+use piet::kurbo::{BezPath, Circle, Line, Point, Vec2};
+use piet::{Color, RenderContext};
+
use piet_gpu_hal::vulkan::VkInstance;
use piet_gpu_hal::{CmdBuf, Device, MemFlags};
-use piet_gpu_types::encoder::{Encode, Encoder};
-use piet_gpu_types::scene::{Bbox, PietCircle, PietItem, Point, SimpleGroup};
+mod render_ctx;
+
+use render_ctx::PietGpuRenderContext;
const WIDTH: usize = 2048;
const HEIGHT: usize = 1536;
@@ -16,52 +20,53 @@
const TILE_W: usize = 16;
const TILE_H: usize = 16;
-const N_CIRCLES: usize = 100;
+const WIDTH_IN_TILEGROUPS: usize = 4;
+const HEIGHT_IN_TILEGROUPS: usize = 96;
+const TILEGROUP_STRIDE: usize = 2048;
-fn make_scene() -> Vec<u8> {
+const WIDTH_IN_TILES: usize = 128;
+const HEIGHT_IN_TILES: usize = 96;
+const PTCL_INITIAL_ALLOC: usize = 1024;
+
+const K2_PER_TILE_SIZE: usize = 8;
+
+const N_CIRCLES: usize = 1;
+
+fn render_scene(rc: &mut impl RenderContext) {
let mut rng = rand::thread_rng();
- let mut encoder = Encoder::new();
- let _reserve_root = encoder.alloc_chunk(PietItem::fixed_size() as u32);
-
- let mut items = Vec::new();
- let mut bboxes = Vec::new();
for _ in 0..N_CIRCLES {
- let circle = PietCircle {
- rgba_color: rng.next_u32(),
- center: Point {
- xy: [
- rng.gen_range(0.0, WIDTH as f32),
- rng.gen_range(0.0, HEIGHT as f32),
- ],
- },
- radius: rng.gen_range(0.0, 50.0),
- };
- let bbox = Bbox {
- bbox: [
- (circle.center.xy[0] - circle.radius).floor() as i16,
- (circle.center.xy[1] - circle.radius).floor() as i16,
- (circle.center.xy[0] + circle.radius).ceil() as i16,
- (circle.center.xy[1] + circle.radius).ceil() as i16,
- ],
- };
- items.push(PietItem::Circle(circle));
- bboxes.push(bbox);
+ let color = Color::from_rgba32_u32(rng.next_u32());
+ let center = Point::new(
+ rng.gen_range(0.0, WIDTH as f64),
+ rng.gen_range(0.0, HEIGHT as f64),
+ );
+ let radius = rng.gen_range(0.0, 50.0);
+ let circle = Circle::new(center, radius);
+ rc.fill(circle, &color);
}
+ rc.stroke(
+ Line::new((100.0, 100.0), (200.0, 150.0)),
+ &Color::WHITE,
+ 5.0,
+ );
+ render_cardioid(rc);
+}
- let n_items = bboxes.len() as u32;
- let bboxes = bboxes.encode(&mut encoder).transmute();
- let items = items.encode(&mut encoder).transmute();
- let offset = Point { xy: [0.0, 0.0] };
- let simple_group = SimpleGroup {
- n_items,
- bboxes,
- items,
- offset,
- };
- let root_item = PietItem::Group(simple_group);
- root_item.encode_to(&mut encoder.buf_mut()[0..PietItem::fixed_size()]);
- // We should avoid this clone.
- encoder.buf().to_owned()
+fn render_cardioid(rc: &mut impl RenderContext) {
+ let n = 100;
+ let dth = std::f64::consts::PI * 2.0 / (n as f64);
+ let center = Point::new(1024.0, 768.0);
+ let r = 750.0;
+ let mut path = BezPath::new();
+ for i in 1..n {
+ let p0 = center + Vec2::from_angle(i as f64 * dth) * r;
+ let p1 = center + Vec2::from_angle(((i * 2) % n) as f64 * dth) * r;
+ rc.fill(&Circle::new(p0, 8.0), &Color::WHITE);
+ path.move_to(p0);
+ path.line_to(p1);
+ //rc.stroke(Line::new(p0, p1), &Color::BLACK, 2.0);
+ }
+ rc.stroke(&path, &Color::BLACK, 2.0);
}
#[allow(unused)]
@@ -73,6 +78,7 @@
}
}
+#[allow(unused)]
fn dump_k1_data(k1_buf: &[u32]) {
for i in 0..k1_buf.len() {
if k1_buf[i] != 0 {
@@ -87,7 +93,9 @@
let device = instance.device().unwrap();
let host = MemFlags::host_coherent();
let dev = MemFlags::device_local();
- let scene = make_scene();
+ let mut ctx = PietGpuRenderContext::new();
+ render_scene(&mut ctx);
+ let scene = ctx.get_scene_buf();
//dump_scene(&scene);
let scene_buf = device
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, host)
@@ -96,7 +104,9 @@
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, dev)
.unwrap();
device.write_buffer(&scene_buf, &scene).unwrap();
- let tilegroup_buf = device.create_buffer(384 * 1024, host).unwrap();
+ let tilegroup_buf = device.create_buffer(4 * 1024 * 1024, dev).unwrap();
+ let ptcl_buf = device.create_buffer(48 * 1024 * 1024, dev).unwrap();
+ let segment_buf = device.create_buffer(64 * 1024 * 1024, dev).unwrap();
let image_buf = device
.create_buffer((WIDTH * HEIGHT * 4) as u64, host)
.unwrap();
@@ -104,23 +114,74 @@
.create_buffer((WIDTH * HEIGHT * 4) as u64, dev)
.unwrap();
+ let k1_alloc_buf_host = device.create_buffer(4, host).unwrap();
+ let k1_alloc_buf_dev = device.create_buffer(4, dev).unwrap();
+ let k1_alloc_start = WIDTH_IN_TILEGROUPS * HEIGHT_IN_TILEGROUPS * TILEGROUP_STRIDE;
+ device
+ .write_buffer(&k1_alloc_buf_host, &[k1_alloc_start as u32])
+ .unwrap();
let k1_code = include_bytes!("../shader/kernel1.spv");
- let k1_pipeline = device.create_simple_compute_pipeline(k1_code, 2).unwrap();
+ let k1_pipeline = device.create_simple_compute_pipeline(k1_code, 3).unwrap();
let k1_ds = device
- .create_descriptor_set(&k1_pipeline, &[&scene_dev, &tilegroup_buf])
+ .create_descriptor_set(
+ &k1_pipeline,
+ &[&scene_dev, &tilegroup_buf, &k1_alloc_buf_dev],
+ )
.unwrap();
- let code = include_bytes!("../shader/image.spv");
- let pipeline = device.create_simple_compute_pipeline(code, 2).unwrap();
- let descriptor_set = device
- .create_descriptor_set(&pipeline, &[&scene_dev, &image_dev])
+ let k2s_alloc_buf_host = device.create_buffer(4, host).unwrap();
+ let k2s_alloc_buf_dev = device.create_buffer(4, dev).unwrap();
+ let k2s_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * K2_PER_TILE_SIZE;
+ device
+ .write_buffer(&k2s_alloc_buf_host, &[k2s_alloc_start as u32])
.unwrap();
- let query_pool = device.create_query_pool(3).unwrap();
+ let k2s_code = include_bytes!("../shader/kernel2s.spv");
+ let k2s_pipeline = device.create_simple_compute_pipeline(k2s_code, 4).unwrap();
+ let k2s_ds = device
+ .create_descriptor_set(
+ &k2s_pipeline,
+ &[&scene_dev, &tilegroup_buf, &segment_buf, &k2s_alloc_buf_dev],
+ )
+ .unwrap();
+
+ let k3_alloc_buf_host = device.create_buffer(4, host).unwrap();
+ let k3_alloc_buf_dev = device.create_buffer(4, dev).unwrap();
+ let k3_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC;
+ device
+ .write_buffer(&k3_alloc_buf_host, &[k3_alloc_start as u32])
+ .unwrap();
+ let k3_code = include_bytes!("../shader/kernel3.spv");
+ let k3_pipeline = device.create_simple_compute_pipeline(k3_code, 5).unwrap();
+ let k3_ds = device
+ .create_descriptor_set(
+ &k3_pipeline,
+ &[
+ &scene_dev,
+ &tilegroup_buf,
+ &segment_buf,
+ &ptcl_buf,
+ &k3_alloc_buf_dev,
+ ],
+ )
+ .unwrap();
+
+ let k4_code = include_bytes!("../shader/kernel4.spv");
+ let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 3).unwrap();
+ let k4_ds = device
+ .create_descriptor_set(&k4_pipeline, &[&ptcl_buf, &segment_buf, &image_dev])
+ .unwrap();
+
+ let query_pool = device.create_query_pool(5).unwrap();
let mut cmd_buf = device.create_cmd_buf().unwrap();
cmd_buf.begin();
cmd_buf.copy_buffer(&scene_buf, &scene_dev);
+ cmd_buf.copy_buffer(&k1_alloc_buf_host, &k1_alloc_buf_dev);
+ cmd_buf.copy_buffer(&k2s_alloc_buf_host, &k2s_alloc_buf_dev);
+ cmd_buf.copy_buffer(&k3_alloc_buf_host, &k3_alloc_buf_dev);
cmd_buf.clear_buffer(&tilegroup_buf);
+ cmd_buf.clear_buffer(&ptcl_buf);
cmd_buf.memory_barrier();
+ cmd_buf.reset_query_pool(&query_pool);
cmd_buf.write_timestamp(&query_pool, 0);
cmd_buf.dispatch(
&k1_pipeline,
@@ -130,22 +191,49 @@
cmd_buf.write_timestamp(&query_pool, 1);
cmd_buf.memory_barrier();
cmd_buf.dispatch(
- &pipeline,
- &descriptor_set,
- ((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1),
+ &k2s_pipeline,
+ &k2s_ds,
+ ((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 1),
);
cmd_buf.write_timestamp(&query_pool, 2);
cmd_buf.memory_barrier();
+ cmd_buf.dispatch(
+ &k3_pipeline,
+ &k3_ds,
+ ((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 1),
+ );
+ cmd_buf.write_timestamp(&query_pool, 3);
+ cmd_buf.memory_barrier();
+ cmd_buf.dispatch(
+ &k4_pipeline,
+ &k4_ds,
+ ((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1),
+ );
+ cmd_buf.write_timestamp(&query_pool, 4);
+ cmd_buf.memory_barrier();
cmd_buf.copy_buffer(&image_dev, &image_buf);
cmd_buf.finish();
device.run_cmd_buf(&cmd_buf).unwrap();
let timestamps = device.reap_query_pool(query_pool).unwrap();
println!("Kernel 1 time: {:.3}ms", timestamps[0] * 1e3);
- println!("Render time: {:.3}ms", (timestamps[1] - timestamps[0]) * 1e3);
+ println!(
+ "Kernel 2 time: {:.3}ms",
+ (timestamps[1] - timestamps[0]) * 1e3
+ );
+ println!(
+ "Kernel 3 time: {:.3}ms",
+ (timestamps[2] - timestamps[1]) * 1e3
+ );
+ println!(
+ "Render time: {:.3}ms",
+ (timestamps[3] - timestamps[2]) * 1e3
+ );
+ /*
let mut k1_data: Vec<u32> = Default::default();
- device.read_buffer(&tilegroup_buf, &mut k1_data).unwrap();
+ device.read_buffer(&segment_buf, &mut k1_data).unwrap();
dump_k1_data(&k1_data);
+ */
let mut img_data: Vec<u8> = Default::default();
// Note: because png can use a `&[u8]` slice, we could avoid an extra copy
diff --git a/piet-gpu/src/render_ctx.rs b/piet-gpu/src/render_ctx.rs
new file mode 100644
index 0000000..f5b6897
--- /dev/null
+++ b/piet-gpu/src/render_ctx.rs
@@ -0,0 +1,356 @@
+use std::borrow::Cow;
+
+use piet_gpu_types::encoder::{Encode, Encoder, Ref};
+use piet_gpu_types::scene;
+use piet_gpu_types::scene::{Bbox, PietCircle, PietItem, PietStrokePolyLine, SimpleGroup};
+
+use piet::kurbo::{Affine, PathEl, Point, Rect, Shape};
+
+use piet::{
+ Color, Error, FixedGradient, Font, FontBuilder, HitTestPoint, HitTestTextPosition, ImageFormat,
+ InterpolationMode, IntoBrush, LineMetric, RenderContext, StrokeStyle, Text, TextLayout,
+ TextLayoutBuilder,
+};
+
+pub struct PietGpuImage;
+
+pub struct PietGpuFont;
+
+pub struct PietGpuFontBuilder;
+
+#[derive(Clone)]
+pub struct PietGpuTextLayout;
+
+pub struct PietGpuTextLayoutBuilder;
+
+pub struct PietGpuText;
+
+pub struct PietGpuRenderContext {
+ encoder: Encoder,
+ bboxes: Vec<Bbox>,
+ items: Vec<PietItem>,
+ // Will probably need direct accesss to hal Device to create images etc.
+ inner_text: PietGpuText,
+}
+
+#[derive(Clone)]
+pub enum PietGpuBrush {
+ Solid(u32),
+ Gradient,
+}
+
+const TOLERANCE: f64 = 0.1;
+
+impl PietGpuRenderContext {
+ pub fn new() -> PietGpuRenderContext {
+ let mut encoder = Encoder::new();
+ let _reserve_root = encoder.alloc_chunk(PietItem::fixed_size() as u32);
+ let bboxes = Vec::new();
+ let items = Vec::new();
+ let inner_text = PietGpuText;
+ PietGpuRenderContext {
+ encoder,
+ bboxes,
+ items,
+ inner_text,
+ }
+ }
+
+ pub fn get_scene_buf(&mut self) -> &[u8] {
+ let n_items = self.bboxes.len() as u32;
+ let bboxes = self.bboxes.encode(&mut self.encoder).transmute();
+ let items = self.items.encode(&mut self.encoder).transmute();
+ let offset = scene::Point { xy: [0.0, 0.0] };
+ let simple_group = SimpleGroup {
+ n_items,
+ bboxes,
+ items,
+ offset,
+ };
+ let root_item = PietItem::Group(simple_group);
+ root_item.encode_to(&mut self.encoder.buf_mut()[0..PietItem::fixed_size()]);
+ self.encoder.buf()
+ }
+
+ fn push_item(&mut self, item: PietItem, bbox: Rect) {
+ let scene_bbox = Bbox {
+ bbox: [
+ bbox.x0.floor() as i16,
+ bbox.y0.floor() as i16,
+ bbox.x1.ceil() as i16,
+ bbox.y1.ceil() as i16,
+ ],
+ };
+ self.items.push(item);
+ self.bboxes.push(scene_bbox);
+ }
+}
+
+impl RenderContext for PietGpuRenderContext {
+ type Brush = PietGpuBrush;
+ type Image = PietGpuImage;
+ type Text = PietGpuText;
+ type TextLayout = PietGpuTextLayout;
+
+ fn status(&mut self) -> Result<(), Error> {
+ Ok(())
+ }
+
+ fn solid_brush(&mut self, color: Color) -> Self::Brush {
+ PietGpuBrush::Solid(color.as_rgba_u32())
+ }
+
+ fn gradient(&mut self, _gradient: impl Into<FixedGradient>) -> Result<Self::Brush, Error> {
+ Ok(Self::Brush::Gradient)
+ }
+
+ fn clear(&mut self, _color: Color) {}
+
+ fn stroke(&mut self, shape: impl Shape, brush: &impl IntoBrush<Self>, width: f64) {
+ let bbox = shape.bounding_box();
+ let brush = brush.make_brush(self, || bbox).into_owned();
+ let path = shape.to_bez_path(TOLERANCE);
+ let (n_points, points) = flatten_shape(&mut self.encoder, path);
+ match brush {
+ PietGpuBrush::Solid(rgba_color) => {
+ let poly_line = PietStrokePolyLine {
+ rgba_color,
+ width: width as f32,
+ n_points,
+ points,
+ };
+ self.push_item(PietItem::Poly(poly_line), bbox);
+ }
+ _ => (),
+ }
+ }
+
+ fn stroke_styled(
+ &mut self,
+ _shape: impl Shape,
+ _brush: &impl IntoBrush<Self>,
+ _width: f64,
+ _style: &StrokeStyle,
+ ) {
+ }
+
+ fn fill(&mut self, shape: impl Shape, brush: &impl IntoBrush<Self>) {
+ let brush = brush.make_brush(self, || shape.bounding_box()).into_owned();
+
+ match shape.as_circle() {
+ Some(circle) => match brush {
+ PietGpuBrush::Solid(rgba_color) => {
+ let piet_circle = PietCircle {
+ rgba_color,
+ center: to_scene_point(circle.center),
+ radius: circle.radius as f32,
+ };
+ let bbox = circle.bounding_box();
+ self.push_item(PietItem::Circle(piet_circle), bbox);
+ }
+ _ => {}
+ },
+ None => {}
+ }
+ }
+
+ fn fill_even_odd(&mut self, _shape: impl Shape, _brush: &impl IntoBrush<Self>) {}
+
+ fn clip(&mut self, _shape: impl Shape) {}
+
+ fn text(&mut self) -> &mut Self::Text {
+ &mut self.inner_text
+ }
+
+ fn draw_text(
+ &mut self,
+ _layout: &Self::TextLayout,
+ pos: impl Into<Point>,
+ brush: &impl IntoBrush<Self>,
+ ) {
+ let _pos = pos.into();
+
+ let brush: PietGpuBrush = brush.make_brush(self, || Rect::ZERO).into_owned();
+
+ match brush {
+ PietGpuBrush::Solid(_rgba) => {
+ // TODO: draw text
+ }
+ _ => {}
+ }
+ }
+
+ fn save(&mut self) -> Result<(), Error> {
+ Ok(())
+ }
+ fn restore(&mut self) -> Result<(), Error> {
+ Ok(())
+ }
+ fn finish(&mut self) -> Result<(), Error> {
+ Ok(())
+ }
+ fn transform(&mut self, _transform: Affine) {}
+
+ fn make_image(
+ &mut self,
+ _width: usize,
+ _height: usize,
+ _buf: &[u8],
+ _format: ImageFormat,
+ ) -> Result<Self::Image, Error> {
+ Ok(PietGpuImage)
+ }
+
+ fn draw_image(
+ &mut self,
+ _image: &Self::Image,
+ _rect: impl Into<Rect>,
+ _interp: InterpolationMode,
+ ) {
+ }
+
+ fn draw_image_area(
+ &mut self,
+ _image: &Self::Image,
+ _src_rect: impl Into<Rect>,
+ _dst_rect: impl Into<Rect>,
+ _interp: InterpolationMode,
+ ) {
+ }
+
+ fn blurred_rect(&mut self, _rect: Rect, _blur_radius: f64, _brush: &impl IntoBrush<Self>) {}
+
+ fn current_transform(&self) -> Affine {
+ Default::default()
+ }
+}
+
+fn flatten_shape(
+ encoder: &mut Encoder,
+ path: impl Iterator<Item = PathEl>,
+) -> (u32, Ref<scene::Point>) {
+ let mut points = Vec::new();
+ let mut start_pt = None;
+ let mut last_pt = None;
+ kurbo::flatten(path, TOLERANCE, |el| {
+ match el {
+ PathEl::MoveTo(p) => {
+ let scene_pt = to_scene_point(p);
+ start_pt = Some(clone_scene_pt(&scene_pt));
+ if !points.is_empty() {
+ points.push(scene::Point {
+ xy: [std::f32::NAN, std::f32::NAN],
+ });
+ }
+ last_pt = Some(clone_scene_pt(&scene_pt));
+ points.push(scene_pt);
+ }
+ PathEl::LineTo(p) => {
+ let scene_pt = to_scene_point(p);
+ last_pt = Some(clone_scene_pt(&scene_pt));
+ points.push(scene_pt);
+ }
+ PathEl::ClosePath => {
+ if let (Some(start), Some(last)) = (start_pt.take(), last_pt.take()) {
+ if start.xy != last.xy {
+ points.push(start);
+ }
+ }
+ }
+ _ => (),
+ }
+ //println!("{:?}", el);
+ });
+ let n_points = points.len() as u32;
+ let points_ref = points.encode(encoder).transmute();
+ (n_points, points_ref)
+}
+
+impl Text for PietGpuText {
+ type Font = PietGpuFont;
+ type FontBuilder = PietGpuFontBuilder;
+ type TextLayout = PietGpuTextLayout;
+ type TextLayoutBuilder = PietGpuTextLayoutBuilder;
+
+ fn new_font_by_name(&mut self, _name: &str, _size: f64) -> Self::FontBuilder {
+ unimplemented!();
+ }
+
+ fn new_text_layout(
+ &mut self,
+ _font: &Self::Font,
+ _text: &str,
+ _width: f64,
+ ) -> Self::TextLayoutBuilder {
+ unimplemented!();
+ }
+}
+
+impl Font for PietGpuFont {}
+
+impl FontBuilder for PietGpuFontBuilder {
+ type Out = PietGpuFont;
+
+ fn build(self) -> Result<Self::Out, Error> {
+ unimplemented!();
+ }
+}
+
+impl TextLayoutBuilder for PietGpuTextLayoutBuilder {
+ type Out = PietGpuTextLayout;
+
+ fn build(self) -> Result<Self::Out, Error> {
+ unimplemented!()
+ }
+}
+
+impl TextLayout for PietGpuTextLayout {
+ fn width(&self) -> f64 {
+ 0.0
+ }
+
+ fn update_width(&mut self, _new_width: f64) -> Result<(), Error> {
+ unimplemented!()
+ }
+
+ fn line_text(&self, _line_number: usize) -> Option<&str> {
+ unimplemented!()
+ }
+
+ fn line_metric(&self, _line_number: usize) -> Option<LineMetric> {
+ unimplemented!()
+ }
+
+ fn line_count(&self) -> usize {
+ unimplemented!()
+ }
+
+ fn hit_test_point(&self, _point: Point) -> HitTestPoint {
+ unimplemented!()
+ }
+
+ fn hit_test_text_position(&self, _text_position: usize) -> Option<HitTestTextPosition> {
+ unimplemented!()
+ }
+}
+
+impl IntoBrush<PietGpuRenderContext> for PietGpuBrush {
+ fn make_brush<'b>(
+ &'b self,
+ _piet: &mut PietGpuRenderContext,
+ _bbox: impl FnOnce() -> Rect,
+ ) -> std::borrow::Cow<'b, PietGpuBrush> {
+ Cow::Borrowed(self)
+ }
+}
+
+fn to_scene_point(point: Point) -> scene::Point {
+ scene::Point {
+ xy: [point.x as f32, point.y as f32],
+ }
+}
+
+// TODO: allow #[derive(Clone)] in piet-gpu-derive.
+fn clone_scene_pt(p: &scene::Point) -> scene::Point {
+ scene::Point { xy: p.xy }
+}