| // Copyright 2025 the Vello Authors |
| // SPDX-License-Identifier: Apache-2.0 OR MIT |
| |
| mod common; |
| mod highp; |
| mod lowp; |
| |
| use crate::peniko::{BlendMode, Compose, ImageQuality, Mix}; |
| use crate::region::Region; |
| use alloc::vec; |
| use alloc::vec::Vec; |
| use core::fmt::Debug; |
| use core::iter; |
| use vello_common::coarse::{Cmd, WideTile}; |
| use vello_common::encode::{ |
| EncodedBlurredRoundedRectangle, EncodedGradient, EncodedImage, EncodedKind, EncodedPaint, |
| }; |
| use vello_common::paint::{ImageSource, Paint, PremulColor}; |
| use vello_common::tile::Tile; |
| |
| pub(crate) const COLOR_COMPONENTS: usize = 4; |
| pub(crate) const TILE_HEIGHT_COMPONENTS: usize = Tile::HEIGHT as usize * COLOR_COMPONENTS; |
| pub const SCRATCH_BUF_SIZE: usize = |
| WideTile::WIDTH as usize * Tile::HEIGHT as usize * COLOR_COMPONENTS; |
| |
| use crate::fine::common::gradient::linear::SimdLinearKind; |
| use crate::fine::common::gradient::radial::SimdRadialKind; |
| use crate::fine::common::gradient::sweep::SimdSweepKind; |
| use crate::fine::common::gradient::{GradientPainter, calculate_t_vals}; |
| use crate::fine::common::image::{FilteredImagePainter, NNImagePainter, PlainNNImagePainter}; |
| use crate::fine::common::rounded_blurred_rect::BlurredRoundedRectFiller; |
| use crate::util::{BlendModeExt, EncodedImageExt}; |
| pub use highp::F32Kernel; |
| pub use lowp::U8Kernel; |
| use vello_common::fearless_simd::{ |
| Simd, SimdBase, SimdFloat, SimdInto, f32x4, f32x8, f32x16, u8x16, u8x32, u32x4, u32x8, |
| }; |
| use vello_common::pixmap::Pixmap; |
| use vello_common::simd::Splat4thExt; |
| |
| pub type ScratchBuf<F> = [F; SCRATCH_BUF_SIZE]; |
| |
| pub trait Numeric: Copy + Default + Clone + Debug + PartialEq + Send + Sync + 'static { |
| const ZERO: Self; |
| const ONE: Self; |
| } |
| |
| impl Numeric for f32 { |
| const ZERO: Self = 0.0; |
| const ONE: Self = 1.0; |
| } |
| |
| impl Numeric for u8 { |
| const ZERO: Self = 0; |
| const ONE: Self = 255; |
| } |
| |
| pub trait NumericVec<S: Simd>: Copy + Clone + Send + Sync { |
| fn from_f32(simd: S, val: f32x16<S>) -> Self; |
| fn from_u8(simd: S, val: u8x16<S>) -> Self; |
| } |
| |
| impl<S: Simd> NumericVec<S> for f32x16<S> { |
| #[inline(always)] |
| fn from_f32(_: S, val: Self) -> Self { |
| val |
| } |
| |
| #[inline(always)] |
| fn from_u8(simd: S, val: u8x16<S>) -> Self { |
| let converted = u8_to_f32(val); |
| converted * Self::splat(simd, 1.0 / 255.0) |
| } |
| } |
| |
| impl<S: Simd> NumericVec<S> for u8x16<S> { |
| #[inline(always)] |
| fn from_f32(simd: S, val: f32x16<S>) -> Self { |
| let v1 = f32x16::splat(simd, 255.0); |
| let v2 = f32x16::splat(simd, 0.5); |
| let mulled = v2.madd(v1, val); |
| |
| f32_to_u8(mulled) |
| } |
| |
| #[inline(always)] |
| fn from_u8(_: S, val: Self) -> Self { |
| val |
| } |
| } |
| |
| #[inline(always)] |
| pub(crate) fn f32_to_u8<S: Simd>(val: f32x16<S>) -> u8x16<S> { |
| let simd = val.simd; |
| // Note that converting to u32 first using SIMD and then u8 |
| // is much faster than converting directly from f32 to u8. |
| let converted = simd.cvt_u32_f32x16(val); |
| |
| // TODO: Maybe we can also do this using SIMD? |
| [ |
| converted[0] as u8, |
| converted[1] as u8, |
| converted[2] as u8, |
| converted[3] as u8, |
| converted[4] as u8, |
| converted[5] as u8, |
| converted[6] as u8, |
| converted[7] as u8, |
| converted[8] as u8, |
| converted[9] as u8, |
| converted[10] as u8, |
| converted[11] as u8, |
| converted[12] as u8, |
| converted[13] as u8, |
| converted[14] as u8, |
| converted[15] as u8, |
| ] |
| .simd_into(val.simd) |
| } |
| |
| #[inline(always)] |
| pub(crate) fn u8_to_f32<S: Simd>(val: u8x16<S>) -> f32x16<S> { |
| // TODO: SIMDify |
| [ |
| val.val[0] as f32, |
| val.val[1] as f32, |
| val.val[2] as f32, |
| val.val[3] as f32, |
| val.val[4] as f32, |
| val.val[5] as f32, |
| val.val[6] as f32, |
| val.val[7] as f32, |
| val.val[8] as f32, |
| val.val[9] as f32, |
| val.val[10] as f32, |
| val.val[11] as f32, |
| val.val[12] as f32, |
| val.val[13] as f32, |
| val.val[14] as f32, |
| val.val[15] as f32, |
| ] |
| .simd_into(val.simd) |
| } |
| |
| pub trait CompositeType<N: Numeric, S: Simd>: Copy + Clone + Send + Sync { |
| const LENGTH: usize; |
| |
| fn from_slice(simd: S, slice: &[N]) -> Self; |
| fn from_color(simd: S, color: [N; 4]) -> Self; |
| } |
| |
| impl<S: Simd> CompositeType<f32, S> for f32x16<S> { |
| const LENGTH: usize = 16; |
| |
| #[inline(always)] |
| fn from_slice(simd: S, slice: &[f32]) -> Self { |
| <Self as SimdBase<_, _>>::from_slice(simd, slice) |
| } |
| |
| #[inline(always)] |
| fn from_color(simd: S, color: [f32; 4]) -> Self { |
| Self::block_splat(f32x4::from_slice(simd, &color[..])) |
| } |
| } |
| |
| impl<S: Simd> CompositeType<u8, S> for u8x32<S> { |
| const LENGTH: usize = 32; |
| |
| #[inline(always)] |
| fn from_slice(simd: S, slice: &[u8]) -> Self { |
| <Self as SimdBase<_, _>>::from_slice(simd, slice) |
| } |
| |
| #[inline(always)] |
| fn from_color(simd: S, color: [u8; 4]) -> Self { |
| u32x8::block_splat(u32x4::splat(simd, u32::from_ne_bytes(color))).reinterpret_u8() |
| } |
| } |
| |
| /// A kernel for performing fine rasterization. |
| pub trait FineKernel<S: Simd>: Send + Sync + 'static { |
| /// The basic underlying numerical type of the kernel. |
| type Numeric: Numeric; |
| /// The type that is used for blending and compositing. |
| type Composite: CompositeType<Self::Numeric, S>; |
| /// The base SIMD vector type for converting between u8 and f32. |
| type NumericVec: NumericVec<S>; |
| |
| /// Extract the color from a premultiplied color. |
| fn extract_color(color: PremulColor) -> [Self::Numeric; 4]; |
| /// Pack the blend buf into the given region. |
| fn pack(simd: S, region: &mut Region<'_>, blend_buf: &[Self::Numeric]); |
| /// Repeatedly copy the solid color into the target buffer. |
| fn copy_solid(simd: S, target: &mut [Self::Numeric], color: [Self::Numeric; 4]); |
| /// Return the painter used for painting gradients. |
| fn gradient_painter<'a>( |
| simd: S, |
| gradient: &'a EncodedGradient, |
| t_vals: &'a [f32], |
| ) -> impl Painter + 'a { |
| GradientPainter::new(simd, gradient, false, t_vals) |
| } |
| /// Return the painter used for painting gradients, with support for masking undefined locations. |
| fn gradient_painter_with_undefined<'a>( |
| simd: S, |
| gradient: &'a EncodedGradient, |
| t_vals: &'a [f32], |
| ) -> impl Painter + 'a { |
| GradientPainter::new(simd, gradient, true, t_vals) |
| } |
| /// Return the painter used for painting plain nearest-neighbor images. |
| /// |
| /// Plain nearest-neighbor images are images with the quality 'Low' and no skewing component in their |
| /// transform. |
| fn plain_nn_image_painter<'a>( |
| simd: S, |
| image: &'a EncodedImage, |
| pixmap: &'a Pixmap, |
| start_x: u16, |
| start_y: u16, |
| ) -> impl Painter + 'a { |
| PlainNNImagePainter::new(simd, image, pixmap, start_x, start_y) |
| } |
| /// Return the painter used for painting plain nearest-neighbor images. |
| /// |
| /// Same as `plain_nn`, but must also support skewing transforms. |
| fn nn_image_painter<'a>( |
| simd: S, |
| image: &'a EncodedImage, |
| pixmap: &'a Pixmap, |
| start_x: u16, |
| start_y: u16, |
| ) -> impl Painter + 'a { |
| NNImagePainter::new(simd, image, pixmap, start_x, start_y) |
| } |
| /// Return the painter used for painting image with `Medium` quality. |
| fn medium_quality_image_painter<'a>( |
| simd: S, |
| image: &'a EncodedImage, |
| pixmap: &'a Pixmap, |
| start_x: u16, |
| start_y: u16, |
| ) -> impl Painter + 'a { |
| FilteredImagePainter::new(simd, image, pixmap, start_x, start_y) |
| } |
| /// Return the painter used for painting image with `High` quality. |
| fn high_quality_image_painter<'a>( |
| simd: S, |
| image: &'a EncodedImage, |
| pixmap: &'a Pixmap, |
| start_x: u16, |
| start_y: u16, |
| ) -> impl Painter + 'a { |
| FilteredImagePainter::new(simd, image, pixmap, start_x, start_y) |
| } |
| /// Return the painter used for painting blurred rounded rectangles. |
| fn blurred_rounded_rectangle_painter<'a>( |
| simd: S, |
| rect: &'a EncodedBlurredRoundedRectangle, |
| start_x: u16, |
| start_y: u16, |
| ) -> impl Painter + 'a { |
| BlurredRoundedRectFiller::new(simd, rect, start_x, start_y) |
| } |
| /// Apply the mask to the destination buffer. |
| fn apply_mask(simd: S, dest: &mut [Self::Numeric], src: impl Iterator<Item = Self::NumericVec>); |
| /// Apply the painter to the destination buffer. |
| fn apply_painter<'a>(simd: S, dest: &mut [Self::Numeric], painter: impl Painter + 'a); |
| /// Do basic alpha compositing with a solid color. |
| fn alpha_composite_solid( |
| simd: S, |
| target: &mut [Self::Numeric], |
| src: [Self::Numeric; 4], |
| alphas: Option<&[u8]>, |
| ); |
| /// Do basic alpha compositing with the given buffer. |
| fn alpha_composite_buffer( |
| simd: S, |
| dest: &mut [Self::Numeric], |
| src: &[Self::Numeric], |
| alphas: Option<&[u8]>, |
| ); |
| /// Blend the source into the destination with the given blend mode. |
| fn blend( |
| simd: S, |
| dest: &mut [Self::Numeric], |
| src: impl Iterator<Item = Self::Composite>, |
| blend_mode: BlendMode, |
| alphas: Option<&[u8]>, |
| ); |
| } |
| |
| /// An object for performing fine rasterization |
| #[derive(Debug)] |
| pub struct Fine<S: Simd, T: FineKernel<S>> { |
| /// The coordinates of the currently covered wide tile. |
| pub(crate) wide_coords: (u16, u16), |
| /// The stack of blend buffers. |
| pub(crate) blend_buf: Vec<ScratchBuf<T::Numeric>>, |
| /// An intermediate buffer used by shaders to store their contents. |
| pub(crate) paint_buf: ScratchBuf<T::Numeric>, |
| /// An intermediate buffer used by gradients to store the t values. |
| pub(crate) f32_buf: Vec<f32>, |
| pub(crate) simd: S, |
| } |
| |
| impl<S: Simd, T: FineKernel<S>> Fine<S, T> { |
| pub fn new(simd: S) -> Self { |
| Self { |
| simd, |
| wide_coords: (0, 0), |
| blend_buf: vec![[T::Numeric::ZERO; SCRATCH_BUF_SIZE]], |
| f32_buf: vec![0.0; SCRATCH_BUF_SIZE / 4], |
| paint_buf: [T::Numeric::ZERO; SCRATCH_BUF_SIZE], |
| } |
| } |
| |
| pub fn set_coords(&mut self, x: u16, y: u16) { |
| self.wide_coords = (x, y); |
| } |
| |
| pub fn clear(&mut self, premul_color: PremulColor) { |
| let converted_color = T::extract_color(premul_color); |
| let blend_buf = self.blend_buf.last_mut().unwrap(); |
| |
| T::copy_solid(self.simd, blend_buf, converted_color); |
| } |
| |
| pub fn pack(&self, region: &mut Region<'_>) { |
| let blend_buf = self.blend_buf.last().unwrap(); |
| |
| T::pack(self.simd, region, blend_buf); |
| } |
| |
| pub(crate) fn run_cmd(&mut self, cmd: &Cmd, alphas: &[u8], paints: &[EncodedPaint]) { |
| match cmd { |
| Cmd::Fill(f) => { |
| self.fill( |
| usize::from(f.x), |
| usize::from(f.width), |
| &f.paint, |
| f.blend_mode |
| .unwrap_or(BlendMode::new(Mix::Normal, Compose::SrcOver)), |
| paints, |
| None, |
| ); |
| } |
| Cmd::AlphaFill(s) => { |
| self.fill( |
| usize::from(s.x), |
| usize::from(s.width), |
| &s.paint, |
| s.blend_mode |
| .unwrap_or(BlendMode::new(Mix::Normal, Compose::SrcOver)), |
| paints, |
| Some(&alphas[s.alpha_idx..]), |
| ); |
| } |
| Cmd::PushBuf => { |
| self.blend_buf.push([T::Numeric::ZERO; SCRATCH_BUF_SIZE]); |
| } |
| Cmd::PopBuf => { |
| self.blend_buf.pop(); |
| } |
| Cmd::ClipFill(cf) => { |
| self.clip(cf.x as usize, cf.width as usize, None); |
| } |
| Cmd::ClipStrip(cs) => { |
| self.clip( |
| cs.x as usize, |
| cs.width as usize, |
| Some(&alphas[cs.alpha_idx..]), |
| ); |
| } |
| Cmd::Blend(b) => self.blend(*b), |
| Cmd::Mask(m) => { |
| let start_x = self.wide_coords.0 * WideTile::WIDTH; |
| let start_y = self.wide_coords.1 * Tile::HEIGHT; |
| |
| let blend_buf = self.blend_buf.last_mut().unwrap(); |
| |
| let width = (blend_buf.len() / (Tile::HEIGHT as usize * COLOR_COMPONENTS)) as u16; |
| let y = start_y as u32 + u32x4::from_slice(self.simd, &[0, 1, 2, 3]); |
| |
| let iter = (start_x..(start_x + width)).map(|x| { |
| let x_in_range = x < m.width(); |
| |
| macro_rules! sample { |
| ($idx:expr) => { |
| if x_in_range && (y[$idx] as u16) < m.height() { |
| m.sample(x, y[$idx] as u16) |
| } else { |
| 0 |
| } |
| }; |
| } |
| |
| let s1 = sample!(0); |
| let s2 = sample!(1); |
| let s3 = sample!(2); |
| let s4 = sample!(3); |
| |
| let samples = u8x16::from_slice( |
| self.simd, |
| &[ |
| s1, s1, s1, s1, s2, s2, s2, s2, s3, s3, s3, s3, s4, s4, s4, s4, |
| ], |
| ); |
| T::NumericVec::from_u8(self.simd, samples) |
| }); |
| |
| T::apply_mask(self.simd, blend_buf, iter); |
| } |
| Cmd::Opacity(o) => { |
| if *o != 1.0 { |
| let blend_buf = self.blend_buf.last_mut().unwrap(); |
| |
| T::apply_mask( |
| self.simd, |
| blend_buf, |
| iter::repeat(T::NumericVec::from_f32( |
| self.simd, |
| f32x16::splat(self.simd, *o), |
| )), |
| ); |
| } |
| } |
| } |
| } |
| |
| /// Fill at a given x and with a width using the given paint. |
| // For short strip segments, benchmarks showed that not inlining leads to significantly |
| // worse performance. |
| pub fn fill( |
| &mut self, |
| x: usize, |
| width: usize, |
| fill: &Paint, |
| blend_mode: BlendMode, |
| encoded_paints: &[EncodedPaint], |
| alphas: Option<&[u8]>, |
| ) { |
| let blend_buf = &mut self.blend_buf.last_mut().unwrap()[x * TILE_HEIGHT_COMPONENTS..] |
| [..TILE_HEIGHT_COMPONENTS * width]; |
| let default_blend = blend_mode.is_default(); |
| |
| match fill { |
| Paint::Solid(color) => { |
| let color = T::extract_color(*color); |
| |
| // If color is completely opaque, we can just directly override |
| // the blend buffer. |
| if color[3] == T::Numeric::ONE && default_blend && alphas.is_none() { |
| T::copy_solid(self.simd, blend_buf, color); |
| |
| return; |
| } |
| |
| if default_blend { |
| T::alpha_composite_solid(self.simd, blend_buf, color, alphas); |
| } else { |
| T::blend( |
| self.simd, |
| blend_buf, |
| iter::repeat(T::Composite::from_color(self.simd, color)), |
| blend_mode, |
| alphas, |
| ); |
| } |
| } |
| Paint::Indexed(paint) => { |
| let color_buf = &mut self.paint_buf[x * TILE_HEIGHT_COMPONENTS..] |
| [..TILE_HEIGHT_COMPONENTS * width]; |
| |
| let encoded_paint = &encoded_paints[paint.index()]; |
| |
| let start_x = self.wide_coords.0 * WideTile::WIDTH + x as u16; |
| let start_y = self.wide_coords.1 * Tile::HEIGHT; |
| |
| // We need to have this as a macro because closures cannot take generic arguments, and |
| // we would have to repeatedly provide all arguments if we made it a function. |
| macro_rules! fill_complex_paint { |
| ($has_opacities:expr, $filler:expr) => { |
| if $has_opacities || alphas.is_some() { |
| T::apply_painter(self.simd, color_buf, $filler); |
| |
| if default_blend { |
| T::alpha_composite_buffer(self.simd, blend_buf, color_buf, alphas); |
| } else { |
| T::blend( |
| self.simd, |
| blend_buf, |
| color_buf |
| .chunks_exact(T::Composite::LENGTH) |
| .map(|s| T::Composite::from_slice(self.simd, s)), |
| blend_mode, |
| alphas, |
| ); |
| } |
| } else { |
| // Similarly to solid colors we can just override the previous values |
| // if all colors in the gradient are fully opaque. |
| T::apply_painter(self.simd, blend_buf, $filler); |
| } |
| }; |
| } |
| |
| match encoded_paint { |
| EncodedPaint::BlurredRoundedRect(b) => { |
| fill_complex_paint!( |
| true, |
| T::blurred_rounded_rectangle_painter(self.simd, b, start_x, start_y) |
| ); |
| } |
| EncodedPaint::Gradient(g) => { |
| // Note that we are calculating the t values first, store them in a separate |
| // buffer and then pass that buffer to the iterator instead of calculating |
| // the t values on the fly in the iterator. The latter would be faster, but |
| // it would probably increase code size a lot, because the functions for |
| // position calculation need to be inlined for good performance. |
| let f32_buf = &mut self.f32_buf[..width * Tile::HEIGHT as usize]; |
| |
| match &g.kind { |
| EncodedKind::Linear(l) => { |
| calculate_t_vals( |
| self.simd, |
| SimdLinearKind::new(self.simd, *l), |
| f32_buf, |
| g, |
| start_x, |
| start_y, |
| ); |
| |
| fill_complex_paint!( |
| g.has_opacities, |
| T::gradient_painter(self.simd, g, f32_buf) |
| ); |
| } |
| EncodedKind::Sweep(s) => { |
| calculate_t_vals( |
| self.simd, |
| SimdSweepKind::new(self.simd, s), |
| f32_buf, |
| g, |
| start_x, |
| start_y, |
| ); |
| |
| fill_complex_paint!( |
| g.has_opacities, |
| T::gradient_painter(self.simd, g, f32_buf) |
| ); |
| } |
| EncodedKind::Radial(r) => { |
| calculate_t_vals( |
| self.simd, |
| SimdRadialKind::new(self.simd, r), |
| f32_buf, |
| g, |
| start_x, |
| start_y, |
| ); |
| |
| if r.has_undefined() { |
| fill_complex_paint!( |
| g.has_opacities, |
| T::gradient_painter_with_undefined(self.simd, g, f32_buf) |
| ); |
| } else { |
| fill_complex_paint!( |
| g.has_opacities, |
| T::gradient_painter(self.simd, g, f32_buf) |
| ); |
| } |
| } |
| } |
| } |
| EncodedPaint::Image(i) => { |
| let ImageSource::Pixmap(pixmap) = &i.source else { |
| panic!("vello_cpu doesn't support the opaque image source."); |
| }; |
| |
| match (i.has_skew(), i.nearest_neighbor()) { |
| (_, false) => { |
| if i.quality == ImageQuality::Medium { |
| fill_complex_paint!( |
| i.has_opacities, |
| T::medium_quality_image_painter( |
| self.simd, i, pixmap, start_x, start_y |
| ) |
| ); |
| } else { |
| fill_complex_paint!( |
| i.has_opacities, |
| T::high_quality_image_painter( |
| self.simd, i, pixmap, start_x, start_y |
| ) |
| ); |
| } |
| } |
| (false, true) => { |
| fill_complex_paint!( |
| i.has_opacities, |
| T::plain_nn_image_painter( |
| self.simd, i, pixmap, start_x, start_y |
| ) |
| ); |
| } |
| (true, true) => { |
| fill_complex_paint!( |
| i.has_opacities, |
| T::nn_image_painter(self.simd, i, pixmap, start_x, start_y) |
| ); |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| fn blend(&mut self, blend_mode: BlendMode) { |
| let (source_buffer, rest) = self.blend_buf.split_last_mut().unwrap(); |
| let target_buffer = rest.last_mut().unwrap(); |
| |
| if blend_mode.is_default() { |
| T::alpha_composite_buffer(self.simd, target_buffer, source_buffer, None); |
| } else { |
| T::blend( |
| self.simd, |
| target_buffer, |
| source_buffer |
| .chunks_exact(T::Composite::LENGTH) |
| .map(|s| T::Composite::from_slice(self.simd, s)), |
| blend_mode, |
| None, |
| ); |
| } |
| } |
| |
| fn clip(&mut self, x: usize, width: usize, alphas: Option<&[u8]>) { |
| let (source_buffer, rest) = self.blend_buf.split_last_mut().unwrap(); |
| let target_buffer = rest.last_mut().unwrap(); |
| |
| let source_buffer = |
| &mut source_buffer[x * TILE_HEIGHT_COMPONENTS..][..TILE_HEIGHT_COMPONENTS * width]; |
| let target_buffer = |
| &mut target_buffer[x * TILE_HEIGHT_COMPONENTS..][..TILE_HEIGHT_COMPONENTS * width]; |
| |
| T::alpha_composite_buffer(self.simd, target_buffer, source_buffer, alphas); |
| } |
| } |
| |
| /// A trait for shaders that can render their contents into a u8/f32 buffer. Note that while |
| /// the trait has a method for both, f32 and u8, some shaders might only support 1 of them, so |
| /// care is needed when using them. |
| pub trait Painter { |
| fn paint_u8(&mut self, buf: &mut [u8]); |
| fn paint_f32(&mut self, buf: &mut [f32]); |
| } |
| |
| /// Calculate the x/y position using the x/y advances for each pixel, assuming a tile height of 4. |
| pub trait PosExt<S: Simd> { |
| fn splat_pos(simd: S, pos: f32, x_advance: f32, y_advance: f32) -> Self; |
| } |
| |
| impl<S: Simd> PosExt<S> for f32x4<S> { |
| #[inline(always)] |
| fn splat_pos(simd: S, pos: f32, _: f32, y_advance: f32) -> Self { |
| let columns: [f32; Tile::HEIGHT as usize] = [0.0, 1.0, 2.0, 3.0]; |
| let column_mask: Self = columns.simd_into(simd); |
| |
| Self::splat(simd, pos).madd(column_mask, Self::splat(simd, y_advance)) |
| } |
| } |
| |
| impl<S: Simd> PosExt<S> for f32x8<S> { |
| #[inline(always)] |
| fn splat_pos(simd: S, pos: f32, x_advance: f32, y_advance: f32) -> Self { |
| simd.combine_f32x4( |
| f32x4::splat_pos(simd, pos, x_advance, y_advance), |
| f32x4::splat_pos(simd, pos + x_advance, x_advance, y_advance), |
| ) |
| } |
| } |
| |
| /// The results of an f32 shader, where each channel stored separately. |
| pub(crate) struct ShaderResultF32<S: Simd> { |
| pub(crate) r: f32x8<S>, |
| pub(crate) g: f32x8<S>, |
| pub(crate) b: f32x8<S>, |
| pub(crate) a: f32x8<S>, |
| } |
| |
| impl<S: Simd> ShaderResultF32<S> { |
| /// Convert the result into two f32x16 elements, interleaved as RGBA. |
| #[inline(always)] |
| pub(crate) fn get(&self) -> (f32x16<S>, f32x16<S>) { |
| let (r_1, r_2) = self.r.simd.split_f32x8(self.r); |
| let (g_1, g_2) = self.g.simd.split_f32x8(self.g); |
| let (b_1, b_2) = self.b.simd.split_f32x8(self.b); |
| let (a_1, a_2) = self.a.simd.split_f32x8(self.a); |
| |
| let first = self.r.simd.combine_f32x8( |
| self.r.simd.combine_f32x4(r_1, g_1), |
| self.r.simd.combine_f32x4(b_1, a_1), |
| ); |
| |
| let second = self.r.simd.combine_f32x8( |
| self.r.simd.combine_f32x4(r_2, g_2), |
| self.r.simd.combine_f32x4(b_2, a_2), |
| ); |
| |
| (first, second) |
| } |
| } |
| |
| mod macros { |
| /// The default `Painter` implementation for an iterator |
| /// that returns its results as f32x16. |
| macro_rules! f32x16_painter { |
| ($($type_path:tt)+) => { |
| impl<S: Simd> crate::fine::Painter for $($type_path)+ { |
| fn paint_u8(&mut self, buf: &mut [u8]) { |
| use vello_common::fearless_simd::*; |
| use crate::fine::NumericVec; |
| |
| for chunk in buf.chunks_exact_mut(16) { |
| let next = self.next().unwrap(); |
| let converted = u8x16::<S>::from_f32(next.simd, next); |
| chunk.copy_from_slice(&converted.val); |
| } |
| } |
| |
| fn paint_f32(&mut self, buf: &mut [f32]) { |
| |
| |
| for chunk in buf.chunks_exact_mut(16) { |
| let next = self.next().unwrap(); |
| chunk.copy_from_slice(&next.val); |
| } |
| } |
| } |
| }; |
| } |
| |
| /// The default `Painter` implementation for an iterator |
| /// that returns its results as u8x16. |
| macro_rules! u8x16_painter { |
| ($($type_path:tt)+) => { |
| impl<S: Simd> crate::fine::Painter for $($type_path)+ { |
| fn paint_u8(&mut self, buf: &mut [u8]) { |
| for chunk in buf.chunks_exact_mut(16) { |
| let next = self.next().unwrap(); |
| chunk.copy_from_slice(&next.val); |
| } |
| } |
| |
| fn paint_f32(&mut self, buf: &mut [f32]) { |
| use vello_common::fearless_simd::*; |
| use crate::fine::NumericVec; |
| |
| for chunk in buf.chunks_exact_mut(16) { |
| let next = self.next().unwrap(); |
| let converted = f32x16::<S>::from_u8(next.simd, next); |
| chunk.copy_from_slice(&converted.val); |
| } |
| } |
| } |
| }; |
| } |
| |
| pub(crate) use f32x16_painter; |
| pub(crate) use u8x16_painter; |
| } |