blob: d267fc33e4acb05291531f21c406636a576b28e0 [file]
// Copyright 2025 the Vello Authors
// SPDX-License-Identifier: Apache-2.0 OR MIT
//! A number of SIMD extension traits.
use fearless_simd::*;
/// Splatting every 4th element in the vector, used for splatting the alpha value of
/// a color to all lanes.
pub trait Splat4thExt<S> {
/// Splat every 4th element of the vector.
fn splat_4th(self) -> Self;
}
impl<S: Simd> Splat4thExt<S> for f32x4<S> {
#[inline(always)]
fn splat_4th(self) -> Self {
// TODO: Explore whether it's just faster to manually access the 4th element and splat it.
let zip1 = self.zip_high(self);
zip1.zip_high(zip1)
}
}
impl<S: Simd> Splat4thExt<S> for f32x8<S> {
#[inline(always)]
fn splat_4th(self) -> Self {
let (mut p1, mut p2) = self.simd.split_f32x8(self);
p1 = p1.splat_4th();
p2 = p2.splat_4th();
self.simd.combine_f32x4(p1, p2)
}
}
impl<S: Simd> Splat4thExt<S> for f32x16<S> {
#[inline(always)]
fn splat_4th(self) -> Self {
let (mut p1, mut p2) = self.simd.split_f32x16(self);
p1 = p1.splat_4th();
p2 = p2.splat_4th();
self.simd.combine_f32x8(p1, p2)
}
}
impl<S: Simd> Splat4thExt<S> for u8x16<S> {
#[inline(always)]
fn splat_4th(self) -> Self {
// TODO: SIMDify
[
self[3], self[3], self[3], self[3], self[7], self[7], self[7], self[7], self[11],
self[11], self[11], self[11], self[15], self[15], self[15], self[15],
]
.simd_into(self.simd)
}
}
impl<S: Simd> Splat4thExt<S> for u8x32<S> {
#[inline(always)]
fn splat_4th(self) -> Self {
let (mut p1, mut p2) = self.simd.split_u8x32(self);
p1 = p1.splat_4th();
p2 = p2.splat_4th();
self.simd.combine_u8x16(p1, p2)
}
}
/// Splat each single element in the vector to 4 lanes.
#[inline(always)]
pub fn element_wise_splat<S: Simd>(simd: S, input: f32x4<S>) -> f32x16<S> {
simd.combine_f32x8(
simd.combine_f32x4(f32x4::splat(simd, input[0]), f32x4::splat(simd, input[1])),
simd.combine_f32x4(f32x4::splat(simd, input[2]), f32x4::splat(simd, input[3])),
)
}