blob: 6e4656518a9ce679c6c411f9d2e742ea7c5853e7 [file] [log] [blame]
// Copyright 2025 the Vello Authors
// SPDX-License-Identifier: Apache-2.0 OR MIT
use crate::peniko::{BlendMode, Compose, ImageQuality, Mix};
use vello_common::encode::EncodedImage;
use vello_common::fearless_simd::{Simd, SimdBase, f32x4, u8x32, u16x16, u16x32};
use vello_common::math::FloatExt;
#[allow(
dead_code,
reason = "this is not used because the division by 255 is now done with SIMD, but\
we still keep it around to document its properties."
)]
pub(crate) mod scalar {
/// Perform an approximate division by 255.
///
/// There are three reasons for having this method.
/// 1) Divisions are slower than shifting + adding, and the compiler does not seem to replace
/// divisions by 255 with an equivalent (this was verified by benchmarking; doing / 255 was
/// significantly slower).
/// 2) Integer divisions are usually not available in SIMD, so this provides a good baseline
/// implementation.
/// 3) There are two options for performing the division: One is to perform the division
/// in a way that completely preserves the rounding semantics of a integer division by
/// 255. This could be achieved using the implementation `(val + 1 + (val >> 8)) >> 8`.
/// The second approach (used here) has slightly different rounding behavior to a
/// normal division by 255, but is much faster (see <https://github.com/linebender/vello/issues/904>)
/// and therefore preferable for the high-performance pipeline.
///
/// Four properties worth mentioning:
/// - This actually calculates the ceiling of `val / 256`.
/// - Within the allowed range for `val`, rounding errors do not appear for values divisible by 255, i.e. any call `div_255(val * 255)` will always yield `val`.
/// - If there is a discrepancy, this division will always yield a value 1 higher than the original.
/// - This holds for values of `val` up to and including `65279`. You should not call this function with higher values.
#[inline(always)]
pub(crate) const fn div_255(val: u16) -> u16 {
debug_assert!(
val < 65280,
"the properties of `div_255` do not hold for values of `65280` or greater"
);
(val + 255) >> 8
}
#[cfg(test)]
mod tests {
use crate::util::scalar::div_255;
#[test]
fn div_255_properties() {
for i in 0_u16..256 * 255 {
let expected = i / 255;
let actual = div_255(i);
assert!(
expected <= actual,
"In case of a discrepancy, the division should yield a value higher than the original."
);
let diff = expected.abs_diff(actual);
assert!(diff <= 1, "Rounding error shouldn't be higher than 1.");
if i % 255 == 0 {
assert_eq!(diff, 0, "Division should be accurate for multiples of 255.");
}
}
}
}
}
pub(crate) trait NormalizedMulExt {
fn normalized_mul(self, other: Self) -> Self;
}
impl<S: Simd> NormalizedMulExt for u8x32<S> {
#[inline(always)]
fn normalized_mul(self, other: Self) -> Self {
let divided = (self.simd.widen_u8x32(self) * other.simd.widen_u8x32(other)).div_255();
self.simd.narrow_u16x32(divided)
}
}
pub(crate) trait Div255Ext {
fn div_255(self) -> Self;
}
impl<S: Simd> Div255Ext for u16x32<S> {
#[inline(always)]
fn div_255(self) -> Self {
let p1 = Self::splat(self.simd, 255);
let p2 = self + p1;
p2.shr(8)
}
}
impl<S: Simd> Div255Ext for u16x16<S> {
#[inline(always)]
fn div_255(self) -> Self {
let p1 = Self::splat(self.simd, 255);
let p2 = self + p1;
p2.shr(8)
}
}
#[inline(always)]
pub(crate) fn normalized_mul<S: Simd>(a: u8x32<S>, b: u8x32<S>) -> u16x32<S> {
(S::widen_u8x32(a.simd, a) * S::widen_u8x32(b.simd, b)).div_255()
}
pub(crate) trait BlendModeExt {
fn is_default(&self) -> bool;
}
impl BlendModeExt for BlendMode {
// peniko uses `Clip` instead of `Normal` as the default, hence this override.
fn is_default(&self) -> bool {
matches!(self.mix, Mix::Normal | Mix::Clip) && self.compose == Compose::SrcOver
}
}
pub(crate) trait EncodedImageExt {
fn has_skew(&self) -> bool;
fn nearest_neighbor(&self) -> bool;
}
impl EncodedImageExt for EncodedImage {
fn has_skew(&self) -> bool {
!(self.x_advance.y as f32).is_nearly_zero() || !(self.y_advance.x as f32).is_nearly_zero()
}
fn nearest_neighbor(&self) -> bool {
self.quality == ImageQuality::Low
}
}
pub(crate) trait Premultiply {
fn premultiply(self, alphas: Self) -> Self;
fn unpremultiply(self, alphas: Self) -> Self;
}
impl<S: Simd> Premultiply for f32x4<S> {
#[inline(always)]
fn premultiply(self, alphas: Self) -> Self {
self * alphas
}
#[inline(always)]
fn unpremultiply(self, alphas: Self) -> Self {
let zero = Self::splat(alphas.simd, 0.0);
let divided = self / alphas;
self.simd
.select_f32x4(self.simd.simd_eq_f32x4(alphas, zero), zero, divided)
}
}