Use store_slice
diff --git a/sparse_strips/vello_common/src/flatten_simd.rs b/sparse_strips/vello_common/src/flatten_simd.rs index f65ad2f..e71df65 100644 --- a/sparse_strips/vello_common/src/flatten_simd.rs +++ b/sparse_strips/vello_common/src/flatten_simd.rs
@@ -456,13 +456,13 @@ let (low, high) = simd.split_f32x8(evaluated); - even_pts[i * 4..][..4].copy_from_slice(low.as_slice()); - odd_pts[i * 4..][..4].copy_from_slice(high.as_slice()); + low.store_slice(&mut even_pts[i * 4..][..4]); + high.store_slice(&mut odd_pts[i * 4..][..4]); t += t_inc; } - even_pts[n * 2..][..8].copy_from_slice(p3_128.as_slice()); + p3_128.store_slice(&mut even_pts[n * 2..][..8]); } #[inline(always)] @@ -480,7 +480,7 @@ let x1 = p_onehalf.mul_add(2.0, x); let p1 = p2.mul_add(-0.5, x1); - odd_pts[(i * 8)..][..8].copy_from_slice(p1.as_slice()); + p1.store_slice(&mut odd_pts[(i * 8)..][..8]); let d01 = p1 - p0; let d12 = p2 - p1; @@ -536,11 +536,11 @@ let uscale_a = u2 - u0; let uscale = 1.0 / uscale_a; - ctx.a0[i * 4..][..4].copy_from_slice(a0.as_slice()); - ctx.da[i * 4..][..4].copy_from_slice(da.as_slice()); - ctx.u0[i * 4..][..4].copy_from_slice(u0.as_slice()); - ctx.uscale[i * 4..][..4].copy_from_slice(uscale.as_slice()); - ctx.val[i * 4..][..4].copy_from_slice(val.as_slice()); + a0.store_slice(&mut ctx.a0[i * 4..][..4]); + da.store_slice(&mut ctx.da[i * 4..][..4]); + u0.store_slice(&mut ctx.u0[i * 4..][..4]); + uscale.store_slice(&mut ctx.uscale[i * 4..][..4]); + val.store_slice(&mut ctx.val[i * 4..][..4]); } } @@ -578,7 +578,7 @@ let u = approx_parabola_inv_integral_simd(a); let t = (u - u0) * uscale; let p = coeff_a.mul_add(t, coeff_b).mul_add(t, coeff_c); - out[j * 8..][..8].copy_from_slice(p.as_slice()); + p.store_slice(&mut out[j * 8..][..8]); a += a_inc; } }
diff --git a/sparse_strips/vello_cpu/src/fine/common/gradient/mod.rs b/sparse_strips/vello_cpu/src/fine/common/gradient/mod.rs index f5c4240..60cdbf8 100644 --- a/sparse_strips/vello_cpu/src/fine/common/gradient/mod.rs +++ b/sparse_strips/vello_cpu/src/fine/common/gradient/mod.rs
@@ -33,7 +33,7 @@ let x_pos = f32x8::splat_pos(simd, cur_pos.x as f32, x_advances.0, y_advances.0); let y_pos = f32x8::splat_pos(simd, cur_pos.y as f32, x_advances.1, y_advances.1); let pos = kind.cur_pos(x_pos, y_pos); - buf_part.copy_from_slice(pos.as_slice()); + pos.store_slice(buf_part); cur_pos += 2.0 * gradient.x_advance; } @@ -113,7 +113,7 @@ self.simd.combine_f32x4(rgbas_1[2], rgbas_1[3]), ); let rgbas_1 = u8x16::from_f32(self.simd, rgbas_1); - chunk[..16].copy_from_slice(rgbas_1.as_slice()); + rgbas_1.store_slice(&mut chunk[..16]); let rgbas_2: [f32x4<S>; 4] = core::array::from_fn(|i| { let idx = clamped_indices[i + 4] as usize; @@ -124,7 +124,7 @@ self.simd.combine_f32x4(rgbas_2[2], rgbas_2[3]), ); let rgbas_2 = u8x16::from_f32(self.simd, rgbas_2); - chunk[16..].copy_from_slice(rgbas_2.as_slice()); + rgbas_2.store_slice(&mut chunk[16..]); } }, ); @@ -146,8 +146,9 @@ let masked_1 = self.simd .select_u32x4(invalid_1, u32x4::splat(self.simd, 0), loaded_1); - chunk[..16] - .copy_from_slice(self.simd.reinterpret_u8_u32x4(masked_1).as_slice()); + self.simd + .reinterpret_u8_u32x4(masked_1) + .store_slice(&mut chunk[..16]); let loaded_2 = self .simd @@ -155,8 +156,9 @@ let masked_2 = self.simd .select_u32x4(invalid_2, u32x4::splat(self.simd, 0), loaded_2); - chunk[16..] - .copy_from_slice(self.simd.reinterpret_u8_u32x4(masked_2).as_slice()); + self.simd + .reinterpret_u8_u32x4(masked_2) + .store_slice(&mut chunk[16..]); } }, ); @@ -202,7 +204,7 @@ f32x16::splat(self.simd, 0.0), loaded_1, ); - chunk[..16].copy_from_slice(masked_1.as_slice()); + masked_1.store_slice(&mut chunk[..16]); let invalid_2 = invalid_f32_mask(self.simd, indices_2); let loaded_2 = f32x16::from_slice(self.simd, &chunk[16..]); @@ -211,7 +213,7 @@ f32x16::splat(self.simd, 0.0), loaded_2, ); - chunk[16..].copy_from_slice(masked_2.as_slice()); + masked_2.store_slice(&mut chunk[16..]); } }, );
diff --git a/sparse_strips/vello_cpu/src/fine/highp/mod.rs b/sparse_strips/vello_cpu/src/fine/highp/mod.rs index 8117456..0948001 100644 --- a/sparse_strips/vello_cpu/src/fine/highp/mod.rs +++ b/sparse_strips/vello_cpu/src/fine/highp/mod.rs
@@ -127,7 +127,7 @@ let color = f32x16::block_splat(src.simd_into(simd)); for el in dest.chunks_exact_mut(16) { - el.copy_from_slice(color.as_slice()); + color.store_slice(el); } }, ); @@ -148,7 +148,7 @@ for el in dest.chunks_exact_mut(16) { let loaded = f32x16::from_slice(simd, el); let mulled = loaded * src.next().unwrap(); - el.copy_from_slice(mulled.as_slice()); + mulled.store_slice(el); } }, ); @@ -176,14 +176,14 @@ let pixel = f32x16::from_slice(simd, chunk); let alphas = pixel.splat_4th(); let tinted = tint_v * alphas; - chunk.copy_from_slice(tinted.as_slice()); + tinted.store_slice(chunk); } } TintMode::Multiply => { for chunk in dest.chunks_exact_mut(16) { let pixel = f32x16::from_slice(simd, chunk); let tinted = pixel * tint_v; - chunk.copy_from_slice(tinted.as_slice()); + tinted.store_slice(chunk); } } }, @@ -389,7 +389,7 @@ let bg_v = f32x16::from_slice(simd, next_dest); let src_c = blend::mix(next_src, bg_v, blend_mode); let res = blend_mode.compose(simd, src_c, bg_v, None); - next_dest.copy_from_slice(res.as_slice()); + res.store_slice(next_dest); } } @@ -406,7 +406,7 @@ ) { let mut bg_c = f32x16::from_slice(s, dest); bg_c = one_minus_alpha.mul_add(bg_c, src); - dest.copy_from_slice(bg_c.as_slice()); + bg_c.store_slice(dest); } } @@ -488,7 +488,7 @@ let bg = f32x16::from_slice(simd, next_dest); let src_c = blend::mix(next_src, bg, blend_mode); let res = blend_mode.compose(simd, src_c, bg, Some(masks)); - next_dest.copy_from_slice(res.as_slice()); + res.store_slice(next_dest); } }, ); @@ -514,7 +514,7 @@ let inv_src_a_mask_a = src_a.mul_add(-mask_a, one); let res = bg_c.mul_add(inv_src_a_mask_a, src_c * mask_a); - dest.copy_from_slice(res.as_slice()); + res.store_slice(dest); } }
diff --git a/sparse_strips/vello_cpu/src/fine/lowp/gradient.rs b/sparse_strips/vello_cpu/src/fine/lowp/gradient.rs index 126d280..210818a 100644 --- a/sparse_strips/vello_cpu/src/fine/lowp/gradient.rs +++ b/sparse_strips/vello_cpu/src/fine/lowp/gradient.rs
@@ -58,7 +58,7 @@ #[inline(always)] || { for chunk in buf.chunks_exact_mut(64) { - chunk.copy_from_slice(self.next().unwrap().as_slice()); + self.next().unwrap().store_slice(chunk); } }, );
diff --git a/sparse_strips/vello_cpu/src/fine/lowp/mod.rs b/sparse_strips/vello_cpu/src/fine/lowp/mod.rs index dd357ef..c59d681 100644 --- a/sparse_strips/vello_cpu/src/fine/lowp/mod.rs +++ b/sparse_strips/vello_cpu/src/fine/lowp/mod.rs
@@ -182,7 +182,7 @@ (simd.widen_u8x16(loaded) * simd.widen_u8x16(src.next().unwrap())) .div_255(), ); - el.copy_from_slice(mulled.as_slice()); + mulled.store_slice(el); } }, ); @@ -212,14 +212,14 @@ let pixel = u8x32::from_slice(simd, chunk); let alphas = pixel.splat_4th(); let tinted = tint_v.normalized_mul(alphas); - chunk.copy_from_slice(tinted.as_slice()); + tinted.store_slice(chunk); } } TintMode::Multiply => { for chunk in dest.chunks_exact_mut(32) { let pixel = u8x32::from_slice(simd, chunk); let tinted = pixel.normalized_mul(tint_v); - chunk.copy_from_slice(tinted.as_slice()); + tinted.store_slice(chunk); } } }, @@ -375,7 +375,7 @@ mix(next_src, bg_v, blend_mode) }; let res = blend_mode.compose(simd, src_v, bg_v, None); - next_dest.copy_from_slice(res.as_slice()); + res.store_slice(next_dest); } }, ); @@ -399,7 +399,7 @@ let res_1 = alpha_composite_inner(s, bg_1, src_c, one_minus_alpha); let res_2 = alpha_composite_inner(s, bg_2, src_c, one_minus_alpha); let combined = s.combine_u8x32(res_1, res_2); - next_dest.copy_from_slice(combined.as_slice()); + combined.store_slice(next_dest); } }, ); @@ -420,7 +420,7 @@ let one_minus_alpha = 255 - next_src.splat_4th(); let bg_v = u8x32::from_slice(simd, next_dest); let res = alpha_composite_inner(simd, bg_v, next_src, one_minus_alpha); - next_dest.copy_from_slice(res.as_slice()); + res.store_slice(next_dest); } }, ); @@ -479,7 +479,7 @@ let masks = extract_masks(simd, &next_mask); let res = blend_mode.compose(simd, src_c, bg_v, Some(masks)); - next_bg.copy_from_slice(res.as_slice()); + res.store_slice(next_bg); } }, ); @@ -559,7 +559,7 @@ let p2 = s.widen_u8x32(src_c) * s.widen_u8x32(mask_v); let res = s.narrow_u16x32((p1 + p2).div_255()); - dest.copy_from_slice(res.as_slice()); + res.store_slice(dest); }, ); } @@ -679,10 +679,13 @@ let casted: &[u32; 16] = cast_slice::<u8, u32>(col).try_into().unwrap(); let loaded = simd.load_interleaved_128_u32x16(casted).to_bytes(); - dest_slices[0][dest_idx..][..16].copy_from_slice(&loaded.as_slice()[..16]); - dest_slices[1][dest_idx..][..16].copy_from_slice(&loaded.as_slice()[16..32]); - dest_slices[2][dest_idx..][..16].copy_from_slice(&loaded.as_slice()[32..48]); - dest_slices[3][dest_idx..][..16].copy_from_slice(&loaded.as_slice()[48..64]); + let (loaded_lo, loaded_hi) = simd.split_u8x64(loaded); + let (loaded_1, loaded_2) = simd.split_u8x32(loaded_lo); + let (loaded_3, loaded_4) = simd.split_u8x32(loaded_hi); + loaded_1.store_slice(&mut dest_slices[0][dest_idx..][..16]); + loaded_2.store_slice(&mut dest_slices[1][dest_idx..][..16]); + loaded_3.store_slice(&mut dest_slices[2][dest_idx..][..16]); + loaded_4.store_slice(&mut dest_slices[3][dest_idx..][..16]); } }
diff --git a/sparse_strips/vello_cpu/src/fine/mod.rs b/sparse_strips/vello_cpu/src/fine/mod.rs index 649b5ea..f020d7c 100644 --- a/sparse_strips/vello_cpu/src/fine/mod.rs +++ b/sparse_strips/vello_cpu/src/fine/mod.rs
@@ -1070,7 +1070,7 @@ for chunk in buf.chunks_exact_mut(16) { let next = self.next().unwrap(); let converted = u8x16::<S>::from_f32(next.simd, next); - chunk.copy_from_slice(converted.as_slice()); + converted.store_slice(chunk); } }) } @@ -1079,7 +1079,7 @@ self.simd.vectorize(#[inline(always)] || { for chunk in buf.chunks_exact_mut(16) { let next = self.next().unwrap(); - chunk.copy_from_slice(next.as_slice()); + next.store_slice(chunk); } }) } @@ -1098,7 +1098,7 @@ self.simd.vectorize(#[inline(always)] || { for chunk in buf.chunks_exact_mut(16) { let next = self.next().unwrap(); - chunk.copy_from_slice(next.as_slice()); + next.store_slice(chunk); } }) } @@ -1111,7 +1111,7 @@ for chunk in buf.chunks_exact_mut(16) { let next = self.next().unwrap(); let converted = f32x16::<S>::from_u8(next.simd, next); - chunk.copy_from_slice(converted.as_slice()); + converted.store_slice(chunk); } }) }