vello_hybrid: Conditionally split up rectangles into (up to) 5 smaller ones
diff --git a/sparse_strips/vello_hybrid/src/schedule.rs b/sparse_strips/vello_hybrid/src/schedule.rs
index 6875b96..92e0d2a 100644
--- a/sparse_strips/vello_hybrid/src/schedule.rs
+++ b/sparse_strips/vello_hybrid/src/schedule.rs
@@ -210,6 +210,9 @@
 /// Bit 31 of [`GpuStrip::paint_and_rect_flag`] signals that the strip
 /// represents a full rectangle.
 const RECT_STRIP_FLAG: u32 = 1 << 31;
+/// The threshold of the rectangle size after which a rectangle should be split up
+/// into multiple smaller ones.
+const LARGE_RECT_SPLIT_THRESHOLD: u16 = 32;
 
 // The sentinel tile index representing the surface.
 const SENTINEL_SLOT_IDX: usize = usize::MAX;
@@ -772,9 +775,7 @@
                     );
                 }
                 FastStripCommand::Rect(r) => {
-                    let strip = pack_rectangle_into_gpu(r, encoded_paints, paint_idxs);
-
-                    draw.0.push(strip);
+                    pack_rectangle_into_gpu(r, encoded_paints, paint_idxs, &mut draw.0);
                 }
             }
         }
@@ -1802,7 +1803,44 @@
     rect: &FastPathRect,
     encoded_paints: &[EncodedPaint],
     paint_idxs: &[u32],
-) -> GpuStrip {
+    out: &mut Vec<GpuStrip>,
+) {
+    let split = split_rect(rect);
+    for part in [
+        Some(split.main),
+        split.top,
+        split.bottom,
+        split.left,
+        split.right,
+    ]
+    .into_iter()
+    .flatten()
+    {
+        let (payload, paint_packed) =
+            Scheduler::process_paint(&rect.paint, encoded_paints, (part.x, part.y), paint_idxs);
+        out.push(make_gpu_rect(part, payload, paint_packed));
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+struct RectPart {
+    x: u16,
+    y: u16,
+    width: u16,
+    height: u16,
+    frac: u32,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+struct SplitRect {
+    main: RectPart,
+    top: Option<RectPart>,
+    bottom: Option<RectPart>,
+    left: Option<RectPart>,
+    right: Option<RectPart>,
+}
+
+fn split_rect(rect: &FastPathRect) -> SplitRect {
     let sx0 = rect.x0.floor();
     let sy0 = rect.y0.floor();
     let sx1 = rect.x1.ceil();
@@ -1814,19 +1852,100 @@
     let width = (sx1 - sx0) as u16;
     let height = (sy1 - sy0) as u16;
 
-    let (payload, paint_packed) =
-        Scheduler::process_paint(&rect.paint, encoded_paints, (x, y), paint_idxs);
+    // Note that `top_frac` and `left_fract` store the actual coverage, while
+    // `right_frac` and `bottom_fract` store one minus the coverage. This is on purpose
+    // and handled that way in the shader.
+    let left_frac = rect.x0 - sx0;
+    let top_frac = rect.y0 - sy0;
+    let right_frac = sx1 - rect.x1;
+    let bottom_frac = sy1 - rect.y1;
 
-    // Determine the fractional offsets for anti-aliasing and quantize so it
-    // fits into u8.
-    let frac = pack_unorm4x8([rect.x0 - sx0, rect.y0 - sy0, sx1 - rect.x1, sy1 - rect.y1]);
+    // There's a balance to strike between reducing work in the fragment shader by splitting
+    // out the inner part of the rectangle without anti-aliasing, and additional overhead
+    // that arises from rendering 5 rectangles instead of just one. While the exact threshold
+    // will obviously depend on the device, some experiments on a low-tier tablet showed that
+    // `LARGE_RECT_SPLIT_THRESHOLD` seems to be a a reasonable value.
+    if rect.x1 - rect.x0 < f32::from(LARGE_RECT_SPLIT_THRESHOLD)
+        || rect.y1 - rect.y0 < f32::from(LARGE_RECT_SPLIT_THRESHOLD)
+    {
+        return SplitRect {
+            main: RectPart {
+                x,
+                y,
+                width,
+                height,
+                frac: pack_unorm4x8([left_frac, top_frac, right_frac, bottom_frac]),
+            },
+            top: None,
+            bottom: None,
+            left: None,
+            right: None,
+        };
+    }
 
+    let has_left_aa = left_frac > 0.0;
+    let has_top_aa = top_frac > 0.0;
+    let has_right_aa = right_frac > 0.0;
+    let has_bottom_aa = bottom_frac > 0.0;
+    let has_top_strip = has_top_aa || has_left_aa || has_right_aa;
+    let has_bottom_strip = has_bottom_aa || has_left_aa || has_right_aa;
+    let left_inset = u16::from(has_left_aa);
+    let right_inset = u16::from(has_right_aa);
+    let top_inset = u16::from(has_top_strip);
+    let bottom_inset = u16::from(has_bottom_strip);
+    let inner_x = x + left_inset;
+    let inner_y = y + top_inset;
+    // Can't underflow because rectangles have at least `LARGE_RECT_SPLIT_THRESHOLD` in each
+    // direction, which is larger than 2.
+    let inner_width = width - left_inset - right_inset;
+    let inner_height = height - top_inset - bottom_inset;
+
+    SplitRect {
+        main: RectPart {
+            x: inner_x,
+            y: inner_y,
+            width: inner_width,
+            height: inner_height,
+            frac: 0,
+        },
+        top: has_top_strip.then_some(RectPart {
+            x,
+            y,
+            width,
+            height: 1,
+            frac: pack_unorm4x8([left_frac, top_frac, right_frac, 0.0]),
+        }),
+        bottom: has_bottom_strip.then_some(RectPart {
+            x,
+            y: y + height - 1,
+            width,
+            height: 1,
+            frac: pack_unorm4x8([left_frac, 0.0, right_frac, bottom_frac]),
+        }),
+        left: has_left_aa.then_some(RectPart {
+            x,
+            y: inner_y,
+            width: 1,
+            height: inner_height,
+            frac: pack_unorm4x8([left_frac, 0.0, 0.0, 0.0]),
+        }),
+        right: has_right_aa.then_some(RectPart {
+            x: x + width - 1,
+            y: inner_y,
+            width: 1,
+            height: inner_height,
+            frac: pack_unorm4x8([0.0, 0.0, right_frac, 0.0]),
+        }),
+    }
+}
+
+fn make_gpu_rect(part: RectPart, payload: u32, paint_packed: u32) -> GpuStrip {
     GpuStrip {
-        x,
-        y,
-        width,
-        dense_width_or_rect_height: height,
-        col_idx_or_rect_frac: frac,
+        x: part.x,
+        y: part.y,
+        width: part.width,
+        dense_width_or_rect_height: part.height,
+        col_idx_or_rect_frac: part.frac,
         payload,
         paint_and_rect_flag: paint_packed | RECT_STRIP_FLAG,
     }
@@ -1839,3 +1958,274 @@
         | (u32::from(q(v[2])) << 16)
         | (u32::from(q(v[3])) << 24)
 }
+
+#[cfg(test)]
+mod tests {
+    use super::{
+        RECT_STRIP_FLAG, RectPart, SplitRect, pack_rectangle_into_gpu, pack_unorm4x8, split_rect,
+    };
+    use crate::scene::FastPathRect;
+    use alloc::vec;
+    use alloc::vec::Vec;
+    use vello_common::encode::EncodedImage;
+    use vello_common::kurbo::{Affine, Vec2};
+    use vello_common::paint::{Color, ImageId, ImageSource, IndexedPaint, Paint};
+    use vello_common::peniko::ImageSampler;
+
+    fn solid_rect(x0: f32, y0: f32, x1: f32, y1: f32) -> FastPathRect {
+        FastPathRect {
+            x0,
+            y0,
+            x1,
+            y1,
+            paint: Paint::from(Color::from_rgba8(255, 0, 0, 255)),
+        }
+    }
+
+    fn part(x: u16, y: u16, width: u16, height: u16, frac: [f32; 4]) -> RectPart {
+        RectPart {
+            x,
+            y,
+            width,
+            height,
+            frac: pack_unorm4x8(frac),
+        }
+    }
+
+    #[test]
+    fn splitter_keeps_small_rect_whole() {
+        let rect = solid_rect(10.25, 20.5, 25.75, 35.25);
+        let split = split_rect(&rect);
+
+        assert_eq!(
+            split,
+            SplitRect {
+                main: part(10, 20, 16, 16, [0.25, 0.5, 0.25, 0.75]),
+                top: None,
+                bottom: None,
+                left: None,
+                right: None,
+            }
+        );
+    }
+
+    #[test]
+    fn splitter_keeps_subpixel_rect_inside_one_pixel() {
+        let rect = solid_rect(10.125, 20.25, 10.875, 20.75);
+        let split = split_rect(&rect);
+
+        assert_eq!(
+            split,
+            SplitRect {
+                main: part(10, 20, 1, 1, [0.125, 0.25, 0.125, 0.25]),
+                top: None,
+                bottom: None,
+                left: None,
+                right: None,
+            }
+        );
+    }
+
+    #[test]
+    fn splitter_keeps_subpixel_rect_spanning_two_pixels_in_width() {
+        let rect = solid_rect(10.75, 20.125, 11.25, 20.875);
+        let split = split_rect(&rect);
+
+        assert_eq!(
+            split,
+            SplitRect {
+                main: part(10, 20, 2, 1, [0.75, 0.125, 0.75, 0.125]),
+                top: None,
+                bottom: None,
+                left: None,
+                right: None,
+            }
+        );
+    }
+
+    #[test]
+    fn splitter_keeps_subpixel_rect_spanning_two_pixels_in_height() {
+        let rect = solid_rect(10.125, 20.75, 10.875, 21.25);
+        let split = split_rect(&rect);
+
+        assert_eq!(
+            split,
+            SplitRect {
+                main: part(10, 20, 1, 2, [0.125, 0.75, 0.125, 0.75]),
+                top: None,
+                bottom: None,
+                left: None,
+                right: None,
+            }
+        );
+    }
+
+    #[test]
+    fn splitter_keeps_multi_pixel_width_rect_within_one_pixel_height() {
+        let rect = solid_rect(10.25, 20.125, 14.75, 20.875);
+        let split = split_rect(&rect);
+
+        assert_eq!(
+            split,
+            SplitRect {
+                main: part(10, 20, 5, 1, [0.25, 0.125, 0.25, 0.125]),
+                top: None,
+                bottom: None,
+                left: None,
+                right: None,
+            }
+        );
+    }
+
+    #[test]
+    fn splitter_keeps_multi_pixel_height_rect_within_one_pixel_width() {
+        let rect = solid_rect(10.125, 20.25, 10.875, 24.75);
+        let split = split_rect(&rect);
+
+        assert_eq!(
+            split,
+            SplitRect {
+                main: part(10, 20, 1, 5, [0.125, 0.25, 0.125, 0.25]),
+                top: None,
+                bottom: None,
+                left: None,
+                right: None,
+            }
+        );
+    }
+
+    #[test]
+    fn splitter_splits_large_rect_into_five_parts() {
+        let rect = solid_rect(10.25, 20.5, 42.75, 52.75);
+        let split = split_rect(&rect);
+
+        assert_eq!(
+            split,
+            SplitRect {
+                main: part(11, 21, 31, 31, [0.0, 0.0, 0.0, 0.0]),
+                top: Some(part(10, 20, 33, 1, [0.25, 0.5, 0.25, 0.0])),
+                bottom: Some(part(10, 52, 33, 1, [0.25, 0.0, 0.25, 0.25])),
+                left: Some(part(10, 21, 1, 31, [0.25, 0.0, 0.0, 0.0])),
+                right: Some(part(42, 21, 1, 31, [0.0, 0.0, 0.25, 0.0])),
+            }
+        );
+    }
+
+    #[test]
+    fn splitter_omits_unneeded_edge_parts() {
+        let rect = solid_rect(10.0, 20.5, 42.0, 53.0);
+        let split = split_rect(&rect);
+
+        assert_eq!(
+            split,
+            SplitRect {
+                main: part(10, 21, 32, 32, [0.0, 0.0, 0.0, 0.0]),
+                top: Some(part(10, 20, 32, 1, [0.0, 0.5, 0.0, 0.0])),
+                bottom: None,
+                left: None,
+                right: None,
+            }
+        );
+    }
+
+    #[test]
+    fn splitter_handles_large_rect_with_only_vertical_aa() {
+        let rect = solid_rect(5.0, 2.25, 37.0, 34.75);
+        let split = split_rect(&rect);
+
+        assert_eq!(
+            split,
+            SplitRect {
+                main: part(5, 3, 32, 31, [0.0, 0.0, 0.0, 0.0]),
+                top: Some(part(5, 2, 32, 1, [0.0, 0.25, 0.0, 0.0])),
+                bottom: Some(part(5, 34, 32, 1, [0.0, 0.0, 0.0, 0.25])),
+                left: None,
+                right: None,
+            }
+        );
+    }
+
+    #[test]
+    fn splitter_keeps_large_aligned_rect_as_single_main_rect() {
+        let rect = solid_rect(10.0, 20.0, 42.0, 60.0);
+        let split = split_rect(&rect);
+
+        assert_eq!(
+            split,
+            SplitRect {
+                main: part(10, 20, 32, 40, [0.0, 0.0, 0.0, 0.0]),
+                top: None,
+                bottom: None,
+                left: None,
+                right: None,
+            }
+        );
+    }
+
+    #[test]
+    fn gpu_upload_emits_main_and_present_optional_parts() {
+        let rect = solid_rect(10.0, 20.5, 42.0, 53.0);
+        let mut out = Vec::new();
+
+        pack_rectangle_into_gpu(&rect, &[], &[], &mut out);
+
+        assert_eq!(out.len(), 2);
+        assert_eq!(
+            (
+                out[0].x,
+                out[0].y,
+                out[0].width,
+                out[0].dense_width_or_rect_height
+            ),
+            (10, 21, 32, 32)
+        );
+        assert_eq!(out[0].col_idx_or_rect_frac, 0);
+        assert_eq!(
+            (
+                out[1].x,
+                out[1].y,
+                out[1].width,
+                out[1].dense_width_or_rect_height
+            ),
+            (10, 20, 32, 1)
+        );
+        assert_eq!(
+            out[1].col_idx_or_rect_frac,
+            pack_unorm4x8([0.0, 0.5, 0.0, 0.0])
+        );
+        assert!(
+            out.iter()
+                .all(|strip| strip.paint_and_rect_flag & RECT_STRIP_FLAG != 0)
+        );
+    }
+
+    #[test]
+    fn gpu_upload_updates_payload_for_each_split_part() {
+        let rect = FastPathRect {
+            x0: 10.25,
+            y0: 20.5,
+            x1: 42.75,
+            y1: 52.75,
+            paint: Paint::Indexed(IndexedPaint::new(0)),
+        };
+        let encoded_paints = vec![vello_common::encode::EncodedPaint::Image(EncodedImage {
+            source: ImageSource::opaque_id(ImageId::new(1)),
+            sampler: ImageSampler::new(),
+            may_have_opacities: false,
+            transform: Affine::IDENTITY,
+            x_advance: Vec2::new(1.0, 0.0),
+            y_advance: Vec2::new(0.0, 1.0),
+            tint: None,
+        })];
+        let mut out = Vec::new();
+
+        pack_rectangle_into_gpu(&rect, &encoded_paints, &[7], &mut out);
+
+        assert_eq!(out.len(), 5);
+        assert_eq!(out[0].payload, (21_u32 << 16) | 11_u32);
+        assert_eq!(out[1].payload, (20_u32 << 16) | 10_u32);
+        assert_eq!(out[2].payload, (52_u32 << 16) | 10_u32);
+        assert_eq!(out[3].payload, (21_u32 << 16) | 10_u32);
+        assert_eq!(out[4].payload, (21_u32 << 16) | 42_u32);
+    }
+}
diff --git a/sparse_strips/vello_sparse_tests/snapshots/image_with_anti_aliasing.png b/sparse_strips/vello_sparse_tests/snapshots/image_with_anti_aliasing.png
new file mode 100644
index 0000000..879f5a8
--- /dev/null
+++ b/sparse_strips/vello_sparse_tests/snapshots/image_with_anti_aliasing.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4905b9ee1d77211434ca0840a993481d679061e137e6028fbd084fb773a355cc
+size 189
diff --git a/sparse_strips/vello_sparse_tests/tests/image.rs b/sparse_strips/vello_sparse_tests/tests/image.rs
index 4c95b6b..a7eca32 100644
--- a/sparse_strips/vello_sparse_tests/tests/image.rs
+++ b/sparse_strips/vello_sparse_tests/tests/image.rs
@@ -334,6 +334,23 @@
     image_format(ctx, image_source);
 }
 
+#[vello_test]
+fn image_with_anti_aliasing(ctx: &mut impl Renderer) {
+    let rect = Rect::new(10.5, 10.5, 90.5, 90.5);
+    let image = Image {
+        image: rgb_img_10x10(ctx),
+        sampler: ImageSampler {
+            x_extend: Extend::Repeat,
+            y_extend: Extend::Repeat,
+            quality: ImageQuality::Low,
+            alpha: 1.0,
+        },
+    };
+
+    ctx.set_paint(image);
+    ctx.fill_rect(&rect);
+}
+
 fn quality(
     ctx: &mut impl Renderer,
     transform: Affine,