Add support for adding padding around images
diff --git a/sparse_strips/vello_common/src/image_cache.rs b/sparse_strips/vello_common/src/image_cache.rs
index 18ba975..b9211ce 100644
--- a/sparse_strips/vello_common/src/image_cache.rs
+++ b/sparse_strips/vello_common/src/image_cache.rs
@@ -20,8 +20,11 @@
     pub height: u16,
     /// The Id of the atlas containing this image.
     pub atlas_id: AtlasId,
-    /// The offset of the image within its atlas.
+    /// The offset of the image within its atlas (does not include padding, i.e. it points to the
+    /// position of the first actual top-left pixel).
     pub offset: [u16; 2],
+    /// The number of transparent padding pixels around the image in the atlas.
+    pub padding: u16,
     /// The atlas allocation ID for deallocation.
     atlas_alloc_id: AllocId,
 }
@@ -64,13 +67,22 @@
         self.slots.get(id.as_u32() as usize)?.as_ref()
     }
 
-    /// Allocate an image in the cache.
+    /// Allocate an image in the cache, with optional transparent padding.
     #[expect(
         clippy::cast_possible_truncation,
         reason = "u16 is enough for the offset and width/height"
     )]
-    pub fn allocate(&mut self, width: u32, height: u32) -> Result<ImageId, AtlasError> {
-        let atlas_alloc = self.atlas_manager.try_allocate(width, height)?;
+    pub fn allocate(
+        &mut self,
+        width: u32,
+        height: u32,
+        padding: u16,
+    ) -> Result<ImageId, AtlasError> {
+        let padded_width = width + u32::from(padding) * 2;
+        let padded_height = height + u32::from(padding) * 2;
+        let atlas_alloc = self
+            .atlas_manager
+            .try_allocate(padded_width, padded_height)?;
 
         let slot_idx = self.free_idxs.pop().unwrap_or_else(|| {
             // No free slots, append to vector
@@ -86,9 +98,10 @@
             height: height as u16,
             atlas_id: atlas_alloc.atlas_id,
             offset: [
-                atlas_alloc.allocation.rectangle.min.x as u16,
-                atlas_alloc.allocation.rectangle.min.y as u16,
+                atlas_alloc.allocation.rectangle.min.x as u16 + padding,
+                atlas_alloc.allocation.rectangle.min.y as u16 + padding,
             ],
+            padding,
             atlas_alloc_id: atlas_alloc.allocation.id,
         };
         self.slots[slot_idx] = Some(image_resource);
@@ -101,12 +114,15 @@
         let index = id.as_u32() as usize;
         if let Some(image_resource) = self.slots.get_mut(index).and_then(Option::take) {
             // Deallocate from the appropriate atlas
+            let padded_width = image_resource.width as u32 + u32::from(image_resource.padding) * 2;
+            let padded_height =
+                image_resource.height as u32 + u32::from(image_resource.padding) * 2;
             self.atlas_manager
                 .deallocate(
                     image_resource.atlas_id,
                     image_resource.atlas_alloc_id,
-                    image_resource.width as u32,
-                    image_resource.height as u32,
+                    padded_width,
+                    padded_height,
                 )
                 .unwrap();
             self.free_idxs.push(index);
@@ -140,7 +156,7 @@
             ..Default::default()
         });
 
-        let id = cache.allocate(100, 100).unwrap();
+        let id = cache.allocate(100, 100, 0).unwrap();
 
         assert_eq!(id.as_u32(), 0);
         let resource = cache.get(id).unwrap();
@@ -151,14 +167,32 @@
     }
 
     #[test]
+    fn test_insert_single_image_with_padding() {
+        let mut cache = ImageCache::new_with_config(AtlasConfig {
+            atlas_size: (ATLAS_SIZE, ATLAS_SIZE),
+            ..Default::default()
+        });
+
+        let id = cache.allocate(100, 100, 4).unwrap();
+
+        assert_eq!(id.as_u32(), 0);
+        let resource = cache.get(id).unwrap();
+        assert_eq!(resource.width, 100);
+        assert_eq!(resource.height, 100);
+        assert_eq!(resource.padding, 4);
+        // Offset should be shifted inward by padding.
+        assert_eq!(resource.offset, [4, 4]);
+    }
+
+    #[test]
     fn test_insert_multiple_images() {
         let mut cache = ImageCache::new_with_config(AtlasConfig {
             atlas_size: (ATLAS_SIZE, ATLAS_SIZE),
             ..Default::default()
         });
 
-        let id1 = cache.allocate(50, 50).unwrap();
-        let id2 = cache.allocate(75, 75).unwrap();
+        let id1 = cache.allocate(50, 50, 0).unwrap();
+        let id2 = cache.allocate(75, 75, 0).unwrap();
 
         assert_eq!(id1.as_u32(), 0);
         assert_eq!(id2.as_u32(), 1);
@@ -191,7 +225,7 @@
             ..Default::default()
         });
 
-        let id = cache.allocate(100, 100).unwrap();
+        let id = cache.allocate(100, 100, 0).unwrap();
         assert!(cache.get(id).is_some());
 
         cache.deallocate(id);
@@ -218,9 +252,9 @@
         });
 
         // Register three images
-        let id1 = cache.allocate(50, 50).unwrap();
-        let id2 = cache.allocate(60, 60).unwrap();
-        let id3 = cache.allocate(70, 70).unwrap();
+        let id1 = cache.allocate(50, 50, 0).unwrap();
+        let id2 = cache.allocate(60, 60, 0).unwrap();
+        let id3 = cache.allocate(70, 70, 0).unwrap();
 
         assert_eq!(id1.as_u32(), 0);
         assert_eq!(id2.as_u32(), 1);
@@ -231,7 +265,7 @@
         assert!(cache.get(id2).is_none());
 
         // Register a new image - should reuse slot 1
-        let id4 = cache.allocate(80, 80).unwrap();
+        let id4 = cache.allocate(80, 80, 0).unwrap();
         // Reused slot 1
         assert_eq!(id4.as_u32(), 1);
 
@@ -251,7 +285,7 @@
 
         // Register several images
         let ids: Vec<_> = (0..5)
-            .map(|i| cache.allocate(100 + i * 10, 100 + i * 10).unwrap())
+            .map(|i| cache.allocate(100 + i * 10, 100 + i * 10, 0).unwrap())
             .collect();
 
         // Unregister some in the middle
@@ -259,8 +293,8 @@
         cache.deallocate(ids[3]);
 
         // Register new images - should reuse the freed slots
-        let new_id1 = cache.allocate(200, 200).unwrap();
-        let new_id2 = cache.allocate(300, 300).unwrap();
+        let new_id1 = cache.allocate(200, 200, 0).unwrap();
+        let new_id2 = cache.allocate(300, 300, 0).unwrap();
 
         // Should have reused slots 3 and 1 (in reverse order due to stack behavior)
         assert_eq!(new_id1.as_u32(), 3);
diff --git a/sparse_strips/vello_hybrid/src/render/common.rs b/sparse_strips/vello_hybrid/src/render/common.rs
index 71b3b36..feed515 100644
--- a/sparse_strips/vello_hybrid/src/render/common.rs
+++ b/sparse_strips/vello_hybrid/src/render/common.rs
@@ -130,8 +130,8 @@
     pub tint: u32,
     /// [`TintMode`](vello_common::paint::TintMode) discriminant. Only meaningful when `tint != 0`.
     pub tint_mode: u32,
-    /// Padding for 16-byte alignment.
-    pub _padding: u32,
+    /// Number of transparent padding pixels around the image in the atlas.
+    pub image_padding: u32,
 }
 
 /// GPU encoded linear gradient data.
diff --git a/sparse_strips/vello_hybrid/src/render/webgl.rs b/sparse_strips/vello_hybrid/src/render/webgl.rs
index e221a90..cbba6f7 100644
--- a/sparse_strips/vello_hybrid/src/render/webgl.rs
+++ b/sparse_strips/vello_hybrid/src/render/webgl.rs
@@ -371,9 +371,19 @@
         &mut self,
         writer: &T,
     ) -> vello_common::paint::ImageId {
+        // TODO: If we want to use native bilinear sampling for uploaded images,
+        // we can pass 1 instead of 0 here.
+        self.upload_image_with(writer, 0)
+    }
+
+    pub(crate) fn upload_image_with<T: WebGlAtlasWriter>(
+        &mut self,
+        writer: &T,
+        padding: u16,
+    ) -> vello_common::paint::ImageId {
         let width = writer.width();
         let height = writer.height();
-        let image_id = self.image_cache.allocate(width, height).unwrap();
+        let image_id = self.image_cache.allocate(width, height, padding).unwrap();
         self.write_to_atlas(image_id, writer, None);
         image_id
     }
@@ -418,14 +428,15 @@
     /// Destroy an image from the cache and clear the allocated slot in the atlas.
     pub fn destroy_image(&mut self, image_id: vello_common::paint::ImageId) {
         if let Some(image_resource) = self.image_cache.deallocate(image_id) {
+            let padding = image_resource.padding as u32;
             self.clear_atlas_region(
                 image_resource.atlas_id,
                 [
-                    image_resource.offset[0] as u32,
-                    image_resource.offset[1] as u32,
+                    image_resource.offset[0] as u32 - padding,
+                    image_resource.offset[1] as u32 - padding,
                 ],
-                image_resource.width as u32,
-                image_resource.height as u32,
+                image_resource.width as u32 + padding * 2,
+                image_resource.height as u32 + padding * 2,
             );
         }
     }
@@ -547,7 +558,7 @@
             transform,
             tint,
             tint_mode,
-            _padding: 0,
+            image_padding: image_resource.padding as u32,
         })
     }
 
diff --git a/sparse_strips/vello_hybrid/src/render/wgpu.rs b/sparse_strips/vello_hybrid/src/render/wgpu.rs
index 04f911b..3588f14 100644
--- a/sparse_strips/vello_hybrid/src/render/wgpu.rs
+++ b/sparse_strips/vello_hybrid/src/render/wgpu.rs
@@ -296,9 +296,22 @@
         encoder: &mut CommandEncoder,
         writer: &T,
     ) -> vello_common::paint::ImageId {
+        // TODO: If we want to use native bilinear sampling for uploaded images,
+        // we can pass 1 instead of 0 here.
+        self.upload_image_with(device, queue, encoder, writer, 0)
+    }
+
+    pub(crate) fn upload_image_with<T: AtlasWriter>(
+        &mut self,
+        device: &Device,
+        queue: &Queue,
+        encoder: &mut CommandEncoder,
+        writer: &T,
+        padding: u16,
+    ) -> vello_common::paint::ImageId {
         let width = writer.width();
         let height = writer.height();
-        let image_id = self.image_cache.allocate(width, height).unwrap();
+        let image_id = self.image_cache.allocate(width, height, padding).unwrap();
         self.write_to_atlas(device, queue, encoder, image_id, writer, None);
         image_id
     }
@@ -359,17 +372,19 @@
         image_id: vello_common::paint::ImageId,
     ) {
         if let Some(image_resource) = self.image_cache.deallocate(image_id) {
+            let padding = image_resource.padding as u32;
+
             self.clear_atlas_region(
                 device,
                 queue,
                 encoder,
                 image_resource.atlas_id,
                 [
-                    image_resource.offset[0] as u32,
-                    image_resource.offset[1] as u32,
+                    image_resource.offset[0] as u32 - padding,
+                    image_resource.offset[1] as u32 - padding,
                 ],
-                image_resource.width as u32,
-                image_resource.height as u32,
+                image_resource.width as u32 + padding * 2,
+                image_resource.height as u32 + padding * 2,
             );
         }
     }
@@ -505,7 +520,7 @@
             transform,
             tint,
             tint_mode,
-            _padding: 0,
+            image_padding: image_resource.padding as u32,
         })
     }
 
diff --git a/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl b/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl
index a54638d..7843ab1 100644
--- a/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl
+++ b/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl
@@ -341,6 +341,9 @@
                 extend_mode(local_xy.x + offset, encoded_image.extend_modes.x, image_size.x),
                 extend_mode(local_xy.y + offset, encoded_image.extend_modes.y, image_size.y)
             );
+
+            // TODO: add a fast path for images where we are using bilinear sampling and want transparent pixels,
+            // using GPU-native bilinear sampling
             
             var sample_color: vec4<f32>;
             if encoded_image.quality == IMAGE_QUALITY_HIGH {
@@ -352,6 +355,7 @@
                     image_offset,
                     image_size,
                     encoded_image.extend_modes,
+                    encoded_image.image_padding,
                 );
             } else if encoded_image.quality == IMAGE_QUALITY_MEDIUM {
                 let final_xy = image_offset + extended_xy - vec2(0.5);
@@ -362,6 +366,7 @@
                     image_offset,
                     image_size,
                     encoded_image.extend_modes,
+                    encoded_image.image_padding,
                 );
             } else {
                 let final_xy = image_offset + extended_xy;
@@ -800,6 +805,8 @@
     tint: vec4<f32>,
     /// Tint mode: TINT_MODE_ALPHA_MASK (`0`) or TINT_MODE_MULTIPLY (`1`).
     tint_mode: u32,
+    /// Number of transparent padding pixels around the image in the atlas.
+    image_padding: f32,
 }
 
 // Convert a flat texel index to 2D texture coordinates for the encoded paints texture.
@@ -834,6 +841,7 @@
     let packed_tint = texel2.y;
     let tint = select(vec4<f32>(1.0), unpack4x8unorm(packed_tint), packed_tint != 0u);
     let tint_mode = select(TINT_MODE_MULTIPLY, texel2.z, packed_tint != 0u);
+    let image_padding = f32(texel2.w);
 
     return EncodedImage(
         quality, 
@@ -844,7 +852,8 @@
         transform,
         translate,
         tint,
-        tint_mode
+        tint_mode,
+        image_padding
     );
 }
 
@@ -901,6 +910,7 @@
     image_offset: vec2<f32>,
     image_size: vec2<f32>,
     extend_modes: vec2<u32>,
+    image_padding: f32,
 ) -> vec4<f32> {
     let atlas_max = image_offset + image_size - vec2(1.0);
     let atlas_uv_clamped = clamp(coords, image_offset, atlas_max);
@@ -926,6 +936,7 @@
     image_offset: vec2<f32>,
     image_size: vec2<f32>,
     extend_modes: vec2<u32>,
+    image_padding: f32,
 ) -> vec4<f32> {
      let atlas_max = image_offset + image_size - vec2(1.0);
      let frac_coords = fract(coords + 0.5);