Add a test configuration for running a test against the native surface format
diff --git a/sparse_strips/vello_dev_macros/src/test.rs b/sparse_strips/vello_dev_macros/src/test.rs
index 3f30fdd..76078ab 100644
--- a/sparse_strips/vello_dev_macros/src/test.rs
+++ b/sparse_strips/vello_dev_macros/src/test.rs
@@ -41,6 +41,9 @@
     no_ref: bool,
     /// A reason for ignoring a test.
     ignore_reason: Option<String>,
+    /// Whether to generate an additional testing variant that runs the test against a non-RGBA8
+    /// target surface texture.
+    native_format: bool,
 }
 
 impl Default for Arguments {
@@ -57,6 +60,7 @@
             no_ref: false,
             diff_pixels: 0,
             ignore_reason: None,
+            native_format: false,
         }
     }
 }
@@ -112,6 +116,10 @@
         input_fn_name.span(),
     );
     let hybrid_fn_name = Ident::new(&format!("{input_fn_name}_hybrid"), input_fn_name.span());
+    let hybrid_native_format_fn_name = Ident::new(
+        &format!("{input_fn_name}_hybrid_native_format"),
+        input_fn_name.span(),
+    );
     let webgl_fn_name = Ident::new(
         &format!("{input_fn_name}_hybrid_webgl"),
         input_fn_name.span(),
@@ -134,6 +142,7 @@
     let f32_fn_name_wasm_str = f32_fn_name_wasm.to_string();
     let multithreaded_fn_name_str = multithreaded_fn_name.to_string();
     let hybrid_fn_name_str = hybrid_fn_name.to_string();
+    let hybrid_native_format_fn_name_str = hybrid_native_format_fn_name.to_string();
     let webgl_fn_name_str = webgl_fn_name.to_string();
 
     let Arguments {
@@ -148,6 +157,7 @@
         ignore_reason,
         no_ref,
         diff_pixels,
+        native_format,
     } = parse_args(&attrs);
 
     // Wasm doesn't have access to the filesystem. For wasm, inline the snapshot bytes into the
@@ -210,6 +220,11 @@
     } else {
         empty_snippet.clone()
     };
+    let ignore_hybrid_native_format = if skip_hybrid || !native_format {
+        ignore_snippet.clone()
+    } else {
+        empty_snippet.clone()
+    };
 
     let cpu_snippet = |fn_name: Ident,
                        fn_name_str: String,
@@ -251,7 +266,7 @@
                 };
                 use vello_cpu::{RenderContext, RenderMode};
 
-                let mut ctx = get_ctx::<RenderContext>(#width, #height, #transparent, #num_threads, #level, #render_mode);
+                let mut ctx = get_ctx::<RenderContext>(#width, #height, #transparent, #num_threads, #level, #render_mode, false);
                 #input_fn_name(&mut ctx);
                 ctx.flush();
                 if !#no_ref {
@@ -437,7 +452,7 @@
             use crate::renderer::HybridRenderer;
             use vello_cpu::RenderMode;
 
-            let mut ctx = get_ctx::<HybridRenderer>(#width, #height, #transparent, 0, "fallback", RenderMode::OptimizeSpeed);
+            let mut ctx = get_ctx::<HybridRenderer>(#width, #height, #transparent, 0, "fallback", RenderMode::OptimizeSpeed, false);
             #input_fn_name(&mut ctx);
             ctx.flush();
             if !#no_ref {
@@ -445,6 +460,24 @@
             }
         }
 
+        #ignore_hybrid_native_format
+        #[cfg(not(target_arch = "wasm32"))]
+        #[test]
+        fn #hybrid_native_format_fn_name() {
+            use crate::util::{
+                check_ref, get_ctx
+            };
+            use crate::renderer::HybridRenderer;
+            use vello_cpu::RenderMode;
+
+            let mut ctx = get_ctx::<HybridRenderer>(#width, #height, #transparent, 0, "fallback", RenderMode::OptimizeSpeed, true);
+            #input_fn_name(&mut ctx);
+            ctx.flush();
+            if !#no_ref {
+                check_ref(&ctx, #input_fn_name_str, #hybrid_native_format_fn_name_str, #hybrid_tolerance, #diff_pixels, false, #reference_image_name);
+            }
+        }
+
         #ignore_hybrid_webgl
         #[cfg(all(target_arch = "wasm32", feature = "webgl"))]
         #[wasm_bindgen_test::wasm_bindgen_test]
@@ -455,7 +488,7 @@
             use crate::renderer::HybridRenderer;
             use vello_cpu::RenderMode;
 
-            let mut ctx = get_ctx::<HybridRenderer>(#width, #height, #transparent, 0, "fallback", RenderMode::OptimizeSpeed);
+            let mut ctx = get_ctx::<HybridRenderer>(#width, #height, #transparent, 0, "fallback", RenderMode::OptimizeSpeed, false);
             #input_fn_name(&mut ctx);
             ctx.flush();
             if !#no_ref {
@@ -501,6 +534,7 @@
                     "skip_multithreaded" => args.skip_multithreaded = true,
                     "skip_hybrid" => args.skip_hybrid = true,
                     "no_ref" => args.no_ref = true,
+                    "native_format" => args.native_format = true,
                     "ignore" => {
                         args.skip_cpu = true;
                         args.skip_multithreaded = true;
diff --git a/sparse_strips/vello_sparse_tests/tests/basic.rs b/sparse_strips/vello_sparse_tests/tests/basic.rs
index 93ee031..48f8220 100644
--- a/sparse_strips/vello_sparse_tests/tests/basic.rs
+++ b/sparse_strips/vello_sparse_tests/tests/basic.rs
@@ -72,7 +72,7 @@
     ctx.stroke_path(&path);
 }
 
-#[vello_test]
+#[vello_test(native_format)]
 fn filled_circle(ctx: &mut impl Renderer) {
     let circle = Circle::new((50.0, 50.0), 45.0);
     ctx.set_paint(LIME);
diff --git a/sparse_strips/vello_sparse_tests/tests/renderer.rs b/sparse_strips/vello_sparse_tests/tests/renderer.rs
index 05fe0e7..5283753 100644
--- a/sparse_strips/vello_sparse_tests/tests/renderer.rs
+++ b/sparse_strips/vello_sparse_tests/tests/renderer.rs
@@ -26,6 +26,7 @@
         num_threads: u16,
         level: Level,
         render_mode: RenderMode,
+        native_format: bool,
     ) -> Self;
     fn fill_path(&mut self, path: &BezPath);
     fn stroke_path(&mut self, path: &BezPath);
@@ -79,6 +80,7 @@
         num_threads: u16,
         level: Level,
         render_mode: RenderMode,
+        _native_format: bool,
     ) -> Self {
         let settings = RenderSettings {
             level,
@@ -241,13 +243,21 @@
     texture: wgpu::Texture,
     texture_view: wgpu::TextureView,
     renderer: RefCell<vello_hybrid::Renderer>,
+    format: wgpu::TextureFormat,
 }
 
 #[cfg(not(all(target_arch = "wasm32", feature = "webgl")))]
 impl Renderer for HybridRenderer {
     type GlyphRenderer = Scene;
 
-    fn new(width: u16, height: u16, num_threads: u16, level: Level, _: RenderMode) -> Self {
+    fn new(
+        width: u16,
+        height: u16,
+        num_threads: u16,
+        level: Level,
+        _: RenderMode,
+        native_format: bool,
+    ) -> Self {
         if num_threads != 0 {
             panic!("hybrid renderer doesn't support multi-threading");
         }
@@ -256,6 +266,14 @@
             panic!("hybrid renderer doesn't support SIMD");
         }
 
+        // We use this to test issues that would occur if the render target
+        // is not natively RGBA8.
+        let format = if native_format {
+            wgpu::TextureFormat::Bgra8Unorm
+        } else {
+            wgpu::TextureFormat::Rgba8Unorm
+        };
+
         let scene = Scene::new(width, height);
         // Initialize wgpu device and queue for GPU rendering
         let instance = wgpu::Instance::default();
@@ -283,7 +301,7 @@
             mip_level_count: 1,
             sample_count: 1,
             dimension: wgpu::TextureDimension::D2,
-            format: wgpu::TextureFormat::Rgba8Unorm,
+            format,
             usage: wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::COPY_SRC,
             view_formats: &[],
         });
@@ -307,6 +325,7 @@
             texture,
             texture_view,
             renderer: RefCell::new(renderer),
+            format,
         }
     }
 
@@ -525,6 +544,13 @@
             buf.copy_from_slice(&row[0..width as usize * 4]);
         }
         texture_copy_buffer.unmap();
+
+        // Swizzle from BGRA to RGBA.
+        if self.format == wgpu::TextureFormat::Bgra8Unorm {
+            for pixel in pixmap.data_as_u8_slice_mut().chunks_exact_mut(4) {
+                pixel.swap(0, 2);
+            }
+        }
     }
 
     fn width(&self) -> u16 {
@@ -598,7 +624,14 @@
 impl Renderer for HybridRenderer {
     type GlyphRenderer = Scene;
 
-    fn new(width: u16, height: u16, num_threads: u16, level: Level, _: RenderMode) -> Self {
+    fn new(
+        width: u16,
+        height: u16,
+        num_threads: u16,
+        level: Level,
+        _: RenderMode,
+        _native_format: bool,
+    ) -> Self {
         use wasm_bindgen::JsCast;
         use web_sys::HtmlCanvasElement;
 
diff --git a/sparse_strips/vello_sparse_tests/tests/util.rs b/sparse_strips/vello_sparse_tests/tests/util.rs
index 0e84c2c..3f4a984 100644
--- a/sparse_strips/vello_sparse_tests/tests/util.rs
+++ b/sparse_strips/vello_sparse_tests/tests/util.rs
@@ -104,6 +104,7 @@
     num_threads: u16,
     level: &str,
     render_mode: RenderMode,
+    native_format: bool,
 ) -> T {
     let level = match level {
         #[cfg(target_arch = "aarch64")]
@@ -142,7 +143,14 @@
         _ => panic!("unknown level: {level}"),
     };
 
-    let mut ctx = T::new(width, height, num_threads, level, render_mode);
+    let mut ctx = T::new(
+        width,
+        height,
+        num_threads,
+        level,
+        render_mode,
+        native_format,
+    );
 
     if !transparent {
         let path = Rect::new(0.0, 0.0, width as f64, height as f64).to_path(0.1);