diff --git a/script/bench-go-png/main.go b/script/bench-go-png/main.go
new file mode 100644
index 0000000..5c5c036
--- /dev/null
+++ b/script/bench-go-png/main.go
@@ -0,0 +1,157 @@
+// Copyright 2021 The Wuffs Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build ignore
+
+package main
+
+// This program exercises the Go standard library's PNG decoder.
+//
+// Wuffs' C code doesn't depend on Go per se, but this program gives some
+// performance data for specific Go PNG implementations. The equivalent Wuffs
+// benchmarks (on the same test images) are run via:
+//
+// wuffs bench std/png
+
+import (
+	"bytes"
+	"fmt"
+	"image"
+	"image/draw"
+	"image/png"
+	"io/ioutil"
+	"os"
+	"runtime"
+	"strings"
+	"time"
+)
+
+const (
+	iterscale = 20
+	reps      = 5
+)
+
+type testCase = struct {
+	benchname     string
+	src           []byte
+	itersUnscaled uint32
+}
+
+var testCases = []testCase{{
+	benchname:     "go_png_decode_image_19k_8bpp",
+	src:           mustLoad("test/data/bricks-gray.no-ancillary.png"),
+	itersUnscaled: 50,
+}, {
+	benchname:     "go_png_decode_image_40k_24bpp",
+	src:           mustLoad("test/data/hat.png"),
+	itersUnscaled: 50,
+}, {
+	benchname:     "go_png_decode_image_77k_8bpp",
+	src:           mustLoad("test/data/bricks-dither.png"),
+	itersUnscaled: 50,
+}, {
+	benchname:     "go_png_decode_image_552k_32bpp_verify_checksum",
+	src:           mustLoad("test/data/hibiscus.primitive.png"),
+	itersUnscaled: 4,
+}, {
+	benchname:     "go_png_decode_image_4002k_24bpp",
+	src:           mustLoad("test/data/harvesters.png"),
+	itersUnscaled: 1,
+}}
+
+func mustLoad(filename string) []byte {
+	src, err := ioutil.ReadFile("../../" + filename)
+	if err != nil {
+		panic(err.Error())
+	}
+	return src
+}
+
+func main() {
+	if err := main1(); err != nil {
+		os.Stderr.WriteString(err.Error() + "\n")
+		os.Exit(1)
+	}
+}
+
+func main1() error {
+	fmt.Printf("# Go %s\n", runtime.Version())
+	fmt.Printf("#\n")
+	fmt.Printf("# The output format, including the \"Benchmark\" prefixes, is compatible with the\n")
+	fmt.Printf("# https://godoc.org/golang.org/x/perf/cmd/benchstat tool. To install it, first\n")
+	fmt.Printf("# install Go, then run \"go get golang.org/x/perf/cmd/benchstat\".\n")
+
+	for i := -1; i < reps; i++ {
+		for _, tc := range testCases {
+			runtime.GC()
+
+			start := time.Now()
+
+			iters := uint64(tc.itersUnscaled) * iterscale
+			bgra := strings.HasSuffix(tc.benchname, "_77k_8bpp")
+			numBytes, err := decode(tc.src, bgra)
+			if err != nil {
+				return err
+			}
+			for j := uint64(1); j < iters; j++ {
+				decode(tc.src, bgra)
+			}
+
+			elapsedNanos := time.Since(start)
+
+			kbPerS := numBytes * uint64(iters) * 1000000 / uint64(elapsedNanos)
+
+			if i < 0 {
+				continue // Warm up rep.
+			}
+
+			fmt.Printf("Benchmark%-30s %8d %12d ns/op %8d.%03d MB/s\n",
+				tc.benchname, iters, uint64(elapsedNanos)/iters, kbPerS/1000, kbPerS%1000)
+		}
+	}
+
+	return nil
+}
+
+func decode(src []byte, bgra bool) (numBytes uint64, retErr error) {
+	m, err := png.Decode(bytes.NewReader(src))
+	if err != nil {
+		return 0, err
+	}
+
+	b := m.Bounds()
+	n := uint64(b.Dx()) * uint64(b.Dy())
+
+	// Go converts to RGBA (as that's what Go's image/draw standard library is
+	// optimized for); Wuffs converts to BGRA. The difference isn't important,
+	// as we just want measure how long it takes.
+	if bgra {
+		dst := image.NewRGBA(b)
+		draw.Draw(dst, b, m, b.Min, draw.Src)
+		m = dst
+	}
+
+	switch m.(type) {
+	case *image.Gray:
+		n *= 1
+	case *image.NRGBA:
+		n *= 4
+	case *image.RGBA:
+		n *= 4
+	default:
+		return 0, fmt.Errorf("unexpected image type %T", m)
+	}
+
+	return n, nil
+}
diff --git a/script/bench-rust-png/Cargo.toml b/script/bench-rust-png/Cargo.toml
new file mode 100644
index 0000000..3664f3e
--- /dev/null
+++ b/script/bench-rust-png/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name    = "bench_rust_png"
+version = "0.0.1"
+authors = ["Nigel Tao <nigeltao@golang.org>"]
+
+[dependencies]
+png     = "*"
+rustc_version_runtime = "*"
diff --git a/script/bench-rust-png/src/main.rs b/script/bench-rust-png/src/main.rs
new file mode 100644
index 0000000..5bd7348
--- /dev/null
+++ b/script/bench-rust-png/src/main.rs
@@ -0,0 +1,152 @@
+// Copyright 2021 The Wuffs Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// ----------------
+
+// This program exercises the Rust PNG decoder at
+// https://github.com/image-rs/image-png
+// which is the top result for https://crates.io/search?q=png&sort=downloads
+//
+// Wuffs' C code doesn't depend on Rust per se, but this program gives some
+// performance data for specific Rust PNG implementations. The equivalent Wuffs
+// benchmarks (on the same test images) are run via:
+//
+// wuffs bench std/png
+//
+// To run this program, do "cargo run --release" from the parent directory (the
+// directory containing the Cargo.toml file).
+
+extern crate png;
+extern crate rustc_version_runtime;
+
+use std::time::Instant;
+
+const ITERSCALE: u64 = 50;
+const REPS: u64 = 5;
+
+fn main() {
+    let version = rustc_version_runtime::version();
+    print!(
+        "# Rust {}.{}.{}\n",
+        version.major, version.minor, version.patch,
+    );
+    print!("#\n");
+    print!("# The output format, including the \"Benchmark\" prefixes, is compatible with the\n");
+    print!("# https://godoc.org/golang.org/x/perf/cmd/benchstat tool. To install it, first\n");
+    print!("# install Go, then run \"go get golang.org/x/perf/cmd/benchstat\".\n");
+
+    let mut dst = vec![0u8; 64 * 1024 * 1024];
+
+    // The various magic constants below are copied from test/c/std/png.c
+    for i in 0..(1 + REPS) {
+        bench(
+            "19k_8bpp",
+            &mut dst[..],
+            include_bytes!("../../../test/data/bricks-gray.no-ancillary.png"),
+            i == 0,        // warm_up
+            160 * 120 * 1, // want_num_bytes = 19_200
+            50,            // iters_unscaled
+        );
+
+        bench(
+            "40k_24bpp",
+            &mut dst[..],
+            include_bytes!("../../../test/data/hat.png"),
+            i == 0,       // warm_up
+            90 * 112 * 4, // want_num_bytes = 40_320
+            30,           // iters_unscaled
+        );
+
+        bench(
+            "77k_8bpp",
+            &mut dst[..],
+            include_bytes!("../../../test/data/bricks-dither.png"),
+            i == 0,        // warm_up
+            160 * 120 * 4, // want_num_bytes = 76_800
+            30,            // iters_unscaled
+        );
+
+        bench(
+            "552k_32bpp_verify_checksum",
+            &mut dst[..],
+            include_bytes!("../../../test/data/hibiscus.primitive.png"),
+            i == 0,        // warm_up
+            312 * 442 * 4, // want_num_bytes = 551_616
+            4,             // iters_unscaled
+        );
+
+        bench(
+            "4002k_24bpp",
+            &mut dst[..],
+            include_bytes!("../../../test/data/harvesters.png"),
+            i == 0,         // warm_up
+            1165 * 859 * 4, // want_num_bytes = 4_002_940
+            1,              // iters_unscaled
+        );
+    }
+}
+
+fn bench(
+    name: &str,          // Benchmark name.
+    dst: &mut [u8],      // Destination buffer.
+    src: &[u8],          // Source data.
+    warm_up: bool,       // Whether this is a warm up rep.
+    want_num_bytes: u64, // Expected num_bytes per iteration.
+    iters_unscaled: u64, // Base number of iterations.
+) {
+    let iters = iters_unscaled * ITERSCALE;
+    let mut total_num_bytes = 0u64;
+
+    let start = Instant::now();
+    for _ in 0..iters {
+        let n = decode(&mut dst[..], src);
+        if n != want_num_bytes {
+            panic!("num_bytes: got {}, want {}", n, want_num_bytes);
+        }
+        total_num_bytes += n;
+    }
+    let elapsed = start.elapsed();
+
+    let elapsed_nanos = (elapsed.as_secs() * 1_000_000_000) + (elapsed.subsec_nanos() as u64);
+    let kb_per_s: u64 = total_num_bytes * 1_000_000 / elapsed_nanos;
+
+    if warm_up {
+        return;
+    }
+
+    print!(
+        "Benchmarkrust_png_decode_image_{:16}   {:8}   {:12} ns/op   {:3}.{:03} MB/s\n",
+        name,
+        iters,
+        elapsed_nanos / iters,
+        kb_per_s / 1_000,
+        kb_per_s % 1_000
+    );
+}
+
+// decode returns the number of bytes processed.
+fn decode(dst: &mut [u8], src: &[u8]) -> u64 {
+    let decoder = png::Decoder::new(src);
+    let (info, mut reader) = decoder.read_info().unwrap();
+    let num_bytes = info.buffer_size() as u64;
+    reader.next_frame(dst).unwrap();
+    if info.color_type == png::ColorType::RGB {
+        // If the PNG image is RGB (not RGBA) then Rust's png crate will decode
+        // to 3 bytes per pixel. Wuffs' std/png benchmarks decode to 4 bytes
+        // per pixel (and in BGRA order, not RGB or RGBA). We'll hand-wave the
+        // difference away and say that we decoded 33% more pixels than we did.
+        return (num_bytes / 3) * 4;
+    }
+    num_bytes
+}
diff --git a/test/c/mimiclib/png.c b/test/c/mimiclib/png.c
index 32acd63..2b3396b 100644
--- a/test/c/mimiclib/png.c
+++ b/test/c/mimiclib/png.c
@@ -14,10 +14,18 @@
 
 // ----------------
 
-// Uncomment this line to test and bench libspng instead of libpng.
+// Uncomment one of these #define lines to test and bench alternative mimic
+// libraries (libspng, lodepng or stb_image) instead of libpng.
+//
+// These are collectively referred to as
+// WUFFS_MIMICLIB_USE_XXX_INSTEAD_OF_LIBPNG.
+//
 // #define WUFFS_MIMICLIB_USE_LIBSPNG_INSTEAD_OF_LIBPNG 1
+// #define WUFFS_MIMICLIB_USE_LODEPNG_INSTEAD_OF_LIBPNG 1
+// #define WUFFS_MIMICLIB_USE_STB_IMAGE_INSTEAD_OF_LIBPNG 1
 
-#ifdef WUFFS_MIMICLIB_USE_LIBSPNG_INSTEAD_OF_LIBPNG
+// -------------------------------- WUFFS_MIMICLIB_USE_XXX_INSTEAD_OF_LIBPNG
+#if defined(WUFFS_MIMICLIB_USE_LIBSPNG_INSTEAD_OF_LIBPNG)
 
 // We #include a foo.c file, not a foo.h file, as libspng is a "single file C
 // library".
@@ -26,6 +34,9 @@
 // We deliberately do not define the
 // WUFFS_MIMICLIB_PNG_DOES_NOT_SUPPORT_QUIRK_IGNORE_CHECKSUM macro.
 
+// We deliberately do not define the
+// WUFFS_MIMICLIB_PNG_DOES_NOT_VERIFY_CHECKSUM macro.
+
 // libspng (version 0.6.2, released November 2020) calculates but does not
 // verify the CRC-32 checksum on the final IDAT chunk. It also does not verify
 // the Adler-32 checksum. After calling spng_decode_image, it ends in
@@ -68,6 +79,7 @@
   }
 
   int fmt = 0;
+  bool swap_bgra_rgba = false;
   switch (pixfmt.repr) {
     case WUFFS_BASE__PIXEL_FORMAT__Y:
       fmt = SPNG_FMT_G8;
@@ -76,6 +88,7 @@
       // libspng doesn't do BGRA8. RGBA8 is the closest approximation. We'll
       // fix it up later.
       fmt = SPNG_FMT_RGBA8;
+      swap_bgra_rgba = true;
       break;
     default:
       ret = "mimic_png_decode: unsupported pixfmt";
@@ -103,7 +116,7 @@
   }
 
   // Fix up BGRA8 vs RGBA8.
-  if (fmt == SPNG_FMT_RGBA8) {
+  if (swap_bgra_rgba) {
     for (; n >= 4; n -= 4) {
       uint8_t swap = dst_ptr[0];
       dst_ptr[0] = dst_ptr[2];
@@ -117,12 +130,206 @@
   return ret;
 }
 
-#else  // WUFFS_MIMICLIB_USE_LIBSPNG_INSTEAD_OF_LIBPNG
+// -------------------------------- WUFFS_MIMICLIB_USE_XXX_INSTEAD_OF_LIBPNG
+#elif defined(WUFFS_MIMICLIB_USE_LODEPNG_INSTEAD_OF_LIBPNG)
+
+// We #include a foo.cpp file, not a foo.h file, as lodepng is a "single file
+// C++ library".
+#include "/path/to/your/copy/of/github.com/lvandeve/lodepng/lodepng.cpp"
+
+#define WUFFS_MIMICLIB_PNG_DOES_NOT_SUPPORT_QUIRK_IGNORE_CHECKSUM 1
+
+// We deliberately do not define the
+// WUFFS_MIMICLIB_PNG_DOES_NOT_VERIFY_CHECKSUM macro.
+
+// We deliberately do not define the
+// WUFFS_MIMICLIB_PNG_DOES_NOT_VERIFY_FINAL_IDAT_CHECKSUMS macro.
+
+const char*  //
+mimic_png_decode(uint64_t* n_bytes_out,
+                 wuffs_base__io_buffer* dst,
+                 uint32_t wuffs_initialize_flags,
+                 wuffs_base__pixel_format pixfmt,
+                 uint32_t* quirks_ptr,
+                 size_t quirks_len,
+                 wuffs_base__io_buffer* src) {
+  wuffs_base__io_buffer dst_fallback =
+      wuffs_base__slice_u8__writer(g_mimiclib_scratch_slice_u8);
+  if (!dst) {
+    dst = &dst_fallback;
+  }
+
+  uint64_t n = 0;
+  LodePNGColorType color_type = 0;
+  unsigned int bitdepth = 8;
+  bool swap_bgra_rgba = false;
+  switch (pixfmt.repr) {
+    case WUFFS_BASE__PIXEL_FORMAT__Y:
+      n = 1;
+      color_type = LCT_GREY;
+      break;
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
+      n = 4;
+      // lodepng doesn't do BGRA8. RGBA8 is the closest approximation. We'll
+      // fix it up later.
+      color_type = LCT_RGBA;
+      swap_bgra_rgba = true;
+      break;
+    default:
+      return "mimic_png_decode: unsupported pixfmt";
+  }
+
+  unsigned char* output = 0;
+  unsigned int width = 0;
+  unsigned int height = 0;
+  unsigned int err =
+      lodepng_decode_memory(&output, &width, &height,                    //
+                            wuffs_base__io_buffer__reader_pointer(src),  //
+                            wuffs_base__io_buffer__reader_length(src),   //
+                            color_type, bitdepth);
+  if (err) {
+    return lodepng_error_text(err);
+  }
+
+  const char* ret = NULL;
+
+  if ((width > 0xFFFF) || (height > 0xFFFF)) {
+    ret = "mimic_png_decode: image is too large";
+    goto cleanup0;
+  }
+  n *= ((uint64_t)width) * ((uint64_t)height);
+  if (n > wuffs_base__io_buffer__writer_length(dst)) {
+    ret = "mimic_png_decode: image is too large";
+    goto cleanup0;
+  }
+
+  // Copy from the mimic library's output buffer to Wuffs' dst buffer.
+  uint8_t* dst_ptr = wuffs_base__io_buffer__writer_pointer(dst);
+  memcpy(dst_ptr, output, n);
+  dst->meta.wi += n;
+  if (n_bytes_out) {
+    *n_bytes_out += n;
+  }
+
+  // Fix up BGRA8 vs RGBA8.
+  if (swap_bgra_rgba) {
+    for (; n >= 4; n -= 4) {
+      uint8_t swap = dst_ptr[0];
+      dst_ptr[0] = dst_ptr[2];
+      dst_ptr[2] = swap;
+      dst_ptr += 4;
+    }
+  }
+
+cleanup0:;
+  free(output);
+  return ret;
+}
+
+// -------------------------------- WUFFS_MIMICLIB_USE_XXX_INSTEAD_OF_LIBPNG
+#elif defined(WUFFS_MIMICLIB_USE_STB_IMAGE_INSTEAD_OF_LIBPNG)
+
+// We #include a foo.cpp file, not a foo.h file, as stb_image is a "single file
+// C library".
+#define STB_IMAGE_IMPLEMENTATION
+#include "/path/to/your/copy/of/github.com/nothings/stb/stb_image.h"
+
+// We deliberately do not define the
+// WUFFS_MIMICLIB_PNG_DOES_NOT_SUPPORT_QUIRK_IGNORE_CHECKSUM macro. The
+// stb_image library always ignores checksums.
+
+#define WUFFS_MIMICLIB_PNG_DOES_NOT_VERIFY_CHECKSUM 1
+
+// We deliberately do not define the
+// WUFFS_MIMICLIB_PNG_DOES_NOT_VERIFY_FINAL_IDAT_CHECKSUMS macro.
+
+const char*  //
+mimic_png_decode(uint64_t* n_bytes_out,
+                 wuffs_base__io_buffer* dst,
+                 uint32_t wuffs_initialize_flags,
+                 wuffs_base__pixel_format pixfmt,
+                 uint32_t* quirks_ptr,
+                 size_t quirks_len,
+                 wuffs_base__io_buffer* src) {
+  wuffs_base__io_buffer dst_fallback =
+      wuffs_base__slice_u8__writer(g_mimiclib_scratch_slice_u8);
+  if (!dst) {
+    dst = &dst_fallback;
+  }
+
+  uint64_t n = 0;
+  bool swap_bgra_rgba = false;
+  switch (pixfmt.repr) {
+    case WUFFS_BASE__PIXEL_FORMAT__Y:
+      n = 1;
+      break;
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
+      n = 4;
+      // stb_image doesn't do BGRA8. RGBA8 is the closest approximation. We'll
+      // fix it up later.
+      swap_bgra_rgba = true;
+      break;
+    default:
+      return "mimic_png_decode: unsupported pixfmt";
+  }
+
+  int width = 0;
+  int height = 0;
+  int channels_in_file = 0;
+  unsigned char* output =
+      stbi_load_from_memory(wuffs_base__io_buffer__reader_pointer(src),  //
+                            wuffs_base__io_buffer__reader_length(src),   //
+                            &width, &height, &channels_in_file, n);
+  if (!output) {
+    return "mimic_png_decode: could not load image";
+  }
+
+  const char* ret = NULL;
+
+  if ((width > 0xFFFF) || (height > 0xFFFF)) {
+    ret = "mimic_png_decode: image is too large";
+    goto cleanup0;
+  }
+  n *= ((uint64_t)width) * ((uint64_t)height);
+  if (n > wuffs_base__io_buffer__writer_length(dst)) {
+    ret = "mimic_png_decode: image is too large";
+    goto cleanup0;
+  }
+
+  // Copy from the mimic library's output buffer to Wuffs' dst buffer.
+  uint8_t* dst_ptr = wuffs_base__io_buffer__writer_pointer(dst);
+  memcpy(dst_ptr, output, n);
+  dst->meta.wi += n;
+  if (n_bytes_out) {
+    *n_bytes_out += n;
+  }
+
+  // Fix up BGRA8 vs RGBA8.
+  if (swap_bgra_rgba) {
+    for (; n >= 4; n -= 4) {
+      uint8_t swap = dst_ptr[0];
+      dst_ptr[0] = dst_ptr[2];
+      dst_ptr[2] = swap;
+      dst_ptr += 4;
+    }
+  }
+
+cleanup0:;
+  stbi_image_free(output);
+  return ret;
+}
+
+// -------------------------------- WUFFS_MIMICLIB_USE_XXX_INSTEAD_OF_LIBPNG
+#else
+
 #include "png.h"
 
 #define WUFFS_MIMICLIB_PNG_DOES_NOT_SUPPORT_QUIRK_IGNORE_CHECKSUM 1
 
 // We deliberately do not define the
+// WUFFS_MIMICLIB_PNG_DOES_NOT_VERIFY_CHECKSUM macro.
+
+// We deliberately do not define the
 // WUFFS_MIMICLIB_PNG_DOES_NOT_VERIFY_FINAL_IDAT_CHECKSUMS macro.
 
 const char*  //
@@ -201,4 +408,6 @@
   png_image_free(&pi);
   return ret;
 }
-#endif  // WUFFS_MIMICLIB_USE_LIBSPNG_INSTEAD_OF_LIBPNG
+
+#endif
+// -------------------------------- WUFFS_MIMICLIB_USE_XXX_INSTEAD_OF_LIBPNG
diff --git a/test/c/std/png.c b/test/c/std/png.c
index 6adda6d..b4e94a9 100644
--- a/test/c/std/png.c
+++ b/test/c/std/png.c
@@ -815,7 +815,9 @@
 #ifdef WUFFS_MIMIC
 
     test_mimic_png_decode_bad_crc32_checksum_ancillary,
+#ifndef WUFFS_MIMICLIB_PNG_DOES_NOT_VERIFY_CHECKSUM
     test_mimic_png_decode_bad_crc32_checksum_critical,
+#endif
     test_mimic_png_decode_image_19k_8bpp,
     test_mimic_png_decode_image_40k_24bpp,
     test_mimic_png_decode_image_77k_8bpp,
@@ -852,7 +854,9 @@
 #ifndef WUFFS_MIMICLIB_PNG_DOES_NOT_SUPPORT_QUIRK_IGNORE_CHECKSUM
     bench_mimic_png_decode_image_552k_32bpp_ignore_checksum,
 #endif
+#ifndef WUFFS_MIMICLIB_PNG_DOES_NOT_VERIFY_CHECKSUM
     bench_mimic_png_decode_image_552k_32bpp_verify_checksum,
+#endif
     bench_mimic_png_decode_image_4002k_24bpp,
 
 #endif  // WUFFS_MIMIC
