Add more png benchmarks for alternative mimic libs

On a mid-range x86_64 laptop:

"libpng" (Debian's /usr/lib/x86_64-linux-gnu/libpng16.so) numbers are
used as a 1.00x baseline for the other libraries.

(†): libpng's "simplified API" doesn't provide a way to ignore the
checksum. We copy the "verify_checksum" numbers for a 1.00x baseline.
Some of the other libraries don't have the option to ignore or verify
the checksum, always doing only one or the other.

----

libpng_decode_19k_8bpp                            58.0MB/s ± 0%  1.00x
libpng_decode_40k_24bpp                           73.1MB/s ± 0%  1.00x
libpng_decode_77k_8bpp                             177MB/s ± 0%  1.00x
libpng_decode_552k_32bpp_ignore_checksum           146MB/s ± 0%  (†)
libpng_decode_552k_32bpp_verify_checksum           146MB/s ± 0%  1.00x
libpng_decode_4002k_24bpp                          104MB/s ± 0%  1.00x

----

wuffs_decode_19k_8bpp/clang9                       131MB/s ± 0%  2.26x
wuffs_decode_40k_24bpp/clang9                      153MB/s ± 0%  2.09x
wuffs_decode_77k_8bpp/clang9                       472MB/s ± 0%  2.67x
wuffs_decode_552k_32bpp_ignore_checksum/clang9     370MB/s ± 0%  2.53x
wuffs_decode_552k_32bpp_verify_checksum/clang9     357MB/s ± 0%  2.45x
wuffs_decode_4002k_24bpp/clang9                    156MB/s ± 0%  1.50x

wuffs_decode_19k_8bpp/gcc10                        136MB/s ± 1%  2.34x
wuffs_decode_40k_24bpp/gcc10                       162MB/s ± 0%  2.22x
wuffs_decode_77k_8bpp/gcc10                        486MB/s ± 0%  2.75x
wuffs_decode_552k_32bpp_ignore_checksum/gcc10      388MB/s ± 0%  2.66x
wuffs_decode_552k_32bpp_verify_checksum/gcc10      373MB/s ± 0%  2.55x
wuffs_decode_4002k_24bpp/gcc10                     164MB/s ± 0%  1.58x

----

libspng_decode_19k_8bpp/clang9                    59.3MB/s ± 0%  1.02x
libspng_decode_40k_24bpp/clang9                   78.4MB/s ± 0%  1.07x
libspng_decode_77k_8bpp/clang9                     189MB/s ± 0%  1.07x
libspng_decode_552k_32bpp_ignore_checksum/clang9   236MB/s ± 0%  1.62x
libspng_decode_552k_32bpp_verify_checksum/clang9   203MB/s ± 0%  1.39x
libspng_decode_4002k_24bpp/clang9                  110MB/s ± 0%  1.06x

libspng_decode_19k_8bpp/gcc10                     59.6MB/s ± 0%  1.03x
libspng_decode_40k_24bpp/gcc10                    77.5MB/s ± 0%  1.06x
libspng_decode_77k_8bpp/gcc10                      189MB/s ± 0%  1.07x
libspng_decode_552k_32bpp_ignore_checksum/gcc10    223MB/s ± 0%  1.53x
libspng_decode_552k_32bpp_verify_checksum/gcc10    194MB/s ± 0%  1.33x
libspng_decode_4002k_24bpp/gcc10                   109MB/s ± 0%  1.05x

----

lodepng_decode_19k_8bpp/clang9                    65.1MB/s ± 0%  1.12x
lodepng_decode_40k_24bpp/clang9                   72.1MB/s ± 0%  0.99x
lodepng_decode_77k_8bpp/clang9                     222MB/s ± 0%  1.25x
lodepng_decode_552k_32bpp_ignore_checksum/clang9               skipped
lodepng_decode_552k_32bpp_verify_checksum/clang9   162MB/s ± 0%  1.11x
lodepng_decode_4002k_24bpp/clang9                 70.5MB/s ± 0%  0.68x

lodepng_decode_19k_8bpp/gcc10                     61.1MB/s ± 0%  1.05x
lodepng_decode_40k_24bpp/gcc10                    62.5MB/s ± 1%  0.85x
lodepng_decode_77k_8bpp/gcc10                      176MB/s ± 0%  0.99x
lodepng_decode_552k_32bpp_ignore_checksum/gcc10                skipped
lodepng_decode_552k_32bpp_verify_checksum/gcc10    139MB/s ± 0%  0.95x
lodepng_decode_4002k_24bpp/gcc10                  62.3MB/s ± 0%  0.60x

----

stbimage_decode_19k_8bpp/clang9                   75.1MB/s ± 1%  1.29x
stbimage_decode_40k_24bpp/clang9                  84.6MB/s ± 0%  1.16x
stbimage_decode_77k_8bpp/clang9                    234MB/s ± 0%  1.32x
stbimage_decode_552k_32bpp_ignore_checksum/clang9  162MB/s ± 0%  1.11x
stbimage_decode_552k_32bpp_verify_checksum/clang9              skipped
stbimage_decode_4002k_24bpp/clang9                80.7MB/s ± 0%  0.78x

stbimage_decode_19k_8bpp/gcc10                    73.3MB/s ± 0%  1.26x
stbimage_decode_40k_24bpp/gcc10                   81.8MB/s ± 0%  1.12x
stbimage_decode_77k_8bpp/gcc10                     214MB/s ± 0%  1.21x
stbimage_decode_552k_32bpp_ignore_checksum/gcc10   145MB/s ± 0%  0.99x
stbimage_decode_552k_32bpp_verify_checksum/gcc10               skipped
stbimage_decode_4002k_24bpp/gcc10                 79.7MB/s ± 0%  0.77x

----

go_decode_19k_8bpp/go1.16                         39.3MB/s ± 1%  0.68x
go_decode_40k_24bpp/go1.16                        46.5MB/s ± 1%  0.64x
go_decode_77k_8bpp/go1.16                         78.3MB/s ± 0%  0.44x
go_decode_552k_32bpp_ignore_checksum/go1.16                    skipped
go_decode_552k_32bpp_verify_checksum/go1.16        120MB/s ± 1%  0.82x
go_decode_4002k_24bpp/go1.16                      50.7MB/s ± 0%  0.49x

----

rust_decode_19k_8bpp/rust1.48                     88.3MB/s ± 0%  1.52x
rust_decode_40k_24bpp/rust1.48                     133MB/s ± 0%  1.82x
rust_decode_77k_8bpp/rust1.48                      180MB/s ± 0%  1.02x
rust_decode_552k_32bpp_ignore_checksum/rust1.48                skipped
rust_decode_552k_32bpp_verify_checksum/rust1.48    146MB/s ± 0%  1.00x
rust_decode_4002k_24bpp/rust1.48                   134MB/s ± 0%  1.29x
diff --git a/script/bench-go-png/main.go b/script/bench-go-png/main.go
new file mode 100644
index 0000000..5c5c036
--- /dev/null
+++ b/script/bench-go-png/main.go
@@ -0,0 +1,157 @@
+// Copyright 2021 The Wuffs Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build ignore
+
+package main
+
+// This program exercises the Go standard library's PNG decoder.
+//
+// Wuffs' C code doesn't depend on Go per se, but this program gives some
+// performance data for specific Go PNG implementations. The equivalent Wuffs
+// benchmarks (on the same test images) are run via:
+//
+// wuffs bench std/png
+
+import (
+	"bytes"
+	"fmt"
+	"image"
+	"image/draw"
+	"image/png"
+	"io/ioutil"
+	"os"
+	"runtime"
+	"strings"
+	"time"
+)
+
+const (
+	iterscale = 20
+	reps      = 5
+)
+
+type testCase = struct {
+	benchname     string
+	src           []byte
+	itersUnscaled uint32
+}
+
+var testCases = []testCase{{
+	benchname:     "go_png_decode_image_19k_8bpp",
+	src:           mustLoad("test/data/bricks-gray.no-ancillary.png"),
+	itersUnscaled: 50,
+}, {
+	benchname:     "go_png_decode_image_40k_24bpp",
+	src:           mustLoad("test/data/hat.png"),
+	itersUnscaled: 50,
+}, {
+	benchname:     "go_png_decode_image_77k_8bpp",
+	src:           mustLoad("test/data/bricks-dither.png"),
+	itersUnscaled: 50,
+}, {
+	benchname:     "go_png_decode_image_552k_32bpp_verify_checksum",
+	src:           mustLoad("test/data/hibiscus.primitive.png"),
+	itersUnscaled: 4,
+}, {
+	benchname:     "go_png_decode_image_4002k_24bpp",
+	src:           mustLoad("test/data/harvesters.png"),
+	itersUnscaled: 1,
+}}
+
+func mustLoad(filename string) []byte {
+	src, err := ioutil.ReadFile("../../" + filename)
+	if err != nil {
+		panic(err.Error())
+	}
+	return src
+}
+
+func main() {
+	if err := main1(); err != nil {
+		os.Stderr.WriteString(err.Error() + "\n")
+		os.Exit(1)
+	}
+}
+
+func main1() error {
+	fmt.Printf("# Go %s\n", runtime.Version())
+	fmt.Printf("#\n")
+	fmt.Printf("# The output format, including the \"Benchmark\" prefixes, is compatible with the\n")
+	fmt.Printf("# https://godoc.org/golang.org/x/perf/cmd/benchstat tool. To install it, first\n")
+	fmt.Printf("# install Go, then run \"go get golang.org/x/perf/cmd/benchstat\".\n")
+
+	for i := -1; i < reps; i++ {
+		for _, tc := range testCases {
+			runtime.GC()
+
+			start := time.Now()
+
+			iters := uint64(tc.itersUnscaled) * iterscale
+			bgra := strings.HasSuffix(tc.benchname, "_77k_8bpp")
+			numBytes, err := decode(tc.src, bgra)
+			if err != nil {
+				return err
+			}
+			for j := uint64(1); j < iters; j++ {
+				decode(tc.src, bgra)
+			}
+
+			elapsedNanos := time.Since(start)
+
+			kbPerS := numBytes * uint64(iters) * 1000000 / uint64(elapsedNanos)
+
+			if i < 0 {
+				continue // Warm up rep.
+			}
+
+			fmt.Printf("Benchmark%-30s %8d %12d ns/op %8d.%03d MB/s\n",
+				tc.benchname, iters, uint64(elapsedNanos)/iters, kbPerS/1000, kbPerS%1000)
+		}
+	}
+
+	return nil
+}
+
+func decode(src []byte, bgra bool) (numBytes uint64, retErr error) {
+	m, err := png.Decode(bytes.NewReader(src))
+	if err != nil {
+		return 0, err
+	}
+
+	b := m.Bounds()
+	n := uint64(b.Dx()) * uint64(b.Dy())
+
+	// Go converts to RGBA (as that's what Go's image/draw standard library is
+	// optimized for); Wuffs converts to BGRA. The difference isn't important,
+	// as we just want measure how long it takes.
+	if bgra {
+		dst := image.NewRGBA(b)
+		draw.Draw(dst, b, m, b.Min, draw.Src)
+		m = dst
+	}
+
+	switch m.(type) {
+	case *image.Gray:
+		n *= 1
+	case *image.NRGBA:
+		n *= 4
+	case *image.RGBA:
+		n *= 4
+	default:
+		return 0, fmt.Errorf("unexpected image type %T", m)
+	}
+
+	return n, nil
+}
diff --git a/script/bench-rust-png/Cargo.toml b/script/bench-rust-png/Cargo.toml
new file mode 100644
index 0000000..3664f3e
--- /dev/null
+++ b/script/bench-rust-png/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name    = "bench_rust_png"
+version = "0.0.1"
+authors = ["Nigel Tao <nigeltao@golang.org>"]
+
+[dependencies]
+png     = "*"
+rustc_version_runtime = "*"
diff --git a/script/bench-rust-png/src/main.rs b/script/bench-rust-png/src/main.rs
new file mode 100644
index 0000000..5bd7348
--- /dev/null
+++ b/script/bench-rust-png/src/main.rs
@@ -0,0 +1,152 @@
+// Copyright 2021 The Wuffs Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// ----------------
+
+// This program exercises the Rust PNG decoder at
+// https://github.com/image-rs/image-png
+// which is the top result for https://crates.io/search?q=png&sort=downloads
+//
+// Wuffs' C code doesn't depend on Rust per se, but this program gives some
+// performance data for specific Rust PNG implementations. The equivalent Wuffs
+// benchmarks (on the same test images) are run via:
+//
+// wuffs bench std/png
+//
+// To run this program, do "cargo run --release" from the parent directory (the
+// directory containing the Cargo.toml file).
+
+extern crate png;
+extern crate rustc_version_runtime;
+
+use std::time::Instant;
+
+const ITERSCALE: u64 = 50;
+const REPS: u64 = 5;
+
+fn main() {
+    let version = rustc_version_runtime::version();
+    print!(
+        "# Rust {}.{}.{}\n",
+        version.major, version.minor, version.patch,
+    );
+    print!("#\n");
+    print!("# The output format, including the \"Benchmark\" prefixes, is compatible with the\n");
+    print!("# https://godoc.org/golang.org/x/perf/cmd/benchstat tool. To install it, first\n");
+    print!("# install Go, then run \"go get golang.org/x/perf/cmd/benchstat\".\n");
+
+    let mut dst = vec![0u8; 64 * 1024 * 1024];
+
+    // The various magic constants below are copied from test/c/std/png.c
+    for i in 0..(1 + REPS) {
+        bench(
+            "19k_8bpp",
+            &mut dst[..],
+            include_bytes!("../../../test/data/bricks-gray.no-ancillary.png"),
+            i == 0,        // warm_up
+            160 * 120 * 1, // want_num_bytes = 19_200
+            50,            // iters_unscaled
+        );
+
+        bench(
+            "40k_24bpp",
+            &mut dst[..],
+            include_bytes!("../../../test/data/hat.png"),
+            i == 0,       // warm_up
+            90 * 112 * 4, // want_num_bytes = 40_320
+            30,           // iters_unscaled
+        );
+
+        bench(
+            "77k_8bpp",
+            &mut dst[..],
+            include_bytes!("../../../test/data/bricks-dither.png"),
+            i == 0,        // warm_up
+            160 * 120 * 4, // want_num_bytes = 76_800
+            30,            // iters_unscaled
+        );
+
+        bench(
+            "552k_32bpp_verify_checksum",
+            &mut dst[..],
+            include_bytes!("../../../test/data/hibiscus.primitive.png"),
+            i == 0,        // warm_up
+            312 * 442 * 4, // want_num_bytes = 551_616
+            4,             // iters_unscaled
+        );
+
+        bench(
+            "4002k_24bpp",
+            &mut dst[..],
+            include_bytes!("../../../test/data/harvesters.png"),
+            i == 0,         // warm_up
+            1165 * 859 * 4, // want_num_bytes = 4_002_940
+            1,              // iters_unscaled
+        );
+    }
+}
+
+fn bench(
+    name: &str,          // Benchmark name.
+    dst: &mut [u8],      // Destination buffer.
+    src: &[u8],          // Source data.
+    warm_up: bool,       // Whether this is a warm up rep.
+    want_num_bytes: u64, // Expected num_bytes per iteration.
+    iters_unscaled: u64, // Base number of iterations.
+) {
+    let iters = iters_unscaled * ITERSCALE;
+    let mut total_num_bytes = 0u64;
+
+    let start = Instant::now();
+    for _ in 0..iters {
+        let n = decode(&mut dst[..], src);
+        if n != want_num_bytes {
+            panic!("num_bytes: got {}, want {}", n, want_num_bytes);
+        }
+        total_num_bytes += n;
+    }
+    let elapsed = start.elapsed();
+
+    let elapsed_nanos = (elapsed.as_secs() * 1_000_000_000) + (elapsed.subsec_nanos() as u64);
+    let kb_per_s: u64 = total_num_bytes * 1_000_000 / elapsed_nanos;
+
+    if warm_up {
+        return;
+    }
+
+    print!(
+        "Benchmarkrust_png_decode_image_{:16}   {:8}   {:12} ns/op   {:3}.{:03} MB/s\n",
+        name,
+        iters,
+        elapsed_nanos / iters,
+        kb_per_s / 1_000,
+        kb_per_s % 1_000
+    );
+}
+
+// decode returns the number of bytes processed.
+fn decode(dst: &mut [u8], src: &[u8]) -> u64 {
+    let decoder = png::Decoder::new(src);
+    let (info, mut reader) = decoder.read_info().unwrap();
+    let num_bytes = info.buffer_size() as u64;
+    reader.next_frame(dst).unwrap();
+    if info.color_type == png::ColorType::RGB {
+        // If the PNG image is RGB (not RGBA) then Rust's png crate will decode
+        // to 3 bytes per pixel. Wuffs' std/png benchmarks decode to 4 bytes
+        // per pixel (and in BGRA order, not RGB or RGBA). We'll hand-wave the
+        // difference away and say that we decoded 33% more pixels than we did.
+        return (num_bytes / 3) * 4;
+    }
+    num_bytes
+}
diff --git a/test/c/mimiclib/png.c b/test/c/mimiclib/png.c
index 32acd63..2b3396b 100644
--- a/test/c/mimiclib/png.c
+++ b/test/c/mimiclib/png.c
@@ -14,10 +14,18 @@
 
 // ----------------
 
-// Uncomment this line to test and bench libspng instead of libpng.
+// Uncomment one of these #define lines to test and bench alternative mimic
+// libraries (libspng, lodepng or stb_image) instead of libpng.
+//
+// These are collectively referred to as
+// WUFFS_MIMICLIB_USE_XXX_INSTEAD_OF_LIBPNG.
+//
 // #define WUFFS_MIMICLIB_USE_LIBSPNG_INSTEAD_OF_LIBPNG 1
+// #define WUFFS_MIMICLIB_USE_LODEPNG_INSTEAD_OF_LIBPNG 1
+// #define WUFFS_MIMICLIB_USE_STB_IMAGE_INSTEAD_OF_LIBPNG 1
 
-#ifdef WUFFS_MIMICLIB_USE_LIBSPNG_INSTEAD_OF_LIBPNG
+// -------------------------------- WUFFS_MIMICLIB_USE_XXX_INSTEAD_OF_LIBPNG
+#if defined(WUFFS_MIMICLIB_USE_LIBSPNG_INSTEAD_OF_LIBPNG)
 
 // We #include a foo.c file, not a foo.h file, as libspng is a "single file C
 // library".
@@ -26,6 +34,9 @@
 // We deliberately do not define the
 // WUFFS_MIMICLIB_PNG_DOES_NOT_SUPPORT_QUIRK_IGNORE_CHECKSUM macro.
 
+// We deliberately do not define the
+// WUFFS_MIMICLIB_PNG_DOES_NOT_VERIFY_CHECKSUM macro.
+
 // libspng (version 0.6.2, released November 2020) calculates but does not
 // verify the CRC-32 checksum on the final IDAT chunk. It also does not verify
 // the Adler-32 checksum. After calling spng_decode_image, it ends in
@@ -68,6 +79,7 @@
   }
 
   int fmt = 0;
+  bool swap_bgra_rgba = false;
   switch (pixfmt.repr) {
     case WUFFS_BASE__PIXEL_FORMAT__Y:
       fmt = SPNG_FMT_G8;
@@ -76,6 +88,7 @@
       // libspng doesn't do BGRA8. RGBA8 is the closest approximation. We'll
       // fix it up later.
       fmt = SPNG_FMT_RGBA8;
+      swap_bgra_rgba = true;
       break;
     default:
       ret = "mimic_png_decode: unsupported pixfmt";
@@ -103,7 +116,7 @@
   }
 
   // Fix up BGRA8 vs RGBA8.
-  if (fmt == SPNG_FMT_RGBA8) {
+  if (swap_bgra_rgba) {
     for (; n >= 4; n -= 4) {
       uint8_t swap = dst_ptr[0];
       dst_ptr[0] = dst_ptr[2];
@@ -117,12 +130,206 @@
   return ret;
 }
 
-#else  // WUFFS_MIMICLIB_USE_LIBSPNG_INSTEAD_OF_LIBPNG
+// -------------------------------- WUFFS_MIMICLIB_USE_XXX_INSTEAD_OF_LIBPNG
+#elif defined(WUFFS_MIMICLIB_USE_LODEPNG_INSTEAD_OF_LIBPNG)
+
+// We #include a foo.cpp file, not a foo.h file, as lodepng is a "single file
+// C++ library".
+#include "/path/to/your/copy/of/github.com/lvandeve/lodepng/lodepng.cpp"
+
+#define WUFFS_MIMICLIB_PNG_DOES_NOT_SUPPORT_QUIRK_IGNORE_CHECKSUM 1
+
+// We deliberately do not define the
+// WUFFS_MIMICLIB_PNG_DOES_NOT_VERIFY_CHECKSUM macro.
+
+// We deliberately do not define the
+// WUFFS_MIMICLIB_PNG_DOES_NOT_VERIFY_FINAL_IDAT_CHECKSUMS macro.
+
+const char*  //
+mimic_png_decode(uint64_t* n_bytes_out,
+                 wuffs_base__io_buffer* dst,
+                 uint32_t wuffs_initialize_flags,
+                 wuffs_base__pixel_format pixfmt,
+                 uint32_t* quirks_ptr,
+                 size_t quirks_len,
+                 wuffs_base__io_buffer* src) {
+  wuffs_base__io_buffer dst_fallback =
+      wuffs_base__slice_u8__writer(g_mimiclib_scratch_slice_u8);
+  if (!dst) {
+    dst = &dst_fallback;
+  }
+
+  uint64_t n = 0;
+  LodePNGColorType color_type = 0;
+  unsigned int bitdepth = 8;
+  bool swap_bgra_rgba = false;
+  switch (pixfmt.repr) {
+    case WUFFS_BASE__PIXEL_FORMAT__Y:
+      n = 1;
+      color_type = LCT_GREY;
+      break;
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
+      n = 4;
+      // lodepng doesn't do BGRA8. RGBA8 is the closest approximation. We'll
+      // fix it up later.
+      color_type = LCT_RGBA;
+      swap_bgra_rgba = true;
+      break;
+    default:
+      return "mimic_png_decode: unsupported pixfmt";
+  }
+
+  unsigned char* output = 0;
+  unsigned int width = 0;
+  unsigned int height = 0;
+  unsigned int err =
+      lodepng_decode_memory(&output, &width, &height,                    //
+                            wuffs_base__io_buffer__reader_pointer(src),  //
+                            wuffs_base__io_buffer__reader_length(src),   //
+                            color_type, bitdepth);
+  if (err) {
+    return lodepng_error_text(err);
+  }
+
+  const char* ret = NULL;
+
+  if ((width > 0xFFFF) || (height > 0xFFFF)) {
+    ret = "mimic_png_decode: image is too large";
+    goto cleanup0;
+  }
+  n *= ((uint64_t)width) * ((uint64_t)height);
+  if (n > wuffs_base__io_buffer__writer_length(dst)) {
+    ret = "mimic_png_decode: image is too large";
+    goto cleanup0;
+  }
+
+  // Copy from the mimic library's output buffer to Wuffs' dst buffer.
+  uint8_t* dst_ptr = wuffs_base__io_buffer__writer_pointer(dst);
+  memcpy(dst_ptr, output, n);
+  dst->meta.wi += n;
+  if (n_bytes_out) {
+    *n_bytes_out += n;
+  }
+
+  // Fix up BGRA8 vs RGBA8.
+  if (swap_bgra_rgba) {
+    for (; n >= 4; n -= 4) {
+      uint8_t swap = dst_ptr[0];
+      dst_ptr[0] = dst_ptr[2];
+      dst_ptr[2] = swap;
+      dst_ptr += 4;
+    }
+  }
+
+cleanup0:;
+  free(output);
+  return ret;
+}
+
+// -------------------------------- WUFFS_MIMICLIB_USE_XXX_INSTEAD_OF_LIBPNG
+#elif defined(WUFFS_MIMICLIB_USE_STB_IMAGE_INSTEAD_OF_LIBPNG)
+
+// We #include a foo.cpp file, not a foo.h file, as stb_image is a "single file
+// C library".
+#define STB_IMAGE_IMPLEMENTATION
+#include "/path/to/your/copy/of/github.com/nothings/stb/stb_image.h"
+
+// We deliberately do not define the
+// WUFFS_MIMICLIB_PNG_DOES_NOT_SUPPORT_QUIRK_IGNORE_CHECKSUM macro. The
+// stb_image library always ignores checksums.
+
+#define WUFFS_MIMICLIB_PNG_DOES_NOT_VERIFY_CHECKSUM 1
+
+// We deliberately do not define the
+// WUFFS_MIMICLIB_PNG_DOES_NOT_VERIFY_FINAL_IDAT_CHECKSUMS macro.
+
+const char*  //
+mimic_png_decode(uint64_t* n_bytes_out,
+                 wuffs_base__io_buffer* dst,
+                 uint32_t wuffs_initialize_flags,
+                 wuffs_base__pixel_format pixfmt,
+                 uint32_t* quirks_ptr,
+                 size_t quirks_len,
+                 wuffs_base__io_buffer* src) {
+  wuffs_base__io_buffer dst_fallback =
+      wuffs_base__slice_u8__writer(g_mimiclib_scratch_slice_u8);
+  if (!dst) {
+    dst = &dst_fallback;
+  }
+
+  uint64_t n = 0;
+  bool swap_bgra_rgba = false;
+  switch (pixfmt.repr) {
+    case WUFFS_BASE__PIXEL_FORMAT__Y:
+      n = 1;
+      break;
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
+      n = 4;
+      // stb_image doesn't do BGRA8. RGBA8 is the closest approximation. We'll
+      // fix it up later.
+      swap_bgra_rgba = true;
+      break;
+    default:
+      return "mimic_png_decode: unsupported pixfmt";
+  }
+
+  int width = 0;
+  int height = 0;
+  int channels_in_file = 0;
+  unsigned char* output =
+      stbi_load_from_memory(wuffs_base__io_buffer__reader_pointer(src),  //
+                            wuffs_base__io_buffer__reader_length(src),   //
+                            &width, &height, &channels_in_file, n);
+  if (!output) {
+    return "mimic_png_decode: could not load image";
+  }
+
+  const char* ret = NULL;
+
+  if ((width > 0xFFFF) || (height > 0xFFFF)) {
+    ret = "mimic_png_decode: image is too large";
+    goto cleanup0;
+  }
+  n *= ((uint64_t)width) * ((uint64_t)height);
+  if (n > wuffs_base__io_buffer__writer_length(dst)) {
+    ret = "mimic_png_decode: image is too large";
+    goto cleanup0;
+  }
+
+  // Copy from the mimic library's output buffer to Wuffs' dst buffer.
+  uint8_t* dst_ptr = wuffs_base__io_buffer__writer_pointer(dst);
+  memcpy(dst_ptr, output, n);
+  dst->meta.wi += n;
+  if (n_bytes_out) {
+    *n_bytes_out += n;
+  }
+
+  // Fix up BGRA8 vs RGBA8.
+  if (swap_bgra_rgba) {
+    for (; n >= 4; n -= 4) {
+      uint8_t swap = dst_ptr[0];
+      dst_ptr[0] = dst_ptr[2];
+      dst_ptr[2] = swap;
+      dst_ptr += 4;
+    }
+  }
+
+cleanup0:;
+  stbi_image_free(output);
+  return ret;
+}
+
+// -------------------------------- WUFFS_MIMICLIB_USE_XXX_INSTEAD_OF_LIBPNG
+#else
+
 #include "png.h"
 
 #define WUFFS_MIMICLIB_PNG_DOES_NOT_SUPPORT_QUIRK_IGNORE_CHECKSUM 1
 
 // We deliberately do not define the
+// WUFFS_MIMICLIB_PNG_DOES_NOT_VERIFY_CHECKSUM macro.
+
+// We deliberately do not define the
 // WUFFS_MIMICLIB_PNG_DOES_NOT_VERIFY_FINAL_IDAT_CHECKSUMS macro.
 
 const char*  //
@@ -201,4 +408,6 @@
   png_image_free(&pi);
   return ret;
 }
-#endif  // WUFFS_MIMICLIB_USE_LIBSPNG_INSTEAD_OF_LIBPNG
+
+#endif
+// -------------------------------- WUFFS_MIMICLIB_USE_XXX_INSTEAD_OF_LIBPNG
diff --git a/test/c/std/png.c b/test/c/std/png.c
index 6adda6d..b4e94a9 100644
--- a/test/c/std/png.c
+++ b/test/c/std/png.c
@@ -815,7 +815,9 @@
 #ifdef WUFFS_MIMIC
 
     test_mimic_png_decode_bad_crc32_checksum_ancillary,
+#ifndef WUFFS_MIMICLIB_PNG_DOES_NOT_VERIFY_CHECKSUM
     test_mimic_png_decode_bad_crc32_checksum_critical,
+#endif
     test_mimic_png_decode_image_19k_8bpp,
     test_mimic_png_decode_image_40k_24bpp,
     test_mimic_png_decode_image_77k_8bpp,
@@ -852,7 +854,9 @@
 #ifndef WUFFS_MIMICLIB_PNG_DOES_NOT_SUPPORT_QUIRK_IGNORE_CHECKSUM
     bench_mimic_png_decode_image_552k_32bpp_ignore_checksum,
 #endif
+#ifndef WUFFS_MIMICLIB_PNG_DOES_NOT_VERIFY_CHECKSUM
     bench_mimic_png_decode_image_552k_32bpp_verify_checksum,
+#endif
     bench_mimic_png_decode_image_4002k_24bpp,
 
 #endif  // WUFFS_MIMIC