Swizzler support dst_pixfmt BGRA_NONPREMUL_4X16LE

Binary size, before:
41496 gen/lib/c/clang-9-dynamic/wuffs-base-pixconv.lo
43104 gen/lib/c/clang-9-static/wuffs-base-pixconv.o
72344 gen/lib/c/gcc-dynamic/wuffs-base-pixconv.lo
72328 gen/lib/c/gcc-static/wuffs-base-pixconv.o

After:
48632 gen/lib/c/clang-9-dynamic/wuffs-base-pixconv.lo
50784 gen/lib/c/clang-9-static/wuffs-base-pixconv.o
82936 gen/lib/c/gcc-dynamic/wuffs-base-pixconv.lo
82920 gen/lib/c/gcc-static/wuffs-base-pixconv.o
diff --git a/internal/cgen/auxiliary/image.cc b/internal/cgen/auxiliary/image.cc
index 3a53cf1..125349d 100644
--- a/internal/cgen/auxiliary/image.cc
+++ b/internal/cgen/auxiliary/image.cc
@@ -296,6 +296,7 @@
       case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
       case WUFFS_BASE__PIXEL_FORMAT__BGR:
       case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
+      case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
       case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
       case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:
       case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
diff --git a/internal/cgen/auxiliary/image.hh b/internal/cgen/auxiliary/image.hh
index aac1e28..c46d2be 100644
--- a/internal/cgen/auxiliary/image.hh
+++ b/internal/cgen/auxiliary/image.hh
@@ -85,6 +85,7 @@
   //  - WUFFS_BASE__PIXEL_FORMAT__BGR_565
   //  - WUFFS_BASE__PIXEL_FORMAT__BGR
   //  - WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL
+  //  - WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE
   //  - WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL
   //  - WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL
   //  - WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL
diff --git a/internal/cgen/base/pixconv-submodule.c b/internal/cgen/base/pixconv-submodule.c
index c060cb7..04358e9 100644
--- a/internal/cgen/base/pixconv-submodule.c
+++ b/internal/cgen/base/pixconv-submodule.c
@@ -761,12 +761,12 @@
 
   size_t n = len;
   while (n--) {
-    uint8_t b0 = s[0];
-    uint8_t b1 = s[1];
-    uint8_t b2 = s[2];
-    d[0] = b2;
-    d[1] = b1;
-    d[2] = b0;
+    uint8_t s0 = s[0];
+    uint8_t s1 = s[1];
+    uint8_t s2 = s[2];
+    d[0] = s2;
+    d[1] = s1;
+    d[2] = s0;
     s += 3;
     d += 3;
   }
@@ -804,14 +804,14 @@
   }
 
   while (n--) {
-    uint8_t b0 = s[0];
-    uint8_t b1 = s[1];
-    uint8_t b2 = s[2];
-    uint8_t b3 = s[3];
-    d[0] = b2;
-    d[1] = b1;
-    d[2] = b0;
-    d[3] = b3;
+    uint8_t s0 = s[0];
+    uint8_t s1 = s[1];
+    uint8_t s2 = s[2];
+    uint8_t s3 = s[3];
+    d[0] = s2;
+    d[1] = s1;
+    d[2] = s0;
+    d[3] = s3;
     s += 4;
     d += 4;
   }
@@ -832,14 +832,14 @@
 
   size_t n = len;
   while (n--) {
-    uint8_t b0 = s[0];
-    uint8_t b1 = s[1];
-    uint8_t b2 = s[2];
-    uint8_t b3 = s[3];
-    d[0] = b2;
-    d[1] = b1;
-    d[2] = b0;
-    d[3] = b3;
+    uint8_t s0 = s[0];
+    uint8_t s1 = s[1];
+    uint8_t s2 = s[2];
+    uint8_t s3 = s[3];
+    d[0] = s2;
+    d[1] = s1;
+    d[2] = s0;
+    d[3] = s3;
     s += 4;
     d += 4;
   }
@@ -894,6 +894,22 @@
   return len;
 }
 
+static uint64_t  //
+wuffs_base__pixel_swizzler__copy_8_8(uint8_t* dst_ptr,
+                                     size_t dst_len,
+                                     uint8_t* dst_palette_ptr,
+                                     size_t dst_palette_len,
+                                     const uint8_t* src_ptr,
+                                     size_t src_len) {
+  size_t dst_len8 = dst_len / 8;
+  size_t src_len8 = src_len / 8;
+  size_t len = (dst_len8 < src_len8) ? dst_len8 : src_len8;
+  if (len > 0) {
+    memmove(dst_ptr, src_ptr, len * 8);
+  }
+  return len;
+}
+
 // --------
 
 static uint64_t  //
@@ -1831,6 +1847,204 @@
 // --------
 
 static uint64_t  //
+wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_nonpremul__src(
+    uint8_t* dst_ptr,
+    size_t dst_len,
+    uint8_t* dst_palette_ptr,
+    size_t dst_palette_len,
+    const uint8_t* src_ptr,
+    size_t src_len) {
+  size_t dst_len8 = dst_len / 8;
+  size_t src_len4 = src_len / 4;
+  size_t len = (dst_len8 < src_len4) ? dst_len8 : src_len4;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+
+  size_t n = len;
+  while (n >= 1) {
+    uint8_t s0 = s[0];
+    uint8_t s1 = s[1];
+    uint8_t s2 = s[2];
+    uint8_t s3 = s[3];
+    d[0] = s0;
+    d[1] = s0;
+    d[2] = s1;
+    d[3] = s1;
+    d[4] = s2;
+    d[5] = s2;
+    d[6] = s3;
+    d[7] = s3;
+
+    s += 1 * 4;
+    d += 1 * 8;
+    n -= 1;
+  }
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_nonpremul__src_over(
+    uint8_t* dst_ptr,
+    size_t dst_len,
+    uint8_t* dst_palette_ptr,
+    size_t dst_palette_len,
+    const uint8_t* src_ptr,
+    size_t src_len) {
+  size_t dst_len8 = dst_len / 8;
+  size_t src_len4 = src_len / 4;
+  size_t len = (dst_len8 < src_len4) ? dst_len8 : src_len4;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+
+  size_t n = len;
+  while (n >= 1) {
+    uint64_t d0 = wuffs_base__peek_u64le__no_bounds_check(d + (0 * 8));
+    uint64_t s0 = wuffs_base__color_u32__as__color_u64(
+        wuffs_base__peek_u32le__no_bounds_check(s + (0 * 4)));
+    wuffs_base__poke_u64le__no_bounds_check(
+        d + (0 * 8),
+        wuffs_base__composite_nonpremul_nonpremul_u64_axxx(d0, s0));
+
+    s += 1 * 4;
+    d += 1 * 8;
+    n -= 1;
+  }
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_nonpremul_4x16le__src_over(
+    uint8_t* dst_ptr,
+    size_t dst_len,
+    uint8_t* dst_palette_ptr,
+    size_t dst_palette_len,
+    const uint8_t* src_ptr,
+    size_t src_len) {
+  size_t dst_len8 = dst_len / 8;
+  size_t src_len8 = src_len / 8;
+  size_t len = (dst_len8 < src_len8) ? dst_len8 : src_len8;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+
+  size_t n = len;
+  while (n >= 1) {
+    uint64_t d0 = wuffs_base__peek_u64le__no_bounds_check(d + (0 * 8));
+    uint64_t s0 = wuffs_base__peek_u64le__no_bounds_check(s + (0 * 8));
+    wuffs_base__poke_u64le__no_bounds_check(
+        d + (0 * 8),
+        wuffs_base__composite_nonpremul_nonpremul_u64_axxx(d0, s0));
+
+    s += 1 * 8;
+    d += 1 * 8;
+    n -= 1;
+  }
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__index_bgra_nonpremul__src_over(
+    uint8_t* dst_ptr,
+    size_t dst_len,
+    uint8_t* dst_palette_ptr,
+    size_t dst_palette_len,
+    const uint8_t* src_ptr,
+    size_t src_len) {
+  if (dst_palette_len != 1024) {
+    return 0;
+  }
+  size_t dst_len8 = dst_len / 8;
+  size_t len = (dst_len8 < src_len) ? dst_len8 : src_len;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+  size_t n = len;
+
+  while (n >= 1) {
+    uint64_t d0 = wuffs_base__peek_u64le__no_bounds_check(d + (0 * 8));
+    uint64_t s0 = wuffs_base__color_u32__as__color_u64(
+        wuffs_base__peek_u32le__no_bounds_check(dst_palette_ptr +
+                                                ((size_t)s[0] * 4)));
+    wuffs_base__poke_u64le__no_bounds_check(
+        d + (0 * 8),
+        wuffs_base__composite_nonpremul_nonpremul_u64_axxx(d0, s0));
+
+    s += 1 * 1;
+    d += 1 * 8;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__rgba_nonpremul__src(
+    uint8_t* dst_ptr,
+    size_t dst_len,
+    uint8_t* dst_palette_ptr,
+    size_t dst_palette_len,
+    const uint8_t* src_ptr,
+    size_t src_len) {
+  size_t dst_len8 = dst_len / 8;
+  size_t src_len4 = src_len / 4;
+  size_t len = (dst_len8 < src_len4) ? dst_len8 : src_len4;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+
+  size_t n = len;
+  while (n >= 1) {
+    uint8_t s0 = s[0];
+    uint8_t s1 = s[1];
+    uint8_t s2 = s[2];
+    uint8_t s3 = s[3];
+    d[0] = s2;
+    d[1] = s2;
+    d[2] = s1;
+    d[3] = s1;
+    d[4] = s0;
+    d[5] = s0;
+    d[6] = s3;
+    d[7] = s3;
+
+    s += 1 * 4;
+    d += 1 * 8;
+    n -= 1;
+  }
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__rgba_nonpremul__src_over(
+    uint8_t* dst_ptr,
+    size_t dst_len,
+    uint8_t* dst_palette_ptr,
+    size_t dst_palette_len,
+    const uint8_t* src_ptr,
+    size_t src_len) {
+  size_t dst_len8 = dst_len / 8;
+  size_t src_len4 = src_len / 4;
+  size_t len = (dst_len8 < src_len4) ? dst_len8 : src_len4;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+
+  size_t n = len;
+  while (n >= 1) {
+    uint64_t d0 = wuffs_base__peek_u64le__no_bounds_check(d + (0 * 8));
+    uint64_t s0 =
+        wuffs_base__color_u32__as__color_u64(wuffs_base__swap_u32_argb_abgr(
+            wuffs_base__peek_u32le__no_bounds_check(s + (0 * 4))));
+    wuffs_base__poke_u64le__no_bounds_check(
+        d + (0 * 8),
+        wuffs_base__composite_nonpremul_nonpremul_u64_axxx(d0, s0));
+
+    s += 1 * 4;
+    d += 1 * 8;
+    n -= 1;
+  }
+  return len;
+}
+
+// --------
+
+static uint64_t  //
 wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src(
     uint8_t* dst_ptr,
     size_t dst_len,
@@ -2305,6 +2519,113 @@
 // --------
 
 static uint64_t  //
+wuffs_base__pixel_swizzler__bgrw_4x16le__bgr(uint8_t* dst_ptr,
+                                             size_t dst_len,
+                                             uint8_t* dst_palette_ptr,
+                                             size_t dst_palette_len,
+                                             const uint8_t* src_ptr,
+                                             size_t src_len) {
+  size_t dst_len8 = dst_len / 8;
+  size_t src_len3 = src_len / 3;
+  size_t len = (dst_len8 < src_len3) ? dst_len8 : src_len3;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+  size_t n = len;
+
+  while (n >= 1) {
+    uint8_t s0 = s[0];
+    uint8_t s1 = s[1];
+    uint8_t s2 = s[2];
+    d[0] = s0;
+    d[1] = s0;
+    d[2] = s1;
+    d[3] = s1;
+    d[4] = s2;
+    d[5] = s2;
+    d[6] = 0xFF;
+    d[7] = 0xFF;
+
+    s += 1 * 3;
+    d += 1 * 8;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgrw_4x16le__bgrx(uint8_t* dst_ptr,
+                                              size_t dst_len,
+                                              uint8_t* dst_palette_ptr,
+                                              size_t dst_palette_len,
+                                              const uint8_t* src_ptr,
+                                              size_t src_len) {
+  size_t dst_len8 = dst_len / 8;
+  size_t src_len4 = src_len / 4;
+  size_t len = (dst_len8 < src_len4) ? dst_len8 : src_len4;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+  size_t n = len;
+
+  while (n >= 1) {
+    uint8_t s0 = s[0];
+    uint8_t s1 = s[1];
+    uint8_t s2 = s[2];
+    d[0] = s0;
+    d[1] = s0;
+    d[2] = s1;
+    d[3] = s1;
+    d[4] = s2;
+    d[5] = s2;
+    d[6] = 0xFF;
+    d[7] = 0xFF;
+
+    s += 1 * 4;
+    d += 1 * 8;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgrw_4x16le__rgb(uint8_t* dst_ptr,
+                                             size_t dst_len,
+                                             uint8_t* dst_palette_ptr,
+                                             size_t dst_palette_len,
+                                             const uint8_t* src_ptr,
+                                             size_t src_len) {
+  size_t dst_len8 = dst_len / 8;
+  size_t src_len3 = src_len / 3;
+  size_t len = (dst_len8 < src_len3) ? dst_len8 : src_len3;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+  size_t n = len;
+
+  while (n >= 1) {
+    uint8_t s0 = s[0];
+    uint8_t s1 = s[1];
+    uint8_t s2 = s[2];
+    d[0] = s2;
+    d[1] = s2;
+    d[2] = s1;
+    d[3] = s1;
+    d[4] = s0;
+    d[5] = s0;
+    d[6] = 0xFF;
+    d[7] = 0xFF;
+
+    s += 1 * 3;
+    d += 1 * 8;
+    n -= 1;
+  }
+
+  return len;
+}
+
+// --------
+
+static uint64_t  //
 wuffs_base__pixel_swizzler__rgba_nonpremul__bgra_nonpremul_4x16le__src(
     uint8_t* dst_ptr,
     size_t dst_len,
@@ -2822,6 +3143,124 @@
 // --------
 
 static uint64_t  //
+wuffs_base__pixel_swizzler__xxxxxxxx__index__src(uint8_t* dst_ptr,
+                                                 size_t dst_len,
+                                                 uint8_t* dst_palette_ptr,
+                                                 size_t dst_palette_len,
+                                                 const uint8_t* src_ptr,
+                                                 size_t src_len) {
+  if (dst_palette_len != 1024) {
+    return 0;
+  }
+  size_t dst_len8 = dst_len / 8;
+  size_t len = (dst_len8 < src_len) ? dst_len8 : src_len;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+  size_t n = len;
+
+  while (n >= 1) {
+    wuffs_base__poke_u64le__no_bounds_check(
+        d + (0 * 8), wuffs_base__color_u32__as__color_u64(
+                         wuffs_base__peek_u32le__no_bounds_check(
+                             dst_palette_ptr + ((size_t)s[0] * 4))));
+
+    s += 1 * 1;
+    d += 1 * 8;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__xxxxxxxx__index_binary_alpha__src_over(
+    uint8_t* dst_ptr,
+    size_t dst_len,
+    uint8_t* dst_palette_ptr,
+    size_t dst_palette_len,
+    const uint8_t* src_ptr,
+    size_t src_len) {
+  if (dst_palette_len != 1024) {
+    return 0;
+  }
+  size_t dst_len8 = dst_len / 8;
+  size_t len = (dst_len8 < src_len) ? dst_len8 : src_len;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+  size_t n = len;
+
+  while (n >= 1) {
+    uint32_t s0 = wuffs_base__peek_u32le__no_bounds_check(dst_palette_ptr +
+                                                          ((size_t)s[0] * 4));
+    if (s0) {
+      wuffs_base__poke_u64le__no_bounds_check(
+          d + (0 * 8), wuffs_base__color_u32__as__color_u64(s0));
+    }
+
+    s += 1 * 1;
+    d += 1 * 8;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__xxxxxxxx__y(uint8_t* dst_ptr,
+                                        size_t dst_len,
+                                        uint8_t* dst_palette_ptr,
+                                        size_t dst_palette_len,
+                                        const uint8_t* src_ptr,
+                                        size_t src_len) {
+  size_t dst_len8 = dst_len / 8;
+  size_t len = (dst_len8 < src_len) ? dst_len8 : src_len;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+  size_t n = len;
+
+  while (n >= 1) {
+    wuffs_base__poke_u64le__no_bounds_check(
+        d + (0 * 8), 0xFFFF000000000000 | (0x010101010101 * (uint64_t)s[0]));
+
+    s += 1 * 1;
+    d += 1 * 8;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__xxxxxxxx__y_16be(uint8_t* dst_ptr,
+                                             size_t dst_len,
+                                             uint8_t* dst_palette_ptr,
+                                             size_t dst_palette_len,
+                                             const uint8_t* src_ptr,
+                                             size_t src_len) {
+  size_t dst_len8 = dst_len / 8;
+  size_t src_len2 = src_len / 2;
+  size_t len = (dst_len8 < src_len2) ? dst_len8 : src_len2;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+  size_t n = len;
+
+  while (n >= 1) {
+    uint64_t s0 =
+        ((uint64_t)(wuffs_base__peek_u16be__no_bounds_check(s + (0 * 2))));
+    wuffs_base__poke_u64le__no_bounds_check(
+        d + (0 * 8), 0xFFFF000000000000 | (0x000100010001 * s0));
+
+    s += 1 * 2;
+    d += 1 * 8;
+    n -= 1;
+  }
+
+  return len;
+}
+
+// --------
+
+static uint64_t  //
 wuffs_base__pixel_swizzler__y__y_16be(uint8_t* dst_ptr,
                                       size_t dst_len,
                                       uint8_t* dst_palette_ptr,
@@ -2911,6 +3350,12 @@
       }
 #endif
       return wuffs_base__pixel_swizzler__xxxx__y;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL_4X16LE:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL_4X16LE:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL_4X16LE:
+      return wuffs_base__pixel_swizzler__xxxxxxxx__y;
   }
   return NULL;
 }
@@ -2942,6 +3387,12 @@
     case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:
     case WUFFS_BASE__PIXEL_FORMAT__RGBX:
       return wuffs_base__pixel_swizzler__xxxx__y_16be;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL_4X16LE:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL_4X16LE:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL_4X16LE:
+      return wuffs_base__pixel_swizzler__xxxxxxxx__y_16be;
   }
   return NULL;
 }
@@ -3014,6 +3465,19 @@
       }
       return NULL;
 
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
+      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=
+          1024) {
+        return NULL;
+      }
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__xxxxxxxx__index__src;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__index_bgra_nonpremul__src_over;
+      }
+      return NULL;
+
     case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
       switch (blend) {
         case WUFFS_BASE__PIXEL_BLEND__SRC:
@@ -3139,6 +3603,20 @@
       }
       return NULL;
 
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL_4X16LE:
+      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=
+          1024) {
+        return NULL;
+      }
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__xxxxxxxx__index__src;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__xxxxxxxx__index_binary_alpha__src_over;
+      }
+      return NULL;
+
     case WUFFS_BASE__PIXEL_FORMAT__RGB:
       if (wuffs_base__pixel_swizzler__swap_rgbx_bgrx(
               dst_palette.ptr, dst_palette.len, NULL, 0, src_palette.ptr,
@@ -3191,6 +3669,10 @@
     case WUFFS_BASE__PIXEL_FORMAT__BGRX:
       return wuffs_base__pixel_swizzler__bgrw__bgr;
 
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL_4X16LE:
+      return wuffs_base__pixel_swizzler__bgrw_4x16le__bgr;
+
     case WUFFS_BASE__PIXEL_FORMAT__RGB:
       return wuffs_base__pixel_swizzler__swap_rgb_bgr;
 
@@ -3243,6 +3725,15 @@
       }
       return NULL;
 
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_nonpremul__src;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_nonpremul__src_over;
+      }
+      return NULL;
+
     case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
       switch (blend) {
         case WUFFS_BASE__PIXEL_BLEND__SRC:
@@ -3327,6 +3818,15 @@
       }
       return NULL;
 
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__copy_8_8;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_nonpremul_4x16le__src_over;
+      }
+      return NULL;
+
     case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
       switch (blend) {
         case WUFFS_BASE__PIXEL_BLEND__SRC:
@@ -3389,6 +3889,9 @@
     case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:
       return wuffs_base__pixel_swizzler__bgrw__bgrx;
 
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
+      return wuffs_base__pixel_swizzler__bgrw_4x16le__bgrx;
+
     case WUFFS_BASE__PIXEL_FORMAT__BGRX:
       return wuffs_base__pixel_swizzler__copy_4_4;
 
@@ -3429,6 +3932,9 @@
 #endif
       return wuffs_base__pixel_swizzler__bgrw__rgb;
 
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
+      return wuffs_base__pixel_swizzler__bgrw_4x16le__rgb;
+
     case WUFFS_BASE__PIXEL_FORMAT__RGB:
       return wuffs_base__pixel_swizzler__copy_3_3;
 
@@ -3481,6 +3987,15 @@
       }
       return NULL;
 
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__rgba_nonpremul__src;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__rgba_nonpremul__src_over;
+      }
+      return NULL;
+
     case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
       switch (blend) {
         case WUFFS_BASE__PIXEL_BLEND__SRC:
diff --git a/internal/cgen/data/data.go b/internal/cgen/data/data.go
index 8f1daf8..7cb0dda 100644
--- a/internal/cgen/data/data.go
+++ b/internal/cgen/data/data.go
@@ -583,12 +583,13 @@
 	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__squash_align4_bgr_565_8888(uint8_t* dst_ptr,\n                                                       size_t dst_len,\n                                                       const uint8_t* src_ptr,\n                                                       size_t src_len,\n                                                       bool nonpremul) {\n  size_t len = (dst_len < src_len ? dst_len : src_len) / 4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n\n  size_t n = len;\n  while (n--) {\n    uint32_t argb = wuffs_base__peek_u32le__no_bounds_check(s);\n    if (nonpremul) {\n      argb =\n          wuffs_base__color_u32_argb_nonpremul__as__color_u32_argb_premul(argb);\n    }\n    uint32_t b5 = 0x1F & (argb >> (8 - 5));\n    uint32_t g6 = 0x3F & (argb >> (16 - 6));\n    uint32_t r5 = 0x1F & (argb >> (24 - 5));\n    uint32_t alpha = argb & 0xFF000000;\n    wuffs_base__poke_u32le__no_bounds_check(\n        d, alpha | (r5 << 11) | (g6 << 5) | (b5 << 0));\n    s += 4;\n   " +
 	" d += 4;\n  }\n  return len;\n}\n\n" +
 	"" +
-	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__swap_rgb_bgr(uint8_t* dst_ptr,\n                                         size_t dst_len,\n                                         uint8_t* dst_palette_ptr,\n                                         size_t dst_palette_len,\n                                         const uint8_t* src_ptr,\n                                         size_t src_len) {\n  size_t len = (dst_len < src_len ? dst_len : src_len) / 3;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n\n  size_t n = len;\n  while (n--) {\n    uint8_t b0 = s[0];\n    uint8_t b1 = s[1];\n    uint8_t b2 = s[2];\n    d[0] = b2;\n    d[1] = b1;\n    d[2] = b0;\n    s += 3;\n    d += 3;\n  }\n  return len;\n}\n\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\nWUFFS_BASE__MAYBE_ATTRIBUTE_TARGET(\"sse4.2\")\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__swap_rgbx_bgrx__sse42(uint8_t* dst_ptr,\n                                                  size_t dst_len,\n                                                  uint8_t* dst_palette" +
-	"_ptr,\n                                                  size_t dst_palette_len,\n                                                  const uint8_t* src_ptr,\n                                                  size_t src_len) {\n  size_t len = (dst_len < src_len ? dst_len : src_len) / 4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  __m128i shuffle = _mm_set_epi8(+0x0F, +0x0C, +0x0D, +0x0E,  //\n                                 +0x0B, +0x08, +0x09, +0x0A,  //\n                                 +0x07, +0x04, +0x05, +0x06,  //\n                                 +0x03, +0x00, +0x01, +0x02);\n\n  while (n >= 4) {\n    __m128i x;\n    x = _mm_lddqu_si128((const __m128i*)(const void*)s);\n    x = _mm_shuffle_epi8(x, shuffle);\n    _mm_storeu_si128((__m128i*)(void*)d, x);\n\n    s += 4 * 4;\n    d += 4 * 4;\n    n -= 4;\n  }\n\n  while (n--) {\n    uint8_t b0 = s[0];\n    uint8_t b1 = s[1];\n    uint8_t b2 = s[2];\n    uint8_t b3 = s[3];\n    d[0] = b2;\n    d[1] = b1;\n    d[2] = b0;\n    d[3] = b3;\n    s += 4;\n    d +" +
-	"= 4;\n  }\n  return len;\n}\n#endif  // defined(WUFFS_BASE__CPU_ARCH__X86_64)\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__swap_rgbx_bgrx(uint8_t* dst_ptr,\n                                           size_t dst_len,\n                                           uint8_t* dst_palette_ptr,\n                                           size_t dst_palette_len,\n                                           const uint8_t* src_ptr,\n                                           size_t src_len) {\n  size_t len = (dst_len < src_len ? dst_len : src_len) / 4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n\n  size_t n = len;\n  while (n--) {\n    uint8_t b0 = s[0];\n    uint8_t b1 = s[1];\n    uint8_t b2 = s[2];\n    uint8_t b3 = s[3];\n    d[0] = b2;\n    d[1] = b1;\n    d[2] = b0;\n    d[3] = b3;\n    s += 4;\n    d += 4;\n  }\n  return len;\n}\n\n" +
+	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__swap_rgb_bgr(uint8_t* dst_ptr,\n                                         size_t dst_len,\n                                         uint8_t* dst_palette_ptr,\n                                         size_t dst_palette_len,\n                                         const uint8_t* src_ptr,\n                                         size_t src_len) {\n  size_t len = (dst_len < src_len ? dst_len : src_len) / 3;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n\n  size_t n = len;\n  while (n--) {\n    uint8_t s0 = s[0];\n    uint8_t s1 = s[1];\n    uint8_t s2 = s[2];\n    d[0] = s2;\n    d[1] = s1;\n    d[2] = s0;\n    s += 3;\n    d += 3;\n  }\n  return len;\n}\n\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\nWUFFS_BASE__MAYBE_ATTRIBUTE_TARGET(\"sse4.2\")\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__swap_rgbx_bgrx__sse42(uint8_t* dst_ptr,\n                                                  size_t dst_len,\n                                                  uint8_t* dst_palette" +
+	"_ptr,\n                                                  size_t dst_palette_len,\n                                                  const uint8_t* src_ptr,\n                                                  size_t src_len) {\n  size_t len = (dst_len < src_len ? dst_len : src_len) / 4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  __m128i shuffle = _mm_set_epi8(+0x0F, +0x0C, +0x0D, +0x0E,  //\n                                 +0x0B, +0x08, +0x09, +0x0A,  //\n                                 +0x07, +0x04, +0x05, +0x06,  //\n                                 +0x03, +0x00, +0x01, +0x02);\n\n  while (n >= 4) {\n    __m128i x;\n    x = _mm_lddqu_si128((const __m128i*)(const void*)s);\n    x = _mm_shuffle_epi8(x, shuffle);\n    _mm_storeu_si128((__m128i*)(void*)d, x);\n\n    s += 4 * 4;\n    d += 4 * 4;\n    n -= 4;\n  }\n\n  while (n--) {\n    uint8_t s0 = s[0];\n    uint8_t s1 = s[1];\n    uint8_t s2 = s[2];\n    uint8_t s3 = s[3];\n    d[0] = s2;\n    d[1] = s1;\n    d[2] = s0;\n    d[3] = s3;\n    s += 4;\n    d +" +
+	"= 4;\n  }\n  return len;\n}\n#endif  // defined(WUFFS_BASE__CPU_ARCH__X86_64)\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__swap_rgbx_bgrx(uint8_t* dst_ptr,\n                                           size_t dst_len,\n                                           uint8_t* dst_palette_ptr,\n                                           size_t dst_palette_len,\n                                           const uint8_t* src_ptr,\n                                           size_t src_len) {\n  size_t len = (dst_len < src_len ? dst_len : src_len) / 4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n\n  size_t n = len;\n  while (n--) {\n    uint8_t s0 = s[0];\n    uint8_t s1 = s[1];\n    uint8_t s2 = s[2];\n    uint8_t s3 = s[3];\n    d[0] = s2;\n    d[1] = s1;\n    d[2] = s0;\n    d[3] = s3;\n    s += 4;\n    d += 4;\n  }\n  return len;\n}\n\n" +
 	"" +
 	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__copy_1_1(uint8_t* dst_ptr,\n                                     size_t dst_len,\n                                     uint8_t* dst_palette_ptr,\n                                     size_t dst_palette_len,\n                                     const uint8_t* src_ptr,\n                                     size_t src_len) {\n  size_t len = (dst_len < src_len) ? dst_len : src_len;\n  if (len > 0) {\n    memmove(dst_ptr, src_ptr, len);\n  }\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__copy_3_3(uint8_t* dst_ptr,\n                                     size_t dst_len,\n                                     uint8_t* dst_palette_ptr,\n                                     size_t dst_palette_len,\n                                     const uint8_t* src_ptr,\n                                     size_t src_len) {\n  size_t dst_len3 = dst_len / 3;\n  size_t src_len3 = src_len / 3;\n  size_t len = (dst_len3 < src_len3) ? dst_len3 : src_len3;\n  if (len > 0) {\n  " +
-	"  memmove(dst_ptr, src_ptr, len * 3);\n  }\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__copy_4_4(uint8_t* dst_ptr,\n                                     size_t dst_len,\n                                     uint8_t* dst_palette_ptr,\n                                     size_t dst_palette_len,\n                                     const uint8_t* src_ptr,\n                                     size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len4 < src_len4) ? dst_len4 : src_len4;\n  if (len > 0) {\n    memmove(dst_ptr, src_ptr, len * 4);\n  }\n  return len;\n}\n\n" +
+	"  memmove(dst_ptr, src_ptr, len * 3);\n  }\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__copy_4_4(uint8_t* dst_ptr,\n                                     size_t dst_len,\n                                     uint8_t* dst_palette_ptr,\n                                     size_t dst_palette_len,\n                                     const uint8_t* src_ptr,\n                                     size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len4 < src_len4) ? dst_len4 : src_len4;\n  if (len > 0) {\n    memmove(dst_ptr, src_ptr, len * 4);\n  }\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__copy_8_8(uint8_t* dst_ptr,\n                                     size_t dst_len,\n                                     uint8_t* dst_palette_ptr,\n                                     size_t dst_palette_len,\n                                     const uint8_t* src_ptr,\n                                     size_t src_len) {\n  size_t dst_len8 = dst_l" +
+	"en / 8;\n  size_t src_len8 = src_len / 8;\n  size_t len = (dst_len8 < src_len8) ? dst_len8 : src_len8;\n  if (len > 0) {\n    memmove(dst_ptr, src_ptr, len * 8);\n  }\n  return len;\n}\n\n" +
 	"" +
 	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr_565__bgr(uint8_t* dst_ptr,\n                                         size_t dst_len,\n                                         uint8_t* dst_palette_ptr,\n                                         size_t dst_palette_len,\n                                         const uint8_t* src_ptr,\n                                         size_t src_len) {\n  size_t dst_len2 = dst_len / 2;\n  size_t src_len3 = src_len / 3;\n  size_t len = (dst_len2 < src_len3) ? dst_len2 : src_len3;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t b5 = s[0] >> 3;\n    uint32_t g6 = s[1] >> 2;\n    uint32_t r5 = s[2] >> 3;\n    uint32_t rgb_565 = (r5 << 11) | (g6 << 5) | (b5 << 0);\n    wuffs_base__poke_u16le__no_bounds_check(d + (0 * 2), (uint16_t)rgb_565);\n\n    s += 1 * 3;\n    d += 1 * 2;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr_565__bgrx(uint8_t* dst_ptr,\n           " +
 	"                               size_t dst_len,\n                                          uint8_t* dst_palette_ptr,\n                                          size_t dst_palette_len,\n                                          const uint8_t* src_ptr,\n                                          size_t src_len) {\n  size_t dst_len2 = dst_len / 2;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len2 < src_len4) ? dst_len2 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t b5 = s[0] >> 3;\n    uint32_t g6 = s[1] >> 2;\n    uint32_t r5 = s[2] >> 3;\n    uint32_t rgb_565 = (r5 << 11) | (g6 << 5) | (b5 << 0);\n    wuffs_base__poke_u16le__no_bounds_check(d + (0 * 2), (uint16_t)rgb_565);\n\n    s += 1 * 4;\n    d += 1 * 2;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const u" +
@@ -621,6 +622,12 @@
 	"alette_ptr +\n                                                          ((size_t)s[0] * 4));\n    wuffs_base__poke_u32le__no_bounds_check(\n        d + (0 * 4),\n        wuffs_base__composite_nonpremul_nonpremul_u32_axxx(d0, s0));\n\n    s += 1 * 1;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgra_nonpremul__rgba_nonpremul__src_over(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len4 < src_len4) ? dst_len4 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    uint32_t d0 = wuffs_base__peek_u32le__no_bounds_check(d + (0 * 4));\n    uint32_t s0 = wuffs_base__swap_u32_argb_abgr(\n        wuffs_base__peek_u32le__no_bounds_check(s + (0 * 4)));\n    wuffs_base__poke_u32le__no_bounds_check(\n        d + (0 * 4),\n        wuffs_base__c" +
 	"omposite_nonpremul_nonpremul_u32_axxx(d0, s0));\n\n    s += 1 * 4;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
 	"" +
+	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_nonpremul__src(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  size_t dst_len8 = dst_len / 8;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len8 < src_len4) ? dst_len8 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n\n  size_t n = len;\n  while (n >= 1) {\n    uint8_t s0 = s[0];\n    uint8_t s1 = s[1];\n    uint8_t s2 = s[2];\n    uint8_t s3 = s[3];\n    d[0] = s0;\n    d[1] = s0;\n    d[2] = s1;\n    d[3] = s1;\n    d[4] = s2;\n    d[5] = s2;\n    d[6] = s3;\n    d[7] = s3;\n\n    s += 1 * 4;\n    d += 1 * 8;\n    n -= 1;\n  }\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_nonpremul__src_over(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  size_t dst_len8 = dst_le" +
+	"n / 8;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len8 < src_len4) ? dst_len8 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n\n  size_t n = len;\n  while (n >= 1) {\n    uint64_t d0 = wuffs_base__peek_u64le__no_bounds_check(d + (0 * 8));\n    uint64_t s0 = wuffs_base__color_u32__as__color_u64(\n        wuffs_base__peek_u32le__no_bounds_check(s + (0 * 4)));\n    wuffs_base__poke_u64le__no_bounds_check(\n        d + (0 * 8),\n        wuffs_base__composite_nonpremul_nonpremul_u64_axxx(d0, s0));\n\n    s += 1 * 4;\n    d += 1 * 8;\n    n -= 1;\n  }\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_nonpremul_4x16le__src_over(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  size_t dst_len8 = dst_len / 8;\n  size_t src_len8 = src_len / 8;\n  size_t len = (dst_len8 < src_len8) ? dst_len8 : src_len8;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n\n  siz" +
+	"e_t n = len;\n  while (n >= 1) {\n    uint64_t d0 = wuffs_base__peek_u64le__no_bounds_check(d + (0 * 8));\n    uint64_t s0 = wuffs_base__peek_u64le__no_bounds_check(s + (0 * 8));\n    wuffs_base__poke_u64le__no_bounds_check(\n        d + (0 * 8),\n        wuffs_base__composite_nonpremul_nonpremul_u64_axxx(d0, s0));\n\n    s += 1 * 8;\n    d += 1 * 8;\n    n -= 1;\n  }\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__index_bgra_nonpremul__src_over(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  if (dst_palette_len != 1024) {\n    return 0;\n  }\n  size_t dst_len8 = dst_len / 8;\n  size_t len = (dst_len8 < src_len) ? dst_len8 : src_len;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    uint64_t d0 = wuffs_base__peek_u64le__no_bounds_check(d + (0 * 8));\n    uint64_t s0 = wuffs_base__color_u32__as__color_u64(\n        wuffs_base__peek_u32le__" +
+	"no_bounds_check(dst_palette_ptr +\n                                                ((size_t)s[0] * 4)));\n    wuffs_base__poke_u64le__no_bounds_check(\n        d + (0 * 8),\n        wuffs_base__composite_nonpremul_nonpremul_u64_axxx(d0, s0));\n\n    s += 1 * 1;\n    d += 1 * 8;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__rgba_nonpremul__src(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  size_t dst_len8 = dst_len / 8;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len8 < src_len4) ? dst_len8 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n\n  size_t n = len;\n  while (n >= 1) {\n    uint8_t s0 = s[0];\n    uint8_t s1 = s[1];\n    uint8_t s2 = s[2];\n    uint8_t s3 = s[3];\n    d[0] = s2;\n    d[1] = s2;\n    d[2] = s1;\n    d[3] = s1;\n    d[4] = s0;\n    d[5] = s0;\n    d[6] = s3;\n    d[7] = s3;\n\n    s += 1 * 4;\n    d += 1 * 8;\n    n -= 1;\n " +
+	" }\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__rgba_nonpremul__src_over(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  size_t dst_len8 = dst_len / 8;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len8 < src_len4) ? dst_len8 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n\n  size_t n = len;\n  while (n >= 1) {\n    uint64_t d0 = wuffs_base__peek_u64le__no_bounds_check(d + (0 * 8));\n    uint64_t s0 =\n        wuffs_base__color_u32__as__color_u64(wuffs_base__swap_u32_argb_abgr(\n            wuffs_base__peek_u32le__no_bounds_check(s + (0 * 4))));\n    wuffs_base__poke_u64le__no_bounds_check(\n        d + (0 * 8),\n        wuffs_base__composite_nonpremul_nonpremul_u64_axxx(d0, s0));\n\n    s += 1 * 4;\n    d += 1 * 8;\n    n -= 1;\n  }\n  return len;\n}\n\n" +
+	"" +
 	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len4 < src_len4) ? dst_len4 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t s0 = wuffs_base__peek_u32le__no_bounds_check(s + (0 * 4));\n    wuffs_base__poke_u32le__no_bounds_check(\n        d + (0 * 4),\n        wuffs_base__color_u32_argb_nonpremul__as__color_u32_argb_premul(s0));\n\n    s += 1 * 4;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul_4x16le__src(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  size_t dst_len4 =" +
 	" dst_len / 4;\n  size_t src_len8 = src_len / 8;\n  size_t len = (dst_len4 < src_len8) ? dst_len4 : src_len8;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint64_t s0 = wuffs_base__peek_u64le__no_bounds_check(s + (0 * 8));\n    wuffs_base__poke_u32le__no_bounds_check(\n        d + (0 * 4),\n        wuffs_base__color_u64_argb_nonpremul__as__color_u32_argb_premul(s0));\n\n    s += 1 * 8;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src_over(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len4 < src_len4) ? dst_len4 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t d0 = wuffs_base__peek_u32le__no_bo" +
 	"unds_check(d + (0 * 4));\n    uint32_t s0 = wuffs_base__peek_u32le__no_bounds_check(s + (0 * 4));\n    wuffs_base__poke_u32le__no_bounds_check(\n        d + (0 * 4), wuffs_base__composite_premul_nonpremul_u32_axxx(d0, s0));\n\n    s += 1 * 4;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul_4x16le__src_over(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t src_len8 = src_len / 8;\n  size_t len = (dst_len4 < src_len8) ? dst_len4 : src_len8;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint64_t d0 = wuffs_base__color_u32__as__color_u64(\n        wuffs_base__peek_u32le__no_bounds_check(d + (0 * 4)));\n    uint64_t s0 = wuffs_base__peek_u64le__no_bounds_check(s + (0 * 8));\n    wuffs_base__poke_u32le__no_bounds_check(\n        d + (0 * 4" +
@@ -636,6 +643,10 @@
 	"x);\n\n    s += 4 * 3;\n    d += 4 * 4;\n    n -= 4;\n  }\n\n  while (n >= 1) {\n    uint8_t b0 = s[0];\n    uint8_t b1 = s[1];\n    uint8_t b2 = s[2];\n    d[0] = b2;\n    d[1] = b1;\n    d[2] = b0;\n    d[3] = 0xFF;\n\n    s += 1 * 3;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n#endif  // defined(WUFFS_BASE__CPU_ARCH__X86_64)\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgrw__rgb(uint8_t* dst_ptr,\n                                      size_t dst_len,\n                                      uint8_t* dst_palette_ptr,\n                                      size_t dst_palette_len,\n                                      const uint8_t* src_ptr,\n                                      size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t src_len3 = src_len / 3;\n  size_t len = (dst_len4 < src_len3) ? dst_len4 : src_len3;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    uint8_t b0 = s[0];\n    uint8_t b1 = s[1];\n    uint8_t b2 = s[2];\n    d[0] = b2;\n    d[1] = b1;\n    d[2] = b0" +
 	";\n    d[3] = 0xFF;\n\n    s += 1 * 3;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgrw__rgbx(uint8_t* dst_ptr,\n                                       size_t dst_len,\n                                       uint8_t* dst_palette_ptr,\n                                       size_t dst_palette_len,\n                                       const uint8_t* src_ptr,\n                                       size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len4 < src_len4) ? dst_len4 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint8_t b0 = s[0];\n    uint8_t b1 = s[1];\n    uint8_t b2 = s[2];\n    d[0] = b2;\n    d[1] = b1;\n    d[2] = b0;\n    d[3] = 0xFF;\n\n    s += 1 * 4;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
 	"" +
+	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgrw_4x16le__bgr(uint8_t* dst_ptr,\n                                             size_t dst_len,\n                                             uint8_t* dst_palette_ptr,\n                                             size_t dst_palette_len,\n                                             const uint8_t* src_ptr,\n                                             size_t src_len) {\n  size_t dst_len8 = dst_len / 8;\n  size_t src_len3 = src_len / 3;\n  size_t len = (dst_len8 < src_len3) ? dst_len8 : src_len3;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    uint8_t s0 = s[0];\n    uint8_t s1 = s[1];\n    uint8_t s2 = s[2];\n    d[0] = s0;\n    d[1] = s0;\n    d[2] = s1;\n    d[3] = s1;\n    d[4] = s2;\n    d[5] = s2;\n    d[6] = 0xFF;\n    d[7] = 0xFF;\n\n    s += 1 * 3;\n    d += 1 * 8;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgrw_4x16le__bgrx(uint8_t* dst_ptr,\n                                 " +
+	"             size_t dst_len,\n                                              uint8_t* dst_palette_ptr,\n                                              size_t dst_palette_len,\n                                              const uint8_t* src_ptr,\n                                              size_t src_len) {\n  size_t dst_len8 = dst_len / 8;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len8 < src_len4) ? dst_len8 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    uint8_t s0 = s[0];\n    uint8_t s1 = s[1];\n    uint8_t s2 = s[2];\n    d[0] = s0;\n    d[1] = s0;\n    d[2] = s1;\n    d[3] = s1;\n    d[4] = s2;\n    d[5] = s2;\n    d[6] = 0xFF;\n    d[7] = 0xFF;\n\n    s += 1 * 4;\n    d += 1 * 8;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgrw_4x16le__rgb(uint8_t* dst_ptr,\n                                             size_t dst_len,\n                                             uint8_t* dst_palette_ptr,\n                          " +
+	"                   size_t dst_palette_len,\n                                             const uint8_t* src_ptr,\n                                             size_t src_len) {\n  size_t dst_len8 = dst_len / 8;\n  size_t src_len3 = src_len / 3;\n  size_t len = (dst_len8 < src_len3) ? dst_len8 : src_len3;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    uint8_t s0 = s[0];\n    uint8_t s1 = s[1];\n    uint8_t s2 = s[2];\n    d[0] = s2;\n    d[1] = s2;\n    d[2] = s1;\n    d[3] = s1;\n    d[4] = s0;\n    d[5] = s0;\n    d[6] = 0xFF;\n    d[7] = 0xFF;\n\n    s += 1 * 3;\n    d += 1 * 8;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
+	"" +
 	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__rgba_nonpremul__bgra_nonpremul_4x16le__src(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t src_len8 = src_len / 8;\n  size_t len = (dst_len4 < src_len8) ? dst_len4 : src_len8;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n\n  size_t n = len;\n  while (n >= 1) {\n    wuffs_base__poke_u32le__no_bounds_check(\n        d + (0 * 4), wuffs_base__color_u64__as__color_u32__swap_u32_argb_abgr(\n                         wuffs_base__peek_u64le__no_bounds_check(s + (0 * 8))));\n\n    s += 1 * 8;\n    d += 1 * 4;\n    n -= 1;\n  }\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__rgba_nonpremul__bgra_nonpremul_4x16le__src_over(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n" +
 	"  size_t src_len8 = src_len / 8;\n  size_t len = (dst_len4 < src_len8) ? dst_len4 : src_len8;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    uint64_t d0 = wuffs_base__color_u32__as__color_u64(\n        wuffs_base__peek_u32le__no_bounds_check(d + (0 * 4)));\n    uint64_t s0 = wuffs_base__swap_u64_argb_abgr(\n        wuffs_base__peek_u64le__no_bounds_check(s + (0 * 8)));\n    wuffs_base__poke_u32le__no_bounds_check(\n        d + (0 * 4),\n        wuffs_base__color_u64__as__color_u32(\n            wuffs_base__composite_nonpremul_nonpremul_u64_axxx(d0, s0)));\n\n    s += 1 * 8;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
 	"" +
@@ -656,34 +667,42 @@
 	"{\n    wuffs_base__poke_u32le__no_bounds_check(\n        d + (0 * 4), 0xFF000000 | (0x010101 * (uint32_t)s[0]));\n\n    s += 1 * 1;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n#endif  // defined(WUFFS_BASE__CPU_ARCH__X86_64)\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__xxxx__y(uint8_t* dst_ptr,\n                                    size_t dst_len,\n                                    uint8_t* dst_palette_ptr,\n                                    size_t dst_palette_len,\n                                    const uint8_t* src_ptr,\n                                    size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t len = (dst_len4 < src_len) ? dst_len4 : src_len;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    wuffs_base__poke_u32le__no_bounds_check(\n        d + (0 * 4), 0xFF000000 | (0x010101 * (uint32_t)s[0]));\n\n    s += 1 * 1;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__xxxx__y_16be(uint8_t* dst_pt" +
 	"r,\n                                         size_t dst_len,\n                                         uint8_t* dst_palette_ptr,\n                                         size_t dst_palette_len,\n                                         const uint8_t* src_ptr,\n                                         size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t src_len2 = src_len / 2;\n  size_t len = (dst_len4 < src_len2) ? dst_len4 : src_len2;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    wuffs_base__poke_u32le__no_bounds_check(\n        d + (0 * 4), 0xFF000000 | (0x010101 * (uint32_t)s[0]));\n\n    s += 1 * 2;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
 	"" +
+	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__xxxxxxxx__index__src(uint8_t* dst_ptr,\n                                                 size_t dst_len,\n                                                 uint8_t* dst_palette_ptr,\n                                                 size_t dst_palette_len,\n                                                 const uint8_t* src_ptr,\n                                                 size_t src_len) {\n  if (dst_palette_len != 1024) {\n    return 0;\n  }\n  size_t dst_len8 = dst_len / 8;\n  size_t len = (dst_len8 < src_len) ? dst_len8 : src_len;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    wuffs_base__poke_u64le__no_bounds_check(\n        d + (0 * 8), wuffs_base__color_u32__as__color_u64(\n                         wuffs_base__peek_u32le__no_bounds_check(\n                             dst_palette_ptr + ((size_t)s[0] * 4))));\n\n    s += 1 * 1;\n    d += 1 * 8;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base" +
+	"__pixel_swizzler__xxxxxxxx__index_binary_alpha__src_over(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  if (dst_palette_len != 1024) {\n    return 0;\n  }\n  size_t dst_len8 = dst_len / 8;\n  size_t len = (dst_len8 < src_len) ? dst_len8 : src_len;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    uint32_t s0 = wuffs_base__peek_u32le__no_bounds_check(dst_palette_ptr +\n                                                          ((size_t)s[0] * 4));\n    if (s0) {\n      wuffs_base__poke_u64le__no_bounds_check(\n          d + (0 * 8), wuffs_base__color_u32__as__color_u64(s0));\n    }\n\n    s += 1 * 1;\n    d += 1 * 8;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__xxxxxxxx__y(uint8_t* dst_ptr,\n                                        size_t dst_len,\n                                        uint8_t* dst_palette_ptr,\n                           " +
+	"             size_t dst_palette_len,\n                                        const uint8_t* src_ptr,\n                                        size_t src_len) {\n  size_t dst_len8 = dst_len / 8;\n  size_t len = (dst_len8 < src_len) ? dst_len8 : src_len;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    wuffs_base__poke_u64le__no_bounds_check(\n        d + (0 * 8), 0xFFFF000000000000 | (0x010101010101 * (uint64_t)s[0]));\n\n    s += 1 * 1;\n    d += 1 * 8;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__xxxxxxxx__y_16be(uint8_t* dst_ptr,\n                                             size_t dst_len,\n                                             uint8_t* dst_palette_ptr,\n                                             size_t dst_palette_len,\n                                             const uint8_t* src_ptr,\n                                             size_t src_len) {\n  size_t dst_len8 = dst_len / 8;\n  size_t src_len2 = src_len / 2;\n  size_t l" +
+	"en = (dst_len8 < src_len2) ? dst_len8 : src_len2;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    uint64_t s0 =\n        ((uint64_t)(wuffs_base__peek_u16be__no_bounds_check(s + (0 * 2))));\n    wuffs_base__poke_u64le__no_bounds_check(\n        d + (0 * 8), 0xFFFF000000000000 | (0x000100010001 * s0));\n\n    s += 1 * 2;\n    d += 1 * 8;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
+	"" +
 	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__y__y_16be(uint8_t* dst_ptr,\n                                      size_t dst_len,\n                                      uint8_t* dst_palette_ptr,\n                                      size_t dst_palette_len,\n                                      const uint8_t* src_ptr,\n                                      size_t src_len) {\n  size_t src_len2 = src_len / 2;\n  size_t len = (dst_len < src_len2) ? dst_len : src_len2;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    d[0] = s[0];\n\n    s += 1 * 2;\n    d += 1 * 1;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
 	"" +
 	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__transparent_black_src(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    uint64_t num_pixels,\n    uint32_t dst_pixfmt_bytes_per_pixel) {\n  uint64_t n = ((uint64_t)dst_len) / dst_pixfmt_bytes_per_pixel;\n  if (n > num_pixels) {\n    n = num_pixels;\n  }\n  memset(dst_ptr, 0, ((size_t)(n * dst_pixfmt_bytes_per_pixel)));\n  return n;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__transparent_black_src_over(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    uint64_t num_pixels,\n    uint32_t dst_pixfmt_bytes_per_pixel) {\n  uint64_t n = ((uint64_t)dst_len) / dst_pixfmt_bytes_per_pixel;\n  if (n > num_pixels) {\n    n = num_pixels;\n  }\n  return n;\n}\n\n" +
 	"" +
 	"// --------\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__y(wuffs_base__pixel_swizzler* p,\n                                       wuffs_base__pixel_format dst_pixfmt,\n                                       wuffs_base__slice_u8 dst_palette,\n                                       wuffs_base__slice_u8 src_palette,\n                                       wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__Y:\n      return wuffs_base__pixel_swizzler__copy_1_1;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      return wuffs_base__pixel_swizzler__bgr_565__y;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      return wuffs_base__pixel_swizzler__xxx__y;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BA" +
-	"SE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\n      if (wuffs_base__cpu_arch__have_x86_sse42()) {\n        return wuffs_base__pixel_swizzler__xxxx__y__sse42;\n      }\n#endif\n      return wuffs_base__pixel_swizzler__xxxx__y;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__y_16be(wuffs_base__pixel_swizzler* p,\n                                            wuffs_base__pixel_format dst_pixfmt,\n                                            wuffs_base__slice_u8 dst_palette,\n                                            wuffs_base__slice_u8 src_palette,\n                                            wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__Y:\n      return wuffs_base__pixel_swizzler__y__y_16be;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      return wuffs_base__pixel_swizzler__bgr_565__y_16be;\n\n    case" +
-	" WUFFS_BASE__PIXEL_FORMAT__BGR:\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      return wuffs_base__pixel_swizzler__xxx__y_16be;\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      return wuffs_base__pixel_swizzler__xxxx__y_16be;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__indexed__bgra_nonpremul(\n    wuffs_base__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_pixfmt,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_NONPREMUL:\n      if (wuffs_base__slice_u8__copy_from_slice" +
-	"(dst_palette, src_palette) !=\n          1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__copy_1_1;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          if (wuffs_base__pixel_swizzler__squash_align4_bgr_565_8888(\n                  dst_palette.ptr, dst_palette.len, src_palette.ptr,\n                  src_palette.len, true) != 256) {\n            return NULL;\n          }\n          return wuffs_base__pixel_swizzler__bgr_565__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=\n              1024) {\n            return NULL;\n          }\n          return wuffs_base__pixel_swizzler__bgr_565__index_bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL" +
-	"_BLEND__SRC:\n          if (wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src(\n                  dst_palette.ptr, dst_palette.len, NULL, 0, src_palette.ptr,\n                  src_palette.len) != 256) {\n            return NULL;\n          }\n          return wuffs_base__pixel_swizzler__xxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=\n              1024) {\n            return NULL;\n          }\n          return wuffs_base__pixel_swizzler__xxx__index_bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=\n          1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__in" +
-	"dex_bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          if (wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src(\n                  dst_palette.ptr, dst_palette.len, NULL, 0, src_palette.ptr,\n                  src_palette.len) != 256) {\n            return NULL;\n          }\n          return wuffs_base__pixel_swizzler__xxxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=\n              1024) {\n            return NULL;\n          }\n          return wuffs_base__pixel_swizzler__bgra_premul__index_bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n      if (wuffs_base__pixel_swizzler__swap_rgbx_bgrx(\n              dst_palette.ptr, dst_palette.len, NULL, 0, src_palette.ptr," +
-	"\n              src_palette.len) != 256) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__index_bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          if (wuffs_base__pixel_swizzler__bgra_premul__rgba_nonpremul__src(\n                  dst_palette.ptr, dst_palette.len, NULL, 0, src_palette.ptr,\n                  src_palette.len) != 256) {\n            return NULL;\n          }\n          return wuffs_base__pixel_swizzler__xxxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          if (wuffs_base__pixel_swizzler__swap_rgbx_bgrx(\n                  dst_palette.ptr, dst_palette.len, NULL, 0, src_palette.ptr,\n                  src_palette.len) != 256) {\n            r" +
-	"eturn NULL;\n          }\n          return wuffs_base__pixel_swizzler__bgra_premul__index_bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      // TODO.\n      break;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__indexed__bgra_binary(\n    wuffs_base__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_pixfmt,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY:\n      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=\n          1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__copy_1_1;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PI" +
-	"XEL_FORMAT__BGR_565:\n      if (wuffs_base__pixel_swizzler__squash_align4_bgr_565_8888(\n              dst_palette.ptr, dst_palette.len, src_palette.ptr,\n              src_palette.len, false) != 256) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr_565__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr_565__index_binary_alpha__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=\n          1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxx__index_binary_alpha__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n    " +
-	"case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=\n          1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      if (wuffs_base__pixel_swizzler__swap_rgbx_bgrx(\n              dst_palette.ptr, dst_palette.len, NULL, 0, src_palette.ptr,\n              src_palette.len) != 256) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxx__index_binary_alpha__src_over;\n      }\n      return NULL;\n\n" +
-	"    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n      if (wuffs_base__pixel_swizzler__swap_rgbx_bgrx(\n              dst_palette.ptr, dst_palette.len, NULL, 0, src_palette.ptr,\n              src_palette.len) != 256) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over;\n      }\n      return NULL;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__bgr(wuffs_base__pixel_swizzler* p,\n                                         wuffs_base__pixel_format dst_pixfmt,\n                                         wuffs_base__slice_u8 dst_palette,\n                                         wuffs_base__slice_u8 src_palette,\n                            " +
-	"             wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      return wuffs_base__pixel_swizzler__bgr_565__bgr;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      return wuffs_base__pixel_swizzler__copy_3_3;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n      return wuffs_base__pixel_swizzler__bgrw__bgr;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      return wuffs_base__pixel_swizzler__swap_rgb_bgr;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\n      if (wuffs_base__cpu_arch__have_x86_sse42()) {\n        return wuffs_base__pixel_swizzler__bgrw__rgb__sse42;\n      }\n#endif\n      return wuffs_base__pixel_swizzler__bgrw__rgb;\n  }\n  retu" +
-	"rn NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__bgra_nonpremul(\n    wuffs_base__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_pixfmt,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_" +
-	"FORMAT__BGRA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__copy_4_4;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\n          if (wuf" +
-	"fs_base__cpu_arch__have_x86_sse42()) {\n            return wuffs_base__pixel_swizzler__swap_rgbx_bgrx__sse42;\n          }\n#endif\n          return wuffs_base__pixel_swizzler__swap_rgbx_bgrx;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__rgba_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_premul__rgba_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__rgba_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      // TODO.\n      break;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__bgra_nonpremul_4x16le(\n    wuffs_base__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_pixfmt,\n" +
-	"    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul_4x16le__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul_4x16le__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul_4x16le__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul_4x16le__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_n" +
-	"onpremul_4x16le__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul_4x16le__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul_4x16le__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul_4x16le__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__rgba_nonpremul__bgra_nonpremul_4x16le__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_b" +
-	"ase__pixel_swizzler__rgba_nonpremul__bgra_nonpremul_4x16le__src_over;\n      }\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_premul__rgba_nonpremul_4x16le__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__rgba_nonpremul_4x16le__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      // TODO.\n      break;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__bgrx(wuffs_base__pixel_swizzler* p,\n                                          wuffs_base__pixel_format dst_pixfmt,\n                                          wuffs_base__slice_u8 dst_palette,\n                                          wuffs_base__slice_u8 src_palette,\n                                          wuffs_base__pixel_blend blend) {\n  s" +
-	"witch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      return wuffs_base__pixel_swizzler__bgr_565__bgrx;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      return wuffs_base__pixel_swizzler__xxx__xxxx;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n      return wuffs_base__pixel_swizzler__bgrw__bgrx;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n      return wuffs_base__pixel_swizzler__copy_4_4;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      return wuffs_base__pixel_swizzler__bgrw__rgbx;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__rgb(wuffs_base__pixel_swizzler* p,\n                                         wuffs_base__pixel_for" +
-	"mat dst_pixfmt,\n                                         wuffs_base__slice_u8 dst_palette,\n                                         wuffs_base__slice_u8 src_palette,\n                                         wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      return wuffs_base__pixel_swizzler__bgr_565__rgb;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      return wuffs_base__pixel_swizzler__swap_rgb_bgr;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\n      if (wuffs_base__cpu_arch__have_x86_sse42()) {\n        return wuffs_base__pixel_swizzler__bgrw__rgb__sse42;\n      }\n#endif\n      return wuffs_base__pixel_swizzler__bgrw__rgb;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      return wuffs_base__pixel_swizzler__copy_3_3;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    c" +
-	"ase WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      return wuffs_base__pixel_swizzler__bgrw__bgr;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__rgba_nonpremul(\n    wuffs_base__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_pixfmt,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr_565__rgba_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr_565__rgba_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr__rgba_n" +
-	"onpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr__rgba_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\n          if (wuffs_base__cpu_arch__have_x86_sse42()) {\n            return wuffs_base__pixel_swizzler__swap_rgbx_bgrx__sse42;\n          }\n#endif\n          return wuffs_base__pixel_swizzler__swap_rgbx_bgrx;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__rgba_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_premul__rgba_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__rgba_nonpremul__src_over;\n    " +
-	"  }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__copy_4_4;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      // TODO.\n      break;\n  }\n  re" +
-	"turn NULL;\n}\n\n" +
+	"SE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\n      if (wuffs_base__cpu_arch__have_x86_sse42()) {\n        return wuffs_base__pixel_swizzler__xxxx__y__sse42;\n      }\n#endif\n      return wuffs_base__pixel_swizzler__xxxx__y;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL_4X16LE:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL_4X16LE:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL_4X16LE:\n      return wuffs_base__pixel_swizzler__xxxxxxxx__y;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__y_16be(wuffs_base__pixel_swizzler* p,\n                                            wuffs_base__pixel_format dst_pixfmt,\n                                            wuffs_base__slice_u8 dst_palette,\n                                            wuffs_base__slice_u8 src_palette,\n                           " +
+	"                 wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__Y:\n      return wuffs_base__pixel_swizzler__y__y_16be;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      return wuffs_base__pixel_swizzler__bgr_565__y_16be;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      return wuffs_base__pixel_swizzler__xxx__y_16be;\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      return wuffs_base__pixel_swizzler__xxxx__y_16be;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL_4X16LE:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL_4X16LE:\n" +
+	"    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL_4X16LE:\n      return wuffs_base__pixel_swizzler__xxxxxxxx__y_16be;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__indexed__bgra_nonpremul(\n    wuffs_base__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_pixfmt,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_NONPREMUL:\n      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=\n          1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__copy_1_1;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          if (wuffs_base__pixel_swizzler__squash_align4_bgr_565_8888(\n                  dst_palette.ptr, dst_palette.len," +
+	" src_palette.ptr,\n                  src_palette.len, true) != 256) {\n            return NULL;\n          }\n          return wuffs_base__pixel_swizzler__bgr_565__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=\n              1024) {\n            return NULL;\n          }\n          return wuffs_base__pixel_swizzler__bgr_565__index_bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          if (wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src(\n                  dst_palette.ptr, dst_palette.len, NULL, 0, src_palette.ptr,\n                  src_palette.len) != 256) {\n            return NULL;\n          }\n          return wuffs_base__pixel_swizzler__xxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=\n              102" +
+	"4) {\n            return NULL;\n          }\n          return wuffs_base__pixel_swizzler__xxx__index_bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=\n          1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__index_bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=\n          1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxxxxxxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_s" +
+	"wizzler__bgra_nonpremul_4x16le__index_bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          if (wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src(\n                  dst_palette.ptr, dst_palette.len, NULL, 0, src_palette.ptr,\n                  src_palette.len) != 256) {\n            return NULL;\n          }\n          return wuffs_base__pixel_swizzler__xxxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=\n              1024) {\n            return NULL;\n          }\n          return wuffs_base__pixel_swizzler__bgra_premul__index_bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n      if (wuffs_base__pixel_swizzler__swap_rgbx_bgrx(\n              dst_palette.ptr, dst_pale" +
+	"tte.len, NULL, 0, src_palette.ptr,\n              src_palette.len) != 256) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__index_bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          if (wuffs_base__pixel_swizzler__bgra_premul__rgba_nonpremul__src(\n                  dst_palette.ptr, dst_palette.len, NULL, 0, src_palette.ptr,\n                  src_palette.len) != 256) {\n            return NULL;\n          }\n          return wuffs_base__pixel_swizzler__xxxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          if (wuffs_base__pixel_swizzler__swap_rgbx_bgrx(\n                  dst_palette.ptr, dst_palette.len, NULL, 0, src_palette.ptr,\n                  src_pa" +
+	"lette.len) != 256) {\n            return NULL;\n          }\n          return wuffs_base__pixel_swizzler__bgra_premul__index_bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      // TODO.\n      break;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__indexed__bgra_binary(\n    wuffs_base__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_pixfmt,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY:\n      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=\n          1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__copy_1_1;\n      }\n      ret" +
+	"urn NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      if (wuffs_base__pixel_swizzler__squash_align4_bgr_565_8888(\n              dst_palette.ptr, dst_palette.len, src_palette.ptr,\n              src_palette.len, false) != 256) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr_565__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr_565__index_binary_alpha__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=\n          1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxx__index_binary_alpha__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__" +
+	"PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=\n          1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL_4X16LE:\n      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=\n          1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxxxxxxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxxxxxxx__index_binary_al" +
+	"pha__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      if (wuffs_base__pixel_swizzler__swap_rgbx_bgrx(\n              dst_palette.ptr, dst_palette.len, NULL, 0, src_palette.ptr,\n              src_palette.len) != 256) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxx__index_binary_alpha__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n      if (wuffs_base__pixel_swizzler__swap_rgbx_bgrx(\n              dst_palette.ptr, dst_palette.len, NULL, 0, src_palette.ptr,\n              src_palette.len) != 256) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxx" +
+	"x__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over;\n      }\n      return NULL;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__bgr(wuffs_base__pixel_swizzler* p,\n                                         wuffs_base__pixel_format dst_pixfmt,\n                                         wuffs_base__slice_u8 dst_palette,\n                                         wuffs_base__slice_u8 src_palette,\n                                         wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      return wuffs_base__pixel_swizzler__bgr_565__bgr;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      return wuffs_base__pixel_swizzler__copy_3_3;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n    " +
+	"  return wuffs_base__pixel_swizzler__bgrw__bgr;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL_4X16LE:\n      return wuffs_base__pixel_swizzler__bgrw_4x16le__bgr;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      return wuffs_base__pixel_swizzler__swap_rgb_bgr;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\n      if (wuffs_base__cpu_arch__have_x86_sse42()) {\n        return wuffs_base__pixel_swizzler__bgrw__rgb__sse42;\n      }\n#endif\n      return wuffs_base__pixel_swizzler__bgrw__rgb;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__bgra_nonpremul(\n    wuffs_base__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_pixfmt,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_ble" +
+	"nd blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__copy_4_4;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul__src_over;\n      " +
+	"}\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n      switch (blend) {\n     " +
+	"   case WUFFS_BASE__PIXEL_BLEND__SRC:\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\n          if (wuffs_base__cpu_arch__have_x86_sse42()) {\n            return wuffs_base__pixel_swizzler__swap_rgbx_bgrx__sse42;\n          }\n#endif\n          return wuffs_base__pixel_swizzler__swap_rgbx_bgrx;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__rgba_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_premul__rgba_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__rgba_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      // TODO.\n      break;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__bg" +
+	"ra_nonpremul_4x16le(\n    wuffs_base__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_pixfmt,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul_4x16le__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul_4x16le__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul_4x16le__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul_4x16le__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n      switch (blend) {\n        case " +
+	"WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul_4x16le__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul_4x16le__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__copy_8_8;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_nonpremul_4x16le__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul_4x16le__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul_4x16le__src_over;\n      }\n      return NULL;\n\n    case WUF" +
+	"FS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__rgba_nonpremul__bgra_nonpremul_4x16le__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__rgba_nonpremul__bgra_nonpremul_4x16le__src_over;\n      }\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_premul__rgba_nonpremul_4x16le__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__rgba_nonpremul_4x16le__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      // TODO.\n      br" +
+	"eak;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__bgrx(wuffs_base__pixel_swizzler* p,\n                                          wuffs_base__pixel_format dst_pixfmt,\n                                          wuffs_base__slice_u8 dst_palette,\n                                          wuffs_base__slice_u8 src_palette,\n                                          wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      return wuffs_base__pixel_swizzler__bgr_565__bgrx;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      return wuffs_base__pixel_swizzler__xxx__xxxx;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n      return wuffs_base__pixel_swizzler__bgrw__bgrx;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n      return wuffs_base__pixel_swizzler__bgrw_4x16le__bgrx;\n\n    case WUFFS_BASE__PIXEL_FORM" +
+	"AT__BGRX:\n      return wuffs_base__pixel_swizzler__copy_4_4;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      return wuffs_base__pixel_swizzler__bgrw__rgbx;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__rgb(wuffs_base__pixel_swizzler* p,\n                                         wuffs_base__pixel_format dst_pixfmt,\n                                         wuffs_base__slice_u8 dst_palette,\n                                         wuffs_base__slice_u8 src_palette,\n                                         wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      return wuffs_base__pixel_swizzler__bgr_565__rgb;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      return wuffs_base__pixel_swizzler__sw" +
+	"ap_rgb_bgr;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\n      if (wuffs_base__cpu_arch__have_x86_sse42()) {\n        return wuffs_base__pixel_swizzler__bgrw__rgb__sse42;\n      }\n#endif\n      return wuffs_base__pixel_swizzler__bgrw__rgb;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n      return wuffs_base__pixel_swizzler__bgrw_4x16le__rgb;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      return wuffs_base__pixel_swizzler__copy_3_3;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      return wuffs_base__pixel_swizzler__bgrw__bgr;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__rgba_nonpremul(\n    wuffs_base__pixel_swizzler*" +
+	" p,\n    wuffs_base__pixel_format dst_pixfmt,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr_565__rgba_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr_565__rgba_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr__rgba_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr__rgba_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\n        " +
+	"  if (wuffs_base__cpu_arch__have_x86_sse42()) {\n            return wuffs_base__pixel_swizzler__swap_rgbx_bgrx__sse42;\n          }\n#endif\n          return wuffs_base__pixel_swizzler__swap_rgbx_bgrx;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__rgba_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__rgba_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__rgba_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_premul__rgba_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swiz" +
+	"zler__bgra_premul__rgba_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__copy_4_4;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_F" +
+	"ORMAT__RGBX:\n      // TODO.\n      break;\n  }\n  return NULL;\n}\n\n" +
 	"" +
 	"// --------\n\nWUFFS_BASE__MAYBE_STATIC wuffs_base__status  //\nwuffs_base__pixel_swizzler__prepare(wuffs_base__pixel_swizzler* p,\n                                    wuffs_base__pixel_format dst_pixfmt,\n                                    wuffs_base__slice_u8 dst_palette,\n                                    wuffs_base__pixel_format src_pixfmt,\n                                    wuffs_base__slice_u8 src_palette,\n                                    wuffs_base__pixel_blend blend) {\n  if (!p) {\n    return wuffs_base__make_status(wuffs_base__error__bad_receiver);\n  }\n  p->private_impl.func = NULL;\n  p->private_impl.transparent_black_func = NULL;\n  p->private_impl.dst_pixfmt_bytes_per_pixel = 0;\n  p->private_impl.src_pixfmt_bytes_per_pixel = 0;\n\n  wuffs_base__pixel_swizzler__func func = NULL;\n  wuffs_base__pixel_swizzler__transparent_black_func transparent_black_func =\n      NULL;\n\n  uint32_t dst_pixfmt_bits_per_pixel =\n      wuffs_base__pixel_format__bits_per_pixel(&dst_pixfmt);\n  if ((dst_pixfmt_bits_per_pixel == " +
 	"0) ||\n      ((dst_pixfmt_bits_per_pixel & 7) != 0)) {\n    return wuffs_base__make_status(\n        wuffs_base__error__unsupported_pixel_swizzler_option);\n  }\n\n  uint32_t src_pixfmt_bits_per_pixel =\n      wuffs_base__pixel_format__bits_per_pixel(&src_pixfmt);\n  if ((src_pixfmt_bits_per_pixel == 0) ||\n      ((src_pixfmt_bits_per_pixel & 7) != 0)) {\n    return wuffs_base__make_status(\n        wuffs_base__error__unsupported_pixel_swizzler_option);\n  }\n\n  // TODO: support many more formats.\n\n  switch (blend) {\n    case WUFFS_BASE__PIXEL_BLEND__SRC:\n      transparent_black_func =\n          wuffs_base__pixel_swizzler__transparent_black_src;\n      break;\n\n    case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n      transparent_black_func =\n          wuffs_base__pixel_swizzler__transparent_black_src_over;\n      break;\n  }\n\n  switch (src_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__Y:\n      func = wuffs_base__pixel_swizzler__prepare__y(p, dst_pixfmt, dst_palette,\n                                                    src_palette" +
@@ -764,24 +783,24 @@
 	" (io_buf.writer_length() == 0)) {\n          fourcc = 0;\n          break;\n        }\n        std::string error_message = input.CopyIn(&io_buf);\n        if (!error_message.empty()) {\n          return DecodeImageResult(std::move(error_message));\n        }\n      }\n    } else {\n      wuffs_base__io_buffer empty = wuffs_base__empty_io_buffer();\n      wuffs_base__more_information minfo = wuffs_base__empty_more_information();\n      wuffs_base__status tmm_status =\n          image_decoder->tell_me_more(&empty, &minfo, &io_buf);\n      if (tmm_status.repr != nullptr) {\n        return DecodeImageResult(tmm_status.message());\n      }\n      if (minfo.flavor != WUFFS_BASE__MORE_INFORMATION__FLAVOR__IO_REDIRECT) {\n        return DecodeImageResult(DecodeImage_UnsupportedImageFormat);\n      }\n      uint64_t pos = minfo.io_redirect__range().min_incl;\n      std::string error_message = DecodeImageAdvanceIOBuf(\n          input, io_buf, !input.BringsItsOwnIOBuffer(), start_pos, pos);\n      if (!error_message.empty()) {\n        return" +
 	" DecodeImageResult(std::move(error_message));\n      }\n      fourcc = (int32_t)(minfo.io_redirect__fourcc());\n      if (fourcc == 0) {\n        return DecodeImageResult(DecodeImage_UnsupportedImageFormat);\n      }\n      image_decoder.reset();\n    }\n\n    // Select the image decoder.\n    image_decoder = callbacks.SelectDecoder(\n        (uint32_t)fourcc,\n        fourcc ? wuffs_base__empty_slice_u8() : io_buf.reader_slice());\n    if (!image_decoder) {\n      return DecodeImageResult(DecodeImage_UnsupportedImageFormat);\n    }\n\n    // Decode the image config.\n    while (true) {\n      wuffs_base__status id_dic_status =\n          image_decoder->decode_image_config(&image_config, &io_buf);\n      if (id_dic_status.repr == nullptr) {\n        break;\n      } else if (id_dic_status.repr == wuffs_base__note__i_o_redirect) {\n        if (redirected) {\n          return DecodeImageResult(DecodeImage_UnsupportedImageFormat);\n        }\n        redirected = true;\n        goto redirect;\n      } else if (id_dic_status.repr != wuffs_bas" +
 	"e__suspension__short_read) {\n        return DecodeImageResult(id_dic_status.message());\n      } else if (io_buf.meta.closed) {\n        return DecodeImageResult(DecodeImage_UnexpectedEndOfFile);\n      } else {\n        std::string error_message = input.CopyIn(&io_buf);\n        if (!error_message.empty()) {\n          return DecodeImageResult(std::move(error_message));\n        }\n      }\n    }\n  } while (false);\n\n  // Select the pixel format.\n  uint32_t w = image_config.pixcfg.width();\n  uint32_t h = image_config.pixcfg.height();\n  if ((w > max_incl_dimension) || (h > max_incl_dimension)) {\n    return DecodeImageResult(DecodeImage_MaxInclDimensionExceeded);\n  }\n  wuffs_base__pixel_format pixel_format = callbacks.SelectPixfmt(image_config);\n  if (pixel_format.repr != image_config.pixcfg.pixel_format().repr) {\n    switch (pixel_format.repr) {\n      case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n      case WUFFS_BASE__PIXEL_FORMAT" +
-	"__BGRA_PREMUL:\n      case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n      case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n        break;\n      default:\n        return DecodeImageResult(DecodeImage_UnsupportedPixelFormat);\n    }\n    image_config.pixcfg.set(pixel_format.repr,\n                            WUFFS_BASE__PIXEL_SUBSAMPLING__NONE, w, h);\n  }\n\n  // Allocate the pixel buffer.\n  uint64_t pixbuf_len_min_incl = 0;\n  if ((w > 0) && (h > 0)) {\n    pixbuf_len_min_incl = image_config.pixcfg.pixbuf_len();\n    if (pixbuf_len_min_incl == 0) {\n      return DecodeImageResult(DecodeImage_UnsupportedPixelFormat);\n    }\n  }\n  bool valid_background_color =\n      wuffs_base__color_u32_argb_premul__is_valid(background_color);\n  DecodeImageCallbacks::AllocResult alloc_pixbuf_result =\n      callbacks.AllocPixbuf(image_config, valid_background_color);\n  if (!alloc_pixbuf_result.error_message.empty()) {\n    return DecodeImageResult(std::move(alloc_pixbuf_result.error_message));\n  } else if (alloc_pixbuf_result.mem_slice.len < pix" +
-	"buf_len_min_incl) {\n    return DecodeImageResult(DecodeImage_BufferIsTooShort);\n  }\n  wuffs_base__pixel_buffer pixel_buffer;\n  wuffs_base__status pb_sfs_status = pixel_buffer.set_from_slice(\n      &image_config.pixcfg, alloc_pixbuf_result.mem_slice);\n  if (!pb_sfs_status.is_ok()) {\n    return DecodeImageResult(pb_sfs_status.message());\n  }\n  if (valid_background_color) {\n    wuffs_base__status pb_scufr_status = pixel_buffer.set_color_u32_fill_rect(\n        pixel_buffer.pixcfg.bounds(), background_color);\n    if (pb_scufr_status.repr != nullptr) {\n      return DecodeImageResult(pb_scufr_status.message());\n    }\n  }\n\n  // Allocate the work buffer. Wuffs' decoders conventionally assume that this\n  // can be uninitialized memory.\n  wuffs_base__range_ii_u64 workbuf_len = image_decoder->workbuf_len();\n  DecodeImageCallbacks::AllocResult alloc_workbuf_result =\n      callbacks.AllocWorkbuf(workbuf_len, true);\n  if (!alloc_workbuf_result.error_message.empty()) {\n    return DecodeImageResult(std::move(alloc_workbuf_res" +
-	"ult.error_message));\n  } else if (alloc_workbuf_result.mem_slice.len < workbuf_len.min_incl) {\n    return DecodeImageResult(DecodeImage_BufferIsTooShort);\n  }\n\n  // Decode the frame config.\n  wuffs_base__frame_config frame_config = wuffs_base__null_frame_config();\n  while (true) {\n    wuffs_base__status id_dfc_status =\n        image_decoder->decode_frame_config(&frame_config, &io_buf);\n    if (id_dfc_status.repr == nullptr) {\n      break;\n    } else if (id_dfc_status.repr != wuffs_base__suspension__short_read) {\n      return DecodeImageResult(id_dfc_status.message());\n    } else if (io_buf.meta.closed) {\n      return DecodeImageResult(DecodeImage_UnexpectedEndOfFile);\n    } else {\n      std::string error_message = input.CopyIn(&io_buf);\n      if (!error_message.empty()) {\n        return DecodeImageResult(std::move(error_message));\n      }\n    }\n  }\n\n  // Decode the frame (the pixels).\n  //\n  // From here on, always returns the pixel_buffer. If we get this far, we can\n  // still display a partial image, even i" +
-	"f we encounter an error.\n  std::string message(\"\");\n  if ((pixel_blend == WUFFS_BASE__PIXEL_BLEND__SRC_OVER) &&\n      frame_config.overwrite_instead_of_blend()) {\n    pixel_blend = WUFFS_BASE__PIXEL_BLEND__SRC;\n  }\n  while (true) {\n    wuffs_base__status id_df_status =\n        image_decoder->decode_frame(&pixel_buffer, &io_buf, pixel_blend,\n                                    alloc_workbuf_result.mem_slice, nullptr);\n    if (id_df_status.repr == nullptr) {\n      break;\n    } else if (id_df_status.repr != wuffs_base__suspension__short_read) {\n      message = id_df_status.message();\n      break;\n    } else if (io_buf.meta.closed) {\n      message = DecodeImage_UnexpectedEndOfFile;\n      break;\n    } else {\n      std::string error_message = input.CopyIn(&io_buf);\n      if (!error_message.empty()) {\n        message = std::move(error_message);\n        break;\n      }\n    }\n  }\n  return DecodeImageResult(std::move(alloc_pixbuf_result.mem_owner),\n                           alloc_pixbuf_result.mem_slice, pixel_buffer,\n" +
-	"                           std::move(message));\n}\n\n}  // namespace\n\nDecodeImageResult  //\nDecodeImage(DecodeImageCallbacks& callbacks,\n            sync_io::Input& input,\n            wuffs_base__pixel_blend pixel_blend,\n            wuffs_base__color_u32_argb_premul background_color,\n            uint32_t max_incl_dimension) {\n  wuffs_base__io_buffer* io_buf = input.BringsItsOwnIOBuffer();\n  wuffs_base__io_buffer fallback_io_buf = wuffs_base__empty_io_buffer();\n  std::unique_ptr<uint8_t[]> fallback_io_array(nullptr);\n  if (!io_buf) {\n    fallback_io_array = std::unique_ptr<uint8_t[]>(new uint8_t[32768]);\n    fallback_io_buf =\n        wuffs_base__ptr_u8__writer(fallback_io_array.get(), 32768);\n    io_buf = &fallback_io_buf;\n  }\n\n  wuffs_base__image_decoder::unique_ptr image_decoder(nullptr, &free);\n  DecodeImageResult result =\n      DecodeImage0(image_decoder, callbacks, input, *io_buf, pixel_blend,\n                   background_color, max_incl_dimension);\n  callbacks.Done(result, input, *io_buf, std::move(image_" +
-	"decoder));\n  return result;\n}\n\n}  // namespace wuffs_aux\n\n#endif  // !defined(WUFFS_CONFIG__MODULES) ||\n        // defined(WUFFS_CONFIG__MODULE__AUX__IMAGE)\n" +
+	"__BGRA_NONPREMUL_4X16LE:\n      case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n      case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n      case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n        break;\n      default:\n        return DecodeImageResult(DecodeImage_UnsupportedPixelFormat);\n    }\n    image_config.pixcfg.set(pixel_format.repr,\n                            WUFFS_BASE__PIXEL_SUBSAMPLING__NONE, w, h);\n  }\n\n  // Allocate the pixel buffer.\n  uint64_t pixbuf_len_min_incl = 0;\n  if ((w > 0) && (h > 0)) {\n    pixbuf_len_min_incl = image_config.pixcfg.pixbuf_len();\n    if (pixbuf_len_min_incl == 0) {\n      return DecodeImageResult(DecodeImage_UnsupportedPixelFormat);\n    }\n  }\n  bool valid_background_color =\n      wuffs_base__color_u32_argb_premul__is_valid(background_color);\n  DecodeImageCallbacks::AllocResult alloc_pixbuf_result =\n      callbacks.AllocPixbuf(image_config, valid_background_color);\n  if (!alloc_pixbuf_result.error_message.empty()) {\n    return DecodeImageResult(std::move(alloc_pixbuf_result.error_mes" +
+	"sage));\n  } else if (alloc_pixbuf_result.mem_slice.len < pixbuf_len_min_incl) {\n    return DecodeImageResult(DecodeImage_BufferIsTooShort);\n  }\n  wuffs_base__pixel_buffer pixel_buffer;\n  wuffs_base__status pb_sfs_status = pixel_buffer.set_from_slice(\n      &image_config.pixcfg, alloc_pixbuf_result.mem_slice);\n  if (!pb_sfs_status.is_ok()) {\n    return DecodeImageResult(pb_sfs_status.message());\n  }\n  if (valid_background_color) {\n    wuffs_base__status pb_scufr_status = pixel_buffer.set_color_u32_fill_rect(\n        pixel_buffer.pixcfg.bounds(), background_color);\n    if (pb_scufr_status.repr != nullptr) {\n      return DecodeImageResult(pb_scufr_status.message());\n    }\n  }\n\n  // Allocate the work buffer. Wuffs' decoders conventionally assume that this\n  // can be uninitialized memory.\n  wuffs_base__range_ii_u64 workbuf_len = image_decoder->workbuf_len();\n  DecodeImageCallbacks::AllocResult alloc_workbuf_result =\n      callbacks.AllocWorkbuf(workbuf_len, true);\n  if (!alloc_workbuf_result.error_message.empty()" +
+	") {\n    return DecodeImageResult(std::move(alloc_workbuf_result.error_message));\n  } else if (alloc_workbuf_result.mem_slice.len < workbuf_len.min_incl) {\n    return DecodeImageResult(DecodeImage_BufferIsTooShort);\n  }\n\n  // Decode the frame config.\n  wuffs_base__frame_config frame_config = wuffs_base__null_frame_config();\n  while (true) {\n    wuffs_base__status id_dfc_status =\n        image_decoder->decode_frame_config(&frame_config, &io_buf);\n    if (id_dfc_status.repr == nullptr) {\n      break;\n    } else if (id_dfc_status.repr != wuffs_base__suspension__short_read) {\n      return DecodeImageResult(id_dfc_status.message());\n    } else if (io_buf.meta.closed) {\n      return DecodeImageResult(DecodeImage_UnexpectedEndOfFile);\n    } else {\n      std::string error_message = input.CopyIn(&io_buf);\n      if (!error_message.empty()) {\n        return DecodeImageResult(std::move(error_message));\n      }\n    }\n  }\n\n  // Decode the frame (the pixels).\n  //\n  // From here on, always returns the pixel_buffer. If we get" +
+	" this far, we can\n  // still display a partial image, even if we encounter an error.\n  std::string message(\"\");\n  if ((pixel_blend == WUFFS_BASE__PIXEL_BLEND__SRC_OVER) &&\n      frame_config.overwrite_instead_of_blend()) {\n    pixel_blend = WUFFS_BASE__PIXEL_BLEND__SRC;\n  }\n  while (true) {\n    wuffs_base__status id_df_status =\n        image_decoder->decode_frame(&pixel_buffer, &io_buf, pixel_blend,\n                                    alloc_workbuf_result.mem_slice, nullptr);\n    if (id_df_status.repr == nullptr) {\n      break;\n    } else if (id_df_status.repr != wuffs_base__suspension__short_read) {\n      message = id_df_status.message();\n      break;\n    } else if (io_buf.meta.closed) {\n      message = DecodeImage_UnexpectedEndOfFile;\n      break;\n    } else {\n      std::string error_message = input.CopyIn(&io_buf);\n      if (!error_message.empty()) {\n        message = std::move(error_message);\n        break;\n      }\n    }\n  }\n  return DecodeImageResult(std::move(alloc_pixbuf_result.mem_owner),\n            " +
+	"               alloc_pixbuf_result.mem_slice, pixel_buffer,\n                           std::move(message));\n}\n\n}  // namespace\n\nDecodeImageResult  //\nDecodeImage(DecodeImageCallbacks& callbacks,\n            sync_io::Input& input,\n            wuffs_base__pixel_blend pixel_blend,\n            wuffs_base__color_u32_argb_premul background_color,\n            uint32_t max_incl_dimension) {\n  wuffs_base__io_buffer* io_buf = input.BringsItsOwnIOBuffer();\n  wuffs_base__io_buffer fallback_io_buf = wuffs_base__empty_io_buffer();\n  std::unique_ptr<uint8_t[]> fallback_io_array(nullptr);\n  if (!io_buf) {\n    fallback_io_array = std::unique_ptr<uint8_t[]>(new uint8_t[32768]);\n    fallback_io_buf =\n        wuffs_base__ptr_u8__writer(fallback_io_array.get(), 32768);\n    io_buf = &fallback_io_buf;\n  }\n\n  wuffs_base__image_decoder::unique_ptr image_decoder(nullptr, &free);\n  DecodeImageResult result =\n      DecodeImage0(image_decoder, callbacks, input, *io_buf, pixel_blend,\n                   background_color, max_incl_dimension" +
+	");\n  callbacks.Done(result, input, *io_buf, std::move(image_decoder));\n  return result;\n}\n\n}  // namespace wuffs_aux\n\n#endif  // !defined(WUFFS_CONFIG__MODULES) ||\n        // defined(WUFFS_CONFIG__MODULE__AUX__IMAGE)\n" +
 	""
 
 const AuxImageHh = "" +
 	"// ---------------- Auxiliary - Image\n\nnamespace wuffs_aux {\n\nstruct DecodeImageResult {\n  DecodeImageResult(MemOwner&& pixbuf_mem_owner0,\n                    wuffs_base__slice_u8 pixbuf_mem_slice0,\n                    wuffs_base__pixel_buffer pixbuf0,\n                    std::string&& error_message0);\n  DecodeImageResult(std::string&& error_message0);\n\n  MemOwner pixbuf_mem_owner;\n  wuffs_base__slice_u8 pixbuf_mem_slice;\n  wuffs_base__pixel_buffer pixbuf;\n  std::string error_message;\n};\n\n// DecodeImageCallbacks are the callbacks given to DecodeImage. They are always\n// called in this order:\n//  1. SelectDecoder\n//  2. SelectPixfmt\n//  3. AllocPixbuf\n//  4. AllocWorkbuf\n//  5. Done\n//\n// It may return early - the third callback might not be invoked if the second\n// one fails - but the final callback (Done) is always invoked.\nclass DecodeImageCallbacks {\n public:\n  // AllocResult holds a memory allocation (the result of malloc or new, a\n  // statically allocated pointer, etc), or an error message. The memory i" +
 	"s\n  // de-allocated when mem_owner goes out of scope and is destroyed.\n  struct AllocResult {\n    AllocResult(MemOwner&& mem_owner0, wuffs_base__slice_u8 mem_slice0);\n    AllocResult(std::string&& error_message0);\n\n    MemOwner mem_owner;\n    wuffs_base__slice_u8 mem_slice;\n    std::string error_message;\n  };\n\n  // SelectDecoder returns the image decoder for the input data's file format.\n  // Returning a nullptr means failure (DecodeImage_UnsupportedImageFormat).\n  //\n  // Common formats will have a FourCC value in the range [1 ..= 0x7FFF_FFFF],\n  // such as WUFFS_BASE__FOURCC__JPEG. A zero FourCC value means that the\n  // caller is responsible for examining the opening bytes (a prefix) of the\n  // input data. SelectDecoder implementations should not modify those bytes.\n  //\n  // SelectDecoder might be called more than once, since some image file\n  // formats can wrap others. For example, a nominal BMP file can actually\n  // contain a JPEG or a PNG.\n  //\n  // The default SelectDecoder accepts the FOURCC codes" +
-	" listed below. For\n  // modular builds (i.e. when #define'ing WUFFS_CONFIG__MODULES), acceptance\n  // of the ETC file format is optional (for each value of ETC) and depends on\n  // the corresponding module to be enabled at compile time (i.e. #define'ing\n  // WUFFS_CONFIG__MODULE__ETC).\n  //  - WUFFS_BASE__FOURCC__BMP\n  //  - WUFFS_BASE__FOURCC__GIF\n  //  - WUFFS_BASE__FOURCC__NIE\n  //  - WUFFS_BASE__FOURCC__PNG\n  //  - WUFFS_BASE__FOURCC__WBMP\n  virtual wuffs_base__image_decoder::unique_ptr  //\n  SelectDecoder(uint32_t fourcc, wuffs_base__slice_u8 prefix);\n\n  // SelectPixfmt returns the destination pixel format for AllocPixbuf. It\n  // should return wuffs_base__make_pixel_format(etc) called with one of:\n  //  - WUFFS_BASE__PIXEL_FORMAT__BGR_565\n  //  - WUFFS_BASE__PIXEL_FORMAT__BGR\n  //  - WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL\n  //  - WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL\n  //  - WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL\n  //  - WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL\n  // or return image_config.pixcfg.pixel_" +
-	"format(). The latter means to use the\n  // image file's natural pixel format. For example, GIF images' natural pixel\n  // format is an indexed one.\n  //\n  // Returning otherwise means failure (DecodeImage_UnsupportedPixelFormat).\n  //\n  // The default SelectPixfmt implementation returns\n  // wuffs_base__make_pixel_format(WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL)\n  // which is 4 bytes per pixel (8 bits per channel × 4 channels).\n  virtual wuffs_base__pixel_format  //\n  SelectPixfmt(const wuffs_base__image_config& image_config);\n\n  // AllocPixbuf allocates the pixel buffer.\n  //\n  // allow_uninitialized_memory will be true if a valid background_color was\n  // passed to DecodeImage, since the pixel buffer's contents will be\n  // overwritten with that color after AllocPixbuf returns.\n  //\n  // The default AllocPixbuf implementation allocates either uninitialized or\n  // zeroed memory. Zeroed memory typically corresponds to filling with opaque\n  // black or transparent black, depending on the pixel format.\n  virt" +
-	"ual AllocResult  //\n  AllocPixbuf(const wuffs_base__image_config& image_config,\n              bool allow_uninitialized_memory);\n\n  // AllocWorkbuf allocates the work buffer. The allocated buffer's length\n  // should be at least len_range.min_incl, but larger allocations (up to\n  // len_range.max_incl) may have better performance (by using more memory).\n  //\n  // The default AllocWorkbuf implementation allocates len_range.max_incl bytes\n  // of either uninitialized or zeroed memory.\n  virtual AllocResult  //\n  AllocWorkbuf(wuffs_base__range_ii_u64 len_range,\n               bool allow_uninitialized_memory);\n\n  // Done is always the last Callback method called by DecodeImage, whether or\n  // not parsing the input encountered an error. Even when successful, trailing\n  // data may remain in input and buffer.\n  //\n  // The image_decoder is the one returned by SelectDecoder (if SelectDecoder\n  // was successful), or a no-op unique_ptr otherwise. Like any unique_ptr,\n  // ownership moves to the Done implementation.\n " +
-	" //\n  // Do not keep a reference to buffer or buffer.data.ptr after Done returns,\n  // as DecodeImage may then de-allocate the backing array.\n  //\n  // The default Done implementation is a no-op, other than running the\n  // image_decoder unique_ptr destructor.\n  virtual void  //\n  Done(DecodeImageResult& result,\n       sync_io::Input& input,\n       IOBuffer& buffer,\n       wuffs_base__image_decoder::unique_ptr image_decoder);\n};\n\nextern const char DecodeImage_BufferIsTooShort[];\nextern const char DecodeImage_MaxInclDimensionExceeded[];\nextern const char DecodeImage_OutOfMemory[];\nextern const char DecodeImage_UnexpectedEndOfFile[];\nextern const char DecodeImage_UnsupportedImageFormat[];\nextern const char DecodeImage_UnsupportedPixelBlend[];\nextern const char DecodeImage_UnsupportedPixelConfiguration[];\nextern const char DecodeImage_UnsupportedPixelFormat[];\n\n// DecodeImage decodes the image data in input. A variety of image file formats\n// can be decoded, depending on what callbacks.SelectDecoder returns.\n//\n" +
-	"// For animated formats, only the first frame is returned, since the API is\n// simpler for synchronous I/O and having DecodeImage only return when\n// completely done, but rendering animation often involves handling other\n// events in between animation frames. To decode multiple frames of animated\n// images, or for asynchronous I/O (e.g. when decoding an image streamed over\n// the network), use Wuffs' lower level C API instead of its higher level,\n// simplified C++ API (the wuffs_aux API).\n//\n// The DecodeImageResult's fields depend on whether decoding succeeded:\n//  - On total success, the error_message is empty and pixbuf.pixcfg.is_valid()\n//    is true.\n//  - On partial success (e.g. the input file was truncated but we are still\n//    able to decode some of the pixels), error_message is non-empty but\n//    pixbuf.pixcfg.is_valid() is still true. It is up to the caller whether to\n//    accept or reject partial success.\n//  - On failure, the error_message is non_empty and pixbuf.pixcfg.is_valid()\n//    is fal" +
-	"se.\n//\n// The callbacks allocate the pixel buffer memory and work buffer memory. On\n// success, pixel buffer memory ownership is passed to the DecodeImage caller\n// as the returned pixbuf_mem_owner. Regardless of success or failure, the work\n// buffer memory is deleted.\n//\n// The pixel_blend (one of the constants listed below) determines how to\n// composite the decoded image over the pixel buffer's original pixels (as\n// returned by callbacks.AllocPixbuf):\n//  - WUFFS_BASE__PIXEL_BLEND__SRC\n//  - WUFFS_BASE__PIXEL_BLEND__SRC_OVER\n//\n// The background_color is used to fill the pixel buffer after\n// callbacks.AllocPixbuf returns, if it is valid in the\n// wuffs_base__color_u32_argb_premul__is_valid sense. The default value,\n// 0x0000_0001, is not valid since its Blue channel value (0x01) is greater\n// than its Alpha channel value (0x00). A valid background_color will typically\n// be overwritten when pixel_blend is WUFFS_BASE__PIXEL_BLEND__SRC, but might\n// still be visible on partial (not total) success or when " +
-	"pixel_blend is\n// WUFFS_BASE__PIXEL_BLEND__SRC_OVER and the decoded image is not fully opaque.\n//\n// Decoding fails (with DecodeImage_MaxInclDimensionExceeded) if the image's\n// width or height is greater than max_incl_dimension.\nDecodeImageResult  //\nDecodeImage(DecodeImageCallbacks& callbacks,\n            sync_io::Input& input,\n            wuffs_base__pixel_blend pixel_blend = WUFFS_BASE__PIXEL_BLEND__SRC,\n            wuffs_base__color_u32_argb_premul background_color = 1,  // Invalid.\n            uint32_t max_incl_dimension = 1048575);  // 0x000F_FFFF\n\n}  // namespace wuffs_aux\n" +
+	" listed below. For\n  // modular builds (i.e. when #define'ing WUFFS_CONFIG__MODULES), acceptance\n  // of the ETC file format is optional (for each value of ETC) and depends on\n  // the corresponding module to be enabled at compile time (i.e. #define'ing\n  // WUFFS_CONFIG__MODULE__ETC).\n  //  - WUFFS_BASE__FOURCC__BMP\n  //  - WUFFS_BASE__FOURCC__GIF\n  //  - WUFFS_BASE__FOURCC__NIE\n  //  - WUFFS_BASE__FOURCC__PNG\n  //  - WUFFS_BASE__FOURCC__WBMP\n  virtual wuffs_base__image_decoder::unique_ptr  //\n  SelectDecoder(uint32_t fourcc, wuffs_base__slice_u8 prefix);\n\n  // SelectPixfmt returns the destination pixel format for AllocPixbuf. It\n  // should return wuffs_base__make_pixel_format(etc) called with one of:\n  //  - WUFFS_BASE__PIXEL_FORMAT__BGR_565\n  //  - WUFFS_BASE__PIXEL_FORMAT__BGR\n  //  - WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL\n  //  - WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE\n  //  - WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL\n  //  - WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL\n  //  - WUFFS_BASE__PIXEL_FORMA" +
+	"T__RGBA_PREMUL\n  // or return image_config.pixcfg.pixel_format(). The latter means to use the\n  // image file's natural pixel format. For example, GIF images' natural pixel\n  // format is an indexed one.\n  //\n  // Returning otherwise means failure (DecodeImage_UnsupportedPixelFormat).\n  //\n  // The default SelectPixfmt implementation returns\n  // wuffs_base__make_pixel_format(WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL)\n  // which is 4 bytes per pixel (8 bits per channel × 4 channels).\n  virtual wuffs_base__pixel_format  //\n  SelectPixfmt(const wuffs_base__image_config& image_config);\n\n  // AllocPixbuf allocates the pixel buffer.\n  //\n  // allow_uninitialized_memory will be true if a valid background_color was\n  // passed to DecodeImage, since the pixel buffer's contents will be\n  // overwritten with that color after AllocPixbuf returns.\n  //\n  // The default AllocPixbuf implementation allocates either uninitialized or\n  // zeroed memory. Zeroed memory typically corresponds to filling with opaque\n  // black or " +
+	"transparent black, depending on the pixel format.\n  virtual AllocResult  //\n  AllocPixbuf(const wuffs_base__image_config& image_config,\n              bool allow_uninitialized_memory);\n\n  // AllocWorkbuf allocates the work buffer. The allocated buffer's length\n  // should be at least len_range.min_incl, but larger allocations (up to\n  // len_range.max_incl) may have better performance (by using more memory).\n  //\n  // The default AllocWorkbuf implementation allocates len_range.max_incl bytes\n  // of either uninitialized or zeroed memory.\n  virtual AllocResult  //\n  AllocWorkbuf(wuffs_base__range_ii_u64 len_range,\n               bool allow_uninitialized_memory);\n\n  // Done is always the last Callback method called by DecodeImage, whether or\n  // not parsing the input encountered an error. Even when successful, trailing\n  // data may remain in input and buffer.\n  //\n  // The image_decoder is the one returned by SelectDecoder (if SelectDecoder\n  // was successful), or a no-op unique_ptr otherwise. Like any unique" +
+	"_ptr,\n  // ownership moves to the Done implementation.\n  //\n  // Do not keep a reference to buffer or buffer.data.ptr after Done returns,\n  // as DecodeImage may then de-allocate the backing array.\n  //\n  // The default Done implementation is a no-op, other than running the\n  // image_decoder unique_ptr destructor.\n  virtual void  //\n  Done(DecodeImageResult& result,\n       sync_io::Input& input,\n       IOBuffer& buffer,\n       wuffs_base__image_decoder::unique_ptr image_decoder);\n};\n\nextern const char DecodeImage_BufferIsTooShort[];\nextern const char DecodeImage_MaxInclDimensionExceeded[];\nextern const char DecodeImage_OutOfMemory[];\nextern const char DecodeImage_UnexpectedEndOfFile[];\nextern const char DecodeImage_UnsupportedImageFormat[];\nextern const char DecodeImage_UnsupportedPixelBlend[];\nextern const char DecodeImage_UnsupportedPixelConfiguration[];\nextern const char DecodeImage_UnsupportedPixelFormat[];\n\n// DecodeImage decodes the image data in input. A variety of image file formats\n// can be decoded" +
+	", depending on what callbacks.SelectDecoder returns.\n//\n// For animated formats, only the first frame is returned, since the API is\n// simpler for synchronous I/O and having DecodeImage only return when\n// completely done, but rendering animation often involves handling other\n// events in between animation frames. To decode multiple frames of animated\n// images, or for asynchronous I/O (e.g. when decoding an image streamed over\n// the network), use Wuffs' lower level C API instead of its higher level,\n// simplified C++ API (the wuffs_aux API).\n//\n// The DecodeImageResult's fields depend on whether decoding succeeded:\n//  - On total success, the error_message is empty and pixbuf.pixcfg.is_valid()\n//    is true.\n//  - On partial success (e.g. the input file was truncated but we are still\n//    able to decode some of the pixels), error_message is non-empty but\n//    pixbuf.pixcfg.is_valid() is still true. It is up to the caller whether to\n//    accept or reject partial success.\n//  - On failure, the error_messag" +
+	"e is non_empty and pixbuf.pixcfg.is_valid()\n//    is false.\n//\n// The callbacks allocate the pixel buffer memory and work buffer memory. On\n// success, pixel buffer memory ownership is passed to the DecodeImage caller\n// as the returned pixbuf_mem_owner. Regardless of success or failure, the work\n// buffer memory is deleted.\n//\n// The pixel_blend (one of the constants listed below) determines how to\n// composite the decoded image over the pixel buffer's original pixels (as\n// returned by callbacks.AllocPixbuf):\n//  - WUFFS_BASE__PIXEL_BLEND__SRC\n//  - WUFFS_BASE__PIXEL_BLEND__SRC_OVER\n//\n// The background_color is used to fill the pixel buffer after\n// callbacks.AllocPixbuf returns, if it is valid in the\n// wuffs_base__color_u32_argb_premul__is_valid sense. The default value,\n// 0x0000_0001, is not valid since its Blue channel value (0x01) is greater\n// than its Alpha channel value (0x00). A valid background_color will typically\n// be overwritten when pixel_blend is WUFFS_BASE__PIXEL_BLEND__SRC, but might\n// " +
+	"still be visible on partial (not total) success or when pixel_blend is\n// WUFFS_BASE__PIXEL_BLEND__SRC_OVER and the decoded image is not fully opaque.\n//\n// Decoding fails (with DecodeImage_MaxInclDimensionExceeded) if the image's\n// width or height is greater than max_incl_dimension.\nDecodeImageResult  //\nDecodeImage(DecodeImageCallbacks& callbacks,\n            sync_io::Input& input,\n            wuffs_base__pixel_blend pixel_blend = WUFFS_BASE__PIXEL_BLEND__SRC,\n            wuffs_base__color_u32_argb_premul background_color = 1,  // Invalid.\n            uint32_t max_incl_dimension = 1048575);  // 0x000F_FFFF\n\n}  // namespace wuffs_aux\n" +
 	""
 
 const AuxJsonCc = "" +
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 293f5bc..1b17f92 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -9556,6 +9556,7 @@
   //  - WUFFS_BASE__PIXEL_FORMAT__BGR_565
   //  - WUFFS_BASE__PIXEL_FORMAT__BGR
   //  - WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL
+  //  - WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE
   //  - WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL
   //  - WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL
   //  - WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL
@@ -15504,12 +15505,12 @@
 
   size_t n = len;
   while (n--) {
-    uint8_t b0 = s[0];
-    uint8_t b1 = s[1];
-    uint8_t b2 = s[2];
-    d[0] = b2;
-    d[1] = b1;
-    d[2] = b0;
+    uint8_t s0 = s[0];
+    uint8_t s1 = s[1];
+    uint8_t s2 = s[2];
+    d[0] = s2;
+    d[1] = s1;
+    d[2] = s0;
     s += 3;
     d += 3;
   }
@@ -15547,14 +15548,14 @@
   }
 
   while (n--) {
-    uint8_t b0 = s[0];
-    uint8_t b1 = s[1];
-    uint8_t b2 = s[2];
-    uint8_t b3 = s[3];
-    d[0] = b2;
-    d[1] = b1;
-    d[2] = b0;
-    d[3] = b3;
+    uint8_t s0 = s[0];
+    uint8_t s1 = s[1];
+    uint8_t s2 = s[2];
+    uint8_t s3 = s[3];
+    d[0] = s2;
+    d[1] = s1;
+    d[2] = s0;
+    d[3] = s3;
     s += 4;
     d += 4;
   }
@@ -15575,14 +15576,14 @@
 
   size_t n = len;
   while (n--) {
-    uint8_t b0 = s[0];
-    uint8_t b1 = s[1];
-    uint8_t b2 = s[2];
-    uint8_t b3 = s[3];
-    d[0] = b2;
-    d[1] = b1;
-    d[2] = b0;
-    d[3] = b3;
+    uint8_t s0 = s[0];
+    uint8_t s1 = s[1];
+    uint8_t s2 = s[2];
+    uint8_t s3 = s[3];
+    d[0] = s2;
+    d[1] = s1;
+    d[2] = s0;
+    d[3] = s3;
     s += 4;
     d += 4;
   }
@@ -15637,6 +15638,22 @@
   return len;
 }
 
+static uint64_t  //
+wuffs_base__pixel_swizzler__copy_8_8(uint8_t* dst_ptr,
+                                     size_t dst_len,
+                                     uint8_t* dst_palette_ptr,
+                                     size_t dst_palette_len,
+                                     const uint8_t* src_ptr,
+                                     size_t src_len) {
+  size_t dst_len8 = dst_len / 8;
+  size_t src_len8 = src_len / 8;
+  size_t len = (dst_len8 < src_len8) ? dst_len8 : src_len8;
+  if (len > 0) {
+    memmove(dst_ptr, src_ptr, len * 8);
+  }
+  return len;
+}
+
 // --------
 
 static uint64_t  //
@@ -16574,6 +16591,204 @@
 // --------
 
 static uint64_t  //
+wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_nonpremul__src(
+    uint8_t* dst_ptr,
+    size_t dst_len,
+    uint8_t* dst_palette_ptr,
+    size_t dst_palette_len,
+    const uint8_t* src_ptr,
+    size_t src_len) {
+  size_t dst_len8 = dst_len / 8;
+  size_t src_len4 = src_len / 4;
+  size_t len = (dst_len8 < src_len4) ? dst_len8 : src_len4;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+
+  size_t n = len;
+  while (n >= 1) {
+    uint8_t s0 = s[0];
+    uint8_t s1 = s[1];
+    uint8_t s2 = s[2];
+    uint8_t s3 = s[3];
+    d[0] = s0;
+    d[1] = s0;
+    d[2] = s1;
+    d[3] = s1;
+    d[4] = s2;
+    d[5] = s2;
+    d[6] = s3;
+    d[7] = s3;
+
+    s += 1 * 4;
+    d += 1 * 8;
+    n -= 1;
+  }
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_nonpremul__src_over(
+    uint8_t* dst_ptr,
+    size_t dst_len,
+    uint8_t* dst_palette_ptr,
+    size_t dst_palette_len,
+    const uint8_t* src_ptr,
+    size_t src_len) {
+  size_t dst_len8 = dst_len / 8;
+  size_t src_len4 = src_len / 4;
+  size_t len = (dst_len8 < src_len4) ? dst_len8 : src_len4;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+
+  size_t n = len;
+  while (n >= 1) {
+    uint64_t d0 = wuffs_base__peek_u64le__no_bounds_check(d + (0 * 8));
+    uint64_t s0 = wuffs_base__color_u32__as__color_u64(
+        wuffs_base__peek_u32le__no_bounds_check(s + (0 * 4)));
+    wuffs_base__poke_u64le__no_bounds_check(
+        d + (0 * 8),
+        wuffs_base__composite_nonpremul_nonpremul_u64_axxx(d0, s0));
+
+    s += 1 * 4;
+    d += 1 * 8;
+    n -= 1;
+  }
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_nonpremul_4x16le__src_over(
+    uint8_t* dst_ptr,
+    size_t dst_len,
+    uint8_t* dst_palette_ptr,
+    size_t dst_palette_len,
+    const uint8_t* src_ptr,
+    size_t src_len) {
+  size_t dst_len8 = dst_len / 8;
+  size_t src_len8 = src_len / 8;
+  size_t len = (dst_len8 < src_len8) ? dst_len8 : src_len8;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+
+  size_t n = len;
+  while (n >= 1) {
+    uint64_t d0 = wuffs_base__peek_u64le__no_bounds_check(d + (0 * 8));
+    uint64_t s0 = wuffs_base__peek_u64le__no_bounds_check(s + (0 * 8));
+    wuffs_base__poke_u64le__no_bounds_check(
+        d + (0 * 8),
+        wuffs_base__composite_nonpremul_nonpremul_u64_axxx(d0, s0));
+
+    s += 1 * 8;
+    d += 1 * 8;
+    n -= 1;
+  }
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__index_bgra_nonpremul__src_over(
+    uint8_t* dst_ptr,
+    size_t dst_len,
+    uint8_t* dst_palette_ptr,
+    size_t dst_palette_len,
+    const uint8_t* src_ptr,
+    size_t src_len) {
+  if (dst_palette_len != 1024) {
+    return 0;
+  }
+  size_t dst_len8 = dst_len / 8;
+  size_t len = (dst_len8 < src_len) ? dst_len8 : src_len;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+  size_t n = len;
+
+  while (n >= 1) {
+    uint64_t d0 = wuffs_base__peek_u64le__no_bounds_check(d + (0 * 8));
+    uint64_t s0 = wuffs_base__color_u32__as__color_u64(
+        wuffs_base__peek_u32le__no_bounds_check(dst_palette_ptr +
+                                                ((size_t)s[0] * 4)));
+    wuffs_base__poke_u64le__no_bounds_check(
+        d + (0 * 8),
+        wuffs_base__composite_nonpremul_nonpremul_u64_axxx(d0, s0));
+
+    s += 1 * 1;
+    d += 1 * 8;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__rgba_nonpremul__src(
+    uint8_t* dst_ptr,
+    size_t dst_len,
+    uint8_t* dst_palette_ptr,
+    size_t dst_palette_len,
+    const uint8_t* src_ptr,
+    size_t src_len) {
+  size_t dst_len8 = dst_len / 8;
+  size_t src_len4 = src_len / 4;
+  size_t len = (dst_len8 < src_len4) ? dst_len8 : src_len4;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+
+  size_t n = len;
+  while (n >= 1) {
+    uint8_t s0 = s[0];
+    uint8_t s1 = s[1];
+    uint8_t s2 = s[2];
+    uint8_t s3 = s[3];
+    d[0] = s2;
+    d[1] = s2;
+    d[2] = s1;
+    d[3] = s1;
+    d[4] = s0;
+    d[5] = s0;
+    d[6] = s3;
+    d[7] = s3;
+
+    s += 1 * 4;
+    d += 1 * 8;
+    n -= 1;
+  }
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__rgba_nonpremul__src_over(
+    uint8_t* dst_ptr,
+    size_t dst_len,
+    uint8_t* dst_palette_ptr,
+    size_t dst_palette_len,
+    const uint8_t* src_ptr,
+    size_t src_len) {
+  size_t dst_len8 = dst_len / 8;
+  size_t src_len4 = src_len / 4;
+  size_t len = (dst_len8 < src_len4) ? dst_len8 : src_len4;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+
+  size_t n = len;
+  while (n >= 1) {
+    uint64_t d0 = wuffs_base__peek_u64le__no_bounds_check(d + (0 * 8));
+    uint64_t s0 =
+        wuffs_base__color_u32__as__color_u64(wuffs_base__swap_u32_argb_abgr(
+            wuffs_base__peek_u32le__no_bounds_check(s + (0 * 4))));
+    wuffs_base__poke_u64le__no_bounds_check(
+        d + (0 * 8),
+        wuffs_base__composite_nonpremul_nonpremul_u64_axxx(d0, s0));
+
+    s += 1 * 4;
+    d += 1 * 8;
+    n -= 1;
+  }
+  return len;
+}
+
+// --------
+
+static uint64_t  //
 wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src(
     uint8_t* dst_ptr,
     size_t dst_len,
@@ -17048,6 +17263,113 @@
 // --------
 
 static uint64_t  //
+wuffs_base__pixel_swizzler__bgrw_4x16le__bgr(uint8_t* dst_ptr,
+                                             size_t dst_len,
+                                             uint8_t* dst_palette_ptr,
+                                             size_t dst_palette_len,
+                                             const uint8_t* src_ptr,
+                                             size_t src_len) {
+  size_t dst_len8 = dst_len / 8;
+  size_t src_len3 = src_len / 3;
+  size_t len = (dst_len8 < src_len3) ? dst_len8 : src_len3;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+  size_t n = len;
+
+  while (n >= 1) {
+    uint8_t s0 = s[0];
+    uint8_t s1 = s[1];
+    uint8_t s2 = s[2];
+    d[0] = s0;
+    d[1] = s0;
+    d[2] = s1;
+    d[3] = s1;
+    d[4] = s2;
+    d[5] = s2;
+    d[6] = 0xFF;
+    d[7] = 0xFF;
+
+    s += 1 * 3;
+    d += 1 * 8;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgrw_4x16le__bgrx(uint8_t* dst_ptr,
+                                              size_t dst_len,
+                                              uint8_t* dst_palette_ptr,
+                                              size_t dst_palette_len,
+                                              const uint8_t* src_ptr,
+                                              size_t src_len) {
+  size_t dst_len8 = dst_len / 8;
+  size_t src_len4 = src_len / 4;
+  size_t len = (dst_len8 < src_len4) ? dst_len8 : src_len4;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+  size_t n = len;
+
+  while (n >= 1) {
+    uint8_t s0 = s[0];
+    uint8_t s1 = s[1];
+    uint8_t s2 = s[2];
+    d[0] = s0;
+    d[1] = s0;
+    d[2] = s1;
+    d[3] = s1;
+    d[4] = s2;
+    d[5] = s2;
+    d[6] = 0xFF;
+    d[7] = 0xFF;
+
+    s += 1 * 4;
+    d += 1 * 8;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgrw_4x16le__rgb(uint8_t* dst_ptr,
+                                             size_t dst_len,
+                                             uint8_t* dst_palette_ptr,
+                                             size_t dst_palette_len,
+                                             const uint8_t* src_ptr,
+                                             size_t src_len) {
+  size_t dst_len8 = dst_len / 8;
+  size_t src_len3 = src_len / 3;
+  size_t len = (dst_len8 < src_len3) ? dst_len8 : src_len3;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+  size_t n = len;
+
+  while (n >= 1) {
+    uint8_t s0 = s[0];
+    uint8_t s1 = s[1];
+    uint8_t s2 = s[2];
+    d[0] = s2;
+    d[1] = s2;
+    d[2] = s1;
+    d[3] = s1;
+    d[4] = s0;
+    d[5] = s0;
+    d[6] = 0xFF;
+    d[7] = 0xFF;
+
+    s += 1 * 3;
+    d += 1 * 8;
+    n -= 1;
+  }
+
+  return len;
+}
+
+// --------
+
+static uint64_t  //
 wuffs_base__pixel_swizzler__rgba_nonpremul__bgra_nonpremul_4x16le__src(
     uint8_t* dst_ptr,
     size_t dst_len,
@@ -17565,6 +17887,124 @@
 // --------
 
 static uint64_t  //
+wuffs_base__pixel_swizzler__xxxxxxxx__index__src(uint8_t* dst_ptr,
+                                                 size_t dst_len,
+                                                 uint8_t* dst_palette_ptr,
+                                                 size_t dst_palette_len,
+                                                 const uint8_t* src_ptr,
+                                                 size_t src_len) {
+  if (dst_palette_len != 1024) {
+    return 0;
+  }
+  size_t dst_len8 = dst_len / 8;
+  size_t len = (dst_len8 < src_len) ? dst_len8 : src_len;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+  size_t n = len;
+
+  while (n >= 1) {
+    wuffs_base__poke_u64le__no_bounds_check(
+        d + (0 * 8), wuffs_base__color_u32__as__color_u64(
+                         wuffs_base__peek_u32le__no_bounds_check(
+                             dst_palette_ptr + ((size_t)s[0] * 4))));
+
+    s += 1 * 1;
+    d += 1 * 8;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__xxxxxxxx__index_binary_alpha__src_over(
+    uint8_t* dst_ptr,
+    size_t dst_len,
+    uint8_t* dst_palette_ptr,
+    size_t dst_palette_len,
+    const uint8_t* src_ptr,
+    size_t src_len) {
+  if (dst_palette_len != 1024) {
+    return 0;
+  }
+  size_t dst_len8 = dst_len / 8;
+  size_t len = (dst_len8 < src_len) ? dst_len8 : src_len;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+  size_t n = len;
+
+  while (n >= 1) {
+    uint32_t s0 = wuffs_base__peek_u32le__no_bounds_check(dst_palette_ptr +
+                                                          ((size_t)s[0] * 4));
+    if (s0) {
+      wuffs_base__poke_u64le__no_bounds_check(
+          d + (0 * 8), wuffs_base__color_u32__as__color_u64(s0));
+    }
+
+    s += 1 * 1;
+    d += 1 * 8;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__xxxxxxxx__y(uint8_t* dst_ptr,
+                                        size_t dst_len,
+                                        uint8_t* dst_palette_ptr,
+                                        size_t dst_palette_len,
+                                        const uint8_t* src_ptr,
+                                        size_t src_len) {
+  size_t dst_len8 = dst_len / 8;
+  size_t len = (dst_len8 < src_len) ? dst_len8 : src_len;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+  size_t n = len;
+
+  while (n >= 1) {
+    wuffs_base__poke_u64le__no_bounds_check(
+        d + (0 * 8), 0xFFFF000000000000 | (0x010101010101 * (uint64_t)s[0]));
+
+    s += 1 * 1;
+    d += 1 * 8;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__xxxxxxxx__y_16be(uint8_t* dst_ptr,
+                                             size_t dst_len,
+                                             uint8_t* dst_palette_ptr,
+                                             size_t dst_palette_len,
+                                             const uint8_t* src_ptr,
+                                             size_t src_len) {
+  size_t dst_len8 = dst_len / 8;
+  size_t src_len2 = src_len / 2;
+  size_t len = (dst_len8 < src_len2) ? dst_len8 : src_len2;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+  size_t n = len;
+
+  while (n >= 1) {
+    uint64_t s0 =
+        ((uint64_t)(wuffs_base__peek_u16be__no_bounds_check(s + (0 * 2))));
+    wuffs_base__poke_u64le__no_bounds_check(
+        d + (0 * 8), 0xFFFF000000000000 | (0x000100010001 * s0));
+
+    s += 1 * 2;
+    d += 1 * 8;
+    n -= 1;
+  }
+
+  return len;
+}
+
+// --------
+
+static uint64_t  //
 wuffs_base__pixel_swizzler__y__y_16be(uint8_t* dst_ptr,
                                       size_t dst_len,
                                       uint8_t* dst_palette_ptr,
@@ -17654,6 +18094,12 @@
       }
 #endif
       return wuffs_base__pixel_swizzler__xxxx__y;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL_4X16LE:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL_4X16LE:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL_4X16LE:
+      return wuffs_base__pixel_swizzler__xxxxxxxx__y;
   }
   return NULL;
 }
@@ -17685,6 +18131,12 @@
     case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:
     case WUFFS_BASE__PIXEL_FORMAT__RGBX:
       return wuffs_base__pixel_swizzler__xxxx__y_16be;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL_4X16LE:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL_4X16LE:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL_4X16LE:
+      return wuffs_base__pixel_swizzler__xxxxxxxx__y_16be;
   }
   return NULL;
 }
@@ -17757,6 +18209,19 @@
       }
       return NULL;
 
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
+      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=
+          1024) {
+        return NULL;
+      }
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__xxxxxxxx__index__src;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__index_bgra_nonpremul__src_over;
+      }
+      return NULL;
+
     case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
       switch (blend) {
         case WUFFS_BASE__PIXEL_BLEND__SRC:
@@ -17882,6 +18347,20 @@
       }
       return NULL;
 
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL_4X16LE:
+      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=
+          1024) {
+        return NULL;
+      }
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__xxxxxxxx__index__src;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__xxxxxxxx__index_binary_alpha__src_over;
+      }
+      return NULL;
+
     case WUFFS_BASE__PIXEL_FORMAT__RGB:
       if (wuffs_base__pixel_swizzler__swap_rgbx_bgrx(
               dst_palette.ptr, dst_palette.len, NULL, 0, src_palette.ptr,
@@ -17934,6 +18413,10 @@
     case WUFFS_BASE__PIXEL_FORMAT__BGRX:
       return wuffs_base__pixel_swizzler__bgrw__bgr;
 
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL_4X16LE:
+      return wuffs_base__pixel_swizzler__bgrw_4x16le__bgr;
+
     case WUFFS_BASE__PIXEL_FORMAT__RGB:
       return wuffs_base__pixel_swizzler__swap_rgb_bgr;
 
@@ -17986,6 +18469,15 @@
       }
       return NULL;
 
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_nonpremul__src;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_nonpremul__src_over;
+      }
+      return NULL;
+
     case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
       switch (blend) {
         case WUFFS_BASE__PIXEL_BLEND__SRC:
@@ -18070,6 +18562,15 @@
       }
       return NULL;
 
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__copy_8_8;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_nonpremul_4x16le__src_over;
+      }
+      return NULL;
+
     case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
       switch (blend) {
         case WUFFS_BASE__PIXEL_BLEND__SRC:
@@ -18132,6 +18633,9 @@
     case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:
       return wuffs_base__pixel_swizzler__bgrw__bgrx;
 
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
+      return wuffs_base__pixel_swizzler__bgrw_4x16le__bgrx;
+
     case WUFFS_BASE__PIXEL_FORMAT__BGRX:
       return wuffs_base__pixel_swizzler__copy_4_4;
 
@@ -18172,6 +18676,9 @@
 #endif
       return wuffs_base__pixel_swizzler__bgrw__rgb;
 
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
+      return wuffs_base__pixel_swizzler__bgrw_4x16le__rgb;
+
     case WUFFS_BASE__PIXEL_FORMAT__RGB:
       return wuffs_base__pixel_swizzler__copy_3_3;
 
@@ -18224,6 +18731,15 @@
       }
       return NULL;
 
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__rgba_nonpremul__src;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__rgba_nonpremul__src_over;
+      }
+      return NULL;
+
     case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
       switch (blend) {
         case WUFFS_BASE__PIXEL_BLEND__SRC:
@@ -37588,6 +38104,7 @@
       case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
       case WUFFS_BASE__PIXEL_FORMAT__BGR:
       case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
+      case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
       case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
       case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:
       case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
diff --git a/test/c/std/wbmp.c b/test/c/std/wbmp.c
index e1e8e73..0ce8fd5 100644
--- a/test/c/std/wbmp.c
+++ b/test/c/std/wbmp.c
@@ -285,6 +285,10 @@
           .pixfmt_repr = WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL,
       },
       {
+          .color = 0x80123456,
+          .pixfmt_repr = WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE,
+      },
+      {
           .color = 0x80000040,
           .pixfmt_repr = WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL,
       },