Swizzler support src_pixfmt = BGR_565

Binary size, before:
 59472 gen/lib/c/clang-9-dynamic/wuffs-base-pixconv.lo
 62072 gen/lib/c/clang-9-static/wuffs-base-pixconv.o
102936 gen/lib/c/gcc-dynamic/wuffs-base-pixconv.lo
102904 gen/lib/c/gcc-static/wuffs-base-pixconv.o

After:
 66064 gen/lib/c/clang-9-dynamic/wuffs-base-pixconv.lo
 68768 gen/lib/c/clang-9-static/wuffs-base-pixconv.o
109256 gen/lib/c/gcc-dynamic/wuffs-base-pixconv.lo
109208 gen/lib/c/gcc-static/wuffs-base-pixconv.o
diff --git a/internal/cgen/base/pixconv-submodule.c b/internal/cgen/base/pixconv-submodule.c
index cc1acfb..08d8a58 100644
--- a/internal/cgen/base/pixconv-submodule.c
+++ b/internal/cgen/base/pixconv-submodule.c
@@ -901,6 +901,22 @@
 }
 
 static uint64_t  //
+wuffs_base__pixel_swizzler__copy_2_2(uint8_t* dst_ptr,
+                                     size_t dst_len,
+                                     uint8_t* dst_palette_ptr,
+                                     size_t dst_palette_len,
+                                     const uint8_t* src_ptr,
+                                     size_t src_len) {
+  size_t dst_len2 = dst_len / 2;
+  size_t src_len2 = src_len / 2;
+  size_t len = (dst_len2 < src_len2) ? dst_len2 : src_len2;
+  if (len > 0) {
+    memmove(dst_ptr, src_ptr, len * 2);
+  }
+  return len;
+}
+
+static uint64_t  //
 wuffs_base__pixel_swizzler__copy_3_3(uint8_t* dst_ptr,
                                      size_t dst_len,
                                      uint8_t* dst_palette_ptr,
@@ -1663,6 +1679,35 @@
 // --------
 
 static uint64_t  //
+wuffs_base__pixel_swizzler__bgr__bgr_565(uint8_t* dst_ptr,
+                                         size_t dst_len,
+                                         uint8_t* dst_palette_ptr,
+                                         size_t dst_palette_len,
+                                         const uint8_t* src_ptr,
+                                         size_t src_len) {
+  size_t dst_len3 = dst_len / 3;
+  size_t src_len2 = src_len / 2;
+  size_t len = (dst_len3 < src_len2) ? dst_len3 : src_len2;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    uint32_t s0 = wuffs_base__color_u16_rgb_565__as__color_u32_argb_premul(
+        wuffs_base__peek_u16le__no_bounds_check(s + (0 * 2)));
+    wuffs_base__poke_u24le__no_bounds_check(d + (0 * 3), s0);
+
+    s += 1 * 2;
+    d += 1 * 3;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
 wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src(uint8_t* dst_ptr,
                                                      size_t dst_len,
                                                      uint8_t* dst_palette_ptr,
@@ -3026,6 +3071,35 @@
 }
 
 static uint64_t  //
+wuffs_base__pixel_swizzler__bgrw__bgr_565(uint8_t* dst_ptr,
+                                          size_t dst_len,
+                                          uint8_t* dst_palette_ptr,
+                                          size_t dst_palette_len,
+                                          const uint8_t* src_ptr,
+                                          size_t src_len) {
+  size_t dst_len4 = dst_len / 4;
+  size_t src_len2 = src_len / 2;
+  size_t len = (dst_len4 < src_len2) ? dst_len4 : src_len2;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    wuffs_base__poke_u32le__no_bounds_check(
+        d + (0 * 4), wuffs_base__color_u16_rgb_565__as__color_u32_argb_premul(
+                         wuffs_base__peek_u16le__no_bounds_check(s + (0 * 2))));
+
+    s += 1 * 2;
+    d += 1 * 4;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
 wuffs_base__pixel_swizzler__bgrw__bgrx(uint8_t* dst_ptr,
                                        size_t dst_len,
                                        uint8_t* dst_palette_ptr,
@@ -3211,6 +3285,35 @@
 }
 
 static uint64_t  //
+wuffs_base__pixel_swizzler__bgrw_4x16le__bgr_565(uint8_t* dst_ptr,
+                                                 size_t dst_len,
+                                                 uint8_t* dst_palette_ptr,
+                                                 size_t dst_palette_len,
+                                                 const uint8_t* src_ptr,
+                                                 size_t src_len) {
+  size_t dst_len8 = dst_len / 8;
+  size_t src_len2 = src_len / 2;
+  size_t len = (dst_len8 < src_len2) ? dst_len8 : src_len2;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+  size_t n = len;
+
+  while (n >= 1) {
+    wuffs_base__poke_u64le__no_bounds_check(
+        d + (0 * 8),
+        wuffs_base__color_u32__as__color_u64(
+            wuffs_base__color_u16_rgb_565__as__color_u32_argb_premul(
+                wuffs_base__peek_u16le__no_bounds_check(s + (0 * 2)))));
+
+    s += 1 * 2;
+    d += 1 * 8;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
 wuffs_base__pixel_swizzler__bgrw_4x16le__bgrx(uint8_t* dst_ptr,
                                               size_t dst_len,
                                               uint8_t* dst_palette_ptr,
@@ -3345,6 +3448,39 @@
 // --------
 
 static uint64_t  //
+wuffs_base__pixel_swizzler__rgbw__bgr_565(uint8_t* dst_ptr,
+                                          size_t dst_len,
+                                          uint8_t* dst_palette_ptr,
+                                          size_t dst_palette_len,
+                                          const uint8_t* src_ptr,
+                                          size_t src_len) {
+  size_t dst_len4 = dst_len / 4;
+  size_t src_len2 = src_len / 2;
+  size_t len = (dst_len4 < src_len2) ? dst_len4 : src_len2;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    wuffs_base__poke_u32le__no_bounds_check(
+        d + (0 * 4),
+        wuffs_base__swap_u32_argb_abgr(
+            wuffs_base__color_u16_rgb_565__as__color_u32_argb_premul(
+                wuffs_base__peek_u16le__no_bounds_check(s + (0 * 2)))));
+
+    s += 1 * 2;
+    d += 1 * 4;
+    n -= 1;
+  }
+
+  return len;
+}
+
+// --------
+
+static uint64_t  //
 wuffs_base__pixel_swizzler__xxx__index__src(uint8_t* dst_ptr,
                                             size_t dst_len,
                                             uint8_t* dst_palette_ptr,
@@ -4308,6 +4444,39 @@
 }
 
 static wuffs_base__pixel_swizzler__func  //
+wuffs_base__pixel_swizzler__prepare__bgr_565(
+    wuffs_base__pixel_swizzler* p,
+    wuffs_base__pixel_format dst_pixfmt,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src_palette,
+    wuffs_base__pixel_blend blend) {
+  switch (dst_pixfmt.repr) {
+    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
+      return wuffs_base__pixel_swizzler__copy_2_2;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGR:
+      return wuffs_base__pixel_swizzler__bgr__bgr_565;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRX:
+      return wuffs_base__pixel_swizzler__bgrw__bgr_565;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL_4X16LE:
+      return wuffs_base__pixel_swizzler__bgrw_4x16le__bgr_565;
+
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBX:
+      return wuffs_base__pixel_swizzler__rgbw__bgr_565;
+  }
+  return NULL;
+}
+
+static wuffs_base__pixel_swizzler__func  //
 wuffs_base__pixel_swizzler__prepare__bgr(wuffs_base__pixel_swizzler* p,
                                          wuffs_base__pixel_format dst_pixfmt,
                                          wuffs_base__slice_u8 dst_palette,
@@ -4927,6 +5096,11 @@
           p, dst_pixfmt, dst_palette, src_palette, blend);
       break;
 
+    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
+      func = wuffs_base__pixel_swizzler__prepare__bgr_565(
+          p, dst_pixfmt, dst_palette, src_palette, blend);
+      break;
+
     case WUFFS_BASE__PIXEL_FORMAT__BGR:
       func = wuffs_base__pixel_swizzler__prepare__bgr(
           p, dst_pixfmt, dst_palette, src_palette, blend);
diff --git a/internal/cgen/data/data.go b/internal/cgen/data/data.go
index 68d540f..f76908e 100644
--- a/internal/cgen/data/data.go
+++ b/internal/cgen/data/data.go
@@ -588,9 +588,9 @@
 	"_ptr,\n                                                  size_t dst_palette_len,\n                                                  const uint8_t* src_ptr,\n                                                  size_t src_len) {\n  size_t len = (dst_len < src_len ? dst_len : src_len) / 4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  __m128i shuffle = _mm_set_epi8(+0x0F, +0x0C, +0x0D, +0x0E,  //\n                                 +0x0B, +0x08, +0x09, +0x0A,  //\n                                 +0x07, +0x04, +0x05, +0x06,  //\n                                 +0x03, +0x00, +0x01, +0x02);\n\n  while (n >= 4) {\n    __m128i x;\n    x = _mm_lddqu_si128((const __m128i*)(const void*)s);\n    x = _mm_shuffle_epi8(x, shuffle);\n    _mm_storeu_si128((__m128i*)(void*)d, x);\n\n    s += 4 * 4;\n    d += 4 * 4;\n    n -= 4;\n  }\n\n  while (n--) {\n    uint8_t s0 = s[0];\n    uint8_t s1 = s[1];\n    uint8_t s2 = s[2];\n    uint8_t s3 = s[3];\n    d[0] = s2;\n    d[1] = s1;\n    d[2] = s0;\n    d[3] = s3;\n    s += 4;\n    d +" +
 	"= 4;\n  }\n  return len;\n}\n#endif  // defined(WUFFS_BASE__CPU_ARCH__X86_64)\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__swap_rgbx_bgrx(uint8_t* dst_ptr,\n                                           size_t dst_len,\n                                           uint8_t* dst_palette_ptr,\n                                           size_t dst_palette_len,\n                                           const uint8_t* src_ptr,\n                                           size_t src_len) {\n  size_t len = (dst_len < src_len ? dst_len : src_len) / 4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n\n  size_t n = len;\n  while (n--) {\n    uint8_t s0 = s[0];\n    uint8_t s1 = s[1];\n    uint8_t s2 = s[2];\n    uint8_t s3 = s[3];\n    d[0] = s2;\n    d[1] = s1;\n    d[2] = s0;\n    d[3] = s3;\n    s += 4;\n    d += 4;\n  }\n  return len;\n}\n\n" +
 	"" +
-	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__copy_1_1(uint8_t* dst_ptr,\n                                     size_t dst_len,\n                                     uint8_t* dst_palette_ptr,\n                                     size_t dst_palette_len,\n                                     const uint8_t* src_ptr,\n                                     size_t src_len) {\n  size_t len = (dst_len < src_len) ? dst_len : src_len;\n  if (len > 0) {\n    memmove(dst_ptr, src_ptr, len);\n  }\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__copy_3_3(uint8_t* dst_ptr,\n                                     size_t dst_len,\n                                     uint8_t* dst_palette_ptr,\n                                     size_t dst_palette_len,\n                                     const uint8_t* src_ptr,\n                                     size_t src_len) {\n  size_t dst_len3 = dst_len / 3;\n  size_t src_len3 = src_len / 3;\n  size_t len = (dst_len3 < src_len3) ? dst_len3 : src_len3;\n  if (len > 0) {\n  " +
-	"  memmove(dst_ptr, src_ptr, len * 3);\n  }\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__copy_4_4(uint8_t* dst_ptr,\n                                     size_t dst_len,\n                                     uint8_t* dst_palette_ptr,\n                                     size_t dst_palette_len,\n                                     const uint8_t* src_ptr,\n                                     size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len4 < src_len4) ? dst_len4 : src_len4;\n  if (len > 0) {\n    memmove(dst_ptr, src_ptr, len * 4);\n  }\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__copy_8_8(uint8_t* dst_ptr,\n                                     size_t dst_len,\n                                     uint8_t* dst_palette_ptr,\n                                     size_t dst_palette_len,\n                                     const uint8_t* src_ptr,\n                                     size_t src_len) {\n  size_t dst_len8 = dst_l" +
-	"en / 8;\n  size_t src_len8 = src_len / 8;\n  size_t len = (dst_len8 < src_len8) ? dst_len8 : src_len8;\n  if (len > 0) {\n    memmove(dst_ptr, src_ptr, len * 8);\n  }\n  return len;\n}\n\n" +
+	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__copy_1_1(uint8_t* dst_ptr,\n                                     size_t dst_len,\n                                     uint8_t* dst_palette_ptr,\n                                     size_t dst_palette_len,\n                                     const uint8_t* src_ptr,\n                                     size_t src_len) {\n  size_t len = (dst_len < src_len) ? dst_len : src_len;\n  if (len > 0) {\n    memmove(dst_ptr, src_ptr, len);\n  }\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__copy_2_2(uint8_t* dst_ptr,\n                                     size_t dst_len,\n                                     uint8_t* dst_palette_ptr,\n                                     size_t dst_palette_len,\n                                     const uint8_t* src_ptr,\n                                     size_t src_len) {\n  size_t dst_len2 = dst_len / 2;\n  size_t src_len2 = src_len / 2;\n  size_t len = (dst_len2 < src_len2) ? dst_len2 : src_len2;\n  if (len > 0) {\n  " +
+	"  memmove(dst_ptr, src_ptr, len * 2);\n  }\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__copy_3_3(uint8_t* dst_ptr,\n                                     size_t dst_len,\n                                     uint8_t* dst_palette_ptr,\n                                     size_t dst_palette_len,\n                                     const uint8_t* src_ptr,\n                                     size_t src_len) {\n  size_t dst_len3 = dst_len / 3;\n  size_t src_len3 = src_len / 3;\n  size_t len = (dst_len3 < src_len3) ? dst_len3 : src_len3;\n  if (len > 0) {\n    memmove(dst_ptr, src_ptr, len * 3);\n  }\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__copy_4_4(uint8_t* dst_ptr,\n                                     size_t dst_len,\n                                     uint8_t* dst_palette_ptr,\n                                     size_t dst_palette_len,\n                                     const uint8_t* src_ptr,\n                                     size_t src_len) {\n  size_t dst_len4 = dst_l" +
+	"en / 4;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len4 < src_len4) ? dst_len4 : src_len4;\n  if (len > 0) {\n    memmove(dst_ptr, src_ptr, len * 4);\n  }\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__copy_8_8(uint8_t* dst_ptr,\n                                     size_t dst_len,\n                                     uint8_t* dst_palette_ptr,\n                                     size_t dst_palette_len,\n                                     const uint8_t* src_ptr,\n                                     size_t src_len) {\n  size_t dst_len8 = dst_len / 8;\n  size_t src_len8 = src_len / 8;\n  size_t len = (dst_len8 < src_len8) ? dst_len8 : src_len8;\n  if (len > 0) {\n    memmove(dst_ptr, src_ptr, len * 8);\n  }\n  return len;\n}\n\n" +
 	"" +
 	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr_565__bgr(uint8_t* dst_ptr,\n                                         size_t dst_len,\n                                         uint8_t* dst_palette_ptr,\n                                         size_t dst_palette_len,\n                                         const uint8_t* src_ptr,\n                                         size_t src_len) {\n  size_t dst_len2 = dst_len / 2;\n  size_t src_len3 = src_len / 3;\n  size_t len = (dst_len2 < src_len3) ? dst_len2 : src_len3;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t b5 = s[0] >> 3;\n    uint32_t g6 = s[1] >> 2;\n    uint32_t r5 = s[2] >> 3;\n    uint32_t rgb_565 = (r5 << 11) | (g6 << 5) | (b5 << 0);\n    wuffs_base__poke_u16le__no_bounds_check(d + (0 * 2), (uint16_t)rgb_565);\n\n    s += 1 * 3;\n    d += 1 * 2;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr_565__bgrx(uint8_t* dst_ptr,\n           " +
 	"                               size_t dst_len,\n                                          uint8_t* dst_palette_ptr,\n                                          size_t dst_palette_len,\n                                          const uint8_t* src_ptr,\n                                          size_t src_len) {\n  size_t dst_len2 = dst_len / 2;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len2 < src_len4) ? dst_len2 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t b5 = s[0] >> 3;\n    uint32_t g6 = s[1] >> 2;\n    uint32_t r5 = s[2] >> 3;\n    uint32_t rgb_565 = (r5 << 11) | (g6 << 5) | (b5 << 0);\n    wuffs_base__poke_u16le__no_bounds_check(d + (0 * 2), (uint16_t)rgb_565);\n\n    s += 1 * 4;\n    d += 1 * 2;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const u" +
@@ -615,18 +615,19 @@
 	"uffs_base__peek_u16le__no_bounds_check(d + (0 * 2)));\n    uint32_t s0 = wuffs_base__peek_u32le__no_bounds_check(dst_palette_ptr +\n                                                          ((size_t)s[0] * 4));\n    wuffs_base__poke_u16le__no_bounds_check(\n        d + (0 * 2),\n        wuffs_base__color_u32_argb_premul__as__color_u16_rgb_565(\n            wuffs_base__composite_premul_nonpremul_u32_axxx(d0, s0)));\n\n    s += 1 * 1;\n    d += 1 * 2;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr_565__index_binary_alpha__src_over(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  if (dst_palette_len != 1024) {\n    return 0;\n  }\n  size_t dst_len2 = dst_len / 2;\n  size_t len = (dst_len2 < src_len) ? dst_len2 : src_len;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t s0 = wuffs_base__peek_u32le__no_bounds_ch" +
 	"eck(dst_palette_ptr +\n                                                          ((size_t)s[0] * 4));\n    if (s0) {\n      wuffs_base__poke_u16le__no_bounds_check(d + (0 * 2), (uint16_t)s0);\n    }\n\n    s += 1 * 1;\n    d += 1 * 2;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
 	"" +
-	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src(uint8_t* dst_ptr,\n                                                     size_t dst_len,\n                                                     uint8_t* dst_palette_ptr,\n                                                     size_t dst_palette_len,\n                                                     const uint8_t* src_ptr,\n                                                     size_t src_len) {\n  size_t dst_len3 = dst_len / 3;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len3 < src_len4) ? dst_len3 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t s0 =\n        wuffs_base__color_u32_argb_nonpremul__as__color_u32_argb_premul(\n            wuffs_base__peek_u32le__no_bounds_check(s + (0 * 4)));\n    wuffs_base__poke_u24le__no_bounds_check(d + (0 * 3), s0);\n\n    s += 1 * 4;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwu" +
-	"ffs_base__pixel_swizzler__bgr__bgra_nonpremul_4x16le__src(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  size_t dst_len3 = dst_len / 3;\n  size_t src_len8 = src_len / 8;\n  size_t len = (dst_len3 < src_len8) ? dst_len3 : src_len8;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t s0 =\n        wuffs_base__color_u64_argb_nonpremul__as__color_u32_argb_premul(\n            wuffs_base__peek_u64le__no_bounds_check(s + (0 * 8)));\n    wuffs_base__poke_u24le__no_bounds_check(d + (0 * 3), s0);\n\n    s += 1 * 8;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src_over(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  size_t dst_len3 = dst_len / 3;\n  size_t src_len4 = src_len " +
-	"/ 4;\n  size_t len = (dst_len3 < src_len4) ? dst_len3 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    // Extract 16-bit color components.\n    uint32_t sa = 0x101 * ((uint32_t)s[3]);\n    uint32_t sr = 0x101 * ((uint32_t)s[2]);\n    uint32_t sg = 0x101 * ((uint32_t)s[1]);\n    uint32_t sb = 0x101 * ((uint32_t)s[0]);\n    uint32_t dr = 0x101 * ((uint32_t)d[2]);\n    uint32_t dg = 0x101 * ((uint32_t)d[1]);\n    uint32_t db = 0x101 * ((uint32_t)d[0]);\n\n    // Calculate the inverse of the src-alpha: how much of the dst to keep.\n    uint32_t ia = 0xFFFF - sa;\n\n    // Composite src (nonpremul) over dst (premul).\n    dr = ((sr * sa) + (dr * ia)) / 0xFFFF;\n    dg = ((sg * sa) + (dg * ia)) / 0xFFFF;\n    db = ((sb * sa) + (db * ia)) / 0xFFFF;\n\n    // Convert from 16-bit color to 8-bit color.\n    d[0] = (uint8_t)(db >> 8);\n    d[1] = (uint8_t)(dg >> 8);\n    d[2] = (uint8_t)(dr >> 8);\n\n    s += 1 * 4;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n" +
-	"\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr__bgra_nonpremul_4x16le__src_over(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  size_t dst_len3 = dst_len / 3;\n  size_t src_len8 = src_len / 8;\n  size_t len = (dst_len3 < src_len8) ? dst_len3 : src_len8;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    // Extract 16-bit color components.\n    uint32_t sa = ((uint32_t)wuffs_base__peek_u16le__no_bounds_check(s + 6));\n    uint32_t sr = ((uint32_t)wuffs_base__peek_u16le__no_bounds_check(s + 4));\n    uint32_t sg = ((uint32_t)wuffs_base__peek_u16le__no_bounds_check(s + 2));\n    uint32_t sb = ((uint32_t)wuffs_base__peek_u16le__no_bounds_check(s + 0));\n    uint32_t dr = 0x101 * ((uint32_t)d[2]);\n    uint32_t dg = 0x101 * ((uint32_t)d[1]);\n    uint32_t db = 0x101 * ((uint32_t)d[0]);\n\n    // Calculate the inverse of the src-alpha: how much of the d" +
-	"st to keep.\n    uint32_t ia = 0xFFFF - sa;\n\n    // Composite src (nonpremul) over dst (premul).\n    dr = ((sr * sa) + (dr * ia)) / 0xFFFF;\n    dg = ((sg * sa) + (dg * ia)) / 0xFFFF;\n    db = ((sb * sa) + (db * ia)) / 0xFFFF;\n\n    // Convert from 16-bit color to 8-bit color.\n    d[0] = (uint8_t)(db >> 8);\n    d[1] = (uint8_t)(dg >> 8);\n    d[2] = (uint8_t)(dr >> 8);\n\n    s += 1 * 8;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr__bgra_premul__src(uint8_t* dst_ptr,\n                                                  size_t dst_len,\n                                                  uint8_t* dst_palette_ptr,\n                                                  size_t dst_palette_len,\n                                                  const uint8_t* src_ptr,\n                                                  size_t src_len) {\n  size_t dst_len3 = dst_len / 3;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len3 < src_len4) ? dst_len3 : src_len4;\n  uint8_t* d = ds" +
-	"t_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    uint8_t s0 = s[0];\n    uint8_t s1 = s[1];\n    uint8_t s2 = s[2];\n    d[0] = s0;\n    d[1] = s1;\n    d[2] = s2;\n    d[3] = 0xFF;\n\n    s += 1 * 4;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr__bgra_premul__src_over(uint8_t* dst_ptr,\n                                                       size_t dst_len,\n                                                       uint8_t* dst_palette_ptr,\n                                                       size_t dst_palette_len,\n                                                       const uint8_t* src_ptr,\n                                                       size_t src_len) {\n  size_t dst_len3 = dst_len / 3;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len3 < src_len4) ? dst_len3 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    // Extract 16-bit color components.\n    uint32_t sa =" +
-	" 0x101 * ((uint32_t)s[3]);\n    uint32_t sr = 0x101 * ((uint32_t)s[2]);\n    uint32_t sg = 0x101 * ((uint32_t)s[1]);\n    uint32_t sb = 0x101 * ((uint32_t)s[0]);\n    uint32_t dr = 0x101 * ((uint32_t)d[2]);\n    uint32_t dg = 0x101 * ((uint32_t)d[1]);\n    uint32_t db = 0x101 * ((uint32_t)d[0]);\n\n    // Calculate the inverse of the src-alpha: how much of the dst to keep.\n    uint32_t ia = 0xFFFF - sa;\n\n    // Composite src (premul) over dst (premul).\n    dr = sr + ((dr * ia) / 0xFFFF);\n    dg = sg + ((dg * ia) / 0xFFFF);\n    db = sb + ((db * ia) / 0xFFFF);\n\n    // Convert from 16-bit color to 8-bit color.\n    d[0] = (uint8_t)(db >> 8);\n    d[1] = (uint8_t)(dg >> 8);\n    d[2] = (uint8_t)(dr >> 8);\n\n    s += 1 * 4;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr__rgba_nonpremul__src(uint8_t* dst_ptr,\n                                                     size_t dst_len,\n                                                     uint8_t* dst_palette_ptr,\n                  " +
-	"                                   size_t dst_palette_len,\n                                                     const uint8_t* src_ptr,\n                                                     size_t src_len) {\n  size_t dst_len3 = dst_len / 3;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len3 < src_len4) ? dst_len3 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t s0 = wuffs_base__swap_u32_argb_abgr(\n        wuffs_base__color_u32_argb_nonpremul__as__color_u32_argb_premul(\n            wuffs_base__peek_u32le__no_bounds_check(s + (0 * 4))));\n    wuffs_base__poke_u24le__no_bounds_check(d + (0 * 3), s0);\n\n    s += 1 * 4;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr__rgba_nonpremul__src_over(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  size_t dst_len3 = dst_len / " +
-	"3;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len3 < src_len4) ? dst_len3 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    // Extract 16-bit color components.\n    uint32_t sa = 0x101 * ((uint32_t)s[3]);\n    uint32_t sb = 0x101 * ((uint32_t)s[2]);\n    uint32_t sg = 0x101 * ((uint32_t)s[1]);\n    uint32_t sr = 0x101 * ((uint32_t)s[0]);\n    uint32_t dr = 0x101 * ((uint32_t)d[2]);\n    uint32_t dg = 0x101 * ((uint32_t)d[1]);\n    uint32_t db = 0x101 * ((uint32_t)d[0]);\n\n    // Calculate the inverse of the src-alpha: how much of the dst to keep.\n    uint32_t ia = 0xFFFF - sa;\n\n    // Composite src (nonpremul) over dst (premul).\n    dr = ((sr * sa) + (dr * ia)) / 0xFFFF;\n    dg = ((sg * sa) + (dg * ia)) / 0xFFFF;\n    db = ((sb * sa) + (db * ia)) / 0xFFFF;\n\n    // Convert from 16-bit color to 8-bit color.\n    d[0] = (uint8_t)(db >> 8);\n    d[1] = (uint8_t)(dg >> 8);\n    d[2] = (uint8_t)(dr >> 8);\n\n    s += 1 * 4;\n    d += 1 * 3;\n  " +
-	"  n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr__rgba_premul__src(uint8_t* dst_ptr,\n                                                  size_t dst_len,\n                                                  uint8_t* dst_palette_ptr,\n                                                  size_t dst_palette_len,\n                                                  const uint8_t* src_ptr,\n                                                  size_t src_len) {\n  size_t dst_len3 = dst_len / 3;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len3 < src_len4) ? dst_len3 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    uint8_t s0 = s[0];\n    uint8_t s1 = s[1];\n    uint8_t s2 = s[2];\n    d[0] = s2;\n    d[1] = s1;\n    d[2] = s0;\n    d[3] = 0xFF;\n\n    s += 1 * 4;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr__rgba_premul__src_over(uint8_t* dst_ptr,\n                                     " +
-	"                  size_t dst_len,\n                                                       uint8_t* dst_palette_ptr,\n                                                       size_t dst_palette_len,\n                                                       const uint8_t* src_ptr,\n                                                       size_t src_len) {\n  size_t dst_len3 = dst_len / 3;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len3 < src_len4) ? dst_len3 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    // Extract 16-bit color components.\n    uint32_t sa = 0x101 * ((uint32_t)s[3]);\n    uint32_t sb = 0x101 * ((uint32_t)s[2]);\n    uint32_t sg = 0x101 * ((uint32_t)s[1]);\n    uint32_t sr = 0x101 * ((uint32_t)s[0]);\n    uint32_t dr = 0x101 * ((uint32_t)d[2]);\n    uint32_t dg = 0x101 * ((uint32_t)d[1]);\n    uint32_t db = 0x101 * ((uint32_t)d[0]);\n\n    // Calculate the inverse of the src-alpha: how much of the dst to keep.\n    uint32_t ia = 0xFFFF - sa;\n\n   " +
-	" // Composite src (premul) over dst (premul).\n    dr = sr + ((dr * ia) / 0xFFFF);\n    dg = sg + ((dg * ia) / 0xFFFF);\n    db = sb + ((db * ia) / 0xFFFF);\n\n    // Convert from 16-bit color to 8-bit color.\n    d[0] = (uint8_t)(db >> 8);\n    d[1] = (uint8_t)(dg >> 8);\n    d[2] = (uint8_t)(dr >> 8);\n\n    s += 1 * 4;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
+	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr__bgr_565(uint8_t* dst_ptr,\n                                         size_t dst_len,\n                                         uint8_t* dst_palette_ptr,\n                                         size_t dst_palette_len,\n                                         const uint8_t* src_ptr,\n                                         size_t src_len) {\n  size_t dst_len3 = dst_len / 3;\n  size_t src_len2 = src_len / 2;\n  size_t len = (dst_len3 < src_len2) ? dst_len3 : src_len2;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t s0 = wuffs_base__color_u16_rgb_565__as__color_u32_argb_premul(\n        wuffs_base__peek_u16le__no_bounds_check(s + (0 * 2)));\n    wuffs_base__poke_u24le__no_bounds_check(d + (0 * 3), s0);\n\n    s += 1 * 2;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src(uint8_t* dst_ptr,\n                      " +
+	"                               size_t dst_len,\n                                                     uint8_t* dst_palette_ptr,\n                                                     size_t dst_palette_len,\n                                                     const uint8_t* src_ptr,\n                                                     size_t src_len) {\n  size_t dst_len3 = dst_len / 3;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len3 < src_len4) ? dst_len3 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t s0 =\n        wuffs_base__color_u32_argb_nonpremul__as__color_u32_argb_premul(\n            wuffs_base__peek_u32le__no_bounds_check(s + (0 * 4)));\n    wuffs_base__poke_u24le__no_bounds_check(d + (0 * 3), s0);\n\n    s += 1 * 4;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr__bgra_nonpremul_4x16le__src(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_" +
+	"ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  size_t dst_len3 = dst_len / 3;\n  size_t src_len8 = src_len / 8;\n  size_t len = (dst_len3 < src_len8) ? dst_len3 : src_len8;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t s0 =\n        wuffs_base__color_u64_argb_nonpremul__as__color_u32_argb_premul(\n            wuffs_base__peek_u64le__no_bounds_check(s + (0 * 8)));\n    wuffs_base__poke_u24le__no_bounds_check(d + (0 * 3), s0);\n\n    s += 1 * 8;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src_over(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  size_t dst_len3 = dst_len / 3;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len3 < src_len4) ? dst_len3 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_" +
+	"t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    // Extract 16-bit color components.\n    uint32_t sa = 0x101 * ((uint32_t)s[3]);\n    uint32_t sr = 0x101 * ((uint32_t)s[2]);\n    uint32_t sg = 0x101 * ((uint32_t)s[1]);\n    uint32_t sb = 0x101 * ((uint32_t)s[0]);\n    uint32_t dr = 0x101 * ((uint32_t)d[2]);\n    uint32_t dg = 0x101 * ((uint32_t)d[1]);\n    uint32_t db = 0x101 * ((uint32_t)d[0]);\n\n    // Calculate the inverse of the src-alpha: how much of the dst to keep.\n    uint32_t ia = 0xFFFF - sa;\n\n    // Composite src (nonpremul) over dst (premul).\n    dr = ((sr * sa) + (dr * ia)) / 0xFFFF;\n    dg = ((sg * sa) + (dg * ia)) / 0xFFFF;\n    db = ((sb * sa) + (db * ia)) / 0xFFFF;\n\n    // Convert from 16-bit color to 8-bit color.\n    d[0] = (uint8_t)(db >> 8);\n    d[1] = (uint8_t)(dg >> 8);\n    d[2] = (uint8_t)(dr >> 8);\n\n    s += 1 * 4;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr__bgra_nonpremul_4x16le__src_over(\n    uint8_t* dst_ptr,\n    size_t dst_le" +
+	"n,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  size_t dst_len3 = dst_len / 3;\n  size_t src_len8 = src_len / 8;\n  size_t len = (dst_len3 < src_len8) ? dst_len3 : src_len8;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    // Extract 16-bit color components.\n    uint32_t sa = ((uint32_t)wuffs_base__peek_u16le__no_bounds_check(s + 6));\n    uint32_t sr = ((uint32_t)wuffs_base__peek_u16le__no_bounds_check(s + 4));\n    uint32_t sg = ((uint32_t)wuffs_base__peek_u16le__no_bounds_check(s + 2));\n    uint32_t sb = ((uint32_t)wuffs_base__peek_u16le__no_bounds_check(s + 0));\n    uint32_t dr = 0x101 * ((uint32_t)d[2]);\n    uint32_t dg = 0x101 * ((uint32_t)d[1]);\n    uint32_t db = 0x101 * ((uint32_t)d[0]);\n\n    // Calculate the inverse of the src-alpha: how much of the dst to keep.\n    uint32_t ia = 0xFFFF - sa;\n\n    // Composite src (nonpremul) over dst (premul).\n    dr = ((sr * sa) + (dr * ia" +
+	")) / 0xFFFF;\n    dg = ((sg * sa) + (dg * ia)) / 0xFFFF;\n    db = ((sb * sa) + (db * ia)) / 0xFFFF;\n\n    // Convert from 16-bit color to 8-bit color.\n    d[0] = (uint8_t)(db >> 8);\n    d[1] = (uint8_t)(dg >> 8);\n    d[2] = (uint8_t)(dr >> 8);\n\n    s += 1 * 8;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr__bgra_premul__src(uint8_t* dst_ptr,\n                                                  size_t dst_len,\n                                                  uint8_t* dst_palette_ptr,\n                                                  size_t dst_palette_len,\n                                                  const uint8_t* src_ptr,\n                                                  size_t src_len) {\n  size_t dst_len3 = dst_len / 3;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len3 < src_len4) ? dst_len3 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    uint8_t s0 = s[0];\n    uint8_t s1 = s[1];\n    u" +
+	"int8_t s2 = s[2];\n    d[0] = s0;\n    d[1] = s1;\n    d[2] = s2;\n    d[3] = 0xFF;\n\n    s += 1 * 4;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr__bgra_premul__src_over(uint8_t* dst_ptr,\n                                                       size_t dst_len,\n                                                       uint8_t* dst_palette_ptr,\n                                                       size_t dst_palette_len,\n                                                       const uint8_t* src_ptr,\n                                                       size_t src_len) {\n  size_t dst_len3 = dst_len / 3;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len3 < src_len4) ? dst_len3 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    // Extract 16-bit color components.\n    uint32_t sa = 0x101 * ((uint32_t)s[3]);\n    uint32_t sr = 0x101 * ((uint32_t)s[2]);\n    uint32_t sg = 0x101 * ((uint32_t)s[1]);\n    uint32_" +
+	"t sb = 0x101 * ((uint32_t)s[0]);\n    uint32_t dr = 0x101 * ((uint32_t)d[2]);\n    uint32_t dg = 0x101 * ((uint32_t)d[1]);\n    uint32_t db = 0x101 * ((uint32_t)d[0]);\n\n    // Calculate the inverse of the src-alpha: how much of the dst to keep.\n    uint32_t ia = 0xFFFF - sa;\n\n    // Composite src (premul) over dst (premul).\n    dr = sr + ((dr * ia) / 0xFFFF);\n    dg = sg + ((dg * ia) / 0xFFFF);\n    db = sb + ((db * ia) / 0xFFFF);\n\n    // Convert from 16-bit color to 8-bit color.\n    d[0] = (uint8_t)(db >> 8);\n    d[1] = (uint8_t)(dg >> 8);\n    d[2] = (uint8_t)(dr >> 8);\n\n    s += 1 * 4;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr__rgba_nonpremul__src(uint8_t* dst_ptr,\n                                                     size_t dst_len,\n                                                     uint8_t* dst_palette_ptr,\n                                                     size_t dst_palette_len,\n                                                     const uint8_t*" +
+	" src_ptr,\n                                                     size_t src_len) {\n  size_t dst_len3 = dst_len / 3;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len3 < src_len4) ? dst_len3 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t s0 = wuffs_base__swap_u32_argb_abgr(\n        wuffs_base__color_u32_argb_nonpremul__as__color_u32_argb_premul(\n            wuffs_base__peek_u32le__no_bounds_check(s + (0 * 4))));\n    wuffs_base__poke_u24le__no_bounds_check(d + (0 * 3), s0);\n\n    s += 1 * 4;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr__rgba_nonpremul__src_over(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  size_t dst_len3 = dst_len / 3;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len3 < src_len4) ? dst_len3 : src_len4;\n  uint8_t* d = dst_ptr;\n  cons" +
+	"t uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    // Extract 16-bit color components.\n    uint32_t sa = 0x101 * ((uint32_t)s[3]);\n    uint32_t sb = 0x101 * ((uint32_t)s[2]);\n    uint32_t sg = 0x101 * ((uint32_t)s[1]);\n    uint32_t sr = 0x101 * ((uint32_t)s[0]);\n    uint32_t dr = 0x101 * ((uint32_t)d[2]);\n    uint32_t dg = 0x101 * ((uint32_t)d[1]);\n    uint32_t db = 0x101 * ((uint32_t)d[0]);\n\n    // Calculate the inverse of the src-alpha: how much of the dst to keep.\n    uint32_t ia = 0xFFFF - sa;\n\n    // Composite src (nonpremul) over dst (premul).\n    dr = ((sr * sa) + (dr * ia)) / 0xFFFF;\n    dg = ((sg * sa) + (dg * ia)) / 0xFFFF;\n    db = ((sb * sa) + (db * ia)) / 0xFFFF;\n\n    // Convert from 16-bit color to 8-bit color.\n    d[0] = (uint8_t)(db >> 8);\n    d[1] = (uint8_t)(dg >> 8);\n    d[2] = (uint8_t)(dr >> 8);\n\n    s += 1 * 4;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr__rgba_premul__src(uint8_t* dst_ptr,\n      " +
+	"                                            size_t dst_len,\n                                                  uint8_t* dst_palette_ptr,\n                                                  size_t dst_palette_len,\n                                                  const uint8_t* src_ptr,\n                                                  size_t src_len) {\n  size_t dst_len3 = dst_len / 3;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len3 < src_len4) ? dst_len3 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    uint8_t s0 = s[0];\n    uint8_t s1 = s[1];\n    uint8_t s2 = s[2];\n    d[0] = s2;\n    d[1] = s1;\n    d[2] = s0;\n    d[3] = 0xFF;\n\n    s += 1 * 4;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr__rgba_premul__src_over(uint8_t* dst_ptr,\n                                                       size_t dst_len,\n                                                       uint8_t* dst_palette_ptr,\n           " +
+	"                                            size_t dst_palette_len,\n                                                       const uint8_t* src_ptr,\n                                                       size_t src_len) {\n  size_t dst_len3 = dst_len / 3;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len3 < src_len4) ? dst_len3 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    // Extract 16-bit color components.\n    uint32_t sa = 0x101 * ((uint32_t)s[3]);\n    uint32_t sb = 0x101 * ((uint32_t)s[2]);\n    uint32_t sg = 0x101 * ((uint32_t)s[1]);\n    uint32_t sr = 0x101 * ((uint32_t)s[0]);\n    uint32_t dr = 0x101 * ((uint32_t)d[2]);\n    uint32_t dg = 0x101 * ((uint32_t)d[1]);\n    uint32_t db = 0x101 * ((uint32_t)d[0]);\n\n    // Calculate the inverse of the src-alpha: how much of the dst to keep.\n    uint32_t ia = 0xFFFF - sa;\n\n    // Composite src (premul) over dst (premul).\n    dr = sr + ((dr * ia) / 0xFFFF);\n    dg = sg + ((dg * ia) / 0xFFFF);\n    db =" +
+	" sb + ((db * ia) / 0xFFFF);\n\n    // Convert from 16-bit color to 8-bit color.\n    d[0] = (uint8_t)(db >> 8);\n    d[1] = (uint8_t)(dg >> 8);\n    d[2] = (uint8_t)(dr >> 8);\n\n    s += 1 * 4;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
 	"" +
 	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul__src_over(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len4 < src_len4) ? dst_len4 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    uint32_t d0 = wuffs_base__peek_u32le__no_bounds_check(d + (0 * 4));\n    uint32_t s0 = wuffs_base__peek_u32le__no_bounds_check(s + (0 * 4));\n    wuffs_base__poke_u32le__no_bounds_check(\n        d + (0 * 4),\n        wuffs_base__composite_nonpremul_nonpremul_u32_axxx(d0, s0));\n\n    s += 1 * 4;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul_4x16le__src(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8" +
 	"_t* src_ptr,\n    size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t src_len8 = src_len / 8;\n  size_t len = (dst_len4 < src_len8) ? dst_len4 : src_len8;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n\n  size_t n = len;\n  while (n >= 1) {\n    wuffs_base__poke_u32le__no_bounds_check(\n        d + (0 * 4), wuffs_base__color_u64__as__color_u32(\n                         wuffs_base__peek_u64le__no_bounds_check(s + (0 * 8))));\n\n    s += 1 * 8;\n    d += 1 * 4;\n    n -= 1;\n  }\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul_4x16le__src_over(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t src_len8 = src_len / 8;\n  size_t len = (dst_len4 < src_len8) ? dst_len4 : src_len8;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    uint64_t d0 = wuffs_base__color_u32__as__color_u64(" +
@@ -658,19 +659,23 @@
 	"unds_check(\n        d + (0 * 4),\n        wuffs_base__color_u64__as__color_u32(\n            wuffs_base__composite_premul_nonpremul_u64_axxx(d0, s0)));\n\n    s += 1 * 8;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgra_premul__rgba_premul__src_over(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len4 < src_len4) ? dst_len4 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    uint32_t d0 = wuffs_base__peek_u32le__no_bounds_check(d + (0 * 4));\n    uint32_t s0 = wuffs_base__swap_u32_argb_abgr(\n        wuffs_base__peek_u32le__no_bounds_check(s + (0 * 4)));\n    wuffs_base__poke_u32le__no_bounds_check(\n        d + (0 * 4), wuffs_base__composite_premul_premul_u32_axxx(d0, s0));\n\n    s += 1 * 4;\n    d += 1 * 4;\n    n -= 1;\n  }\n" +
 	"\n  return len;\n}\n\n" +
 	"" +
-	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgrw__bgr(uint8_t* dst_ptr,\n                                      size_t dst_len,\n                                      uint8_t* dst_palette_ptr,\n                                      size_t dst_palette_len,\n                                      const uint8_t* src_ptr,\n                                      size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t src_len3 = src_len / 3;\n  size_t len = (dst_len4 < src_len3) ? dst_len4 : src_len3;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    wuffs_base__poke_u32le__no_bounds_check(\n        d + (0 * 4),\n        0xFF000000 | wuffs_base__peek_u24le__no_bounds_check(s + (0 * 3)));\n\n    s += 1 * 3;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgrw__bgrx(uint8_t* dst_ptr,\n                                       size_t dst_len,\n                                       uint8_t* dst_palett" +
-	"e_ptr,\n                                       size_t dst_palette_len,\n                                       const uint8_t* src_ptr,\n                                       size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len4 < src_len4) ? dst_len4 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    wuffs_base__poke_u32le__no_bounds_check(\n        d + (0 * 4),\n        0xFF000000 | wuffs_base__peek_u32le__no_bounds_check(s + (0 * 4)));\n\n    s += 1 * 4;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\nWUFFS_BASE__MAYBE_ATTRIBUTE_TARGET(\"sse4.2\")\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgrw__rgb__sse42(uint8_t* dst_ptr,\n                                             size_t dst_len,\n                                             uint8_t* dst_palette_ptr,\n                                             size_t dst_palette_len,\n                " +
-	"                             const uint8_t* src_ptr,\n                                             size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t src_len3 = src_len / 3;\n  size_t len = (dst_len4 < src_len3) ? dst_len4 : src_len3;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  __m128i shuffle = _mm_set_epi8(+0x00, +0x09, +0x0A, +0x0B,  //\n                                 +0x00, +0x06, +0x07, +0x08,  //\n                                 +0x00, +0x03, +0x04, +0x05,  //\n                                 +0x00, +0x00, +0x01, +0x02);\n  __m128i or_ff = _mm_set_epi8(-0x01, +0x00, +0x00, +0x00,  //\n                               -0x01, +0x00, +0x00, +0x00,  //\n                               -0x01, +0x00, +0x00, +0x00,  //\n                               -0x01, +0x00, +0x00, +0x00);\n\n  while (n >= 6) {\n    __m128i x;\n    x = _mm_lddqu_si128((const __m128i*)(const void*)s);\n    x = _mm_shuffle_epi8(x, shuffle);\n    x = _mm_or_si128(x, or_ff);\n    _mm_storeu_si128((__m128i*)(void*)d, " +
-	"x);\n\n    s += 4 * 3;\n    d += 4 * 4;\n    n -= 4;\n  }\n\n  while (n >= 1) {\n    uint8_t b0 = s[0];\n    uint8_t b1 = s[1];\n    uint8_t b2 = s[2];\n    d[0] = b2;\n    d[1] = b1;\n    d[2] = b0;\n    d[3] = 0xFF;\n\n    s += 1 * 3;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n#endif  // defined(WUFFS_BASE__CPU_ARCH__X86_64)\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgrw__rgb(uint8_t* dst_ptr,\n                                      size_t dst_len,\n                                      uint8_t* dst_palette_ptr,\n                                      size_t dst_palette_len,\n                                      const uint8_t* src_ptr,\n                                      size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t src_len3 = src_len / 3;\n  size_t len = (dst_len4 < src_len3) ? dst_len4 : src_len3;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    uint8_t b0 = s[0];\n    uint8_t b1 = s[1];\n    uint8_t b2 = s[2];\n    d[0] = b2;\n    d[1] = b1;\n    d[2] = b0" +
-	";\n    d[3] = 0xFF;\n\n    s += 1 * 3;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgrw__rgbx(uint8_t* dst_ptr,\n                                       size_t dst_len,\n                                       uint8_t* dst_palette_ptr,\n                                       size_t dst_palette_len,\n                                       const uint8_t* src_ptr,\n                                       size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len4 < src_len4) ? dst_len4 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint8_t b0 = s[0];\n    uint8_t b1 = s[1];\n    uint8_t b2 = s[2];\n    d[0] = b2;\n    d[1] = b1;\n    d[2] = b0;\n    d[3] = 0xFF;\n\n    s += 1 * 4;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
+	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgrw__bgr(uint8_t* dst_ptr,\n                                      size_t dst_len,\n                                      uint8_t* dst_palette_ptr,\n                                      size_t dst_palette_len,\n                                      const uint8_t* src_ptr,\n                                      size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t src_len3 = src_len / 3;\n  size_t len = (dst_len4 < src_len3) ? dst_len4 : src_len3;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    wuffs_base__poke_u32le__no_bounds_check(\n        d + (0 * 4),\n        0xFF000000 | wuffs_base__peek_u24le__no_bounds_check(s + (0 * 3)));\n\n    s += 1 * 3;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgrw__bgr_565(uint8_t* dst_ptr,\n                                          size_t dst_len,\n                                          uint8_t* d" +
+	"st_palette_ptr,\n                                          size_t dst_palette_len,\n                                          const uint8_t* src_ptr,\n                                          size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t src_len2 = src_len / 2;\n  size_t len = (dst_len4 < src_len2) ? dst_len4 : src_len2;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    wuffs_base__poke_u32le__no_bounds_check(\n        d + (0 * 4), wuffs_base__color_u16_rgb_565__as__color_u32_argb_premul(\n                         wuffs_base__peek_u16le__no_bounds_check(s + (0 * 2))));\n\n    s += 1 * 2;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgrw__bgrx(uint8_t* dst_ptr,\n                                       size_t dst_len,\n                                       uint8_t* dst_palette_ptr,\n                                       size_t dst_palette_len,\n                                       const u" +
+	"int8_t* src_ptr,\n                                       size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len4 < src_len4) ? dst_len4 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    wuffs_base__poke_u32le__no_bounds_check(\n        d + (0 * 4),\n        0xFF000000 | wuffs_base__peek_u32le__no_bounds_check(s + (0 * 4)));\n\n    s += 1 * 4;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\nWUFFS_BASE__MAYBE_ATTRIBUTE_TARGET(\"sse4.2\")\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgrw__rgb__sse42(uint8_t* dst_ptr,\n                                             size_t dst_len,\n                                             uint8_t* dst_palette_ptr,\n                                             size_t dst_palette_len,\n                                             const uint8_t* src_ptr,\n                                             size_t src_len) {\n" +
+	"  size_t dst_len4 = dst_len / 4;\n  size_t src_len3 = src_len / 3;\n  size_t len = (dst_len4 < src_len3) ? dst_len4 : src_len3;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  __m128i shuffle = _mm_set_epi8(+0x00, +0x09, +0x0A, +0x0B,  //\n                                 +0x00, +0x06, +0x07, +0x08,  //\n                                 +0x00, +0x03, +0x04, +0x05,  //\n                                 +0x00, +0x00, +0x01, +0x02);\n  __m128i or_ff = _mm_set_epi8(-0x01, +0x00, +0x00, +0x00,  //\n                               -0x01, +0x00, +0x00, +0x00,  //\n                               -0x01, +0x00, +0x00, +0x00,  //\n                               -0x01, +0x00, +0x00, +0x00);\n\n  while (n >= 6) {\n    __m128i x;\n    x = _mm_lddqu_si128((const __m128i*)(const void*)s);\n    x = _mm_shuffle_epi8(x, shuffle);\n    x = _mm_or_si128(x, or_ff);\n    _mm_storeu_si128((__m128i*)(void*)d, x);\n\n    s += 4 * 3;\n    d += 4 * 4;\n    n -= 4;\n  }\n\n  while (n >= 1) {\n    uint8_t b0 = s[0];\n    uint8_t b1 = s[1" +
+	"];\n    uint8_t b2 = s[2];\n    d[0] = b2;\n    d[1] = b1;\n    d[2] = b0;\n    d[3] = 0xFF;\n\n    s += 1 * 3;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n#endif  // defined(WUFFS_BASE__CPU_ARCH__X86_64)\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgrw__rgb(uint8_t* dst_ptr,\n                                      size_t dst_len,\n                                      uint8_t* dst_palette_ptr,\n                                      size_t dst_palette_len,\n                                      const uint8_t* src_ptr,\n                                      size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t src_len3 = src_len / 3;\n  size_t len = (dst_len4 < src_len3) ? dst_len4 : src_len3;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    uint8_t b0 = s[0];\n    uint8_t b1 = s[1];\n    uint8_t b2 = s[2];\n    d[0] = b2;\n    d[1] = b1;\n    d[2] = b0;\n    d[3] = 0xFF;\n\n    s += 1 * 3;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base" +
+	"__pixel_swizzler__bgrw__rgbx(uint8_t* dst_ptr,\n                                       size_t dst_len,\n                                       uint8_t* dst_palette_ptr,\n                                       size_t dst_palette_len,\n                                       const uint8_t* src_ptr,\n                                       size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len4 < src_len4) ? dst_len4 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint8_t b0 = s[0];\n    uint8_t b1 = s[1];\n    uint8_t b2 = s[2];\n    d[0] = b2;\n    d[1] = b1;\n    d[2] = b0;\n    d[3] = 0xFF;\n\n    s += 1 * 4;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
 	"" +
-	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgrw_4x16le__bgr(uint8_t* dst_ptr,\n                                             size_t dst_len,\n                                             uint8_t* dst_palette_ptr,\n                                             size_t dst_palette_len,\n                                             const uint8_t* src_ptr,\n                                             size_t src_len) {\n  size_t dst_len8 = dst_len / 8;\n  size_t src_len3 = src_len / 3;\n  size_t len = (dst_len8 < src_len3) ? dst_len8 : src_len3;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    uint8_t s0 = s[0];\n    uint8_t s1 = s[1];\n    uint8_t s2 = s[2];\n    d[0] = s0;\n    d[1] = s0;\n    d[2] = s1;\n    d[3] = s1;\n    d[4] = s2;\n    d[5] = s2;\n    d[6] = 0xFF;\n    d[7] = 0xFF;\n\n    s += 1 * 3;\n    d += 1 * 8;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgrw_4x16le__bgrx(uint8_t* dst_ptr,\n                                 " +
-	"             size_t dst_len,\n                                              uint8_t* dst_palette_ptr,\n                                              size_t dst_palette_len,\n                                              const uint8_t* src_ptr,\n                                              size_t src_len) {\n  size_t dst_len8 = dst_len / 8;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len8 < src_len4) ? dst_len8 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    uint8_t s0 = s[0];\n    uint8_t s1 = s[1];\n    uint8_t s2 = s[2];\n    d[0] = s0;\n    d[1] = s0;\n    d[2] = s1;\n    d[3] = s1;\n    d[4] = s2;\n    d[5] = s2;\n    d[6] = 0xFF;\n    d[7] = 0xFF;\n\n    s += 1 * 4;\n    d += 1 * 8;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgrw_4x16le__rgb(uint8_t* dst_ptr,\n                                             size_t dst_len,\n                                             uint8_t* dst_palette_ptr,\n                          " +
-	"                   size_t dst_palette_len,\n                                             const uint8_t* src_ptr,\n                                             size_t src_len) {\n  size_t dst_len8 = dst_len / 8;\n  size_t src_len3 = src_len / 3;\n  size_t len = (dst_len8 < src_len3) ? dst_len8 : src_len3;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    uint8_t s0 = s[0];\n    uint8_t s1 = s[1];\n    uint8_t s2 = s[2];\n    d[0] = s2;\n    d[1] = s2;\n    d[2] = s1;\n    d[3] = s1;\n    d[4] = s0;\n    d[5] = s0;\n    d[6] = 0xFF;\n    d[7] = 0xFF;\n\n    s += 1 * 3;\n    d += 1 * 8;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
+	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgrw_4x16le__bgr(uint8_t* dst_ptr,\n                                             size_t dst_len,\n                                             uint8_t* dst_palette_ptr,\n                                             size_t dst_palette_len,\n                                             const uint8_t* src_ptr,\n                                             size_t src_len) {\n  size_t dst_len8 = dst_len / 8;\n  size_t src_len3 = src_len / 3;\n  size_t len = (dst_len8 < src_len3) ? dst_len8 : src_len3;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    uint8_t s0 = s[0];\n    uint8_t s1 = s[1];\n    uint8_t s2 = s[2];\n    d[0] = s0;\n    d[1] = s0;\n    d[2] = s1;\n    d[3] = s1;\n    d[4] = s2;\n    d[5] = s2;\n    d[6] = 0xFF;\n    d[7] = 0xFF;\n\n    s += 1 * 3;\n    d += 1 * 8;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgrw_4x16le__bgr_565(uint8_t* dst_ptr,\n                              " +
+	"                   size_t dst_len,\n                                                 uint8_t* dst_palette_ptr,\n                                                 size_t dst_palette_len,\n                                                 const uint8_t* src_ptr,\n                                                 size_t src_len) {\n  size_t dst_len8 = dst_len / 8;\n  size_t src_len2 = src_len / 2;\n  size_t len = (dst_len8 < src_len2) ? dst_len8 : src_len2;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    wuffs_base__poke_u64le__no_bounds_check(\n        d + (0 * 8),\n        wuffs_base__color_u32__as__color_u64(\n            wuffs_base__color_u16_rgb_565__as__color_u32_argb_premul(\n                wuffs_base__peek_u16le__no_bounds_check(s + (0 * 2)))));\n\n    s += 1 * 2;\n    d += 1 * 8;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgrw_4x16le__bgrx(uint8_t* dst_ptr,\n                                              size_t dst_len,\n               " +
+	"                               uint8_t* dst_palette_ptr,\n                                              size_t dst_palette_len,\n                                              const uint8_t* src_ptr,\n                                              size_t src_len) {\n  size_t dst_len8 = dst_len / 8;\n  size_t src_len4 = src_len / 4;\n  size_t len = (dst_len8 < src_len4) ? dst_len8 : src_len4;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    uint8_t s0 = s[0];\n    uint8_t s1 = s[1];\n    uint8_t s2 = s[2];\n    d[0] = s0;\n    d[1] = s0;\n    d[2] = s1;\n    d[3] = s1;\n    d[4] = s2;\n    d[5] = s2;\n    d[6] = 0xFF;\n    d[7] = 0xFF;\n\n    s += 1 * 4;\n    d += 1 * 8;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgrw_4x16le__rgb(uint8_t* dst_ptr,\n                                             size_t dst_len,\n                                             uint8_t* dst_palette_ptr,\n                                             size_t dst_palette_len,\n " +
+	"                                            const uint8_t* src_ptr,\n                                             size_t src_len) {\n  size_t dst_len8 = dst_len / 8;\n  size_t src_len3 = src_len / 3;\n  size_t len = (dst_len8 < src_len3) ? dst_len8 : src_len3;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    uint8_t s0 = s[0];\n    uint8_t s1 = s[1];\n    uint8_t s2 = s[2];\n    d[0] = s2;\n    d[1] = s2;\n    d[2] = s1;\n    d[3] = s1;\n    d[4] = s0;\n    d[5] = s0;\n    d[6] = 0xFF;\n    d[7] = 0xFF;\n\n    s += 1 * 3;\n    d += 1 * 8;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
 	"" +
 	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__rgba_nonpremul__bgra_nonpremul_4x16le__src(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t src_len8 = src_len / 8;\n  size_t len = (dst_len4 < src_len8) ? dst_len4 : src_len8;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n\n  size_t n = len;\n  while (n >= 1) {\n    wuffs_base__poke_u32le__no_bounds_check(\n        d + (0 * 4), wuffs_base__color_u64__as__color_u32__swap_u32_argb_abgr(\n                         wuffs_base__peek_u64le__no_bounds_check(s + (0 * 8))));\n\n    s += 1 * 8;\n    d += 1 * 4;\n    n -= 1;\n  }\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__rgba_nonpremul__bgra_nonpremul_4x16le__src_over(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n" +
 	"  size_t src_len8 = src_len / 8;\n  size_t len = (dst_len4 < src_len8) ? dst_len4 : src_len8;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  while (n >= 1) {\n    uint64_t d0 = wuffs_base__color_u32__as__color_u64(\n        wuffs_base__peek_u32le__no_bounds_check(d + (0 * 4)));\n    uint64_t s0 = wuffs_base__swap_u64_argb_abgr(\n        wuffs_base__peek_u64le__no_bounds_check(s + (0 * 8)));\n    wuffs_base__poke_u32le__no_bounds_check(\n        d + (0 * 4),\n        wuffs_base__color_u64__as__color_u32(\n            wuffs_base__composite_nonpremul_nonpremul_u64_axxx(d0, s0)));\n\n    s += 1 * 8;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
 	"" +
+	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__rgbw__bgr_565(uint8_t* dst_ptr,\n                                          size_t dst_len,\n                                          uint8_t* dst_palette_ptr,\n                                          size_t dst_palette_len,\n                                          const uint8_t* src_ptr,\n                                          size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t src_len2 = src_len / 2;\n  size_t len = (dst_len4 < src_len2) ? dst_len4 : src_len2;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    wuffs_base__poke_u32le__no_bounds_check(\n        d + (0 * 4),\n        wuffs_base__swap_u32_argb_abgr(\n            wuffs_base__color_u16_rgb_565__as__color_u32_argb_premul(\n                wuffs_base__peek_u16le__no_bounds_check(s + (0 * 2)))));\n\n    s += 1 * 2;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
+	"" +
 	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__xxx__index__src(uint8_t* dst_ptr,\n                                            size_t dst_len,\n                                            uint8_t* dst_palette_ptr,\n                                            size_t dst_palette_len,\n                                            const uint8_t* src_ptr,\n                                            size_t src_len) {\n  if (dst_palette_len != 1024) {\n    return 0;\n  }\n  size_t dst_len3 = dst_len / 3;\n  size_t len = (dst_len3 < src_len) ? dst_len3 : src_len;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  const size_t loop_unroll_count = 4;\n\n  // The comparison in the while condition is \">\", not \">=\", because with\n  // \">=\", the last 4-byte store could write past the end of the dst slice.\n  //\n  // Each 4-byte store writes one too many bytes, but a subsequent store\n  // will overwrite that with the correct byte. There is always another\n  // store, whether a 4-byte store in this loop" +
 	" or a 1-byte store in the\n  // next loop.\n  while (n > loop_unroll_count) {\n    wuffs_base__poke_u32le__no_bounds_check(\n        d + (0 * 3), wuffs_base__peek_u32le__no_bounds_check(\n                         dst_palette_ptr + ((size_t)s[0] * 4)));\n    wuffs_base__poke_u32le__no_bounds_check(\n        d + (1 * 3), wuffs_base__peek_u32le__no_bounds_check(\n                         dst_palette_ptr + ((size_t)s[1] * 4)));\n    wuffs_base__poke_u32le__no_bounds_check(\n        d + (2 * 3), wuffs_base__peek_u32le__no_bounds_check(\n                         dst_palette_ptr + ((size_t)s[2] * 4)));\n    wuffs_base__poke_u32le__no_bounds_check(\n        d + (3 * 3), wuffs_base__peek_u32le__no_bounds_check(\n                         dst_palette_ptr + ((size_t)s[3] * 4)));\n\n    s += loop_unroll_count * 1;\n    d += loop_unroll_count * 3;\n    n -= loop_unroll_count;\n  }\n\n  while (n >= 1) {\n    uint32_t s0 = wuffs_base__peek_u32le__no_bounds_check(dst_palette_ptr +\n                                                          ((size_t)" +
 	"s[0] * 4));\n    wuffs_base__poke_u24le__no_bounds_check(d + (0 * 3), s0);\n\n    s += 1 * 1;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__xxx__index_bgra_nonpremul__src_over(\n    uint8_t* dst_ptr,\n    size_t dst_len,\n    uint8_t* dst_palette_ptr,\n    size_t dst_palette_len,\n    const uint8_t* src_ptr,\n    size_t src_len) {\n  if (dst_palette_len != 1024) {\n    return 0;\n  }\n  size_t dst_len3 = dst_len / 3;\n  size_t len = (dst_len3 < src_len) ? dst_len3 : src_len;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t d0 =\n        wuffs_base__peek_u24le__no_bounds_check(d + (0 * 3)) | 0xFF000000;\n    uint32_t s0 = wuffs_base__peek_u32le__no_bounds_check(dst_palette_ptr +\n                                                          ((size_t)s[0] * 4));\n    wuffs_base__poke_u24le__no_bounds_check(\n        d + (0 * 3), wuffs_base__composite_premul_nonpremul_u32_axxx(d0, s0));\n\n    s += 1 * 1;\n    d" +
@@ -709,34 +714,35 @@
 	"urn NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      if (wuffs_base__pixel_swizzler__squash_align4_bgr_565_8888(\n              dst_palette.ptr, dst_palette.len, src_palette.ptr,\n              src_palette.len, false) != 256) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr_565__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr_565__index_binary_alpha__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=\n          1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxx__index_binary_alpha__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__" +
 	"PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=\n          1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL_4X16LE:\n      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=\n          1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxxxxxxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxxxxxxx__index_binary_al" +
 	"pha__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      if (wuffs_base__pixel_swizzler__swap_rgbx_bgrx(\n              dst_palette.ptr, dst_palette.len, NULL, 0, src_palette.ptr,\n              src_palette.len) != 256) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxx__index_binary_alpha__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n      if (wuffs_base__pixel_swizzler__swap_rgbx_bgrx(\n              dst_palette.ptr, dst_palette.len, NULL, 0, src_palette.ptr,\n              src_palette.len) != 256) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxx" +
-	"x__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over;\n      }\n      return NULL;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__bgr(wuffs_base__pixel_swizzler* p,\n                                         wuffs_base__pixel_format dst_pixfmt,\n                                         wuffs_base__slice_u8 dst_palette,\n                                         wuffs_base__slice_u8 src_palette,\n                                         wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      return wuffs_base__pixel_swizzler__bgr_565__bgr;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      return wuffs_base__pixel_swizzler__copy_3_3;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n    " +
-	"  return wuffs_base__pixel_swizzler__bgrw__bgr;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL_4X16LE:\n      return wuffs_base__pixel_swizzler__bgrw_4x16le__bgr;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      return wuffs_base__pixel_swizzler__swap_rgb_bgr;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\n      if (wuffs_base__cpu_arch__have_x86_sse42()) {\n        return wuffs_base__pixel_swizzler__bgrw__rgb__sse42;\n      }\n#endif\n      return wuffs_base__pixel_swizzler__bgrw__rgb;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__bgra_nonpremul(\n    wuffs_base__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_pixfmt,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_ble" +
-	"nd blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__copy_4_4;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul__src_over;\n      " +
-	"}\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n      switch (blend) {\n     " +
-	"   case WUFFS_BASE__PIXEL_BLEND__SRC:\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\n          if (wuffs_base__cpu_arch__have_x86_sse42()) {\n            return wuffs_base__pixel_swizzler__swap_rgbx_bgrx__sse42;\n          }\n#endif\n          return wuffs_base__pixel_swizzler__swap_rgbx_bgrx;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__rgba_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_premul__rgba_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__rgba_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      // TODO.\n      break;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__bg" +
-	"ra_nonpremul_4x16le(\n    wuffs_base__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_pixfmt,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul_4x16le__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul_4x16le__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul_4x16le__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul_4x16le__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n      switch (blend) {\n        case " +
-	"WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul_4x16le__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul_4x16le__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__copy_8_8;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_nonpremul_4x16le__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul_4x16le__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul_4x16le__src_over;\n      }\n      return NULL;\n\n    case WUF" +
-	"FS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__rgba_nonpremul__bgra_nonpremul_4x16le__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__rgba_nonpremul__bgra_nonpremul_4x16le__src_over;\n      }\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_premul__rgba_nonpremul_4x16le__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__rgba_nonpremul_4x16le__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      // TODO.\n      br" +
-	"eak;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__bgra_premul(\n    wuffs_base__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_pixfmt,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr_565__bgra_premul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr_565__bgra_premul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr__bgra_premul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr__bgra_premul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_" +
-	"FORMAT__BGRA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_premul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_premul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_premul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_premul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__copy_4_4;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_premul__src_over;\n      }\n      return" +
-	" NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__rgba_premul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__rgba_premul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\n          if (wuffs_base__cpu_arch__have_x86_sse42()) {\n            return wuffs_base__pixel_swizzler__swap_rgbx_bgrx__sse42;\n          }\n#endif\n          return wuffs_base__pixel_swizzler__swap_rgbx_bgrx;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__rgba_premul__src_over;\n      }\n      return NULL;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__bgrx(wuffs_base__pixel_swizzler* p,\n   " +
-	"                                       wuffs_base__pixel_format dst_pixfmt,\n                                          wuffs_base__slice_u8 dst_palette,\n                                          wuffs_base__slice_u8 src_palette,\n                                          wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      return wuffs_base__pixel_swizzler__bgr_565__bgrx;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      return wuffs_base__pixel_swizzler__xxx__xxxx;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n      return wuffs_base__pixel_swizzler__bgrw__bgrx;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n      return wuffs_base__pixel_swizzler__bgrw_4x16le__bgrx;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n      return wuffs_base__pixel_swizzler__copy_4_4;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      // TODO.\n      break;\n\n    case WUFFS_B" +
-	"ASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      return wuffs_base__pixel_swizzler__bgrw__rgbx;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__rgb(wuffs_base__pixel_swizzler* p,\n                                         wuffs_base__pixel_format dst_pixfmt,\n                                         wuffs_base__slice_u8 dst_palette,\n                                         wuffs_base__slice_u8 src_palette,\n                                         wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      return wuffs_base__pixel_swizzler__bgr_565__rgb;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      return wuffs_base__pixel_swizzler__swap_rgb_bgr;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__" +
-	"BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\n      if (wuffs_base__cpu_arch__have_x86_sse42()) {\n        return wuffs_base__pixel_swizzler__bgrw__rgb__sse42;\n      }\n#endif\n      return wuffs_base__pixel_swizzler__bgrw__rgb;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n      return wuffs_base__pixel_swizzler__bgrw_4x16le__rgb;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      return wuffs_base__pixel_swizzler__copy_3_3;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      return wuffs_base__pixel_swizzler__bgrw__bgr;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__rgba_nonpremul(\n    wuffs_base__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_pixfmt,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_blen" +
-	"d blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr_565__rgba_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr_565__rgba_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr__rgba_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr__rgba_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\n          if (wuffs_base__cpu_arch__have_x86_sse42()) {\n            return wuffs_base__pixel_swizzler__swap_rgbx_bgrx__sse42;\n          }\n#endif\n          " +
-	"return wuffs_base__pixel_swizzler__swap_rgbx_bgrx;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__rgba_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__rgba_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__rgba_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_premul__rgba_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__rgba_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL" +
-	"_FORMAT__BGRX:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__copy_4_4;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      // TODO.\n      break;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__rgb" +
-	"a_premul(\n    wuffs_base__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_pixfmt,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr_565__rgba_premul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr_565__rgba_premul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr__rgba_premul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr__rgba_premul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuff" +
-	"s_base__pixel_swizzler__bgra_nonpremul__rgba_premul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__rgba_premul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__rgba_premul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__rgba_premul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\n          if (wuffs_base__cpu_arch__have_x86_sse42()) {\n            return wuffs_base__pixel_swizzler__swap_rgbx_bgrx__sse42;\n          }\n#endif\n          return wuffs_base__pixel_swizzler__swap_rgbx_bgrx;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wu" +
-	"ffs_base__pixel_swizzler__bgra_premul__rgba_premul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_premul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_premul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__copy_4_4;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_premul__src_over;\n      }\n      return NULL;\n  }\n  return NULL;\n}\n\n" +
+	"x__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over;\n      }\n      return NULL;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__bgr_565(\n    wuffs_base__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_pixfmt,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      return wuffs_base__pixel_swizzler__copy_2_2;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      return wuffs_base__pixel_swizzler__bgr__bgr_565;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n      return wuffs_base__pixel_swizzler__bgrw__bgr_565;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n    case WUFFS_BASE__PIXEL_F" +
+	"ORMAT__BGRA_PREMUL_4X16LE:\n      return wuffs_base__pixel_swizzler__bgrw_4x16le__bgr_565;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      return wuffs_base__pixel_swizzler__rgbw__bgr_565;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__bgr(wuffs_base__pixel_swizzler* p,\n                                         wuffs_base__pixel_format dst_pixfmt,\n                                         wuffs_base__slice_u8 dst_palette,\n                                         wuffs_base__slice_u8 src_palette,\n                                         wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      return wuffs_base__pixel_swizzler__bgr_565__bgr;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      return wuffs_base__pixel_swizzler__copy_3_3;\n\n    case WUFFS_BASE__PIXEL_F" +
+	"ORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n      return wuffs_base__pixel_swizzler__bgrw__bgr;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL_4X16LE:\n      return wuffs_base__pixel_swizzler__bgrw_4x16le__bgr;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      return wuffs_base__pixel_swizzler__swap_rgb_bgr;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\n      if (wuffs_base__cpu_arch__have_x86_sse42()) {\n        return wuffs_base__pixel_swizzler__bgrw__rgb__sse42;\n      }\n#endif\n      return wuffs_base__pixel_swizzler__bgrw__rgb;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__bgra_nonpremul(\n    wuffs_bas" +
+	"e__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_pixfmt,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pix" +
+	"el_swizzler__copy_4_4;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n      // TODO" +
+	".\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\n          if (wuffs_base__cpu_arch__have_x86_sse42()) {\n            return wuffs_base__pixel_swizzler__swap_rgbx_bgrx__sse42;\n          }\n#endif\n          return wuffs_base__pixel_swizzler__swap_rgbx_bgrx;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__rgba_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_premul__rgba_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__rgba_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case W" +
+	"UFFS_BASE__PIXEL_FORMAT__RGBX:\n      // TODO.\n      break;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__bgra_nonpremul_4x16le(\n    wuffs_base__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_pixfmt,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul_4x16le__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul_4x16le__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul_4x16le__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel" +
+	"_swizzler__bgr__bgra_nonpremul_4x16le__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul_4x16le__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul_4x16le__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__copy_8_8;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_nonpremul_4x16le__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul_4x16le__src;\n        case WU" +
+	"FFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul_4x16le__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__rgba_nonpremul__bgra_nonpremul_4x16le__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__rgba_nonpremul__bgra_nonpremul_4x16le__src_over;\n      }\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_premul__rgba_nonpremul_4x16le__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__rgba_nonp" +
+	"remul_4x16le__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      // TODO.\n      break;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__bgra_premul(\n    wuffs_base__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_pixfmt,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr_565__bgra_premul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr_565__bgra_premul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr__bgra_premul__src;\n        case" +
+	" WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr__bgra_premul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_premul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_premul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_premul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__bgra_premul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizz" +
+	"ler__copy_4_4;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_premul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__rgba_premul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__rgba_premul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\n          if (wuffs_base__cpu_arch__have_x86_sse42()) {\n            return wuffs_base__pixel_swizzler__swap_rgbx_bgrx__sse42;\n          }\n#endif\n          return wuffs_base__pixel_swizzler__swap_rgbx_bgrx;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__rgba_premul__src_over;\n     " +
+	" }\n      return NULL;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__bgrx(wuffs_base__pixel_swizzler* p,\n                                          wuffs_base__pixel_format dst_pixfmt,\n                                          wuffs_base__slice_u8 dst_palette,\n                                          wuffs_base__slice_u8 src_palette,\n                                          wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      return wuffs_base__pixel_swizzler__bgr_565__bgrx;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      return wuffs_base__pixel_swizzler__xxx__xxxx;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n      return wuffs_base__pixel_swizzler__bgrw__bgrx;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n      return wuffs_base__pixel_swizzler__bgrw_4x16le__bgrx;\n\n    case WUFFS" +
+	"_BASE__PIXEL_FORMAT__BGRX:\n      return wuffs_base__pixel_swizzler__copy_4_4;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      return wuffs_base__pixel_swizzler__bgrw__rgbx;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__rgb(wuffs_base__pixel_swizzler* p,\n                                         wuffs_base__pixel_format dst_pixfmt,\n                                         wuffs_base__slice_u8 dst_palette,\n                                         wuffs_base__slice_u8 src_palette,\n                                         wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      return wuffs_base__pixel_swizzler__bgr_565__rgb;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      return wuffs_base__p" +
+	"ixel_swizzler__swap_rgb_bgr;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\n      if (wuffs_base__cpu_arch__have_x86_sse42()) {\n        return wuffs_base__pixel_swizzler__bgrw__rgb__sse42;\n      }\n#endif\n      return wuffs_base__pixel_swizzler__bgrw__rgb;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n      return wuffs_base__pixel_swizzler__bgrw_4x16le__rgb;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      return wuffs_base__pixel_swizzler__copy_3_3;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      return wuffs_base__pixel_swizzler__bgrw__bgr;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__rgba_nonpremul(\n    wuffs_base" +
+	"__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_pixfmt,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr_565__rgba_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr_565__rgba_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr__rgba_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr__rgba_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n#if defined(WUFFS_BASE__CPU_ARCH_" +
+	"_X86_64)\n          if (wuffs_base__cpu_arch__have_x86_sse42()) {\n            return wuffs_base__pixel_swizzler__swap_rgbx_bgrx__sse42;\n          }\n#endif\n          return wuffs_base__pixel_swizzler__swap_rgbx_bgrx;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__rgba_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__rgba_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__rgba_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_premul__rgba_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs" +
+	"_base__pixel_swizzler__bgra_premul__rgba_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__copy_4_4;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WU" +
+	"FFS_BASE__PIXEL_FORMAT__RGBX:\n      // TODO.\n      break;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__rgba_premul(\n    wuffs_base__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_pixfmt,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr_565__rgba_premul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr_565__rgba_premul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr__rgba_premul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr__rgba_premul__src_over;\n  " +
+	"    }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__rgba_premul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__rgba_premul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__rgba_premul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul_4x16le__rgba_premul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\n          if (wuffs_base__cpu_arch__have_x86_sse42()) {\n            return wuffs_base__pixel_swizzler__swap_r" +
+	"gbx_bgrx__sse42;\n          }\n#endif\n          return wuffs_base__pixel_swizzler__swap_rgbx_bgrx;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__rgba_premul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_premul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_premul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__copy_4_4;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_premul__src_over;\n      }\n      return NULL;\n  }\n  return NULL;\n}\n\n" +
 	"" +
 	"// --------\n\nWUFFS_BASE__MAYBE_STATIC wuffs_base__status  //\nwuffs_base__pixel_swizzler__prepare(wuffs_base__pixel_swizzler* p,\n                                    wuffs_base__pixel_format dst_pixfmt,\n                                    wuffs_base__slice_u8 dst_palette,\n                                    wuffs_base__pixel_format src_pixfmt,\n                                    wuffs_base__slice_u8 src_palette,\n                                    wuffs_base__pixel_blend blend) {\n  if (!p) {\n    return wuffs_base__make_status(wuffs_base__error__bad_receiver);\n  }\n  p->private_impl.func = NULL;\n  p->private_impl.transparent_black_func = NULL;\n  p->private_impl.dst_pixfmt_bytes_per_pixel = 0;\n  p->private_impl.src_pixfmt_bytes_per_pixel = 0;\n\n  wuffs_base__pixel_swizzler__func func = NULL;\n  wuffs_base__pixel_swizzler__transparent_black_func transparent_black_func =\n      NULL;\n\n  uint32_t dst_pixfmt_bits_per_pixel =\n      wuffs_base__pixel_format__bits_per_pixel(&dst_pixfmt);\n  if ((dst_pixfmt_bits_per_pixel == " +
 	"0) ||\n      ((dst_pixfmt_bits_per_pixel & 7) != 0)) {\n    return wuffs_base__make_status(\n        wuffs_base__error__unsupported_pixel_swizzler_option);\n  }\n\n  uint32_t src_pixfmt_bits_per_pixel =\n      wuffs_base__pixel_format__bits_per_pixel(&src_pixfmt);\n  if ((src_pixfmt_bits_per_pixel == 0) ||\n      ((src_pixfmt_bits_per_pixel & 7) != 0)) {\n    return wuffs_base__make_status(\n        wuffs_base__error__unsupported_pixel_swizzler_option);\n  }\n\n  // TODO: support many more formats.\n\n  switch (blend) {\n    case WUFFS_BASE__PIXEL_BLEND__SRC:\n      transparent_black_func =\n          wuffs_base__pixel_swizzler__transparent_black_src;\n      break;\n\n    case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n      transparent_black_func =\n          wuffs_base__pixel_swizzler__transparent_black_src_over;\n      break;\n  }\n\n  switch (src_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__Y:\n      func = wuffs_base__pixel_swizzler__prepare__y(p, dst_pixfmt, dst_palette,\n                                                    src_palette" +
-	", blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__Y_16BE:\n      func = wuffs_base__pixel_swizzler__prepare__y_16be(\n          p, dst_pixfmt, dst_palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_NONPREMUL:\n      func = wuffs_base__pixel_swizzler__prepare__indexed__bgra_nonpremul(\n          p, dst_pixfmt, dst_palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY:\n      func = wuffs_base__pixel_swizzler__prepare__indexed__bgra_binary(\n          p, dst_pixfmt, dst_palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      func = wuffs_base__pixel_swizzler__prepare__bgr(\n          p, dst_pixfmt, dst_palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n      func = wuffs_base__pixel_swizzler__prepare__bgra_nonpremul(\n          p, dst_pixfmt, dst_palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n" +
-	"      func = wuffs_base__pixel_swizzler__prepare__bgra_nonpremul_4x16le(\n          p, dst_pixfmt, dst_palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n      func = wuffs_base__pixel_swizzler__prepare__bgra_premul(\n          p, dst_pixfmt, dst_palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n      func = wuffs_base__pixel_swizzler__prepare__bgrx(\n          p, dst_pixfmt, dst_palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      func = wuffs_base__pixel_swizzler__prepare__rgb(\n          p, dst_pixfmt, dst_palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n      func = wuffs_base__pixel_swizzler__prepare__rgba_nonpremul(\n          p, dst_pixfmt, dst_palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n      func = wuffs_base__pixel_swizzler__prepare__rgba_premul(\n          p, dst_pixfmt, dst_palette, src_palette, blen" +
-	"d);\n      break;\n  }\n\n  p->private_impl.func = func;\n  p->private_impl.transparent_black_func = transparent_black_func;\n  p->private_impl.dst_pixfmt_bytes_per_pixel = dst_pixfmt_bits_per_pixel / 8;\n  p->private_impl.src_pixfmt_bytes_per_pixel = src_pixfmt_bits_per_pixel / 8;\n  return wuffs_base__make_status(\n      func ? NULL : wuffs_base__error__unsupported_pixel_swizzler_option);\n}\n\nWUFFS_BASE__MAYBE_STATIC uint64_t  //\nwuffs_base__pixel_swizzler__limited_swizzle_u32_interleaved_from_reader(\n    const wuffs_base__pixel_swizzler* p,\n    uint32_t up_to_num_pixels,\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    const uint8_t** ptr_iop_r,\n    const uint8_t* io2_r) {\n  if (p && p->private_impl.func) {\n    const uint8_t* iop_r = *ptr_iop_r;\n    uint64_t src_len = wuffs_base__u64__min(\n        ((uint64_t)up_to_num_pixels) *\n            ((uint64_t)p->private_impl.src_pixfmt_bytes_per_pixel),\n        ((uint64_t)(io2_r - iop_r)));\n    uint64_t n =\n        (*p->private_impl.func)(dst.ptr, dst." +
-	"len, dst_palette.ptr,\n                                dst_palette.len, iop_r, (size_t)src_len);\n    *ptr_iop_r += n * p->private_impl.src_pixfmt_bytes_per_pixel;\n    return n;\n  }\n  return 0;\n}\n\nWUFFS_BASE__MAYBE_STATIC uint64_t  //\nwuffs_base__pixel_swizzler__swizzle_interleaved_from_reader(\n    const wuffs_base__pixel_swizzler* p,\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    const uint8_t** ptr_iop_r,\n    const uint8_t* io2_r) {\n  if (p && p->private_impl.func) {\n    const uint8_t* iop_r = *ptr_iop_r;\n    uint64_t src_len = ((uint64_t)(io2_r - iop_r));\n    uint64_t n =\n        (*p->private_impl.func)(dst.ptr, dst.len, dst_palette.ptr,\n                                dst_palette.len, iop_r, (size_t)src_len);\n    *ptr_iop_r += n * p->private_impl.src_pixfmt_bytes_per_pixel;\n    return n;\n  }\n  return 0;\n}\n\nWUFFS_BASE__MAYBE_STATIC uint64_t  //\nwuffs_base__pixel_swizzler__swizzle_interleaved_from_slice(\n    const wuffs_base__pixel_swizzler* p,\n    wuffs_base__slice_u8 dst,\n    wuffs_" +
-	"base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  if (p && p->private_impl.func) {\n    return (*p->private_impl.func)(dst.ptr, dst.len, dst_palette.ptr,\n                                   dst_palette.len, src.ptr, src.len);\n  }\n  return 0;\n}\n\nWUFFS_BASE__MAYBE_STATIC uint64_t  //\nwuffs_base__pixel_swizzler__swizzle_interleaved_transparent_black(\n    const wuffs_base__pixel_swizzler* p,\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    uint64_t num_pixels) {\n  if (p && p->private_impl.transparent_black_func) {\n    return (*p->private_impl.transparent_black_func)(\n        dst.ptr, dst.len, dst_palette.ptr, dst_palette.len, num_pixels,\n        p->private_impl.dst_pixfmt_bytes_per_pixel);\n  }\n  return 0;\n}\n" +
+	", blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__Y_16BE:\n      func = wuffs_base__pixel_swizzler__prepare__y_16be(\n          p, dst_pixfmt, dst_palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_NONPREMUL:\n      func = wuffs_base__pixel_swizzler__prepare__indexed__bgra_nonpremul(\n          p, dst_pixfmt, dst_palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY:\n      func = wuffs_base__pixel_swizzler__prepare__indexed__bgra_binary(\n          p, dst_pixfmt, dst_palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      func = wuffs_base__pixel_swizzler__prepare__bgr_565(\n          p, dst_pixfmt, dst_palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      func = wuffs_base__pixel_swizzler__prepare__bgr(\n          p, dst_pixfmt, dst_palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n      func = wuffs_ba" +
+	"se__pixel_swizzler__prepare__bgra_nonpremul(\n          p, dst_pixfmt, dst_palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:\n      func = wuffs_base__pixel_swizzler__prepare__bgra_nonpremul_4x16le(\n          p, dst_pixfmt, dst_palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n      func = wuffs_base__pixel_swizzler__prepare__bgra_premul(\n          p, dst_pixfmt, dst_palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n      func = wuffs_base__pixel_swizzler__prepare__bgrx(\n          p, dst_pixfmt, dst_palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      func = wuffs_base__pixel_swizzler__prepare__rgb(\n          p, dst_pixfmt, dst_palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n      func = wuffs_base__pixel_swizzler__prepare__rgba_nonpremul(\n          p, dst_pixfmt, dst_palette, src_palette, blend);\n    " +
+	"  break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n      func = wuffs_base__pixel_swizzler__prepare__rgba_premul(\n          p, dst_pixfmt, dst_palette, src_palette, blend);\n      break;\n  }\n\n  p->private_impl.func = func;\n  p->private_impl.transparent_black_func = transparent_black_func;\n  p->private_impl.dst_pixfmt_bytes_per_pixel = dst_pixfmt_bits_per_pixel / 8;\n  p->private_impl.src_pixfmt_bytes_per_pixel = src_pixfmt_bits_per_pixel / 8;\n  return wuffs_base__make_status(\n      func ? NULL : wuffs_base__error__unsupported_pixel_swizzler_option);\n}\n\nWUFFS_BASE__MAYBE_STATIC uint64_t  //\nwuffs_base__pixel_swizzler__limited_swizzle_u32_interleaved_from_reader(\n    const wuffs_base__pixel_swizzler* p,\n    uint32_t up_to_num_pixels,\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    const uint8_t** ptr_iop_r,\n    const uint8_t* io2_r) {\n  if (p && p->private_impl.func) {\n    const uint8_t* iop_r = *ptr_iop_r;\n    uint64_t src_len = wuffs_base__u64__min(\n        ((uint64_t)up_to_num_pi" +
+	"xels) *\n            ((uint64_t)p->private_impl.src_pixfmt_bytes_per_pixel),\n        ((uint64_t)(io2_r - iop_r)));\n    uint64_t n =\n        (*p->private_impl.func)(dst.ptr, dst.len, dst_palette.ptr,\n                                dst_palette.len, iop_r, (size_t)src_len);\n    *ptr_iop_r += n * p->private_impl.src_pixfmt_bytes_per_pixel;\n    return n;\n  }\n  return 0;\n}\n\nWUFFS_BASE__MAYBE_STATIC uint64_t  //\nwuffs_base__pixel_swizzler__swizzle_interleaved_from_reader(\n    const wuffs_base__pixel_swizzler* p,\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    const uint8_t** ptr_iop_r,\n    const uint8_t* io2_r) {\n  if (p && p->private_impl.func) {\n    const uint8_t* iop_r = *ptr_iop_r;\n    uint64_t src_len = ((uint64_t)(io2_r - iop_r));\n    uint64_t n =\n        (*p->private_impl.func)(dst.ptr, dst.len, dst_palette.ptr,\n                                dst_palette.len, iop_r, (size_t)src_len);\n    *ptr_iop_r += n * p->private_impl.src_pixfmt_bytes_per_pixel;\n    return n;\n  }\n  return 0;\n}\n\nWUF" +
+	"FS_BASE__MAYBE_STATIC uint64_t  //\nwuffs_base__pixel_swizzler__swizzle_interleaved_from_slice(\n    const wuffs_base__pixel_swizzler* p,\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  if (p && p->private_impl.func) {\n    return (*p->private_impl.func)(dst.ptr, dst.len, dst_palette.ptr,\n                                   dst_palette.len, src.ptr, src.len);\n  }\n  return 0;\n}\n\nWUFFS_BASE__MAYBE_STATIC uint64_t  //\nwuffs_base__pixel_swizzler__swizzle_interleaved_transparent_black(\n    const wuffs_base__pixel_swizzler* p,\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    uint64_t num_pixels) {\n  if (p && p->private_impl.transparent_black_func) {\n    return (*p->private_impl.transparent_black_func)(\n        dst.ptr, dst.len, dst_palette.ptr, dst_palette.len, num_pixels,\n        p->private_impl.dst_pixfmt_bytes_per_pixel);\n  }\n  return 0;\n}\n" +
 	""
 
 const BaseUTF8SubmoduleC = "" +
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 3ed8d40..c9f1acd 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -15645,6 +15645,22 @@
 }
 
 static uint64_t  //
+wuffs_base__pixel_swizzler__copy_2_2(uint8_t* dst_ptr,
+                                     size_t dst_len,
+                                     uint8_t* dst_palette_ptr,
+                                     size_t dst_palette_len,
+                                     const uint8_t* src_ptr,
+                                     size_t src_len) {
+  size_t dst_len2 = dst_len / 2;
+  size_t src_len2 = src_len / 2;
+  size_t len = (dst_len2 < src_len2) ? dst_len2 : src_len2;
+  if (len > 0) {
+    memmove(dst_ptr, src_ptr, len * 2);
+  }
+  return len;
+}
+
+static uint64_t  //
 wuffs_base__pixel_swizzler__copy_3_3(uint8_t* dst_ptr,
                                      size_t dst_len,
                                      uint8_t* dst_palette_ptr,
@@ -16407,6 +16423,35 @@
 // --------
 
 static uint64_t  //
+wuffs_base__pixel_swizzler__bgr__bgr_565(uint8_t* dst_ptr,
+                                         size_t dst_len,
+                                         uint8_t* dst_palette_ptr,
+                                         size_t dst_palette_len,
+                                         const uint8_t* src_ptr,
+                                         size_t src_len) {
+  size_t dst_len3 = dst_len / 3;
+  size_t src_len2 = src_len / 2;
+  size_t len = (dst_len3 < src_len2) ? dst_len3 : src_len2;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    uint32_t s0 = wuffs_base__color_u16_rgb_565__as__color_u32_argb_premul(
+        wuffs_base__peek_u16le__no_bounds_check(s + (0 * 2)));
+    wuffs_base__poke_u24le__no_bounds_check(d + (0 * 3), s0);
+
+    s += 1 * 2;
+    d += 1 * 3;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
 wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src(uint8_t* dst_ptr,
                                                      size_t dst_len,
                                                      uint8_t* dst_palette_ptr,
@@ -17770,6 +17815,35 @@
 }
 
 static uint64_t  //
+wuffs_base__pixel_swizzler__bgrw__bgr_565(uint8_t* dst_ptr,
+                                          size_t dst_len,
+                                          uint8_t* dst_palette_ptr,
+                                          size_t dst_palette_len,
+                                          const uint8_t* src_ptr,
+                                          size_t src_len) {
+  size_t dst_len4 = dst_len / 4;
+  size_t src_len2 = src_len / 2;
+  size_t len = (dst_len4 < src_len2) ? dst_len4 : src_len2;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    wuffs_base__poke_u32le__no_bounds_check(
+        d + (0 * 4), wuffs_base__color_u16_rgb_565__as__color_u32_argb_premul(
+                         wuffs_base__peek_u16le__no_bounds_check(s + (0 * 2))));
+
+    s += 1 * 2;
+    d += 1 * 4;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
 wuffs_base__pixel_swizzler__bgrw__bgrx(uint8_t* dst_ptr,
                                        size_t dst_len,
                                        uint8_t* dst_palette_ptr,
@@ -17955,6 +18029,35 @@
 }
 
 static uint64_t  //
+wuffs_base__pixel_swizzler__bgrw_4x16le__bgr_565(uint8_t* dst_ptr,
+                                                 size_t dst_len,
+                                                 uint8_t* dst_palette_ptr,
+                                                 size_t dst_palette_len,
+                                                 const uint8_t* src_ptr,
+                                                 size_t src_len) {
+  size_t dst_len8 = dst_len / 8;
+  size_t src_len2 = src_len / 2;
+  size_t len = (dst_len8 < src_len2) ? dst_len8 : src_len2;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+  size_t n = len;
+
+  while (n >= 1) {
+    wuffs_base__poke_u64le__no_bounds_check(
+        d + (0 * 8),
+        wuffs_base__color_u32__as__color_u64(
+            wuffs_base__color_u16_rgb_565__as__color_u32_argb_premul(
+                wuffs_base__peek_u16le__no_bounds_check(s + (0 * 2)))));
+
+    s += 1 * 2;
+    d += 1 * 8;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
 wuffs_base__pixel_swizzler__bgrw_4x16le__bgrx(uint8_t* dst_ptr,
                                               size_t dst_len,
                                               uint8_t* dst_palette_ptr,
@@ -18089,6 +18192,39 @@
 // --------
 
 static uint64_t  //
+wuffs_base__pixel_swizzler__rgbw__bgr_565(uint8_t* dst_ptr,
+                                          size_t dst_len,
+                                          uint8_t* dst_palette_ptr,
+                                          size_t dst_palette_len,
+                                          const uint8_t* src_ptr,
+                                          size_t src_len) {
+  size_t dst_len4 = dst_len / 4;
+  size_t src_len2 = src_len / 2;
+  size_t len = (dst_len4 < src_len2) ? dst_len4 : src_len2;
+  uint8_t* d = dst_ptr;
+  const uint8_t* s = src_ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    wuffs_base__poke_u32le__no_bounds_check(
+        d + (0 * 4),
+        wuffs_base__swap_u32_argb_abgr(
+            wuffs_base__color_u16_rgb_565__as__color_u32_argb_premul(
+                wuffs_base__peek_u16le__no_bounds_check(s + (0 * 2)))));
+
+    s += 1 * 2;
+    d += 1 * 4;
+    n -= 1;
+  }
+
+  return len;
+}
+
+// --------
+
+static uint64_t  //
 wuffs_base__pixel_swizzler__xxx__index__src(uint8_t* dst_ptr,
                                             size_t dst_len,
                                             uint8_t* dst_palette_ptr,
@@ -19052,6 +19188,39 @@
 }
 
 static wuffs_base__pixel_swizzler__func  //
+wuffs_base__pixel_swizzler__prepare__bgr_565(
+    wuffs_base__pixel_swizzler* p,
+    wuffs_base__pixel_format dst_pixfmt,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src_palette,
+    wuffs_base__pixel_blend blend) {
+  switch (dst_pixfmt.repr) {
+    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
+      return wuffs_base__pixel_swizzler__copy_2_2;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGR:
+      return wuffs_base__pixel_swizzler__bgr__bgr_565;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRX:
+      return wuffs_base__pixel_swizzler__bgrw__bgr_565;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL_4X16LE:
+      return wuffs_base__pixel_swizzler__bgrw_4x16le__bgr_565;
+
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBX:
+      return wuffs_base__pixel_swizzler__rgbw__bgr_565;
+  }
+  return NULL;
+}
+
+static wuffs_base__pixel_swizzler__func  //
 wuffs_base__pixel_swizzler__prepare__bgr(wuffs_base__pixel_swizzler* p,
                                          wuffs_base__pixel_format dst_pixfmt,
                                          wuffs_base__slice_u8 dst_palette,
@@ -19671,6 +19840,11 @@
           p, dst_pixfmt, dst_palette, src_palette, blend);
       break;
 
+    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
+      func = wuffs_base__pixel_swizzler__prepare__bgr_565(
+          p, dst_pixfmt, dst_palette, src_palette, blend);
+      break;
+
     case WUFFS_BASE__PIXEL_FORMAT__BGR:
       func = wuffs_base__pixel_swizzler__prepare__bgr(
           p, dst_pixfmt, dst_palette, src_palette, blend);
diff --git a/test/c/std/wbmp.c b/test/c/std/wbmp.c
index a12912d..031d155 100644
--- a/test/c/std/wbmp.c
+++ b/test/c/std/wbmp.c
@@ -243,6 +243,10 @@
           .pixfmt_repr = WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY,
       },
       {
+          .color = 0xFF102031,
+          .pixfmt_repr = WUFFS_BASE__PIXEL_FORMAT__BGR_565,
+      },
+      {
           .color = 0xFF443300,
           .pixfmt_repr = WUFFS_BASE__PIXEL_FORMAT__BGR,
       },