Add swizzle_interleaved_from_reader
diff --git a/internal/cgen/base/image-private.h b/internal/cgen/base/image-private.h
index bd69978..268e29a 100644
--- a/internal/cgen/base/image-private.h
+++ b/internal/cgen/base/image-private.h
@@ -16,6 +16,14 @@
 
 // ---------------- Images
 
+WUFFS_BASE__MAYBE_STATIC uint64_t  //
+wuffs_base__pixel_swizzler__swizzle_interleaved_from_reader(
+    const wuffs_base__pixel_swizzler* p,
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    const uint8_t** ptr_iop_r,
+    const uint8_t* io2_r);
+
 // ---------------- Images (Utility)
 
 #define wuffs_base__utility__make_pixel_format wuffs_base__make_pixel_format
diff --git a/internal/cgen/base/image-public.h b/internal/cgen/base/image-public.h
index 8f7a208..74ff433 100644
--- a/internal/cgen/base/image-public.h
+++ b/internal/cgen/base/image-public.h
@@ -1299,12 +1299,13 @@
   // compatibility or safety guarantee if you do so.
   struct {
     wuffs_base__pixel_swizzler__func func;
+    uint32_t src_pixfmt_bytes_per_pixel;
   } private_impl;
 
 #ifdef __cplusplus
-  inline wuffs_base__status prepare(wuffs_base__pixel_format dst_format,
+  inline wuffs_base__status prepare(wuffs_base__pixel_format dst_pixfmt,
                                     wuffs_base__slice_u8 dst_palette,
-                                    wuffs_base__pixel_format src_format,
+                                    wuffs_base__pixel_format src_pixfmt,
                                     wuffs_base__slice_u8 src_palette,
                                     wuffs_base__pixel_blend blend);
   inline uint64_t swizzle_interleaved_from_slice(
@@ -1323,9 +1324,9 @@
 // just WUFFS_CONFIG__MODULE__BASE__CORE.
 WUFFS_BASE__MAYBE_STATIC wuffs_base__status  //
 wuffs_base__pixel_swizzler__prepare(wuffs_base__pixel_swizzler* p,
-                                    wuffs_base__pixel_format dst_format,
+                                    wuffs_base__pixel_format dst_pixfmt,
                                     wuffs_base__slice_u8 dst_palette,
-                                    wuffs_base__pixel_format src_format,
+                                    wuffs_base__pixel_format src_pixfmt,
                                     wuffs_base__slice_u8 src_palette,
                                     wuffs_base__pixel_blend blend);
 
@@ -1345,13 +1346,13 @@
 #ifdef __cplusplus
 
 inline wuffs_base__status  //
-wuffs_base__pixel_swizzler::prepare(wuffs_base__pixel_format dst_format,
+wuffs_base__pixel_swizzler::prepare(wuffs_base__pixel_format dst_pixfmt,
                                     wuffs_base__slice_u8 dst_palette,
-                                    wuffs_base__pixel_format src_format,
+                                    wuffs_base__pixel_format src_pixfmt,
                                     wuffs_base__slice_u8 src_palette,
                                     wuffs_base__pixel_blend blend) {
-  return wuffs_base__pixel_swizzler__prepare(this, dst_format, dst_palette,
-                                             src_format, src_palette, blend);
+  return wuffs_base__pixel_swizzler__prepare(this, dst_pixfmt, dst_palette,
+                                             src_pixfmt, src_palette, blend);
 }
 
 uint64_t  //
diff --git a/internal/cgen/base/pixconv-submodule.c b/internal/cgen/base/pixconv-submodule.c
index 9264ee6..0467346 100644
--- a/internal/cgen/base/pixconv-submodule.c
+++ b/internal/cgen/base/pixconv-submodule.c
@@ -1226,11 +1226,11 @@
 
 static wuffs_base__pixel_swizzler__func  //
 wuffs_base__pixel_swizzler__prepare__y(wuffs_base__pixel_swizzler* p,
-                                       wuffs_base__pixel_format dst_format,
+                                       wuffs_base__pixel_format dst_pixfmt,
                                        wuffs_base__slice_u8 dst_palette,
                                        wuffs_base__slice_u8 src_palette,
                                        wuffs_base__pixel_blend blend) {
-  switch (dst_format.repr) {
+  switch (dst_pixfmt.repr) {
     case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
       return wuffs_base__pixel_swizzler__bgr_565__y;
 
@@ -1254,11 +1254,11 @@
 static wuffs_base__pixel_swizzler__func  //
 wuffs_base__pixel_swizzler__prepare__indexed__bgra_binary(
     wuffs_base__pixel_swizzler* p,
-    wuffs_base__pixel_format dst_format,
+    wuffs_base__pixel_format dst_pixfmt,
     wuffs_base__slice_u8 dst_palette,
     wuffs_base__slice_u8 src_palette,
     wuffs_base__pixel_blend blend) {
-  switch (dst_format.repr) {
+  switch (dst_pixfmt.repr) {
     case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_NONPREMUL:
     case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_PREMUL:
     case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY:
@@ -1346,11 +1346,11 @@
 
 static wuffs_base__pixel_swizzler__func  //
 wuffs_base__pixel_swizzler__prepare__bgr(wuffs_base__pixel_swizzler* p,
-                                         wuffs_base__pixel_format dst_format,
+                                         wuffs_base__pixel_format dst_pixfmt,
                                          wuffs_base__slice_u8 dst_palette,
                                          wuffs_base__slice_u8 src_palette,
                                          wuffs_base__pixel_blend blend) {
-  switch (dst_format.repr) {
+  switch (dst_pixfmt.repr) {
     case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
       return wuffs_base__pixel_swizzler__bgr_565__bgr;
 
@@ -1377,11 +1377,11 @@
 static wuffs_base__pixel_swizzler__func  //
 wuffs_base__pixel_swizzler__prepare__bgra_nonpremul(
     wuffs_base__pixel_swizzler* p,
-    wuffs_base__pixel_format dst_format,
+    wuffs_base__pixel_format dst_pixfmt,
     wuffs_base__slice_u8 dst_palette,
     wuffs_base__slice_u8 src_palette,
     wuffs_base__pixel_blend blend) {
-  switch (dst_format.repr) {
+  switch (dst_pixfmt.repr) {
     case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
       switch (blend) {
         case WUFFS_BASE__PIXEL_BLEND__SRC:
@@ -1438,47 +1438,74 @@
 
 WUFFS_BASE__MAYBE_STATIC wuffs_base__status  //
 wuffs_base__pixel_swizzler__prepare(wuffs_base__pixel_swizzler* p,
-                                    wuffs_base__pixel_format dst_format,
+                                    wuffs_base__pixel_format dst_pixfmt,
                                     wuffs_base__slice_u8 dst_palette,
-                                    wuffs_base__pixel_format src_format,
+                                    wuffs_base__pixel_format src_pixfmt,
                                     wuffs_base__slice_u8 src_palette,
                                     wuffs_base__pixel_blend blend) {
   if (!p) {
     return wuffs_base__make_status(wuffs_base__error__bad_receiver);
   }
+  p->private_impl.func = NULL;
+  p->private_impl.src_pixfmt_bytes_per_pixel = 0;
+
+  wuffs_base__pixel_swizzler__func func = NULL;
+  uint32_t src_pixfmt_bits_per_pixel =
+      wuffs_base__pixel_format__bits_per_pixel(&src_pixfmt);
+  if ((src_pixfmt_bits_per_pixel == 0) ||
+      ((src_pixfmt_bits_per_pixel & 7) != 0)) {
+    return wuffs_base__make_status(
+        wuffs_base__error__unsupported_pixel_swizzler_option);
+  }
 
   // TODO: support many more formats.
 
-  wuffs_base__pixel_swizzler__func func = NULL;
-
-  switch (src_format.repr) {
+  switch (src_pixfmt.repr) {
     case WUFFS_BASE__PIXEL_FORMAT__Y:
-      func = wuffs_base__pixel_swizzler__prepare__y(p, dst_format, dst_palette,
+      func = wuffs_base__pixel_swizzler__prepare__y(p, dst_pixfmt, dst_palette,
                                                     src_palette, blend);
       break;
 
     case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY:
       func = wuffs_base__pixel_swizzler__prepare__indexed__bgra_binary(
-          p, dst_format, dst_palette, src_palette, blend);
+          p, dst_pixfmt, dst_palette, src_palette, blend);
       break;
 
     case WUFFS_BASE__PIXEL_FORMAT__BGR:
       func = wuffs_base__pixel_swizzler__prepare__bgr(
-          p, dst_format, dst_palette, src_palette, blend);
+          p, dst_pixfmt, dst_palette, src_palette, blend);
       break;
 
     case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
       func = wuffs_base__pixel_swizzler__prepare__bgra_nonpremul(
-          p, dst_format, dst_palette, src_palette, blend);
+          p, dst_pixfmt, dst_palette, src_palette, blend);
       break;
   }
 
   p->private_impl.func = func;
+  p->private_impl.src_pixfmt_bytes_per_pixel = src_pixfmt_bits_per_pixel / 8;
   return wuffs_base__make_status(
       func ? NULL : wuffs_base__error__unsupported_pixel_swizzler_option);
 }
 
 WUFFS_BASE__MAYBE_STATIC uint64_t  //
+wuffs_base__pixel_swizzler__swizzle_interleaved_from_reader(
+    const wuffs_base__pixel_swizzler* p,
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    const uint8_t** ptr_iop_r,
+    const uint8_t* io2_r) {
+  if (p && p->private_impl.func) {
+    const uint8_t* iop_r = *ptr_iop_r;
+    uint64_t n = (*p->private_impl.func)(dst.ptr, dst.len, dst_palette.ptr,
+                                         dst_palette.len, iop_r, io2_r - iop_r);
+    *ptr_iop_r += n * p->private_impl.src_pixfmt_bytes_per_pixel;
+    return n;
+  }
+  return 0;
+}
+
+WUFFS_BASE__MAYBE_STATIC uint64_t  //
 wuffs_base__pixel_swizzler__swizzle_interleaved_from_slice(
     const wuffs_base__pixel_swizzler* p,
     wuffs_base__slice_u8 dst,
diff --git a/internal/cgen/builtin.go b/internal/cgen/builtin.go
index 989706c..65dcd5e 100644
--- a/internal/cgen/builtin.go
+++ b/internal/cgen/builtin.go
@@ -94,6 +94,27 @@
 			return g.writeBuiltinIOReader(b, recv, method.Ident(), n.Args(), depth)
 		case t.IDIOWriter:
 			return g.writeBuiltinIOWriter(b, recv, method.Ident(), n.Args(), depth)
+		case t.IDPixelSwizzler:
+			switch method.Ident() {
+			case t.IDSwizzleInterleavedFromReader:
+				b.writes("wuffs_base__pixel_swizzler__swizzle_interleaved_from_reader(&")
+				if err := g.writeExpr(b, recv, depth); err != nil {
+					return err
+				}
+				args := n.Args()
+				for _, o := range args[:len(args)-1] {
+					b.writeb(',')
+					if err := g.writeExpr(b, o.AsArg().Value(), depth); err != nil {
+						return err
+					}
+				}
+				readerArgName, err := g.ioRecvName(args[len(args)-1].AsArg().Value())
+				if err != nil {
+					return err
+				}
+				b.printf(", &%s%s, %s%s)", iopPrefix, readerArgName, io2Prefix, readerArgName)
+				return nil
+			}
 		case t.IDTokenWriter:
 			return g.writeBuiltinTokenWriter(b, recv, method.Ident(), n.Args(), depth)
 		case t.IDUtility:
diff --git a/internal/cgen/data.go b/internal/cgen/data.go
index 7413e62..ccb1442 100644
--- a/internal/cgen/data.go
+++ b/internal/cgen/data.go
@@ -168,17 +168,19 @@
 	"n3 = src_len / 3;\n  size_t len = (dst_len4 < src_len3) ? dst_len4 : src_len3;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (0 * 4),\n        0xFF000000 | wuffs_base__load_u24le__no_bounds_check(s + (0 * 3)));\n\n    s += 1 * 3;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__xxxx__y(uint8_t* dst_ptr,\n                                    size_t dst_len,\n                                    uint8_t* dst_palette_ptr,\n                                    size_t dst_palette_len,\n                                    const uint8_t* src_ptr,\n                                    size_t src_len) {\n  size_t dst_len4 = dst_len / 4;\n  size_t len = (dst_len4 < src_len) ? dst_len4 : src_len;\n  uint8_t* d = dst_ptr;\n  const uint8_t* s = src_ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (" +
 	"0 * 4), 0xFF000000 | (0x010101 * (uint32_t)s[0]));\n\n    s += 1 * 1;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
 	"" +
-	"// --------\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__y(wuffs_base__pixel_swizzler* p,\n                                       wuffs_base__pixel_format dst_format,\n                                       wuffs_base__slice_u8 dst_palette,\n                                       wuffs_base__slice_u8 src_palette,\n                                       wuffs_base__pixel_blend blend) {\n  switch (dst_format.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      return wuffs_base__pixel_swizzler__bgr_565__y;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      return wuffs_base__pixel_swizzler__xxx__y;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WU" +
-	"FFS_BASE__PIXEL_FORMAT__RGBX:\n      return wuffs_base__pixel_swizzler__xxxx__y;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__indexed__bgra_binary(\n    wuffs_base__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_format,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_blend blend) {\n  switch (dst_format.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY:\n      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=\n          1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__copy_1_1;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      if (wuffs_base__pixel_swizzler__squash_bgr_565_888(dst_palette,\n                                        " +
+	"// --------\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__y(wuffs_base__pixel_swizzler* p,\n                                       wuffs_base__pixel_format dst_pixfmt,\n                                       wuffs_base__slice_u8 dst_palette,\n                                       wuffs_base__slice_u8 src_palette,\n                                       wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      return wuffs_base__pixel_swizzler__bgr_565__y;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      return wuffs_base__pixel_swizzler__xxx__y;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WU" +
+	"FFS_BASE__PIXEL_FORMAT__RGBX:\n      return wuffs_base__pixel_swizzler__xxxx__y;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__indexed__bgra_binary(\n    wuffs_base__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_pixfmt,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY:\n      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=\n          1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__copy_1_1;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      if (wuffs_base__pixel_swizzler__squash_bgr_565_888(dst_palette,\n                                        " +
 	"                 src_palette) != 1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr_565__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr_565__index_binary_alpha__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=\n          1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxx__index_binary_alpha__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_pa" +
 	"lette) !=\n          1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      if (wuffs_base__pixel_swizzler__swap_rgbx_bgrx(dst_palette,\n                                                     src_palette) != 1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxx__index_binary_alpha__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n      if (wuffs_base__pixel_swizzler__" +
-	"swap_rgbx_bgrx(dst_palette,\n                                                     src_palette) != 1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over;\n      }\n      return NULL;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__bgr(wuffs_base__pixel_swizzler* p,\n                                         wuffs_base__pixel_format dst_format,\n                                         wuffs_base__slice_u8 dst_palette,\n                                         wuffs_base__slice_u8 src_palette,\n                                         wuffs_base__pixel_blend blend) {\n  switch (dst_format.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      return wuffs_base__pixel_swizzler__bgr_565__bgr;\n\n    case WUFFS_BASE__PIXEL_FORMAT__B" +
-	"GR:\n      return wuffs_base__pixel_swizzler__copy_3_3;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n      return wuffs_base__pixel_swizzler__xxxx__xxx;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      // TODO.\n      break;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__bgra_nonpremul(\n    wuffs_base__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_format,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_blend blend) {\n  switch (dst_format.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_b" +
+	"swap_rgbx_bgrx(dst_palette,\n                                                     src_palette) != 1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over;\n      }\n      return NULL;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__bgr(wuffs_base__pixel_swizzler* p,\n                                         wuffs_base__pixel_format dst_pixfmt,\n                                         wuffs_base__slice_u8 dst_palette,\n                                         wuffs_base__slice_u8 src_palette,\n                                         wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      return wuffs_base__pixel_swizzler__bgr_565__bgr;\n\n    case WUFFS_BASE__PIXEL_FORMAT__B" +
+	"GR:\n      return wuffs_base__pixel_swizzler__copy_3_3;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n      return wuffs_base__pixel_swizzler__xxxx__xxx;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      // TODO.\n      break;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__bgra_nonpremul(\n    wuffs_base__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_pixfmt,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_blend blend) {\n  switch (dst_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_b" +
 	"ase__pixel_swizzler__bgr_565__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__copy_4_4;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swiz" +
 	"zler__bgra_premul__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      // TODO.\n      break;\n  }\n  return NULL;\n}\n\n" +
 	"" +
-	"// --------\n\nWUFFS_BASE__MAYBE_STATIC wuffs_base__status  //\nwuffs_base__pixel_swizzler__prepare(wuffs_base__pixel_swizzler* p,\n                                    wuffs_base__pixel_format dst_format,\n                                    wuffs_base__slice_u8 dst_palette,\n                                    wuffs_base__pixel_format src_format,\n                                    wuffs_base__slice_u8 src_palette,\n                                    wuffs_base__pixel_blend blend) {\n  if (!p) {\n    return wuffs_base__make_status(wuffs_base__error__bad_receiver);\n  }\n\n  // TODO: support many more formats.\n\n  wuffs_base__pixel_swizzler__func func = NULL;\n\n  switch (src_format.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__Y:\n      func = wuffs_base__pixel_swizzler__prepare__y(p, dst_format, dst_palette,\n                                                    src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY:\n      func = wuffs_base__pixel_swizzler__prepare__indexed__bgra_binary(\n    " +
-	"      p, dst_format, dst_palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      func = wuffs_base__pixel_swizzler__prepare__bgr(\n          p, dst_format, dst_palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n      func = wuffs_base__pixel_swizzler__prepare__bgra_nonpremul(\n          p, dst_format, dst_palette, src_palette, blend);\n      break;\n  }\n\n  p->private_impl.func = func;\n  return wuffs_base__make_status(\n      func ? NULL : wuffs_base__error__unsupported_pixel_swizzler_option);\n}\n\nWUFFS_BASE__MAYBE_STATIC uint64_t  //\nwuffs_base__pixel_swizzler__swizzle_interleaved_from_slice(\n    const wuffs_base__pixel_swizzler* p,\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  if (p && p->private_impl.func) {\n    return (*p->private_impl.func)(dst.ptr, dst.len, dst_palette.ptr,\n                                   dst_palette.len, src.ptr, src.len);\n  }\n  return 0;\n}\n" +
+	"// --------\n\nWUFFS_BASE__MAYBE_STATIC wuffs_base__status  //\nwuffs_base__pixel_swizzler__prepare(wuffs_base__pixel_swizzler* p,\n                                    wuffs_base__pixel_format dst_pixfmt,\n                                    wuffs_base__slice_u8 dst_palette,\n                                    wuffs_base__pixel_format src_pixfmt,\n                                    wuffs_base__slice_u8 src_palette,\n                                    wuffs_base__pixel_blend blend) {\n  if (!p) {\n    return wuffs_base__make_status(wuffs_base__error__bad_receiver);\n  }\n  p->private_impl.func = NULL;\n  p->private_impl.src_pixfmt_bytes_per_pixel = 0;\n\n  wuffs_base__pixel_swizzler__func func = NULL;\n  uint32_t src_pixfmt_bits_per_pixel =\n      wuffs_base__pixel_format__bits_per_pixel(&src_pixfmt);\n  if ((src_pixfmt_bits_per_pixel == 0) ||\n      ((src_pixfmt_bits_per_pixel & 7) != 0)) {\n    return wuffs_base__make_status(\n        wuffs_base__error__unsupported_pixel_swizzler_option);\n  }\n\n  // TODO: support many more for" +
+	"mats.\n\n  switch (src_pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__Y:\n      func = wuffs_base__pixel_swizzler__prepare__y(p, dst_pixfmt, dst_palette,\n                                                    src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY:\n      func = wuffs_base__pixel_swizzler__prepare__indexed__bgra_binary(\n          p, dst_pixfmt, dst_palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      func = wuffs_base__pixel_swizzler__prepare__bgr(\n          p, dst_pixfmt, dst_palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n      func = wuffs_base__pixel_swizzler__prepare__bgra_nonpremul(\n          p, dst_pixfmt, dst_palette, src_palette, blend);\n      break;\n  }\n\n  p->private_impl.func = func;\n  p->private_impl.src_pixfmt_bytes_per_pixel = src_pixfmt_bits_per_pixel / 8;\n  return wuffs_base__make_status(\n      func ? NULL : wuffs_base__error__unsupported_pixel_swizzler_option);\n}" +
+	"\n\nWUFFS_BASE__MAYBE_STATIC uint64_t  //\nwuffs_base__pixel_swizzler__swizzle_interleaved_from_reader(\n    const wuffs_base__pixel_swizzler* p,\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    const uint8_t** ptr_iop_r,\n    const uint8_t* io2_r) {\n  if (p && p->private_impl.func) {\n    const uint8_t* iop_r = *ptr_iop_r;\n    uint64_t n = (*p->private_impl.func)(dst.ptr, dst.len, dst_palette.ptr,\n                                         dst_palette.len, iop_r, io2_r - iop_r);\n    *ptr_iop_r += n * p->private_impl.src_pixfmt_bytes_per_pixel;\n    return n;\n  }\n  return 0;\n}\n\nWUFFS_BASE__MAYBE_STATIC uint64_t  //\nwuffs_base__pixel_swizzler__swizzle_interleaved_from_slice(\n    const wuffs_base__pixel_swizzler* p,\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  if (p && p->private_impl.func) {\n    return (*p->private_impl.func)(dst.ptr, dst.len, dst_palette.ptr,\n                                   dst_palette.len, src.ptr, src.len);\n  }\n  retu" +
+	"rn 0;\n}\n" +
 	""
 
 const baseFundamentalPrivateH = "" +
@@ -255,7 +257,7 @@
 	""
 
 const baseImagePrivateH = "" +
-	"// ---------------- Images\n\n" +
+	"// ---------------- Images\n\nWUFFS_BASE__MAYBE_STATIC uint64_t  //\nwuffs_base__pixel_swizzler__swizzle_interleaved_from_reader(\n    const wuffs_base__pixel_swizzler* p,\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    const uint8_t** ptr_iop_r,\n    const uint8_t* io2_r);\n\n" +
 	"" +
 	"// ---------------- Images (Utility)\n\n#define wuffs_base__utility__make_pixel_format wuffs_base__make_pixel_format\n" +
 	""
@@ -320,10 +322,10 @@
 	"" +
 	"// --------\n\n// wuffs_base__pixel_palette__closest_element returns the index of the palette\n// element that minimizes the sum of squared differences of the four ARGB\n// channels, working in premultiplied alpha. Ties favor the smaller index.\n//\n// The palette_slice.len may equal (N*4), for N less than 256, which means that\n// only the first N palette elements are considered. It returns 0 when N is 0.\n//\n// Applying this function on a per-pixel basis will not produce whole-of-image\n// dithering.\nWUFFS_BASE__MAYBE_STATIC uint8_t  //\nwuffs_base__pixel_palette__closest_element(\n    wuffs_base__slice_u8 palette_slice,\n    wuffs_base__pixel_format palette_format,\n    wuffs_base__color_u32_argb_premul c);\n\n" +
 	"" +
-	"// --------\n\n// TODO: should the func type take restrict pointers?\ntypedef uint64_t (*wuffs_base__pixel_swizzler__func)(uint8_t* dst_ptr,\n                                                     size_t dst_len,\n                                                     uint8_t* dst_palette_ptr,\n                                                     size_t dst_palette_len,\n                                                     const uint8_t* src_ptr,\n                                                     size_t src_len);\n\ntypedef struct {\n  // Do not access the private_impl's fields directly. There is no API/ABI\n  // compatibility or safety guarantee if you do so.\n  struct {\n    wuffs_base__pixel_swizzler__func func;\n  } private_impl;\n\n#ifdef __cplusplus\n  inline wuffs_base__status prepare(wuffs_base__pixel_format dst_format,\n                                    wuffs_base__slice_u8 dst_palette,\n                                    wuffs_base__pixel_format src_format,\n                                    wuffs_base__slice_u8 src" +
-	"_palette,\n                                    wuffs_base__pixel_blend blend);\n  inline uint64_t swizzle_interleaved_from_slice(\n      wuffs_base__slice_u8 dst,\n      wuffs_base__slice_u8 dst_palette,\n      wuffs_base__slice_u8 src) const;\n#endif  // __cplusplus\n\n} wuffs_base__pixel_swizzler;\n\n// wuffs_base__pixel_swizzler__prepare readies the pixel swizzler so that its\n// other methods may be called.\n//\n// For modular builds that divide the base module into sub-modules, using this\n// function requires the WUFFS_CONFIG__MODULE__BASE__PIXCONV sub-module, not\n// just WUFFS_CONFIG__MODULE__BASE__CORE.\nWUFFS_BASE__MAYBE_STATIC wuffs_base__status  //\nwuffs_base__pixel_swizzler__prepare(wuffs_base__pixel_swizzler* p,\n                                    wuffs_base__pixel_format dst_format,\n                                    wuffs_base__slice_u8 dst_palette,\n                                    wuffs_base__pixel_format src_format,\n                                    wuffs_base__slice_u8 src_palette,\n                  " +
-	"                  wuffs_base__pixel_blend blend);\n\n// wuffs_base__pixel_swizzler__swizzle_interleaved_from_slice converts pixels\n// from a source format to a destination format.\n//\n// For modular builds that divide the base module into sub-modules, using this\n// function requires the WUFFS_CONFIG__MODULE__BASE__PIXCONV sub-module, not\n// just WUFFS_CONFIG__MODULE__BASE__CORE.\nWUFFS_BASE__MAYBE_STATIC uint64_t  //\nwuffs_base__pixel_swizzler__swizzle_interleaved_from_slice(\n    const wuffs_base__pixel_swizzler* p,\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src);\n\n#ifdef __cplusplus\n\ninline wuffs_base__status  //\nwuffs_base__pixel_swizzler::prepare(wuffs_base__pixel_format dst_format,\n                                    wuffs_base__slice_u8 dst_palette,\n                                    wuffs_base__pixel_format src_format,\n                                    wuffs_base__slice_u8 src_palette,\n                                    wuffs_base__pixel_blend blend) {\n " +
-	" return wuffs_base__pixel_swizzler__prepare(this, dst_format, dst_palette,\n                                             src_format, src_palette, blend);\n}\n\nuint64_t  //\nwuffs_base__pixel_swizzler::swizzle_interleaved_from_slice(\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) const {\n  return wuffs_base__pixel_swizzler__swizzle_interleaved_from_slice(\n      this, dst, dst_palette, src);\n}\n\n#endif  // __cplusplus\n" +
+	"// --------\n\n// TODO: should the func type take restrict pointers?\ntypedef uint64_t (*wuffs_base__pixel_swizzler__func)(uint8_t* dst_ptr,\n                                                     size_t dst_len,\n                                                     uint8_t* dst_palette_ptr,\n                                                     size_t dst_palette_len,\n                                                     const uint8_t* src_ptr,\n                                                     size_t src_len);\n\ntypedef struct {\n  // Do not access the private_impl's fields directly. There is no API/ABI\n  // compatibility or safety guarantee if you do so.\n  struct {\n    wuffs_base__pixel_swizzler__func func;\n    uint32_t src_pixfmt_bytes_per_pixel;\n  } private_impl;\n\n#ifdef __cplusplus\n  inline wuffs_base__status prepare(wuffs_base__pixel_format dst_pixfmt,\n                                    wuffs_base__slice_u8 dst_palette,\n                                    wuffs_base__pixel_format src_pixfmt,\n                   " +
+	"                 wuffs_base__slice_u8 src_palette,\n                                    wuffs_base__pixel_blend blend);\n  inline uint64_t swizzle_interleaved_from_slice(\n      wuffs_base__slice_u8 dst,\n      wuffs_base__slice_u8 dst_palette,\n      wuffs_base__slice_u8 src) const;\n#endif  // __cplusplus\n\n} wuffs_base__pixel_swizzler;\n\n// wuffs_base__pixel_swizzler__prepare readies the pixel swizzler so that its\n// other methods may be called.\n//\n// For modular builds that divide the base module into sub-modules, using this\n// function requires the WUFFS_CONFIG__MODULE__BASE__PIXCONV sub-module, not\n// just WUFFS_CONFIG__MODULE__BASE__CORE.\nWUFFS_BASE__MAYBE_STATIC wuffs_base__status  //\nwuffs_base__pixel_swizzler__prepare(wuffs_base__pixel_swizzler* p,\n                                    wuffs_base__pixel_format dst_pixfmt,\n                                    wuffs_base__slice_u8 dst_palette,\n                                    wuffs_base__pixel_format src_pixfmt,\n                                    wuffs_base_" +
+	"_slice_u8 src_palette,\n                                    wuffs_base__pixel_blend blend);\n\n// wuffs_base__pixel_swizzler__swizzle_interleaved_from_slice converts pixels\n// from a source format to a destination format.\n//\n// For modular builds that divide the base module into sub-modules, using this\n// function requires the WUFFS_CONFIG__MODULE__BASE__PIXCONV sub-module, not\n// just WUFFS_CONFIG__MODULE__BASE__CORE.\nWUFFS_BASE__MAYBE_STATIC uint64_t  //\nwuffs_base__pixel_swizzler__swizzle_interleaved_from_slice(\n    const wuffs_base__pixel_swizzler* p,\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src);\n\n#ifdef __cplusplus\n\ninline wuffs_base__status  //\nwuffs_base__pixel_swizzler::prepare(wuffs_base__pixel_format dst_pixfmt,\n                                    wuffs_base__slice_u8 dst_palette,\n                                    wuffs_base__pixel_format src_pixfmt,\n                                    wuffs_base__slice_u8 src_palette,\n                             " +
+	"       wuffs_base__pixel_blend blend) {\n  return wuffs_base__pixel_swizzler__prepare(this, dst_pixfmt, dst_palette,\n                                             src_pixfmt, src_palette, blend);\n}\n\nuint64_t  //\nwuffs_base__pixel_swizzler::swizzle_interleaved_from_slice(\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) const {\n  return wuffs_base__pixel_swizzler__swizzle_interleaved_from_slice(\n      this, dst, dst_palette, src);\n}\n\n#endif  // __cplusplus\n" +
 	""
 
 const baseIOPrivateH = "" +
diff --git a/lang/builtin/builtin.go b/lang/builtin/builtin.go
index 1ee48b7..af50b03 100644
--- a/lang/builtin/builtin.go
+++ b/lang/builtin/builtin.go
@@ -384,6 +384,8 @@
 	"pixel_swizzler.prepare!(" +
 		"dst_pixfmt: pixel_format, dst_palette: slice u8," +
 		"src_pixfmt: pixel_format, src_palette: slice u8, blend: pixel_blend) status",
+	"pixel_swizzler.swizzle_interleaved_from_reader!(" +
+		"dst: slice u8, dst_palette: slice u8, src: io_reader) u64",
 	"pixel_swizzler.swizzle_interleaved_from_slice!(" +
 		"dst: slice u8, dst_palette: slice u8, src: slice u8) u64",
 }
diff --git a/lang/token/list.go b/lang/token/list.go
index 4e5ea4f..6e23c66 100644
--- a/lang/token/list.go
+++ b/lang/token/list.go
@@ -662,6 +662,8 @@
 	IDIO        = ID(0x248)
 	IDLimit     = ID(0x249)
 	IDData      = ID(0x24A)
+
+	IDSwizzleInterleavedFromReader = ID(0x280)
 )
 
 var builtInsByID = [nBuiltInIDs]string{
@@ -1019,6 +1021,8 @@
 	IDIO:        "io",
 	IDLimit:     "limit",
 	IDData:      "data",
+
+	IDSwizzleInterleavedFromReader: "swizzle_interleaved_from_reader",
 }
 
 var builtInsByName = map[string]ID{}
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index c8a62f3..ee41931 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -3792,12 +3792,13 @@
   // compatibility or safety guarantee if you do so.
   struct {
     wuffs_base__pixel_swizzler__func func;
+    uint32_t src_pixfmt_bytes_per_pixel;
   } private_impl;
 
 #ifdef __cplusplus
-  inline wuffs_base__status prepare(wuffs_base__pixel_format dst_format,
+  inline wuffs_base__status prepare(wuffs_base__pixel_format dst_pixfmt,
                                     wuffs_base__slice_u8 dst_palette,
-                                    wuffs_base__pixel_format src_format,
+                                    wuffs_base__pixel_format src_pixfmt,
                                     wuffs_base__slice_u8 src_palette,
                                     wuffs_base__pixel_blend blend);
   inline uint64_t swizzle_interleaved_from_slice(
@@ -3816,9 +3817,9 @@
 // just WUFFS_CONFIG__MODULE__BASE__CORE.
 WUFFS_BASE__MAYBE_STATIC wuffs_base__status  //
 wuffs_base__pixel_swizzler__prepare(wuffs_base__pixel_swizzler* p,
-                                    wuffs_base__pixel_format dst_format,
+                                    wuffs_base__pixel_format dst_pixfmt,
                                     wuffs_base__slice_u8 dst_palette,
-                                    wuffs_base__pixel_format src_format,
+                                    wuffs_base__pixel_format src_pixfmt,
                                     wuffs_base__slice_u8 src_palette,
                                     wuffs_base__pixel_blend blend);
 
@@ -3838,13 +3839,13 @@
 #ifdef __cplusplus
 
 inline wuffs_base__status  //
-wuffs_base__pixel_swizzler::prepare(wuffs_base__pixel_format dst_format,
+wuffs_base__pixel_swizzler::prepare(wuffs_base__pixel_format dst_pixfmt,
                                     wuffs_base__slice_u8 dst_palette,
-                                    wuffs_base__pixel_format src_format,
+                                    wuffs_base__pixel_format src_pixfmt,
                                     wuffs_base__slice_u8 src_palette,
                                     wuffs_base__pixel_blend blend) {
-  return wuffs_base__pixel_swizzler__prepare(this, dst_format, dst_palette,
-                                             src_format, src_palette, blend);
+  return wuffs_base__pixel_swizzler__prepare(this, dst_pixfmt, dst_palette,
+                                             src_pixfmt, src_palette, blend);
 }
 
 uint64_t  //
@@ -4837,26 +4838,22 @@
     uint32_t f_height;
     uint8_t f_call_sequence;
     bool f_top_down;
-    uint8_t f_bits_per_pixel;
     uint32_t f_pad_per_row;
     uint64_t f_bytes_per_row;
-    uint64_t f_bytes_total;
     wuffs_base__pixel_format f_pixfmt;
     uint32_t f_io_redirect_fourcc;
     uint64_t f_io_redirect_pos;
+    uint64_t f_frame_config_io_position;
     uint32_t f_padding;
     uint32_t f_mask_r;
     uint32_t f_mask_g;
     uint32_t f_mask_b;
     uint32_t f_mask_a;
-    uint64_t f_frame_config_io_position;
     uint32_t f_dst_x;
     uint32_t f_dst_y;
     uint32_t f_dst_y_end;
     uint32_t f_dst_y_inc;
-    uint8_t f_stash[4];
-    uint8_t f_num_stashed;
-    uint8_t f_pending_pad;
+    uint32_t f_pending_pad;
     wuffs_base__pixel_swizzler f_swizzler;
 
     uint32_t p_decode_image_config[1];
@@ -4873,7 +4870,6 @@
       uint64_t scratch;
     } s_decode_image_config[1];
     struct {
-      uint64_t v_bytes_remaining;
       uint64_t scratch;
     } s_decode_frame[1];
     struct {
@@ -7899,23 +7895,31 @@
 #define wuffs_base__utility__empty_io_reader wuffs_base__empty_io_reader
 #define wuffs_base__utility__empty_io_writer wuffs_base__empty_io_writer
 
-  // ---------------- Tokens
+// ---------------- Tokens
 
-  // ---------------- Tokens (Utility)
+// ---------------- Tokens (Utility)
 
-  // ---------------- Memory Allocation
+// ---------------- Memory Allocation
 
-  // ---------------- Images
+// ---------------- Images
 
-  // ---------------- Images (Utility)
+WUFFS_BASE__MAYBE_STATIC uint64_t  //
+wuffs_base__pixel_swizzler__swizzle_interleaved_from_reader(
+    const wuffs_base__pixel_swizzler* p,
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    const uint8_t** ptr_iop_r,
+    const uint8_t* io2_r);
+
+// ---------------- Images (Utility)
 
 #define wuffs_base__utility__make_pixel_format wuffs_base__make_pixel_format
 
-  // ---------------- String Conversions
+// ---------------- String Conversions
 
-  // ---------------- Unicode and UTF-8
+// ---------------- Unicode and UTF-8
 
-  // ----------------
+// ----------------
 
 #if !defined(WUFFS_CONFIG__MODULES) || defined(WUFFS_CONFIG__MODULE__BASE) || \
     defined(WUFFS_CONFIG__MODULE__BASE__CORE)
@@ -11586,11 +11590,11 @@
 
 static wuffs_base__pixel_swizzler__func  //
 wuffs_base__pixel_swizzler__prepare__y(wuffs_base__pixel_swizzler* p,
-                                       wuffs_base__pixel_format dst_format,
+                                       wuffs_base__pixel_format dst_pixfmt,
                                        wuffs_base__slice_u8 dst_palette,
                                        wuffs_base__slice_u8 src_palette,
                                        wuffs_base__pixel_blend blend) {
-  switch (dst_format.repr) {
+  switch (dst_pixfmt.repr) {
     case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
       return wuffs_base__pixel_swizzler__bgr_565__y;
 
@@ -11614,11 +11618,11 @@
 static wuffs_base__pixel_swizzler__func  //
 wuffs_base__pixel_swizzler__prepare__indexed__bgra_binary(
     wuffs_base__pixel_swizzler* p,
-    wuffs_base__pixel_format dst_format,
+    wuffs_base__pixel_format dst_pixfmt,
     wuffs_base__slice_u8 dst_palette,
     wuffs_base__slice_u8 src_palette,
     wuffs_base__pixel_blend blend) {
-  switch (dst_format.repr) {
+  switch (dst_pixfmt.repr) {
     case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_NONPREMUL:
     case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_PREMUL:
     case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY:
@@ -11706,11 +11710,11 @@
 
 static wuffs_base__pixel_swizzler__func  //
 wuffs_base__pixel_swizzler__prepare__bgr(wuffs_base__pixel_swizzler* p,
-                                         wuffs_base__pixel_format dst_format,
+                                         wuffs_base__pixel_format dst_pixfmt,
                                          wuffs_base__slice_u8 dst_palette,
                                          wuffs_base__slice_u8 src_palette,
                                          wuffs_base__pixel_blend blend) {
-  switch (dst_format.repr) {
+  switch (dst_pixfmt.repr) {
     case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
       return wuffs_base__pixel_swizzler__bgr_565__bgr;
 
@@ -11737,11 +11741,11 @@
 static wuffs_base__pixel_swizzler__func  //
 wuffs_base__pixel_swizzler__prepare__bgra_nonpremul(
     wuffs_base__pixel_swizzler* p,
-    wuffs_base__pixel_format dst_format,
+    wuffs_base__pixel_format dst_pixfmt,
     wuffs_base__slice_u8 dst_palette,
     wuffs_base__slice_u8 src_palette,
     wuffs_base__pixel_blend blend) {
-  switch (dst_format.repr) {
+  switch (dst_pixfmt.repr) {
     case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
       switch (blend) {
         case WUFFS_BASE__PIXEL_BLEND__SRC:
@@ -11798,47 +11802,74 @@
 
 WUFFS_BASE__MAYBE_STATIC wuffs_base__status  //
 wuffs_base__pixel_swizzler__prepare(wuffs_base__pixel_swizzler* p,
-                                    wuffs_base__pixel_format dst_format,
+                                    wuffs_base__pixel_format dst_pixfmt,
                                     wuffs_base__slice_u8 dst_palette,
-                                    wuffs_base__pixel_format src_format,
+                                    wuffs_base__pixel_format src_pixfmt,
                                     wuffs_base__slice_u8 src_palette,
                                     wuffs_base__pixel_blend blend) {
   if (!p) {
     return wuffs_base__make_status(wuffs_base__error__bad_receiver);
   }
+  p->private_impl.func = NULL;
+  p->private_impl.src_pixfmt_bytes_per_pixel = 0;
+
+  wuffs_base__pixel_swizzler__func func = NULL;
+  uint32_t src_pixfmt_bits_per_pixel =
+      wuffs_base__pixel_format__bits_per_pixel(&src_pixfmt);
+  if ((src_pixfmt_bits_per_pixel == 0) ||
+      ((src_pixfmt_bits_per_pixel & 7) != 0)) {
+    return wuffs_base__make_status(
+        wuffs_base__error__unsupported_pixel_swizzler_option);
+  }
 
   // TODO: support many more formats.
 
-  wuffs_base__pixel_swizzler__func func = NULL;
-
-  switch (src_format.repr) {
+  switch (src_pixfmt.repr) {
     case WUFFS_BASE__PIXEL_FORMAT__Y:
-      func = wuffs_base__pixel_swizzler__prepare__y(p, dst_format, dst_palette,
+      func = wuffs_base__pixel_swizzler__prepare__y(p, dst_pixfmt, dst_palette,
                                                     src_palette, blend);
       break;
 
     case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY:
       func = wuffs_base__pixel_swizzler__prepare__indexed__bgra_binary(
-          p, dst_format, dst_palette, src_palette, blend);
+          p, dst_pixfmt, dst_palette, src_palette, blend);
       break;
 
     case WUFFS_BASE__PIXEL_FORMAT__BGR:
       func = wuffs_base__pixel_swizzler__prepare__bgr(
-          p, dst_format, dst_palette, src_palette, blend);
+          p, dst_pixfmt, dst_palette, src_palette, blend);
       break;
 
     case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
       func = wuffs_base__pixel_swizzler__prepare__bgra_nonpremul(
-          p, dst_format, dst_palette, src_palette, blend);
+          p, dst_pixfmt, dst_palette, src_palette, blend);
       break;
   }
 
   p->private_impl.func = func;
+  p->private_impl.src_pixfmt_bytes_per_pixel = src_pixfmt_bits_per_pixel / 8;
   return wuffs_base__make_status(
       func ? NULL : wuffs_base__error__unsupported_pixel_swizzler_option);
 }
 
 WUFFS_BASE__MAYBE_STATIC uint64_t  //
+wuffs_base__pixel_swizzler__swizzle_interleaved_from_reader(
+    const wuffs_base__pixel_swizzler* p,
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    const uint8_t** ptr_iop_r,
+    const uint8_t* io2_r) {
+  if (p && p->private_impl.func) {
+    const uint8_t* iop_r = *ptr_iop_r;
+    uint64_t n = (*p->private_impl.func)(dst.ptr, dst.len, dst_palette.ptr,
+                                         dst_palette.len, iop_r, io2_r - iop_r);
+    *ptr_iop_r += n * p->private_impl.src_pixfmt_bytes_per_pixel;
+    return n;
+  }
+  return 0;
+}
+
+WUFFS_BASE__MAYBE_STATIC uint64_t  //
 wuffs_base__pixel_swizzler__swizzle_interleaved_from_slice(
     const wuffs_base__pixel_swizzler* p,
     wuffs_base__slice_u8 dst,
@@ -12048,8 +12079,8 @@
 const char* wuffs_bmp__error__bad_header = "#bmp: bad header";
 const char* wuffs_bmp__error__unsupported_bmp_file =
     "#bmp: unsupported BMP file";
-const char* wuffs_bmp__error__internal_error_inconsistent_swizzle_count =
-    "#bmp: internal error: inconsistent swizzle count";
+const char* wuffs_bmp__note__internal_note_short_read =
+    "@bmp: internal note: short read";
 
 // ---------------- Private Consts
 
@@ -12060,7 +12091,7 @@
 static wuffs_base__status  //
 wuffs_bmp__decoder__swizzle(wuffs_bmp__decoder* self,
                             wuffs_base__pixel_buffer* a_dst,
-                            wuffs_base__slice_u8 a_src);
+                            wuffs_base__io_buffer* a_src);
 
 static wuffs_base__status  //
 wuffs_bmp__decoder__skip_frame(wuffs_bmp__decoder* self,
@@ -12568,14 +12599,12 @@
       status = wuffs_base__make_status(wuffs_bmp__error__unsupported_bmp_file);
       goto exit;
     } else if (v_bits_per_pixel == 24) {
-      self->private_impl.f_bits_per_pixel = 24;
       self->private_impl.f_bytes_per_row =
           ((((((uint64_t)(self->private_impl.f_width)) * 3) + 3) >> 2) << 2);
       self->private_impl.f_pad_per_row = (self->private_impl.f_width & 3);
       self->private_impl.f_pixfmt =
           wuffs_base__utility__make_pixel_format(2147485832);
     } else if (v_bits_per_pixel == 32) {
-      self->private_impl.f_bits_per_pixel = 32;
       self->private_impl.f_bytes_per_row =
           (((uint64_t)(self->private_impl.f_width)) * 4);
       self->private_impl.f_pad_per_row = 0;
@@ -12585,9 +12614,6 @@
       status = wuffs_base__make_status(wuffs_bmp__error__unsupported_bmp_file);
       goto exit;
     }
-    self->private_impl.f_bytes_total =
-        (self->private_impl.f_bytes_per_row *
-         ((uint64_t)(self->private_impl.f_height)));
     self->private_data.s_decode_image_config[0].scratch = 20;
     WUFFS_BASE__COROUTINE_SUSPENSION_POINT(18);
     if (self->private_data.s_decode_image_config[0].scratch >
@@ -12935,9 +12961,6 @@
   wuffs_base__status status = wuffs_base__make_status(NULL);
 
   wuffs_base__status v_status = wuffs_base__make_status(NULL);
-  uint64_t v_bytes_remaining = 0;
-  uint64_t v_n = 0;
-  wuffs_base__slice_u8 v_src = {0};
 
   const uint8_t* iop_a_src = NULL;
   const uint8_t* io0_a_src WUFFS_BASE__POTENTIALLY_UNUSED = NULL;
@@ -12951,9 +12974,6 @@
   }
 
   uint32_t coro_susp_point = self->private_impl.p_decode_frame[0];
-  if (coro_susp_point) {
-    v_bytes_remaining = self->private_data.s_decode_frame[0].v_bytes_remaining;
-  }
   switch (coro_susp_point) {
     WUFFS_BASE__COROUTINE_SUSPENSION_POINT_0;
 
@@ -13012,23 +13032,20 @@
         }
         goto ok;
       }
-      v_bytes_remaining = self->private_impl.f_bytes_total;
       while (true) {
-        v_n = ((uint64_t)(io2_a_src - iop_a_src));
-        if (v_bytes_remaining >= v_n) {
-          v_bytes_remaining -= v_n;
-        } else {
-          v_n = v_bytes_remaining;
-          v_bytes_remaining = 0;
+        if (a_src) {
+          a_src->meta.ri = ((size_t)(iop_a_src - a_src->data.ptr));
         }
-        v_src = wuffs_base__io_reader__take(&iop_a_src, io2_a_src, v_n);
-        v_status = wuffs_bmp__decoder__swizzle(self, a_dst, v_src);
+        v_status = wuffs_bmp__decoder__swizzle(self, a_dst, a_src);
+        if (a_src) {
+          iop_a_src = a_src->data.ptr + a_src->meta.ri;
+        }
         if (wuffs_base__status__is_ok(&v_status)) {
           goto label__0__break;
-        } else if (wuffs_base__status__is_suspension(&v_status)) {
-          status = v_status;
-          WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(3);
-        } else {
+        } else if (v_status.repr !=
+                   wuffs_base__make_status(
+                       wuffs_bmp__note__internal_note_short_read)
+                       .repr) {
           status = v_status;
           if (wuffs_base__status__is_error(&status)) {
             goto exit;
@@ -13039,6 +13056,8 @@
           }
           goto ok;
         }
+        status = wuffs_base__make_status(wuffs_base__suspension__short_read);
+        WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(3);
       }
     label__0__break:;
     }
@@ -13056,7 +13075,6 @@
       wuffs_base__status__is_suspension(&status) ? coro_susp_point : 0;
   self->private_impl.active_coroutine =
       wuffs_base__status__is_suspension(&status) ? 3 : 0;
-  self->private_data.s_decode_frame[0].v_bytes_remaining = v_bytes_remaining;
 
   goto exit;
 exit:
@@ -13075,135 +13093,96 @@
 static wuffs_base__status  //
 wuffs_bmp__decoder__swizzle(wuffs_bmp__decoder* self,
                             wuffs_base__pixel_buffer* a_dst,
-                            wuffs_base__slice_u8 a_src) {
+                            wuffs_base__io_buffer* a_src) {
+  wuffs_base__status status = wuffs_base__make_status(NULL);
+
   wuffs_base__pixel_format v_dst_pixfmt = {0};
   uint32_t v_dst_bits_per_pixel = 0;
   uint64_t v_dst_bytes_per_pixel = 0;
   uint64_t v_dst_bytes_per_row = 0;
-  uint8_t v_src_bytes_per_pixel = 0;
   wuffs_base__table_u8 v_tab = {0};
   wuffs_base__slice_u8 v_dst = {0};
   uint64_t v_i = 0;
   uint64_t v_n = 0;
 
+  const uint8_t* iop_a_src = NULL;
+  const uint8_t* io0_a_src WUFFS_BASE__POTENTIALLY_UNUSED = NULL;
+  const uint8_t* io1_a_src WUFFS_BASE__POTENTIALLY_UNUSED = NULL;
+  const uint8_t* io2_a_src WUFFS_BASE__POTENTIALLY_UNUSED = NULL;
+  if (a_src) {
+    io0_a_src = a_src->data.ptr;
+    io1_a_src = io0_a_src + a_src->meta.ri;
+    iop_a_src = io1_a_src;
+    io2_a_src = io0_a_src + a_src->meta.wi;
+  }
+
   v_dst_pixfmt = wuffs_base__pixel_buffer__pixel_format(a_dst);
   v_dst_bits_per_pixel =
       wuffs_base__pixel_format__bits_per_pixel(&v_dst_pixfmt);
   if ((v_dst_bits_per_pixel & 7) != 0) {
-    return wuffs_base__make_status(wuffs_base__error__unsupported_option);
+    status = wuffs_base__make_status(wuffs_base__error__unsupported_option);
+    goto exit;
   }
   v_dst_bytes_per_pixel = ((uint64_t)((v_dst_bits_per_pixel / 8)));
   v_dst_bytes_per_row =
       (((uint64_t)(self->private_impl.f_width)) * v_dst_bytes_per_pixel);
-  v_src_bytes_per_pixel =
-      ((uint8_t)((self->private_impl.f_bits_per_pixel / 8)));
   v_tab = wuffs_base__pixel_buffer__plane(a_dst, 0);
-  while (self->private_impl.f_pending_pad > 0) {
-    if (((uint64_t)(a_src.len)) <= 0) {
-      return wuffs_base__make_status(wuffs_base__suspension__short_read);
-    }
-#if defined(__GNUC__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wconversion"
-#endif
-    self->private_impl.f_pending_pad -= 1;
-#if defined(__GNUC__)
-#pragma GCC diagnostic pop
-#endif
-    a_src = wuffs_base__slice_u8__subslice_i(a_src, 1);
-  }
-  if (self->private_impl.f_num_stashed != 0) {
-    while (self->private_impl.f_num_stashed < v_src_bytes_per_pixel) {
-      if (((uint64_t)(a_src.len)) <= 0) {
-        return wuffs_base__make_status(wuffs_base__suspension__short_read);
-      }
-      self->private_impl.f_stash[self->private_impl.f_num_stashed] =
-          a_src.ptr[0];
-#if defined(__GNUC__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wconversion"
-#endif
-      self->private_impl.f_num_stashed += 1;
-#if defined(__GNUC__)
-#pragma GCC diagnostic pop
-#endif
-      a_src = wuffs_base__slice_u8__subslice_i(a_src, 1);
-    }
-    v_dst = wuffs_base__table_u8__row(v_tab, self->private_impl.f_dst_y);
-    if (v_dst_bytes_per_row < ((uint64_t)(v_dst.len))) {
-      v_dst = wuffs_base__slice_u8__subslice_j(v_dst, v_dst_bytes_per_row);
-    }
-    v_i = (((uint64_t)(self->private_impl.f_dst_x)) * v_dst_bytes_per_pixel);
-    if (v_i < ((uint64_t)(v_dst.len))) {
-      wuffs_base__pixel_swizzler__swizzle_interleaved_from_slice(
-          &self->private_impl.f_swizzler,
-          wuffs_base__slice_u8__subslice_i(v_dst, v_i),
-          wuffs_base__utility__empty_slice_u8(),
-          wuffs_base__slice_u8__subslice_j(
-              wuffs_base__make_slice_u8(self->private_impl.f_stash, 4),
-              self->private_impl.f_num_stashed));
-      wuffs_base__u32__sat_add_indirect(&self->private_impl.f_dst_x, 1);
-    }
-    self->private_impl.f_num_stashed = 0;
-  }
+label__outer__continue:;
   while (true) {
-    if (self->private_impl.f_dst_x == self->private_impl.f_width) {
-      self->private_impl.f_dst_x = 0;
-      self->private_impl.f_dst_y += self->private_impl.f_dst_y_inc;
-      if (self->private_impl.f_pad_per_row != 0) {
-        v_n = ((uint64_t)(self->private_impl.f_pad_per_row));
-        if (v_n <= ((uint64_t)(a_src.len))) {
-          a_src = wuffs_base__slice_u8__subslice_i(a_src, v_n);
-        } else {
-          self->private_impl.f_pending_pad =
-              ((uint8_t)(((v_n - ((uint64_t)(a_src.len))) & 3)));
-          return wuffs_base__make_status(wuffs_base__suspension__short_read);
+    while (self->private_impl.f_pending_pad > 0) {
+      if (((uint64_t)(io2_a_src - iop_a_src)) <= 0) {
+        status =
+            wuffs_base__make_status(wuffs_bmp__note__internal_note_short_read);
+        goto ok;
+      }
+      self->private_impl.f_pending_pad -= 1;
+      (iop_a_src += 1, wuffs_base__make_empty_struct());
+    }
+  label__inner__continue:;
+    while (true) {
+      if (self->private_impl.f_dst_x == self->private_impl.f_width) {
+        self->private_impl.f_dst_x = 0;
+        self->private_impl.f_dst_y += self->private_impl.f_dst_y_inc;
+        if (self->private_impl.f_dst_y == self->private_impl.f_dst_y_end) {
+          goto label__outer__break;
+        } else if (self->private_impl.f_pad_per_row != 0) {
+          self->private_impl.f_pending_pad = self->private_impl.f_pad_per_row;
+          goto label__outer__continue;
         }
       }
-    }
-    if (self->private_impl.f_dst_y == self->private_impl.f_dst_y_end) {
-      goto label__0__break;
-    }
-    v_dst = wuffs_base__table_u8__row(v_tab, self->private_impl.f_dst_y);
-    if (v_dst_bytes_per_row < ((uint64_t)(v_dst.len))) {
-      v_dst = wuffs_base__slice_u8__subslice_j(v_dst, v_dst_bytes_per_row);
-    }
-    v_i = (((uint64_t)(self->private_impl.f_dst_x)) * v_dst_bytes_per_pixel);
-    if (v_i < ((uint64_t)(v_dst.len))) {
-      v_n = wuffs_base__pixel_swizzler__swizzle_interleaved_from_slice(
+      v_dst = wuffs_base__table_u8__row(v_tab, self->private_impl.f_dst_y);
+      if (v_dst_bytes_per_row < ((uint64_t)(v_dst.len))) {
+        v_dst = wuffs_base__slice_u8__subslice_j(v_dst, v_dst_bytes_per_row);
+      }
+      v_i = (((uint64_t)(self->private_impl.f_dst_x)) * v_dst_bytes_per_pixel);
+      if (v_i >= ((uint64_t)(v_dst.len))) {
+        goto label__inner__continue;
+      }
+      v_n = wuffs_base__pixel_swizzler__swizzle_interleaved_from_reader(
           &self->private_impl.f_swizzler,
           wuffs_base__slice_u8__subslice_i(v_dst, v_i),
-          wuffs_base__utility__empty_slice_u8(), a_src);
+          wuffs_base__utility__empty_slice_u8(), &iop_a_src, io2_a_src);
+      if (v_n == 0) {
+        status =
+            wuffs_base__make_status(wuffs_bmp__note__internal_note_short_read);
+        goto ok;
+      }
       wuffs_base__u32__sat_add_indirect(&self->private_impl.f_dst_x,
                                         ((uint32_t)((v_n & 4294967295))));
-      v_n = ((v_n & 4294967295) * ((uint64_t)(v_src_bytes_per_pixel)));
-      if (v_n <= ((uint64_t)(a_src.len))) {
-        a_src = wuffs_base__slice_u8__subslice_i(a_src, v_n);
-      } else {
-        return wuffs_base__make_status(
-            wuffs_bmp__error__internal_error_inconsistent_swizzle_count);
-      }
-    }
-    if (self->private_impl.f_dst_x < self->private_impl.f_width) {
-      while ((self->private_impl.f_num_stashed < v_src_bytes_per_pixel) &&
-             (((uint64_t)(a_src.len)) > 0)) {
-        self->private_impl.f_stash[self->private_impl.f_num_stashed] =
-            a_src.ptr[0];
-#if defined(__GNUC__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wconversion"
-#endif
-        self->private_impl.f_num_stashed += 1;
-#if defined(__GNUC__)
-#pragma GCC diagnostic pop
-#endif
-        a_src = wuffs_base__slice_u8__subslice_i(a_src, 1);
-      }
-      return wuffs_base__make_status(wuffs_base__suspension__short_read);
     }
   }
-label__0__break:;
-  return wuffs_base__make_status(NULL);
+label__outer__break:;
+  status = wuffs_base__make_status(NULL);
+  goto ok;
+  goto ok;
+ok:
+  goto exit;
+exit:
+  if (a_src) {
+    a_src->meta.ri = ((size_t)(iop_a_src - a_src->data.ptr));
+  }
+
+  return status;
 }
 
 // -------- func bmp.decoder.skip_frame
@@ -13240,7 +13219,8 @@
     }
     iop_a_src += self->private_data.s_skip_frame[0].scratch;
     self->private_data.s_skip_frame[0].scratch =
-        self->private_impl.f_bytes_total;
+        (self->private_impl.f_bytes_per_row *
+         ((uint64_t)(self->private_impl.f_height)));
     WUFFS_BASE__COROUTINE_SUSPENSION_POINT(2);
     if (self->private_data.s_skip_frame[0].scratch >
         ((uint64_t)(io2_a_src - iop_a_src))) {
diff --git a/std/bmp/decode_bmp.wuffs b/std/bmp/decode_bmp.wuffs
index af88ffc..394b2d7 100644
--- a/std/bmp/decode_bmp.wuffs
+++ b/std/bmp/decode_bmp.wuffs
@@ -15,7 +15,7 @@
 pub status "#bad header"
 pub status "#unsupported BMP file"
 
-pri status "#internal error: inconsistent swizzle count"
+pri status "@internal note: short read"
 
 pub const DECODER_WORKBUF_LEN_MAX_INCL_WORST_CASE : base.u64 = 0
 
@@ -25,16 +25,16 @@
 
 	call_sequence : base.u8,
 
-	top_down       : base.bool,
-	bits_per_pixel : base.u8[..= 32],
-	pad_per_row    : base.u32[..= 3],
-	bytes_per_row  : base.u64[..= 0x0000_0001_FFFF_FFFC],  // 4 * 0x7FFF_FFFF
-	bytes_total    : base.u64[..= 0xFFFF_FFFC_0000_0004],  // 4 * 0x7FFF_FFFF * 0x7FFF_FFFF
-	pixfmt         : base.pixel_format,
+	top_down      : base.bool,
+	pad_per_row   : base.u32[..= 3],
+	bytes_per_row : base.u64[..= 0x0000_0001_FFFF_FFFC],  // 4 * 0x7FFF_FFFF
+	pixfmt        : base.pixel_format,
 
 	io_redirect_fourcc : base.u32,
 	io_redirect_pos    : base.u64,
 
+	frame_config_io_position : base.u64,
+
 	padding : base.u32,
 
 	mask_r : base.u32,
@@ -42,16 +42,12 @@
 	mask_b : base.u32,
 	mask_a : base.u32,
 
-	frame_config_io_position : base.u64,
-
 	dst_x     : base.u32,
 	dst_y     : base.u32,
 	dst_y_end : base.u32,
 	dst_y_inc : base.u32,
 
-	stash       : array[4] base.u8,
-	num_stashed : base.u8[..= 4],
-	pending_pad : base.u8[..= 3],
+	pending_pad : base.u32[..= 3],
 
 	swizzler : base.pixel_swizzler,
 	util     : base.utility,
@@ -137,7 +133,6 @@
 		}
 		return "#unsupported BMP file"
 	} else if bits_per_pixel == 24 {
-		this.bits_per_pixel = 24
 		// 3 bytes per pixel, but row lengths are rounded up to multiples of 4.
 		// The "((x + 3) >> 2) << 2" dance rounds x up.
 		this.bytes_per_row = ((((this.width as base.u64) * 3) + 3) >> 2) << 2
@@ -146,7 +141,6 @@
 		// WUFFS_BASE__PIXEL_FORMAT__BGR magic pixfmt constant.
 		this.pixfmt = this.util.make_pixel_format(repr: 0x8000_0888)
 	} else if bits_per_pixel == 32 {
-		this.bits_per_pixel = 32
 		this.bytes_per_row = (this.width as base.u64) * 4
 		this.pad_per_row = 0
 		// TODO: a Wuffs (not just C) name for the
@@ -156,7 +150,6 @@
 		// TODO: support other bits_per_pixel's.
 		return "#unsupported BMP file"
 	}
-	this.bytes_total = this.bytes_per_row * (this.height as base.u64)
 
 	// We've already read 20 bytes from the BITMAPINFOHEADER: size (4), width
 	// (4), height (4), planes (2), bpp (2), compression (4). Skip the rest of
@@ -239,10 +232,7 @@
 }
 
 pub func decoder.decode_frame?(dst: ptr base.pixel_buffer, src: base.io_reader, blend: base.pixel_blend, workbuf: slice base.u8, opts: nptr base.decode_frame_options) {
-	var status          : base.status
-	var bytes_remaining : base.u64
-	var n               : base.u64
-	var src             : slice base.u8
+	var status : base.status
 
 	if this.call_sequence < 2 {
 		this.decode_frame_config?(dst: nullptr, src: args.src)
@@ -276,36 +266,25 @@
 			return status
 		}
 
-		bytes_remaining = this.bytes_total
 		while true {
-			n = args.src.available()
-			if bytes_remaining >= n {
-				bytes_remaining -= n
-			} else {
-				n = bytes_remaining
-				bytes_remaining = 0
-			}
-			src = args.src.take!(n: n)
-			status = this.swizzle!(dst: args.dst, src: src)
+			status = this.swizzle!(dst: args.dst, src: args.src)
 			if status.is_ok() {
 				break
-			} else if status.is_suspension() {
-				yield? status
-			} else {
+			} else if status <> "@internal note: short read" {
 				return status
 			}
+			yield? base."$short read"
 		} endwhile
 	}
 
 	this.call_sequence = 3
 }
 
-pri func decoder.swizzle!(dst: ptr base.pixel_buffer, src: slice base.u8) base.status {
+pri func decoder.swizzle!(dst: ptr base.pixel_buffer, src: base.io_reader) base.status {
 	var dst_pixfmt          : base.pixel_format
 	var dst_bits_per_pixel  : base.u32[..= 256]
 	var dst_bytes_per_pixel : base.u64[..= 32]
 	var dst_bytes_per_row   : base.u64
-	var src_bytes_per_pixel : base.u8[..= 4]
 	var tab                 : table base.u8
 	var dst                 : slice base.u8
 	var i                   : base.u64
@@ -320,107 +299,56 @@
 	}
 	dst_bytes_per_pixel = (dst_bits_per_pixel / 8) as base.u64
 	dst_bytes_per_row = (this.width as base.u64) * dst_bytes_per_pixel
-	src_bytes_per_pixel = (this.bits_per_pixel / 8) as base.u8
 	tab = args.dst.plane(p: 0)
 
-	// Handle the case where the I/O suspension occurred in the middle of the
-	// end-of-row padding.
-	while this.pending_pad > 0 {
-		if args.src.length() <= 0 {
-			return base."$short read"
-		}
-		this.pending_pad -= 1
-		args.src = args.src[1 ..]
-	} endwhile
-
-	// Handle the case where the I/O suspension occurred in the middle of a
-	// source pixel.
-	if this.num_stashed <> 0 {
-		while this.num_stashed < src_bytes_per_pixel {
-			assert this.num_stashed < 4 via "a < b: a < c; c <= b"(c: src_bytes_per_pixel)
-			if args.src.length() <= 0 {
-				return base."$short read"
+	while.outer true {
+		while this.pending_pad > 0 {
+			if args.src.available() <= 0 {
+				return "@internal note: short read"
 			}
-			this.stash[this.num_stashed] = args.src[0]
-			this.num_stashed += 1
-			args.src = args.src[1 ..]
+			this.pending_pad -= 1
+			args.src.skip_u32_fast!(actual: 1, worst_case: 1)
 		} endwhile
 
-		// Write the single pixel.
-		dst = tab.row(y: this.dst_y)
-		if dst_bytes_per_row < dst.length() {
-			dst = dst[.. dst_bytes_per_row]
-		}
-		i = (this.dst_x as base.u64) * dst_bytes_per_pixel
-		if i < dst.length() {
-			this.swizzler.swizzle_interleaved_from_slice!(
-				dst: dst[i ..],
-				dst_palette: this.util.empty_slice_u8(),
-				src: this.stash[.. this.num_stashed])
-			this.dst_x ~sat+= 1
-		}
-
-		this.num_stashed = 0
-	}
-
-	while true {
-		if this.dst_x == this.width {
-			this.dst_x = 0
-			this.dst_y ~mod+= this.dst_y_inc
-
-			if this.pad_per_row <> 0 {
-				n = this.pad_per_row as base.u64
-				if n <= args.src.length() {
-					args.src = args.src[n ..]
-				} else {
-					this.pending_pad = ((n - args.src.length()) & 3) as base.u8
-					return base."$short read"
+		while.inner true {
+			if this.dst_x == this.width {
+				this.dst_x = 0
+				this.dst_y ~mod+= this.dst_y_inc
+				if this.dst_y == this.dst_y_end {
+					break.outer
+				} else if this.pad_per_row <> 0 {
+					this.pending_pad = this.pad_per_row
+					continue.outer
 				}
 			}
-		}
 
-		if this.dst_y == this.dst_y_end {
-			break
-		}
-
-		dst = tab.row(y: this.dst_y)
-		if dst_bytes_per_row < dst.length() {
-			dst = dst[.. dst_bytes_per_row]
-		}
-		i = (this.dst_x as base.u64) * dst_bytes_per_pixel
-		if i < dst.length() {
-			n = this.swizzler.swizzle_interleaved_from_slice!(
+			dst = tab.row(y: this.dst_y)
+			if dst_bytes_per_row < dst.length() {
+				dst = dst[.. dst_bytes_per_row]
+			}
+			i = (this.dst_x as base.u64) * dst_bytes_per_pixel
+			if i >= dst.length() {
+				// TODO: advance args.src if the dst pixel_buffer bounds is
+				// smaller than this BMP's image bounds?
+				continue.inner
+			}
+			n = this.swizzler.swizzle_interleaved_from_reader!(
 				dst: dst[i ..],
 				dst_palette: this.util.empty_slice_u8(),
 				src: args.src)
-			this.dst_x ~sat+= (n & 0xFFFF_FFFF) as base.u32
-			n = (n & 0xFFFF_FFFF) * (src_bytes_per_pixel as base.u64)
-			if n <= args.src.length() {
-				args.src = args.src[n ..]
-			} else {
-				return "#internal error: inconsistent swizzle count"
+			if n == 0 {
+				return "@internal note: short read"
 			}
-		}
-
-		// Suspend if we didn't complete the row, potentially in the middle of
-		// a source pixel.
-		if this.dst_x < this.width {
-			while (this.num_stashed < src_bytes_per_pixel) and (args.src.length() > 0) {
-				assert this.num_stashed < 4 via "a < b: a < c; c <= b"(c: src_bytes_per_pixel)
-				this.stash[this.num_stashed] = args.src[0]
-				this.num_stashed += 1
-				args.src = args.src[1 ..]
-			} endwhile
-			return base."$short read"
-		}
-	} endwhile
+			this.dst_x ~sat+= (n & 0xFFFF_FFFF) as base.u32
+		} endwhile.inner
+	} endwhile.outer
 
 	return ok
 }
 
 pri func decoder.skip_frame?(src: base.io_reader) {
 	args.src.skip_u32?(n: this.padding)
-	args.src.skip?(n: this.bytes_total)
+	args.src.skip?(n: this.bytes_per_row * (this.height as base.u64))
 
 	this.call_sequence = 3
 }