Fix std/wbmp decode_frame across a suspension
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 76de842..72a4d01 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -6979,6 +6979,7 @@
       uint32_t v_x32;
     } s_decode_image_config[1];
     struct {
+      uint64_t v_dst_bytes_per_pixel;
       uint32_t v_dst_x;
       uint32_t v_dst_y;
       uint8_t v_src[1];
@@ -24824,6 +24825,10 @@
   wuffs_base__status status = wuffs_base__make_status(NULL);
 
   wuffs_base__status v_status = wuffs_base__make_status(NULL);
+  wuffs_base__pixel_format v_dst_pixfmt = {0};
+  uint32_t v_dst_bits_per_pixel = 0;
+  uint64_t v_dst_bytes_per_pixel = 0;
+  uint64_t v_dst_x_in_bytes = 0;
   uint32_t v_dst_x = 0;
   uint32_t v_dst_y = 0;
   wuffs_base__table_u8 v_tab = {0};
@@ -24844,6 +24849,8 @@
 
   uint32_t coro_susp_point = self->private_impl.p_decode_frame[0];
   if (coro_susp_point) {
+    v_dst_bytes_per_pixel =
+        self->private_data.s_decode_frame[0].v_dst_bytes_per_pixel;
     v_dst_x = self->private_data.s_decode_frame[0].v_dst_x;
     v_dst_y = self->private_data.s_decode_frame[0].v_dst_y;
     memcpy(v_src, self->private_data.s_decode_frame[0].v_src, sizeof(v_src));
@@ -24886,6 +24893,14 @@
       }
       goto ok;
     }
+    v_dst_pixfmt = wuffs_base__pixel_buffer__pixel_format(a_dst);
+    v_dst_bits_per_pixel =
+        wuffs_base__pixel_format__bits_per_pixel(&v_dst_pixfmt);
+    if ((v_dst_bits_per_pixel & 7) != 0) {
+      status = wuffs_base__make_status(wuffs_base__error__unsupported_option);
+      goto exit;
+    }
+    v_dst_bytes_per_pixel = ((uint64_t)((v_dst_bits_per_pixel / 8)));
     if (self->private_impl.f_width > 0) {
       v_tab = wuffs_base__pixel_buffer__plane(a_dst, 0);
       while (v_dst_y < self->private_impl.f_height) {
@@ -24893,16 +24908,21 @@
         v_dst_x = 0;
         while (v_dst_x < self->private_impl.f_width) {
           if ((v_dst_x & 7) == 0) {
-            {
-              WUFFS_BASE__COROUTINE_SUSPENSION_POINT(2);
-              if (WUFFS_BASE__UNLIKELY(iop_a_src == io2_a_src)) {
-                status =
-                    wuffs_base__make_status(wuffs_base__suspension__short_read);
-                goto suspend;
+            while (((uint64_t)(io2_a_src - iop_a_src)) <= 0) {
+              status =
+                  wuffs_base__make_status(wuffs_base__suspension__short_read);
+              WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(2);
+              v_tab = wuffs_base__pixel_buffer__plane(a_dst, 0);
+              v_dst = wuffs_base__table_u8__row(v_tab, v_dst_y);
+              v_dst_x_in_bytes =
+                  (((uint64_t)(v_dst_x)) * v_dst_bytes_per_pixel);
+              if (v_dst_x_in_bytes <= ((uint64_t)(v_dst.len))) {
+                v_dst =
+                    wuffs_base__slice_u8__subslice_i(v_dst, v_dst_x_in_bytes);
               }
-              uint8_t t_0 = *iop_a_src++;
-              v_c = t_0;
             }
+            v_c = wuffs_base__load_u8be__no_bounds_check(iop_a_src);
+            (iop_a_src += 1, wuffs_base__make_empty_struct());
           }
           if ((v_c & 128) == 0) {
             v_src[0] = 0;
@@ -24914,8 +24934,9 @@
               &self->private_impl.f_swizzler, v_dst,
               wuffs_base__utility__empty_slice_u8(),
               wuffs_base__make_slice_u8(v_src, 1));
-          if (((uint64_t)(v_dst.len)) >= 4) {
-            v_dst = wuffs_base__slice_u8__subslice_i(v_dst, 4);
+          if (v_dst_bytes_per_pixel <= ((uint64_t)(v_dst.len))) {
+            v_dst =
+                wuffs_base__slice_u8__subslice_i(v_dst, v_dst_bytes_per_pixel);
           }
           v_dst_x += 1;
         }
@@ -24936,6 +24957,8 @@
       wuffs_base__status__is_suspension(&status) ? coro_susp_point : 0;
   self->private_impl.active_coroutine =
       wuffs_base__status__is_suspension(&status) ? 3 : 0;
+  self->private_data.s_decode_frame[0].v_dst_bytes_per_pixel =
+      v_dst_bytes_per_pixel;
   self->private_data.s_decode_frame[0].v_dst_x = v_dst_x;
   self->private_data.s_decode_frame[0].v_dst_y = v_dst_y;
   memcpy(self->private_data.s_decode_frame[0].v_src, v_src, sizeof(v_src));
diff --git a/std/wbmp/decode_wbmp.wuffs b/std/wbmp/decode_wbmp.wuffs
index b91ac9c..b0be49c 100644
--- a/std/wbmp/decode_wbmp.wuffs
+++ b/std/wbmp/decode_wbmp.wuffs
@@ -129,13 +129,17 @@
 }
 
 pub func decoder.decode_frame?(dst: ptr base.pixel_buffer, src: base.io_reader, blend: base.pixel_blend, workbuf: slice base.u8, opts: nptr base.decode_frame_options) {
-	var status : base.status
-	var dst_x  : base.u32
-	var dst_y  : base.u32
-	var tab    : table base.u8
-	var dst    : slice base.u8
-	var src    : array[1] base.u8
-	var c      : base.u8
+	var status              : base.status
+	var dst_pixfmt          : base.pixel_format
+	var dst_bits_per_pixel  : base.u32[..= 256]
+	var dst_bytes_per_pixel : base.u64[..= 32]
+	var dst_x_in_bytes      : base.u64
+	var dst_x               : base.u32
+	var dst_y               : base.u32
+	var tab                 : table base.u8
+	var dst                 : slice base.u8
+	var src                 : array[1] base.u8
+	var c                   : base.u8
 
 	if this.call_sequence < 2 {
 		this.decode_frame_config?(dst: nullptr, src: args.src)
@@ -157,6 +161,15 @@
 		return status
 	}
 
+	// TODO: the dst_pixfmt variable shouldn't be necessary. We should be able
+	// to chain the two calls: "args.dst.pixel_format().bits_per_pixel()".
+	dst_pixfmt = args.dst.pixel_format()
+	dst_bits_per_pixel = dst_pixfmt.bits_per_pixel()
+	if (dst_bits_per_pixel & 7) <> 0 {
+		return base."#unsupported option"
+	}
+	dst_bytes_per_pixel = (dst_bits_per_pixel / 8) as base.u64
+
 	// TODO: be more efficient than reading one byte at a time.
 	if this.width > 0 {
 		tab = args.dst.plane(p: 0)
@@ -171,7 +184,21 @@
 				assert dst_x < 0xFFFF_FFFF via "a < b: a < c; c <= b"(c: this.width)
 
 				if (dst_x & 7) == 0 {
-					c = args.src.read_u8?()
+					while args.src.available() <= 0,
+						inv dst_x < 0xFFFF_FFFF,
+						inv dst_y < 0xFFFF_FFFF,
+						post args.src.available() > 0,
+					{
+						yield? base."$short read"
+						tab = args.dst.plane(p: 0)
+						dst = tab.row(y: dst_y)
+						dst_x_in_bytes = (dst_x as base.u64) * dst_bytes_per_pixel
+						if dst_x_in_bytes <= dst.length() {
+							dst = dst[dst_x_in_bytes ..]
+						}
+					} endwhile
+					c = args.src.peek_u8()
+					args.src.skip32_fast!(actual: 1, worst_case: 1)
 				}
 				if (c & 0x80) == 0 {
 					src[0] = 0x00
@@ -188,8 +215,8 @@
 				this.swizzler.swizzle_interleaved!(
 					dst: dst, dst_palette: this.util.empty_slice_u8(), src: src[..])
 
-				if dst.length() >= 4 {
-					dst = dst[4 ..]
+				if dst_bytes_per_pixel <= dst.length() {
+					dst = dst[dst_bytes_per_pixel ..]
 				}
 
 				dst_x += 1