Have std/bmp consume the final per-row padding
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index d1ad3bc..502d2c4 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -17506,6 +17506,16 @@
         WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(3);
       }
       label__0__break:;
+      self->private_data.s_decode_frame[0].scratch = self->private_impl.f_pending_pad;
+      WUFFS_BASE__COROUTINE_SUSPENSION_POINT(4);
+      if (self->private_data.s_decode_frame[0].scratch > ((uint64_t)(io2_a_src - iop_a_src))) {
+        self->private_data.s_decode_frame[0].scratch -= ((uint64_t)(io2_a_src - iop_a_src));
+        iop_a_src = io2_a_src;
+        status = wuffs_base__make_status(wuffs_base__suspension__short_read);
+        goto suspend;
+      }
+      iop_a_src += self->private_data.s_decode_frame[0].scratch;
+      self->private_impl.f_pending_pad = 0;
     }
     self->private_impl.f_call_sequence = 3;
 
@@ -17589,6 +17599,9 @@
         self->private_impl.f_dst_x = 0;
         self->private_impl.f_dst_y += self->private_impl.f_dst_y_inc;
         if (self->private_impl.f_dst_y >= self->private_impl.f_height) {
+          if (self->private_impl.f_height > 0) {
+            self->private_impl.f_pending_pad = self->private_impl.f_pad_per_row;
+          }
           goto label__outer__break;
         } else if (self->private_impl.f_pad_per_row != 0) {
           self->private_impl.f_pending_pad = self->private_impl.f_pad_per_row;
@@ -17957,6 +17970,9 @@
         self->private_impl.f_dst_x = 0;
         self->private_impl.f_dst_y += self->private_impl.f_dst_y_inc;
         if (self->private_impl.f_dst_y >= self->private_impl.f_height) {
+          if (self->private_impl.f_height > 0) {
+            self->private_impl.f_pending_pad = self->private_impl.f_pad_per_row;
+          }
           goto label__outer__break;
         } else if (self->private_impl.f_pad_per_row != 0) {
           self->private_impl.f_pending_pad = self->private_impl.f_pad_per_row;
diff --git a/std/bmp/decode_bmp.wuffs b/std/bmp/decode_bmp.wuffs
index cd51a47..b32c53b 100644
--- a/std/bmp/decode_bmp.wuffs
+++ b/std/bmp/decode_bmp.wuffs
@@ -458,6 +458,9 @@
 			}
 			yield? base."$short read"
 		} endwhile
+
+		args.src.skip_u32?(n: this.pending_pad)
+		this.pending_pad = 0
 	}
 
 	this.call_sequence = 3
@@ -500,6 +503,9 @@
 				this.dst_x = 0
 				this.dst_y ~mod+= this.dst_y_inc
 				if this.dst_y >= this.height {
+					if this.height > 0 {
+						this.pending_pad = this.pad_per_row
+					}
 					break.outer
 				} else if this.pad_per_row <> 0 {
 					this.pending_pad = this.pad_per_row
@@ -838,6 +844,9 @@
 				this.dst_x = 0
 				this.dst_y ~mod+= this.dst_y_inc
 				if this.dst_y >= this.height {
+					if this.height > 0 {
+						this.pending_pad = this.pad_per_row
+					}
 					break.outer
 				} else if this.pad_per_row <> 0 {
 					this.pending_pad = this.pad_per_row