Fix std/png Average filter for the top row
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index e80aa47..35ab8f5 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -31000,16 +31000,28 @@
         v_i += 1;
       }
     } else if (v_filter == 3) {
-      v_i = 0;
-      while ((v_i < ((uint64_t)(v_curr_row.len))) && (v_i < ((uint64_t)(v_prev_row.len)))) {
-        if (v_i >= v_filter_distance) {
-          if ((v_i - v_filter_distance) < ((uint64_t)(v_curr_row.len))) {
-            v_curr_row.ptr[v_i] += ((uint8_t)(((((uint32_t)(v_curr_row.ptr[(v_i - v_filter_distance)])) + ((uint32_t)(v_prev_row.ptr[v_i]))) / 2)));
+      if (v_y == 0) {
+        v_i = v_filter_distance;
+        while (v_i < ((uint64_t)(v_curr_row.len))) {
+          if (v_i >= v_filter_distance) {
+            if ((v_i - v_filter_distance) < ((uint64_t)(v_curr_row.len))) {
+              v_curr_row.ptr[v_i] += (v_curr_row.ptr[(v_i - v_filter_distance)] / 2);
+            }
           }
-        } else {
-          v_curr_row.ptr[v_i] += (v_prev_row.ptr[v_i] / 2);
+          v_i += 1;
         }
-        v_i += 1;
+      } else {
+        v_i = 0;
+        while ((v_i < ((uint64_t)(v_curr_row.len))) && (v_i < ((uint64_t)(v_prev_row.len)))) {
+          if (v_i >= v_filter_distance) {
+            if ((v_i - v_filter_distance) < ((uint64_t)(v_curr_row.len))) {
+              v_curr_row.ptr[v_i] += ((uint8_t)(((((uint32_t)(v_curr_row.ptr[(v_i - v_filter_distance)])) + ((uint32_t)(v_prev_row.ptr[v_i]))) / 2)));
+            }
+          } else {
+            v_curr_row.ptr[v_i] += (v_prev_row.ptr[v_i] / 2);
+          }
+          v_i += 1;
+        }
       }
     } else if (v_filter == 4) {
       v_i = 0;
diff --git a/std/png/decode_png.wuffs b/std/png/decode_png.wuffs
index 6298704..64bdc1d 100644
--- a/std/png/decode_png.wuffs
+++ b/std/png/decode_png.wuffs
@@ -471,22 +471,37 @@
 				i += 1
 			} endwhile
 		} else if filter == 3 {
-			i = 0
-			while (i < curr_row.length()) and (i < prev_row.length()),
-				inv y < 0xFFFF_FFFF,
-			{
-				assert i < 0xFFFF_FFFF_FFFF_FFFF via "a < b: a < c; c <= b"(c: curr_row.length())
-				if i >= filter_distance {
-					if (i - filter_distance) < curr_row.length() {
-						curr_row[i] ~mod+= ((
-							(curr_row[i - filter_distance] as base.u32) +
-							(prev_row[i] as base.u32)) / 2) as base.u8
+			if y == 0 {
+				i = filter_distance
+				while i < curr_row.length(),
+					inv y < 0xFFFF_FFFF,
+				{
+					assert i < 0xFFFF_FFFF_FFFF_FFFF via "a < b: a < c; c <= b"(c: curr_row.length())
+					if i >= filter_distance {
+						if (i - filter_distance) < curr_row.length() {
+							curr_row[i] ~mod+= curr_row[i - filter_distance] / 2
+						}
 					}
-				} else {
-					curr_row[i] ~mod+= prev_row[i] / 2
-				}
-				i += 1
-			} endwhile
+					i += 1
+				} endwhile
+			} else {
+				i = 0
+				while (i < curr_row.length()) and (i < prev_row.length()),
+					inv y < 0xFFFF_FFFF,
+				{
+					assert i < 0xFFFF_FFFF_FFFF_FFFF via "a < b: a < c; c <= b"(c: curr_row.length())
+					if i >= filter_distance {
+						if (i - filter_distance) < curr_row.length() {
+							curr_row[i] ~mod+= ((
+								(curr_row[i - filter_distance] as base.u32) +
+								(prev_row[i] as base.u32)) / 2) as base.u8
+						}
+					} else {
+						curr_row[i] ~mod+= prev_row[i] / 2
+					}
+					i += 1
+				} endwhile
+			}
 		} else if filter == 4 {
 			i = 0
 			while (i < curr_row.length()) and (i < prev_row.length()),
diff --git a/test/c/std/png.c b/test/c/std/png.c
index 09fd305..fd372a7 100644
--- a/test/c/std/png.c
+++ b/test/c/std/png.c
@@ -88,6 +88,39 @@
       n_bytes_out, dst, pixfmt, src);
 }
 
+const char*  //
+do_wuffs_png_swizzle(wuffs_png__decoder* dec,
+                     uint32_t width,
+                     uint32_t height,
+                     uint8_t filter_distance,
+                     wuffs_base__slice_u8 dst,
+                     wuffs_base__slice_u8 workbuf) {
+  dec->private_impl.f_width = width;
+  dec->private_impl.f_height = height;
+  dec->private_impl.f_bytes_per_row = width;
+  dec->private_impl.f_filter_distance = filter_distance;
+
+  CHECK_STATUS("prepare",
+               wuffs_base__pixel_swizzler__prepare(
+                   &dec->private_impl.f_swizzler,
+                   wuffs_base__make_pixel_format(WUFFS_BASE__PIXEL_FORMAT__Y),
+                   wuffs_base__empty_slice_u8(),
+                   wuffs_base__make_pixel_format(WUFFS_BASE__PIXEL_FORMAT__Y),
+                   wuffs_base__empty_slice_u8(), WUFFS_BASE__PIXEL_BLEND__SRC));
+
+  wuffs_base__pixel_config pc = ((wuffs_base__pixel_config){});
+  wuffs_base__pixel_config__set(&pc, WUFFS_BASE__PIXEL_FORMAT__Y,
+                                WUFFS_BASE__PIXEL_SUBSAMPLING__NONE, width,
+                                height);
+  wuffs_base__pixel_buffer pb = ((wuffs_base__pixel_buffer){});
+
+  CHECK_STATUS("set_from_slice",
+               wuffs_base__pixel_buffer__set_from_slice(&pb, &pc, dst));
+  CHECK_STATUS("filter_and_swizzle",
+               wuffs_png__decoder__filter_and_swizzle(dec, &pb, workbuf));
+  return NULL;
+}
+
 // --------
 
 const char*  //
@@ -104,6 +137,113 @@
 }
 
 const char*  //
+test_wuffs_png_decode_filters() {
+  CHECK_FOCUS(__func__);
+
+  uint8_t src_rows[2][12] = {
+      // "WhatsInAName".
+      {0x57, 0x68, 0x61, 0x74, 0x73, 0x49, 0x6E, 0x41, 0x4E, 0x61, 0x6D, 0x65},
+      // "SmellAsSweet".
+      {0x53, 0x6D, 0x65, 0x6C, 0x6C, 0x41, 0x73, 0x53, 0x77, 0x65, 0x65, 0x74},
+  };
+
+  uint8_t want_rows[4 * 4 * 2][12] = {
+      // Sub:1.
+      {0x57, 0xBF, 0x20, 0x94, 0x07, 0x50, 0xBE, 0xFF, 0x4D, 0xAE, 0x1B, 0x80},
+      {0x53, 0xC0, 0x25, 0x91, 0xFD, 0x3E, 0xB1, 0x04, 0x7B, 0xE0, 0x45, 0xB9},
+      // Sub:2.
+      {0x57, 0x68, 0xB8, 0xDC, 0x2B, 0x25, 0x99, 0x66, 0xE7, 0xC7, 0x54, 0x2C},
+      {0x53, 0x6D, 0xB8, 0xD9, 0x24, 0x1A, 0x97, 0x6D, 0x0E, 0xD2, 0x73, 0x46},
+      // Sub:3.
+      {0x57, 0x68, 0x61, 0xCB, 0xDB, 0xAA, 0x39, 0x1C, 0xF8, 0x9A, 0x89, 0x5D},
+      {0x53, 0x6D, 0x65, 0xBF, 0xD9, 0xA6, 0x32, 0x2C, 0x1D, 0x97, 0x91, 0x91},
+      // Sub:4.
+      {0x57, 0x68, 0x61, 0x74, 0xCA, 0xB1, 0xCF, 0xB5, 0x18, 0x12, 0x3C, 0x1A},
+      {0x53, 0x6D, 0x65, 0x6C, 0xBF, 0xAE, 0xD8, 0xBF, 0x36, 0x13, 0x3D, 0x33},
+      // Up:1.
+      {0x57, 0x68, 0x61, 0x74, 0x73, 0x49, 0x6E, 0x41, 0x4E, 0x61, 0x6D, 0x65},
+      {0xAA, 0xD5, 0xC6, 0xE0, 0xDF, 0x8A, 0xE1, 0x94, 0xC5, 0xC6, 0xD2, 0xD9},
+      // Up:2.
+      {0x57, 0x68, 0x61, 0x74, 0x73, 0x49, 0x6E, 0x41, 0x4E, 0x61, 0x6D, 0x65},
+      {0xAA, 0xD5, 0xC6, 0xE0, 0xDF, 0x8A, 0xE1, 0x94, 0xC5, 0xC6, 0xD2, 0xD9},
+      // Up:3.
+      {0x57, 0x68, 0x61, 0x74, 0x73, 0x49, 0x6E, 0x41, 0x4E, 0x61, 0x6D, 0x65},
+      {0xAA, 0xD5, 0xC6, 0xE0, 0xDF, 0x8A, 0xE1, 0x94, 0xC5, 0xC6, 0xD2, 0xD9},
+      // Up:4.
+      {0x57, 0x68, 0x61, 0x74, 0x73, 0x49, 0x6E, 0x41, 0x4E, 0x61, 0x6D, 0x65},
+      {0xAA, 0xD5, 0xC6, 0xE0, 0xDF, 0x8A, 0xE1, 0x94, 0xC5, 0xC6, 0xD2, 0xD9},
+      // Average:1.
+      {0x57, 0x93, 0xAA, 0xC9, 0xD7, 0xB4, 0xC8, 0xA5, 0xA0, 0xB1, 0xC5, 0xC7},
+      {0x7E, 0xF5, 0x34, 0xEA, 0x4C, 0xC1, 0x37, 0xC1, 0x27, 0xD1, 0x30, 0xEF},
+      // Average:2.
+      {0x57, 0x68, 0x8C, 0xA8, 0xB9, 0x9D, 0xCA, 0x8F, 0xB3, 0xA8, 0xC6, 0xB9},
+      {0x7E, 0xA1, 0xEA, 0x10, 0x3D, 0x97, 0xF6, 0xE6, 0x4B, 0x2C, 0xED, 0xE6},
+      // Average:3.
+      {0x57, 0x68, 0x61, 0x9F, 0xA7, 0x79, 0xBD, 0x94, 0x8A, 0xBF, 0xB7, 0xAA},
+      {0x7E, 0xA1, 0x95, 0xFA, 0x10, 0xC8, 0x4E, 0xA5, 0x20, 0xEB, 0x13, 0xD9},
+      // Average:4.
+      {0x57, 0x68, 0x61, 0x74, 0x9E, 0x7D, 0x9E, 0x7B, 0x9D, 0x9F, 0xBC, 0xA2},
+      {0x7E, 0xA1, 0x95, 0xA6, 0xFA, 0xD0, 0x0C, 0xE3, 0x42, 0x1C, 0xC9, 0x36},
+      // Paeth:1.
+      {0x57, 0xBF, 0x20, 0x94, 0x07, 0x50, 0xBE, 0xFF, 0x4D, 0xAE, 0x1B, 0x80},
+      {0xAA, 0x2C, 0x85, 0x00, 0x6C, 0xAD, 0x31, 0x84, 0xC4, 0x29, 0x80, 0xF4},
+      // Paeth:2.
+      {0x57, 0x68, 0xB8, 0xDC, 0x2B, 0x25, 0x99, 0x66, 0xE7, 0xC7, 0x54, 0x2C},
+      {0xAA, 0xD5, 0x1D, 0x48, 0x89, 0x66, 0x0C, 0xB9, 0x10, 0x2C, 0x75, 0xA0},
+      // Paeth:3.
+      {0x57, 0x68, 0x61, 0xCB, 0xDB, 0xAA, 0x39, 0x1C, 0xF8, 0x9A, 0x89, 0x5D},
+      {0xAA, 0xD5, 0xC6, 0x37, 0x47, 0x07, 0xAA, 0x6F, 0x7E, 0x0F, 0xEE, 0xD1},
+      // Paeth:4.
+      {0x57, 0x68, 0x61, 0x74, 0xCA, 0xB1, 0xCF, 0xB5, 0x18, 0x12, 0x3C, 0x1A},
+      {0xAA, 0xD5, 0xC6, 0xE0, 0x36, 0x16, 0x42, 0x33, 0x8F, 0x77, 0xA1, 0x8E},
+  };
+
+  wuffs_png__decoder dec;
+  CHECK_STATUS("initialize", wuffs_png__decoder__initialize(
+                                 &dec, sizeof dec, WUFFS_VERSION,
+                                 WUFFS_INITIALIZE__DEFAULT_OPTIONS));
+
+  int filter;
+  for (filter = 1; filter <= 4; filter++) {
+    int filter_distance;
+    for (filter_distance = 1; filter_distance <= 4; filter_distance++) {
+      // For the top row, the Paeth filter (4) is equivalent to the Sub filter
+      // (1), but the Paeth implementation is simpler if it can assume that
+      // there is a previous row.
+      uint8_t top_row_filter = (filter != 4) ? filter : 1;
+
+      g_work_slice_u8.ptr[13 * 0] = top_row_filter;
+      memcpy(g_work_slice_u8.ptr + (13 * 0) + 1, src_rows[0], 12);
+      g_work_slice_u8.ptr[13 * 1] = filter;
+      memcpy(g_work_slice_u8.ptr + (13 * 1) + 1, src_rows[1], 12);
+
+      CHECK_STRING(do_wuffs_png_swizzle(
+          &dec, 12, 2, filter_distance, g_have_slice_u8,
+          wuffs_base__make_slice_u8(g_work_slice_u8.ptr, 13 * 2)));
+
+      wuffs_base__io_buffer have =
+          wuffs_base__ptr_u8__reader(g_have_slice_u8.ptr, 12 * 2, true);
+      have.meta.ri = have.meta.wi;
+
+      int index = (8 * (filter - 1)) + (2 * (filter_distance - 1));
+      memcpy(g_want_slice_u8.ptr + (12 * 0), want_rows[index + 0], 12);
+      memcpy(g_want_slice_u8.ptr + (12 * 1), want_rows[index + 1], 12);
+
+      wuffs_base__io_buffer want =
+          wuffs_base__ptr_u8__reader(g_want_slice_u8.ptr, 12 * 2, true);
+      want.meta.ri = want.meta.wi;
+
+      char prefix_buf[256];
+      sprintf(prefix_buf, "filter=%d, filter_distance=%d ", filter,
+              filter_distance);
+      CHECK_STRING(check_io_buffers_equal(prefix_buf, &have, &want));
+    }
+  }
+
+  return NULL;
+}
+
+const char*  //
 test_wuffs_png_decode_frame_config() {
   CHECK_FOCUS(__func__);
   wuffs_png__decoder dec;
@@ -307,6 +447,7 @@
 
 proc g_tests[] = {
 
+    test_wuffs_png_decode_filters,
     test_wuffs_png_decode_frame_config,
     test_wuffs_png_decode_interface,