Factor out png.decoder.filter_N functions

wuffs_png_decode_19k_8bpp/clang9          92.1MB/s ± 0%  95.7MB/s ± 0%   +3.89%  (p=0.008 n=5+5)
wuffs_png_decode_40k_24bpp/clang9          103MB/s ± 0%   110MB/s ± 0%   +6.81%  (p=0.008 n=5+5)
wuffs_png_decode_77k_8bpp/clang9           348MB/s ± 0%   358MB/s ± 0%   +2.90%  (p=0.008 n=5+5)
wuffs_png_decode_552k_32bpp/clang9         138MB/s ± 0%   136MB/s ± 0%   -1.92%  (p=0.008 n=5+5)
wuffs_png_decode_4002k_24bpp/clang9        103MB/s ± 0%   111MB/s ± 0%   +8.04%  (p=0.008 n=5+5)

wuffs_png_decode_filter_1_sub/clang9       738MB/s ± 0%   646MB/s ± 0%  -12.51%  (p=0.008 n=5+5)
wuffs_png_decode_filter_2_up/clang9       13.4GB/s ± 0%  13.1GB/s ± 0%   -2.51%  (p=0.008 n=5+5)
wuffs_png_decode_filter_3_average/clang9   329MB/s ± 0%   481MB/s ± 0%  +46.25%  (p=0.008 n=5+5)
wuffs_png_decode_filter_4_paeth/clang9    70.0MB/s ± 0%  65.3MB/s ± 0%   -6.80%  (p=0.008 n=5+5)

wuffs_png_decode_19k_8bpp/gcc10           98.0MB/s ± 0%  98.0MB/s ± 0%     ~     (p=0.690 n=5+5)
wuffs_png_decode_40k_24bpp/gcc10           118MB/s ± 0%   118MB/s ± 0%   +0.43%  (p=0.016 n=5+5)
wuffs_png_decode_77k_8bpp/gcc10            318MB/s ± 0%   317MB/s ± 0%   -0.19%  (p=0.008 n=5+5)
wuffs_png_decode_552k_32bpp/gcc10          161MB/s ± 0%   159MB/s ± 0%   -1.38%  (p=0.008 n=5+5)
wuffs_png_decode_4002k_24bpp/gcc10         118MB/s ± 0%   118MB/s ± 0%     ~     (p=0.056 n=5+5)

wuffs_png_decode_filter_1_sub/gcc10        932MB/s ± 0%   931MB/s ± 0%     ~     (p=0.151 n=5+5)
wuffs_png_decode_filter_2_up/gcc10        11.0GB/s ± 1%  11.4GB/s ± 0%   +3.32%  (p=0.016 n=4+5)
wuffs_png_decode_filter_3_average/gcc10    645MB/s ± 0%   643MB/s ± 0%   -0.29%  (p=0.008 n=5+5)
wuffs_png_decode_filter_4_paeth/gcc10     90.1MB/s ± 0%  83.6MB/s ± 0%   -7.24%  (p=0.008 n=5+5)
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 35ab8f5..a79fa07 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -29836,6 +29836,29 @@
     wuffs_base__pixel_buffer* a_dst,
     wuffs_base__slice_u8 a_workbuf);
 
+static wuffs_base__empty_struct
+wuffs_png__decoder__filter_1(
+    wuffs_png__decoder* self,
+    wuffs_base__slice_u8 a_curr);
+
+static wuffs_base__empty_struct
+wuffs_png__decoder__filter_2(
+    wuffs_png__decoder* self,
+    wuffs_base__slice_u8 a_curr,
+    wuffs_base__slice_u8 a_prev);
+
+static wuffs_base__empty_struct
+wuffs_png__decoder__filter_3(
+    wuffs_png__decoder* self,
+    wuffs_base__slice_u8 a_curr,
+    wuffs_base__slice_u8 a_prev);
+
+static wuffs_base__empty_struct
+wuffs_png__decoder__filter_4(
+    wuffs_png__decoder* self,
+    wuffs_base__slice_u8 a_curr,
+    wuffs_base__slice_u8 a_prev);
+
 // ---------------- VTables
 
 const wuffs_base__image_decoder__func_ptrs
@@ -30942,20 +30965,11 @@
   uint64_t v_dst_bytes_per_row = 0;
   wuffs_base__slice_u8 v_dst_palette = {0};
   wuffs_base__table_u8 v_tab = {0};
-  uint64_t v_filter_distance = 0;
   uint32_t v_y = 0;
   wuffs_base__slice_u8 v_dst = {0};
   uint8_t v_filter = 0;
   wuffs_base__slice_u8 v_curr_row = {0};
   wuffs_base__slice_u8 v_prev_row = {0};
-  uint64_t v_i = 0;
-  uint32_t v_fa = 0;
-  uint32_t v_fb = 0;
-  uint32_t v_fc = 0;
-  uint32_t v_pp = 0;
-  uint32_t v_pa = 0;
-  uint32_t v_pb = 0;
-  uint32_t v_pc = 0;
 
   v_dst_pixfmt = wuffs_base__pixel_buffer__pixel_format(a_dst);
   v_dst_bits_per_pixel = wuffs_base__pixel_format__bits_per_pixel(&v_dst_pixfmt);
@@ -30966,7 +30980,6 @@
   v_dst_bytes_per_row = (((uint64_t)(self->private_impl.f_width)) * v_dst_bytes_per_pixel);
   v_dst_palette = wuffs_base__pixel_buffer__palette_or_else(a_dst, wuffs_base__make_slice_u8(self->private_data.f_dst_palette, 1024));
   v_tab = wuffs_base__pixel_buffer__plane(a_dst, 0);
-  v_filter_distance = ((uint64_t)(self->private_impl.f_filter_distance));
   while (v_y < self->private_impl.f_height) {
     v_dst = wuffs_base__table_u8__row(v_tab, v_y);
     if (v_dst_bytes_per_row < ((uint64_t)(v_dst.len))) {
@@ -30984,79 +30997,13 @@
     a_workbuf = wuffs_base__slice_u8__subslice_i(a_workbuf, self->private_impl.f_bytes_per_row);
     if (v_filter == 0) {
     } else if (v_filter == 1) {
-      v_i = v_filter_distance;
-      while (v_i < ((uint64_t)(v_curr_row.len))) {
-        if (v_i >= v_filter_distance) {
-          if ((v_i - v_filter_distance) < ((uint64_t)(v_curr_row.len))) {
-            v_curr_row.ptr[v_i] += v_curr_row.ptr[(v_i - v_filter_distance)];
-          }
-        }
-        v_i += 1;
-      }
+      wuffs_png__decoder__filter_1(self, v_curr_row);
     } else if (v_filter == 2) {
-      v_i = 0;
-      while ((v_i < ((uint64_t)(v_curr_row.len))) && (v_i < ((uint64_t)(v_prev_row.len)))) {
-        v_curr_row.ptr[v_i] += v_prev_row.ptr[v_i];
-        v_i += 1;
-      }
+      wuffs_png__decoder__filter_2(self, v_curr_row, v_prev_row);
     } else if (v_filter == 3) {
-      if (v_y == 0) {
-        v_i = v_filter_distance;
-        while (v_i < ((uint64_t)(v_curr_row.len))) {
-          if (v_i >= v_filter_distance) {
-            if ((v_i - v_filter_distance) < ((uint64_t)(v_curr_row.len))) {
-              v_curr_row.ptr[v_i] += (v_curr_row.ptr[(v_i - v_filter_distance)] / 2);
-            }
-          }
-          v_i += 1;
-        }
-      } else {
-        v_i = 0;
-        while ((v_i < ((uint64_t)(v_curr_row.len))) && (v_i < ((uint64_t)(v_prev_row.len)))) {
-          if (v_i >= v_filter_distance) {
-            if ((v_i - v_filter_distance) < ((uint64_t)(v_curr_row.len))) {
-              v_curr_row.ptr[v_i] += ((uint8_t)(((((uint32_t)(v_curr_row.ptr[(v_i - v_filter_distance)])) + ((uint32_t)(v_prev_row.ptr[v_i]))) / 2)));
-            }
-          } else {
-            v_curr_row.ptr[v_i] += (v_prev_row.ptr[v_i] / 2);
-          }
-          v_i += 1;
-        }
-      }
+      wuffs_png__decoder__filter_3(self, v_curr_row, v_prev_row);
     } else if (v_filter == 4) {
-      v_i = 0;
-      while ((v_i < ((uint64_t)(v_curr_row.len))) && (v_i < ((uint64_t)(v_prev_row.len)))) {
-        if (v_i < v_filter_distance) {
-          v_curr_row.ptr[v_i] += v_prev_row.ptr[v_i];
-        } else {
-          if (((v_i - v_filter_distance) < ((uint64_t)(v_curr_row.len))) && ((v_i - v_filter_distance) < ((uint64_t)(v_prev_row.len)))) {
-            v_fa = ((uint32_t)(v_curr_row.ptr[(v_i - v_filter_distance)]));
-            v_fb = ((uint32_t)(v_prev_row.ptr[v_i]));
-            v_fc = ((uint32_t)(v_prev_row.ptr[(v_i - v_filter_distance)]));
-            v_pp = ((v_fa + v_fb) - v_fc);
-            v_pa = (v_pp - v_fa);
-            if (v_pa >= 2147483648) {
-              v_pa = (0 - v_pa);
-            }
-            v_pb = (v_pp - v_fb);
-            if (v_pb >= 2147483648) {
-              v_pb = (0 - v_pb);
-            }
-            v_pc = (v_pp - v_fc);
-            if (v_pc >= 2147483648) {
-              v_pc = (0 - v_pc);
-            }
-            if ((v_pa <= v_pb) && (v_pa <= v_pc)) {
-              v_curr_row.ptr[v_i] += ((uint8_t)((v_fa & 255)));
-            } else if (v_pb <= v_pc) {
-              v_curr_row.ptr[v_i] += ((uint8_t)((v_fb & 255)));
-            } else {
-              v_curr_row.ptr[v_i] += ((uint8_t)((v_fc & 255)));
-            }
-          }
-        }
-        v_i += 1;
-      }
+      wuffs_png__decoder__filter_4(self, v_curr_row, v_prev_row);
     } else {
       return wuffs_base__make_status(wuffs_png__error__bad_filter);
     }
@@ -31067,6 +31014,136 @@
   return wuffs_base__make_status(NULL);
 }
 
+// -------- func png.decoder.filter_1
+
+static wuffs_base__empty_struct
+wuffs_png__decoder__filter_1(
+    wuffs_png__decoder* self,
+    wuffs_base__slice_u8 a_curr) {
+  uint64_t v_filter_distance = 0;
+  uint64_t v_i = 0;
+
+  v_filter_distance = ((uint64_t)(self->private_impl.f_filter_distance));
+  v_i = v_filter_distance;
+  while (v_i < ((uint64_t)(a_curr.len))) {
+    if (v_i >= v_filter_distance) {
+      if ((v_i - v_filter_distance) < ((uint64_t)(a_curr.len))) {
+        a_curr.ptr[v_i] += a_curr.ptr[(v_i - v_filter_distance)];
+      }
+    }
+    v_i += 1;
+  }
+  return wuffs_base__make_empty_struct();
+}
+
+// -------- func png.decoder.filter_2
+
+static wuffs_base__empty_struct
+wuffs_png__decoder__filter_2(
+    wuffs_png__decoder* self,
+    wuffs_base__slice_u8 a_curr,
+    wuffs_base__slice_u8 a_prev) {
+  uint64_t v_i = 0;
+
+  v_i = 0;
+  while ((v_i < ((uint64_t)(a_curr.len))) && (v_i < ((uint64_t)(a_prev.len)))) {
+    a_curr.ptr[v_i] += a_prev.ptr[v_i];
+    v_i += 1;
+  }
+  return wuffs_base__make_empty_struct();
+}
+
+// -------- func png.decoder.filter_3
+
+static wuffs_base__empty_struct
+wuffs_png__decoder__filter_3(
+    wuffs_png__decoder* self,
+    wuffs_base__slice_u8 a_curr,
+    wuffs_base__slice_u8 a_prev) {
+  uint64_t v_filter_distance = 0;
+  uint64_t v_i = 0;
+
+  v_filter_distance = ((uint64_t)(self->private_impl.f_filter_distance));
+  if (((uint64_t)(a_prev.len)) == 0) {
+    v_i = v_filter_distance;
+    while (v_i < ((uint64_t)(a_curr.len))) {
+      if (v_i >= v_filter_distance) {
+        if ((v_i - v_filter_distance) < ((uint64_t)(a_curr.len))) {
+          a_curr.ptr[v_i] += (a_curr.ptr[(v_i - v_filter_distance)] / 2);
+        }
+      }
+      v_i += 1;
+    }
+  } else {
+    v_i = 0;
+    while ((v_i < ((uint64_t)(a_curr.len))) && (v_i < ((uint64_t)(a_prev.len)))) {
+      if (v_i >= v_filter_distance) {
+        if ((v_i - v_filter_distance) < ((uint64_t)(a_curr.len))) {
+          a_curr.ptr[v_i] += ((uint8_t)(((((uint32_t)(a_curr.ptr[(v_i - v_filter_distance)])) + ((uint32_t)(a_prev.ptr[v_i]))) / 2)));
+        }
+      } else {
+        a_curr.ptr[v_i] += (a_prev.ptr[v_i] / 2);
+      }
+      v_i += 1;
+    }
+  }
+  return wuffs_base__make_empty_struct();
+}
+
+// -------- func png.decoder.filter_4
+
+static wuffs_base__empty_struct
+wuffs_png__decoder__filter_4(
+    wuffs_png__decoder* self,
+    wuffs_base__slice_u8 a_curr,
+    wuffs_base__slice_u8 a_prev) {
+  uint64_t v_filter_distance = 0;
+  uint64_t v_i = 0;
+  uint32_t v_fa = 0;
+  uint32_t v_fb = 0;
+  uint32_t v_fc = 0;
+  uint32_t v_pp = 0;
+  uint32_t v_pa = 0;
+  uint32_t v_pb = 0;
+  uint32_t v_pc = 0;
+
+  v_filter_distance = ((uint64_t)(self->private_impl.f_filter_distance));
+  v_i = 0;
+  while ((v_i < ((uint64_t)(a_curr.len))) && (v_i < ((uint64_t)(a_prev.len)))) {
+    if (v_i < v_filter_distance) {
+      a_curr.ptr[v_i] += a_prev.ptr[v_i];
+    } else {
+      if (((v_i - v_filter_distance) < ((uint64_t)(a_curr.len))) && ((v_i - v_filter_distance) < ((uint64_t)(a_prev.len)))) {
+        v_fa = ((uint32_t)(a_curr.ptr[(v_i - v_filter_distance)]));
+        v_fb = ((uint32_t)(a_prev.ptr[v_i]));
+        v_fc = ((uint32_t)(a_prev.ptr[(v_i - v_filter_distance)]));
+        v_pp = ((v_fa + v_fb) - v_fc);
+        v_pa = (v_pp - v_fa);
+        if (v_pa >= 2147483648) {
+          v_pa = (0 - v_pa);
+        }
+        v_pb = (v_pp - v_fb);
+        if (v_pb >= 2147483648) {
+          v_pb = (0 - v_pb);
+        }
+        v_pc = (v_pp - v_fc);
+        if (v_pc >= 2147483648) {
+          v_pc = (0 - v_pc);
+        }
+        if ((v_pa <= v_pb) && (v_pa <= v_pc)) {
+          a_curr.ptr[v_i] += ((uint8_t)((v_fa & 255)));
+        } else if (v_pb <= v_pc) {
+          a_curr.ptr[v_i] += ((uint8_t)((v_fb & 255)));
+        } else {
+          a_curr.ptr[v_i] += ((uint8_t)((v_fc & 255)));
+        }
+      }
+    }
+    v_i += 1;
+  }
+  return wuffs_base__make_empty_struct();
+}
+
 // -------- func png.decoder.frame_dirty_rect
 
 WUFFS_BASE__MAYBE_STATIC wuffs_base__rect_ie_u32
diff --git a/std/png/decode_png.wuffs b/std/png/decode_png.wuffs
index 64bdc1d..0e9b4cb 100644
--- a/std/png/decode_png.wuffs
+++ b/std/png/decode_png.wuffs
@@ -398,7 +398,6 @@
 	var dst_bytes_per_row   : base.u64
 	var dst_palette         : slice base.u8
 	var tab                 : table base.u8
-	var filter_distance     : base.u64[..= 8]
 
 	var y        : base.u32
 	var dst      : slice base.u8
@@ -406,15 +405,6 @@
 	var curr_row : slice base.u8
 	var prev_row : slice base.u8
 
-	var i  : base.u64
-	var fa : base.u32
-	var fb : base.u32
-	var fc : base.u32
-	var pp : base.u32
-	var pa : base.u32
-	var pb : base.u32
-	var pc : base.u32
-
 	// TODO: the dst_pixfmt variable shouldn't be necessary. We should be able
 	// to chain the two calls: "args.dst.pixel_format().bits_per_pixel()".
 	dst_pixfmt = args.dst.pixel_format()
@@ -426,7 +416,6 @@
 	dst_bytes_per_row = (this.width as base.u64) * dst_bytes_per_pixel
 	dst_palette = args.dst.palette_or_else(fallback: this.dst_palette[..])
 	tab = args.dst.plane(p: 0)
-	filter_distance = this.filter_distance as base.u64
 
 	while y < this.height {
 		assert y < 0xFFFF_FFFF via "a < b: a < c; c <= b"(c: this.height)
@@ -449,97 +438,13 @@
 		if filter == 0 {
 			// No-op.
 		} else if filter == 1 {
-			i = filter_distance
-			while i < curr_row.length(),
-				inv y < 0xFFFF_FFFF,
-			{
-				assert i < 0xFFFF_FFFF_FFFF_FFFF via "a < b: a < c; c <= b"(c: curr_row.length())
-				if i >= filter_distance {
-					if (i - filter_distance) < curr_row.length() {
-						curr_row[i] ~mod+= curr_row[i - filter_distance]
-					}
-				}
-				i += 1
-			} endwhile
+			this.filter_1!(curr: curr_row)
 		} else if filter == 2 {
-			i = 0
-			while (i < curr_row.length()) and (i < prev_row.length()),
-				inv y < 0xFFFF_FFFF,
-			{
-				assert i < 0xFFFF_FFFF_FFFF_FFFF via "a < b: a < c; c <= b"(c: curr_row.length())
-				curr_row[i] ~mod+= prev_row[i]
-				i += 1
-			} endwhile
+			this.filter_2!(curr: curr_row, prev: prev_row)
 		} else if filter == 3 {
-			if y == 0 {
-				i = filter_distance
-				while i < curr_row.length(),
-					inv y < 0xFFFF_FFFF,
-				{
-					assert i < 0xFFFF_FFFF_FFFF_FFFF via "a < b: a < c; c <= b"(c: curr_row.length())
-					if i >= filter_distance {
-						if (i - filter_distance) < curr_row.length() {
-							curr_row[i] ~mod+= curr_row[i - filter_distance] / 2
-						}
-					}
-					i += 1
-				} endwhile
-			} else {
-				i = 0
-				while (i < curr_row.length()) and (i < prev_row.length()),
-					inv y < 0xFFFF_FFFF,
-				{
-					assert i < 0xFFFF_FFFF_FFFF_FFFF via "a < b: a < c; c <= b"(c: curr_row.length())
-					if i >= filter_distance {
-						if (i - filter_distance) < curr_row.length() {
-							curr_row[i] ~mod+= ((
-								(curr_row[i - filter_distance] as base.u32) +
-								(prev_row[i] as base.u32)) / 2) as base.u8
-						}
-					} else {
-						curr_row[i] ~mod+= prev_row[i] / 2
-					}
-					i += 1
-				} endwhile
-			}
+			this.filter_3!(curr: curr_row, prev: prev_row)
 		} else if filter == 4 {
-			i = 0
-			while (i < curr_row.length()) and (i < prev_row.length()),
-				inv y < 0xFFFF_FFFF,
-			{
-				assert i < 0xFFFF_FFFF_FFFF_FFFF via "a < b: a < c; c <= b"(c: curr_row.length())
-				if i < filter_distance {
-					curr_row[i] ~mod+= prev_row[i]
-				} else {
-					if ((i - filter_distance) < curr_row.length()) and
-						((i - filter_distance) < prev_row.length()) {
-						fa = curr_row[i - filter_distance] as base.u32
-						fb = prev_row[i] as base.u32
-						fc = prev_row[i - filter_distance] as base.u32
-						pp = (fa ~mod+ fb) ~mod- fc
-						pa = pp ~mod- fa
-						if pa >= 0x8000_0000 {
-							pa = 0 ~mod- pa
-						}
-						pb = pp ~mod- fb
-						if pb >= 0x8000_0000 {
-							pb = 0 ~mod- pb
-						}
-						pc = pp ~mod- fc
-						if pc >= 0x8000_0000 {
-							pc = 0 ~mod- pc
-						}
-						if (pa <= pb) and (pa <= pc) {
-							curr_row[i] ~mod+= (fa & 0xFF) as base.u8
-						} else if pb <= pc {
-							curr_row[i] ~mod+= (fb & 0xFF) as base.u8
-						} else {
-							curr_row[i] ~mod+= (fc & 0xFF) as base.u8
-						}
-					}
-				}
-				i += 1
-			} endwhile
+			this.filter_4!(curr: curr_row, prev: prev_row)
 		} else {
 			return "#bad filter"
 		}
@@ -556,6 +461,118 @@
 	return ok
 }
 
+pri func decoder.filter_1!(curr: slice base.u8) {
+	var filter_distance : base.u64[..= 8]
+	var i               : base.u64
+
+	filter_distance = this.filter_distance as base.u64
+	i = filter_distance
+	while i < args.curr.length() {
+		assert i < 0xFFFF_FFFF_FFFF_FFFF via "a < b: a < c; c <= b"(c: args.curr.length())
+		if i >= filter_distance {
+			if (i - filter_distance) < args.curr.length() {
+				args.curr[i] ~mod+= args.curr[i - filter_distance]
+			}
+		}
+		i += 1
+	} endwhile
+}
+
+pri func decoder.filter_2!(curr: slice base.u8, prev: slice base.u8) {
+	var i : base.u64
+
+	i = 0
+	while (i < args.curr.length()) and (i < args.prev.length()) {
+		assert i < 0xFFFF_FFFF_FFFF_FFFF via "a < b: a < c; c <= b"(c: args.curr.length())
+		args.curr[i] ~mod+= args.prev[i]
+		i += 1
+	} endwhile
+}
+
+pri func decoder.filter_3!(curr: slice base.u8, prev: slice base.u8) {
+	var filter_distance : base.u64[..= 8]
+	var i               : base.u64
+
+	filter_distance = this.filter_distance as base.u64
+	if args.prev.length() == 0 {
+		i = filter_distance
+		while i < args.curr.length() {
+			assert i < 0xFFFF_FFFF_FFFF_FFFF via "a < b: a < c; c <= b"(c: args.curr.length())
+			if i >= filter_distance {
+				if (i - filter_distance) < args.curr.length() {
+					args.curr[i] ~mod+= args.curr[i - filter_distance] / 2
+				}
+			}
+			i += 1
+		} endwhile
+	} else {
+		i = 0
+		while (i < args.curr.length()) and (i < args.prev.length()) {
+			assert i < 0xFFFF_FFFF_FFFF_FFFF via "a < b: a < c; c <= b"(c: args.curr.length())
+			if i >= filter_distance {
+				if (i - filter_distance) < args.curr.length() {
+					args.curr[i] ~mod+= ((
+						(args.curr[i - filter_distance] as base.u32) +
+						(args.prev[i] as base.u32)) / 2) as base.u8
+				}
+			} else {
+				args.curr[i] ~mod+= args.prev[i] / 2
+			}
+			i += 1
+		} endwhile
+	}
+}
+
+pri func decoder.filter_4!(curr: slice base.u8, prev: slice base.u8) {
+	var filter_distance : base.u64[..= 8]
+	var i               : base.u64
+
+	var fa : base.u32
+	var fb : base.u32
+	var fc : base.u32
+	var pp : base.u32
+	var pa : base.u32
+	var pb : base.u32
+	var pc : base.u32
+
+	filter_distance = this.filter_distance as base.u64
+	i = 0
+	while (i < args.curr.length()) and (i < args.prev.length()) {
+		assert i < 0xFFFF_FFFF_FFFF_FFFF via "a < b: a < c; c <= b"(c: args.curr.length())
+		if i < filter_distance {
+			args.curr[i] ~mod+= args.prev[i]
+		} else {
+			if ((i - filter_distance) < args.curr.length()) and
+				((i - filter_distance) < args.prev.length()) {
+				fa = args.curr[i - filter_distance] as base.u32
+				fb = args.prev[i] as base.u32
+				fc = args.prev[i - filter_distance] as base.u32
+				pp = (fa ~mod+ fb) ~mod- fc
+				pa = pp ~mod- fa
+				if pa >= 0x8000_0000 {
+					pa = 0 ~mod- pa
+				}
+				pb = pp ~mod- fb
+				if pb >= 0x8000_0000 {
+					pb = 0 ~mod- pb
+				}
+				pc = pp ~mod- fc
+				if pc >= 0x8000_0000 {
+					pc = 0 ~mod- pc
+				}
+				if (pa <= pb) and (pa <= pc) {
+					args.curr[i] ~mod+= (fa & 0xFF) as base.u8
+				} else if pb <= pc {
+					args.curr[i] ~mod+= (fb & 0xFF) as base.u8
+				} else {
+					args.curr[i] ~mod+= (fc & 0xFF) as base.u8
+				}
+			}
+		}
+		i += 1
+	} endwhile
+}
+
 pub func decoder.frame_dirty_rect() base.rect_ie_u32 {
 	return this.util.make_rect_ie_u32(
 		min_incl_x: 0,