Eliminate std/png filter_1_distance_4_sse128 temps
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 3de64cf..6430220 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -31053,7 +31053,6 @@
     wuffs_png__decoder* self,
     wuffs_base__slice_u8 a_curr) {
   wuffs_base__slice_u8 v_c = {0};
-  uint32_t v_x32 = 0;
   __m128i v_x128 = {0};
   __m128i v_a128 = {0};
 
@@ -31063,12 +31062,10 @@
     v_c.len = 4;
     uint8_t* i_end0_c = i_slice_c.ptr + ((i_slice_c.len / 4) * 4);
     while (v_c.ptr < i_end0_c) {
-      v_x32 = wuffs_base__peek_u32le__no_bounds_check(v_c.ptr);
-      (v_x128 = _mm_cvtsi32_si128((int)(v_x32)), wuffs_base__make_empty_struct());
+      (v_x128 = _mm_cvtsi32_si128((int)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr))), wuffs_base__make_empty_struct());
       v_x128 = _mm_add_epi8(v_x128, v_a128);
       v_a128 = v_x128;
-      v_x32 = ((uint32_t)(_mm_cvtsi128_si32(v_x128)));
-      (wuffs_base__poke_u32le__no_bounds_check(v_c.ptr, v_x32), wuffs_base__make_empty_struct());
+      (wuffs_base__poke_u32le__no_bounds_check(v_c.ptr, ((uint32_t)(_mm_cvtsi128_si32(v_x128)))), wuffs_base__make_empty_struct());
       v_c.ptr += 4;
     }
   }
diff --git a/std/png/decode_filter_sse128.wuffs b/std/png/decode_filter_sse128.wuffs
index 0338f1c..746afcd 100644
--- a/std/png/decode_filter_sse128.wuffs
+++ b/std/png/decode_filter_sse128.wuffs
@@ -20,18 +20,13 @@
 	choose cpu_arch >= sse128,
 {
 	var c    : slice base.u8
-	var x32  : base.u32
 	var x128 : base.sse128_i
 	var a128 : base.sse128_i
 
 	iterate (c = args.curr)(length: 4, advance: 4, unroll: 1) {
-		x32 = c.peek_u32le()
-		x128.load_u32!(a: x32)
-
+		x128.load_u32!(a: c.peek_u32le())
 		x128 = x128._mm_add_epi8!(b: a128)
 		a128 = x128
-
-		x32 = x128.truncate_u32()
-		c.poke_u32le!(a: x32)
+		c.poke_u32le!(a: x128.truncate_u32())
 	}
 }