Tweak some adler32 hasher.up_x86_sse42 var names

commit: 0a6cce5e0e8896ee7a28bda5ef23ae0cae7f5c05 [log] [tgz]
author: Nigel Tao <nigeltao@golang.org> Wed Apr 28 00:32:32 2021 +1000
committer: Nigel Tao <nigeltao@golang.org> Wed Apr 28 00:32:32 2021 +1000
tree: f7dc06b1f44d9e5ea2aa553013179fd5e6e4faac
parent: abd37fa3f8402facb70e7407651475532e3fbddc [diff]
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index b77f038..3449b11 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c

@@ -20886,8 +20886,8 @@
   __m128i v_ones = {0};
   __m128i v_weights__left = {0};
   __m128i v_weights_right = {0};
-  __m128i v_p__left = {0};
-  __m128i v_p_right = {0};
+  __m128i v_q__left = {0};
+  __m128i v_q_right = {0};
   __m128i v_v1 = {0};
   __m128i v_v2 = {0};
   __m128i v_v2j = {0};
@@ -20918,13 +20918,13 @@
       v_p.len = 32;
       uint8_t* i_end0_p = v_p.ptr + (((i_slice_p.len - (size_t)(v_p.ptr - i_slice_p.ptr)) / 32) * 32);
       while (v_p.ptr < i_end0_p) {
-        v_p__left = _mm_lddqu_si128((const __m128i*)(const void*)(v_p.ptr));
-        v_p_right = _mm_lddqu_si128((const __m128i*)(const void*)(v_p.ptr + 16));
+        v_q__left = _mm_lddqu_si128((const __m128i*)(const void*)(v_p.ptr));
+        v_q_right = _mm_lddqu_si128((const __m128i*)(const void*)(v_p.ptr + 16));
         v_v2j = _mm_add_epi32(v_v2j, v_v1);
-        v_v1 = _mm_add_epi32(v_v1, _mm_sad_epu8(v_p__left, v_zeroes));
-        v_v1 = _mm_add_epi32(v_v1, _mm_sad_epu8(v_p_right, v_zeroes));
-        v_v2k = _mm_add_epi32(v_v2k, _mm_madd_epi16(v_ones, _mm_maddubs_epi16(v_p__left, v_weights__left)));
-        v_v2k = _mm_add_epi32(v_v2k, _mm_madd_epi16(v_ones, _mm_maddubs_epi16(v_p_right, v_weights_right)));
+        v_v1 = _mm_add_epi32(v_v1, _mm_sad_epu8(v_q__left, v_zeroes));
+        v_v1 = _mm_add_epi32(v_v1, _mm_sad_epu8(v_q_right, v_zeroes));
+        v_v2k = _mm_add_epi32(v_v2k, _mm_madd_epi16(v_ones, _mm_maddubs_epi16(v_q__left, v_weights__left)));
+        v_v2k = _mm_add_epi32(v_v2k, _mm_madd_epi16(v_ones, _mm_maddubs_epi16(v_q_right, v_weights_right)));
         v_p.ptr += 32;
       }
       v_p.len = 0;

diff --git a/std/adler32/common_up_x86_sse42.wuffs b/std/adler32/common_up_x86_sse42.wuffs
index 93c5c1b..9599f2f 100644
--- a/std/adler32/common_up_x86_sse42.wuffs
+++ b/std/adler32/common_up_x86_sse42.wuffs

@@ -28,8 +28,8 @@
 	var ones          : base.x86_m128i
 	var weights__left : base.x86_m128i
 	var weights_right : base.x86_m128i
-	var p__left       : base.x86_m128i
-	var p_right       : base.x86_m128i
+	var q__left       : base.x86_m128i
+	var q_right       : base.x86_m128i
 	var v1            : base.x86_m128i
 	var v2            : base.x86_m128i
 	var v2j           : base.x86_m128i
@@ -92,13 +92,13 @@
 
 		// The inner loop.
 		iterate (p = args.x)(length: 32, advance: 32, unroll: 1) {
-			// Split the 32-byte p into left and right halves. SSE4.2 works
-			// with 16-byte registers.
+			// SSE4.2 works with 16-byte registers. Split the 32-byte p into
+			// left and right halves.
 			//
-			// Let p__left = [u8×16: p00, p01, p02, ..., p15]
-			// Let p_right = [u8×16: p16, p17, p18, ..., p31]
-			p__left = util.make_m128i_slice128(a: p[.. 16])
-			p_right = util.make_m128i_slice128(a: p[16 .. 32])
+			// Let q__left = [u8×16: p00, p01, p02, ..., p15]
+			// Let q_right = [u8×16: p16, p17, p18, ..., p31]
+			q__left = util.make_m128i_slice128(a: p[.. 16])
+			q_right = util.make_m128i_slice128(a: p[16 .. 32])
 
 			// For v2j, we need to calculate the sums of the s1j terms for each
 			// of p's 32 elements. This is simply 32 times the same number,
@@ -108,14 +108,14 @@
 
 			// For v1, we need to add the elements of p. Computing the sum of
 			// absolute differences (_mm_sad_epu8) with zero just sums the
-			// elements. p__left._mm_sad_epu8(b: zeroes) equals
+			// elements. q__left._mm_sad_epu8(b: zeroes) equals
 			//   [u64×2: p00 + p01 + ... + p07, p08 + p09 + ... + p15]
 			// This is equivalent (little-endian) to:
 			//   [u32×4: p00 + p01 + ... + p07, 0, p08 + p09 + ... + p15, 0]
-			// We accumulate those "sum of p__left elements" in v1, and ditto
-			// for the p_right elements.
-			v1 = v1._mm_add_epi32(b: p__left._mm_sad_epu8(b: zeroes))
-			v1 = v1._mm_add_epi32(b: p_right._mm_sad_epu8(b: zeroes))
+			// We accumulate those "sum of q__left's elements" in v1, and ditto
+			// for q_right's elements.
+			v1 = v1._mm_add_epi32(b: q__left._mm_sad_epu8(b: zeroes))
+			v1 = v1._mm_add_epi32(b: q_right._mm_sad_epu8(b: zeroes))
 
 			// For v2k, we need to calculate a weighted sum: ((32 * p00) + (31
 			// * p01) + (30 * p02) + ... + (1 * p31)), which splits naturally
@@ -128,19 +128,19 @@
 			//           ...
 			//           ((18*p14)+(17*p15))]
 			//
-			// The ones._mm_madd_epi16(b: etc) call is likewise a multiply-add
-			// (note that it's "madd" not "add"). Multiplying by 1 is a no-op,
-			// so this sums u16 pairs to produce u32 values:
+			// The ones._mm_madd_epi16(b: etc) call is a multiply-add (note
+			// that it's "madd" not "add"). Multiplying by 1 is a no-op, so
+			// this sums u16 pairs to produce u32 values:
 			//   [u32×4: ((32*p00)+(31*p01)+(30*p02)+(29*p03)),
 			//           ((28*p04)+(27*p05)+(26*p06)+(25*p07)),
 			//           ...
 			//           ((20*p12)+(19*p13)+(18*p14)+(17*p15))]
 			//
-			// Ditto again for the p_right elements.
+			// Ditto again for q_right's elements.
 			v2k = v2k._mm_add_epi32(b: ones._mm_madd_epi16(b:
-				p__left._mm_maddubs_epi16(b: weights__left)))
+				q__left._mm_maddubs_epi16(b: weights__left)))
 			v2k = v2k._mm_add_epi32(b: ones._mm_madd_epi16(b:
-				p_right._mm_maddubs_epi16(b: weights_right)))
+				q_right._mm_maddubs_epi16(b: weights_right)))
 		}
 
 		// Merge the four parallel u32 sums (v1) into the single u32 sum (s1).
commit	0a6cce5e0e8896ee7a28bda5ef23ae0cae7f5c05	[log] [tgz]
author	Nigel Tao <nigeltao@golang.org>	Wed Apr 28 00:32:32 2021 +1000
committer	Nigel Tao <nigeltao@golang.org>	Wed Apr 28 00:32:32 2021 +1000
tree	f7dc06b1f44d9e5ea2aa553013179fd5e6e4faac
parent	abd37fa3f8402facb70e7407651475532e3fbddc [diff]