Avoid switching between scalar and SIMD in Affine_vpts.

This might avoid a scenario where math works out differently
between skvx and scalar.

Change-Id: If5561fc451d27114a79f7d12d3bee381719c1db3
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/638222
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Michael Ludwig <michaelludwig@google.com>
Auto-Submit: John Stiles <johnstiles@google.com>
diff --git a/src/core/SkMatrix.cpp b/src/core/SkMatrix.cpp
index d304da0..50c6e7a 100644
--- a/src/core/SkMatrix.cpp
+++ b/src/core/SkMatrix.cpp
@@ -987,16 +987,20 @@
         skvx::float4  skew4(kx, ky, kx, ky);    // applied to swizzle of src4
         bool trailingElement = (count & 1);
         count >>= 1;
+        skvx::float4 src4;
         for (int i = 0; i < count; ++i) {
-            skvx::float4 src4 = skvx::float4::Load(src);
+            src4 = skvx::float4::Load(src);
             skvx::float4 swz4 = skvx::shuffle<1,0,3,2>(src4);  // y0 x0, y1 x1
             (src4 * scale4 + swz4 * skew4 + trans4).store(dst);
             src += 2;
             dst += 2;
         }
         if (trailingElement) {
-            dst->set(src->fX * sx + src->fY * kx + tx,
-                     src->fX * ky + src->fY * sy + ty);
+            // We use the same logic here to ensure that the math stays consistent throughout, even
+            // though the high float2 is ignored.
+            src4.lo = skvx::float2::Load(src);
+            skvx::float4 swz4 = skvx::shuffle<1,0,3,2>(src4);  // y0 x0, y1 x1
+            (src4 * scale4 + swz4 * skew4 + trans4).lo.store(dst);
         }
     }
 }