fix buffer overflow

The other side of this branch also reads too many bytes,
8 when only 4 are allowed.

Bug: oss-fuzz:17829
Change-Id: I3fe4a9e39acf77abacbae9ab892f1d0794a411a8
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/245184
Reviewed-by: Robert Phillips <robertphillips@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h
index 7c68a3c..e01b029 100644
--- a/src/opts/SkRasterPipeline_opts.h
+++ b/src/opts/SkRasterPipeline_opts.h
@@ -727,6 +727,8 @@
         return {p[ix[0]], p[ix[1]], p[ix[2]], p[ix[3]]};
     }
 
+    // TODO: these loads and stores are incredibly difficult to follow.
+
     SI void load2(const uint16_t* ptr, size_t tail, U16* r, U16* g) {
         __m128i _01;
         if (__builtin_expect(tail,0)) {
@@ -738,7 +740,7 @@
                   _01 = _mm_insert_epi16(_01, *(ptr+5), 5);             // r0 g0 r1 g1 r2 g2 00 00
                 }
             } else {
-                _01 = _mm_loadl_pi(_01, (__m64 const*)ptr + 0);         // r0 g0 00 00 00 00 00 00
+                _01 = _mm_cvtsi32_si128(*(const uint32_t*)ptr);         // r0 g0 00 00 00 00 00 00
             }
         } else {
             _01 = _mm_loadu_si128(((__m128i*)ptr) + 0);  // r0 g0 r1 g1 r2 g2 r3 g3