Implement Sk2f::Store2

Bug: skia:
Change-Id: Ieedd05ced376a7604936e9d2729fc20a8669496e
Reviewed-on: https://skia-review.googlesource.com/115531
Commit-Queue: Chris Dalton <csmartdalton@google.com>
Reviewed-by: Mike Klein <mtklein@google.com>
diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h
index f8170ef..554fc82 100644
--- a/src/opts/SkNx_neon.h
+++ b/src/opts/SkNx_neon.h
@@ -34,6 +34,14 @@
     AI static SkNx Load(const void* ptr) { return vld1_f32((const float*)ptr); }
     AI void store(void* ptr) const { vst1_f32((float*)ptr, fVec); }
 
+    AI static void Store2(void* dst, const SkNx& a, const SkNx& b) {
+        float32x2x2_t ab = {{
+            a.fVec,
+            b.fVec,
+        }};
+        vst2_f32((float*) dst, ab);
+    }
+
     AI static void Store3(void* dst, const SkNx& a, const SkNx& b, const SkNx& c) {
         float32x2x3_t abc = {{
             a.fVec,
diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h
index b4ae0cb..bd5c58e 100644
--- a/src/opts/SkNx_sse.h
+++ b/src/opts/SkNx_sse.h
@@ -29,6 +29,11 @@
 
     AI void store(void* ptr) const { _mm_storel_pi((__m64*)ptr, fVec); }
 
+    AI static void Store2(void* dst, const SkNx& a, const SkNx& b) {
+        auto vals = _mm_unpacklo_ps(a.fVec, b.fVec);
+        _mm_storeu_ps((float*)dst, vals);
+    }
+
     AI static void Store3(void* dst, const SkNx& a, const SkNx& b, const SkNx& c) {
         auto lo = _mm_setr_ps(a[0], b[0], c[0], a[1]),
              hi = _mm_setr_ps(b[1], c[1],    0,    0);
diff --git a/tests/SkNxTest.cpp b/tests/SkNxTest.cpp
index 9e2c27e..de21f26 100644
--- a/tests/SkNxTest.cpp
+++ b/tests/SkNxTest.cpp
@@ -424,6 +424,17 @@
     REPORTER_ASSERT(r, y[3] == 7);
 }
 
+DEF_TEST(Sk2f_Store2, r) {
+    Sk2f p0{0, 2};
+    Sk2f p1{1, 3};
+    float dst[4];
+    Sk2f::Store2(dst, p0, p1);
+    REPORTER_ASSERT(r, dst[0] == 0);
+    REPORTER_ASSERT(r, dst[1] == 1);
+    REPORTER_ASSERT(r, dst[2] == 2);
+    REPORTER_ASSERT(r, dst[3] == 3);
+}
+
 DEF_TEST(Sk2f_Store3, r) {
     Sk2f p0{0, 3};
     Sk2f p1{1, 4};