Add atan2_ primitive to SkRasterPipeline.

This logic is also adapted directly from SkVM.

Change-Id: I0b8960fb79511fab3cdad826af4439d1e8d2cb31
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/635616
Reviewed-by: Herb Derby <herb@google.com>
Auto-Submit: John Stiles <johnstiles@google.com>
Commit-Queue: John Stiles <johnstiles@google.com>
diff --git a/src/core/SkVM.cpp b/src/core/SkVM.cpp
index 2e742b0..d3e0af3 100644
--- a/src/core/SkVM.cpp
+++ b/src/core/SkVM.cpp
@@ -968,7 +968,7 @@
         // handle quadrant distinctions
         r = select((y0 >= 0) & (x0  < 0), r + SK_ScalarPI, r);
         r = select((y0  < 0) & (x0 <= 0), r - SK_ScalarPI, r);
-        // Note: we don't try to handle 0,0 or infinities (yet)
+        // Note: we don't try to handle 0,0 or infinities
         return r;
     }
 
diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h
index 0ec9146..b0c720d 100644
--- a/src/opts/SkRasterPipeline_opts.h
+++ b/src/opts/SkRasterPipeline_opts.h
@@ -1530,6 +1530,30 @@
     return x;
 }
 
+/*  Use identity atan(x) = pi/2 - atan(1/x) for x > 1
+    By swapping y,x to ensure the ratio is <= 1, we can safely call atan_unit()
+    which avoids a 2nd divide instruction if we had instead called atan().
+ */
+SI F atan2_(F y0, F x0) {
+    I32 flip = (abs_(y0) > abs_(x0));
+    F   y = if_then_else(flip, x0, y0);
+    F   x = if_then_else(flip, y0, x0);
+    F   arg = y/x;
+
+    I32 neg = (arg < 0.0f);
+    arg = if_then_else(neg, -arg, arg);
+
+    F r = approx_atan_unit(arg);
+    r = if_then_else(flip, SK_ScalarPI/2 - r, r);
+    r = if_then_else(neg, -r, r);
+
+    // handle quadrant distinctions
+    r = if_then_else((y0 >= 0) & (x0  < 0), r + SK_ScalarPI, r);
+    r = if_then_else((y0  < 0) & (x0 <= 0), r - SK_ScalarPI, r);
+    // Note: we don't try to handle 0,0 or infinities
+    return r;
+}
+
 // Used by gather_ stages to calculate the base pointer and a vector of indices to load.
 template <typename T>
 SI U32 ix_and_ptr(T** ptr, const SkRasterPipeline_GatherCtx* ctx, F x, F y) {
diff --git a/tests/SkRasterPipelineOptsTest.cpp b/tests/SkRasterPipelineOptsTest.cpp
index 438ee6b..4bcc260 100644
--- a/tests/SkRasterPipelineOptsTest.cpp
+++ b/tests/SkRasterPipelineOptsTest.cpp
@@ -134,3 +134,18 @@
         REPORTER_ASSERT(r, SK_OPTS_NS::all(delta < kTolerance));
     }
 }
+
+DEF_TEST(SkRasterPipelineOpts_Atan2, r) {
+    using F = SK_OPTS_NS::F;
+
+    constexpr float kTolerance = 0.00175f;
+    for (float y = -3.0f; y <= 3.0f; y += 0.1f) {
+        for (float x = -3.0f; x <= 3.0f; x += 0.1f) {
+            F result = SK_OPTS_NS::atan2_(y, x);
+            F expected = sk_float_atan2(y, x);
+            F delta = SK_OPTS_NS::abs_(expected - result);
+
+            REPORTER_ASSERT(r, SK_OPTS_NS::all(delta < kTolerance));
+        }
+    }
+}