Revert "Guarantee that raster pipeline always uses finite stack"

This reverts commit 3809e9bd28ae110271dccb4e7eeacd2b76a0223b.

Reason for revert: FM-TSAN bot is unhappy

Original change's description:
> Guarantee that raster pipeline always uses finite stack
>
> In release builds, clang and gcc do tail call optimization, which
> reuses the same stack space for each stage. In debug builds, that
> doesn't happen. If we're going to start generating arbitrarily long
> pipelines (eg from SkSL), we need to ensure that we're not going to
> overflow the stack.
>
> I first looked at ways to force tail call optimization (Clang's
> musttail, and optimization pragmas). These are all nonportable, and
> don't work on some of our supported toolchains.
>
> Instead, this version includes a new RP execution mode that does away
> with the tail-calling entirely. Instead, each stage just returns the
> next stage to call. This ruins our optimized stage ABI, obviously, but
> we're only doing it in debug builds.
>
> MSVC can do tail call optimization, but testing shows that it falls over
> with raster pipeline (too complex?). As a result, we always use the new
> technique there.
>
> Our new unit test measures stack consumption (which is the thing we're
> worried about anyway) to make sure that TCO or our new technique is
> successfully limiting stack usage.
>
> Change-Id: Id0d419f97b53b0b57c82214c4a2f1f94e5ab6b3c
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/566896
> Reviewed-by: Herb Derby <herb@google.com>
> Commit-Queue: Brian Osman <brianosman@google.com>

Change-Id: Ic1802d307c919160618826ab7683c05cd448b16e
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/567438
Auto-Submit: Brian Osman <brianosman@google.com>
Bot-Commit: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
Commit-Queue: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h
index 1a04986..625f118 100644
--- a/src/opts/SkRasterPipeline_opts.h
+++ b/src/opts/SkRasterPipeline_opts.h
@@ -1037,20 +1037,6 @@
 #endif
 }
 
-// In debug builds, we can't rely on the compiler to do tail call optimization. To prevent runaway
-// stack growth, we don't do tail calls - instead we change the signatures so each stage returns
-// the next stage pointer, and start_pipeline iterates until we reach the end.
-// This pessimizes everything (it requires bundling all parameters in a struct, so they can be
-// mutated by stages), but we're already in a debug build, so we accept that.
-// Additionally, MSVC ends up consuming stack space, even with optimizations enabled, so we always
-// use our alternate implementation there. This is unfortunate, but if you're using MSVC, you're
-// already getting scalar RP, so performance is going to be terrible regardless.
-#if defined(SK_DEBUG) || defined(_MSC_VER)
-    #define JUMPER_TAIL_CALL 0
-#else
-    #define JUMPER_TAIL_CALL 1
-#endif
-
 // Our fundamental vector depth is our pixel stride.
 static const size_t N = sizeof(F) / sizeof(float);
 
@@ -1082,14 +1068,7 @@
     #define JUMPER_NARROW_STAGES 1
 #endif
 
-#if !JUMPER_TAIL_CALL
-    struct Params {
-        size_t dx, dy, tail;
-        F r,g,b,a, dr,dg,db,da;
-        void** program;
-    };
-    using Stage = void*(ABI*)(Params*);
-#elif JUMPER_NARROW_STAGES
+#if JUMPER_NARROW_STAGES
     struct Params {
         size_t dx, dy, tail;
         F dr,dg,db,da;
@@ -1105,25 +1084,7 @@
     auto start = (Stage)load_and_inc(program);
     const size_t x0 = dx;
     for (; dy < ylimit; dy++) {
-    #if !JUMPER_TAIL_CALL
-        Params params = { x0,dy,0, 0,0,0,0, 0,0,0,0, nullptr };
-        while (params.dx + N <= xlimit) {
-            params.program = program;
-            Stage fn = start;
-            while (fn) {
-                fn = (Stage)fn(&params);
-            }
-            params.dx += N;
-        }
-        if (size_t tail = xlimit - params.dx) {
-            params.program = program;
-            params.tail = tail;
-            Stage fn = start;
-            while (fn) {
-                fn = (Stage)fn(&params);
-            }
-        }
-    #elif JUMPER_NARROW_STAGES
+    #if JUMPER_NARROW_STAGES
         Params params = { x0,dy,0, 0,0,0,0 };
         while (params.dx + N <= xlimit) {
             start(&params,program, 0,0,0,0);
@@ -1146,20 +1107,7 @@
     }
 }
 
-#if !JUMPER_TAIL_CALL
-    #define STAGE(name, ...)                                                    \
-        SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail,        \
-                         F& r, F& g, F& b, F& a, F& dr, F& dg, F& db, F& da);   \
-        static void* ABI name(Params* params) {                                 \
-            name##_k(Ctx{params->program},params->dx,params->dy,params->tail,   \
-                     params->r,params->g,params->b,params->a,                   \
-                     params->dr, params->dg, params->db, params->da);           \
-            void* next = load_and_inc(params->program);                         \
-            return next;                                                        \
-        }                                                                       \
-        SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail,        \
-                         F& r, F& g, F& b, F& a, F& dr, F& dg, F& db, F& da)
-#elif JUMPER_NARROW_STAGES
+#if JUMPER_NARROW_STAGES
     #define STAGE(name, ...)                                                    \
         SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail,        \
                          F& r, F& g, F& b, F& a, F& dr, F& dg, F& db, F& da);   \
@@ -1189,9 +1137,7 @@
 
 // just_return() is a simple no-op stage that only exists to end the chain,
 // returning back up to start_pipeline(), and from there to the caller.
-#if !JUMPER_TAIL_CALL
-    static void* ABI just_return(Params*) { return nullptr; }
-#elif JUMPER_NARROW_STAGES
+#if JUMPER_NARROW_STAGES
     static void ABI just_return(Params*, void**, F,F,F,F) {}
 #else
     static void ABI just_return(size_t, void**, size_t,size_t, F,F,F,F, F,F,F,F) {}
diff --git a/tests/SkRasterPipelineTest.cpp b/tests/SkRasterPipelineTest.cpp
index fea2543..57d84d9 100644
--- a/tests/SkRasterPipelineTest.cpp
+++ b/tests/SkRasterPipelineTest.cpp
@@ -568,45 +568,3 @@
     p.append(SkRasterPipeline::store_8888, &ptr);
     p.run(0,0,1,1);
 }
-
-DEF_TEST(SkRasterPipeline_tail_call, r) {
-    // This test ensures that raster pipeline is ALWAYS performing tail call optimization on stages.
-    // That's important to prevent stack overflows, particularly when generating long and complex
-    // pipelines (eg, from SkSL).
-    //
-    // Note that this test only verifies this behavior in the highp pipeline (callback has no lowp
-    // implemention). SkSL is never going to target the lowp pipeline, so the risk there is minimal.
-
-    struct StackCheckerCtx : SkRasterPipeline_CallbackCtx {
-        std::vector<void*> stack_addrs;
-    };
-    StackCheckerCtx cb;
-    cb.fn = [](SkRasterPipeline_CallbackCtx* self, int active_pixels) {
-        auto ctx = (StackCheckerCtx*)self;
-        ctx->stack_addrs.push_back(&active_pixels);
-    };
-
-    uint32_t rgba = 0xff00ff00;
-    SkRasterPipeline_MemoryCtx ptr = { &rgba, 0 };
-
-    // Our pipeline records the address of the same local variable at numerous points.
-    // If tail call optimization is working, all of those addresses should be the same.
-    // We insert other miscellaneous work, just to be sure.
-    SkRasterPipeline_<256> p;
-    p.append(SkRasterPipeline::callback, &cb);
-    p.append(SkRasterPipeline::load_8888,  &ptr);
-    p.append(SkRasterPipeline::callback, &cb);
-    p.append(SkRasterPipeline::swap_rb);
-    p.append(SkRasterPipeline::callback, &cb);
-    p.append(SkRasterPipeline::store_8888, &ptr);
-    p.run(0,0,1,1);
-
-    REPORTER_ASSERT(r, cb.stack_addrs.size() == 3);
-    if (cb.stack_addrs[0] != cb.stack_addrs[1] || cb.stack_addrs[0] != cb.stack_addrs[2]) {
-        REPORT_FAILURE(
-                r,
-                "stack growth",
-                SkStringPrintf(
-                        "%p : %p : %p", cb.stack_addrs[0], cb.stack_addrs[1], cb.stack_addrs[2]));
-    }
-}