Revert "Guarantee that raster pipeline always uses finite stack"
This reverts commit 3809e9bd28ae110271dccb4e7eeacd2b76a0223b.
Reason for revert: FM-TSAN bot is unhappy
Original change's description:
> Guarantee that raster pipeline always uses finite stack
>
> In release builds, clang and gcc do tail call optimization, which
> reuses the same stack space for each stage. In debug builds, that
> doesn't happen. If we're going to start generating arbitrarily long
> pipelines (eg from SkSL), we need to ensure that we're not going to
> overflow the stack.
>
> I first looked at ways to force tail call optimization (Clang's
> musttail, and optimization pragmas). These are all nonportable, and
> don't work on some of our supported toolchains.
>
> Instead, this version includes a new RP execution mode that does away
> with the tail-calling entirely. Instead, each stage just returns the
> next stage to call. This ruins our optimized stage ABI, obviously, but
> we're only doing it in debug builds.
>
> MSVC can do tail call optimization, but testing shows that it falls over
> with raster pipeline (too complex?). As a result, we always use the new
> technique there.
>
> Our new unit test measures stack consumption (which is the thing we're
> worried about anyway) to make sure that TCO or our new technique is
> successfully limiting stack usage.
>
> Change-Id: Id0d419f97b53b0b57c82214c4a2f1f94e5ab6b3c
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/566896
> Reviewed-by: Herb Derby <herb@google.com>
> Commit-Queue: Brian Osman <brianosman@google.com>
Change-Id: Ic1802d307c919160618826ab7683c05cd448b16e
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/567438
Auto-Submit: Brian Osman <brianosman@google.com>
Bot-Commit: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
Commit-Queue: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h
index 1a04986..625f118 100644
--- a/src/opts/SkRasterPipeline_opts.h
+++ b/src/opts/SkRasterPipeline_opts.h
@@ -1037,20 +1037,6 @@
#endif
}
-// In debug builds, we can't rely on the compiler to do tail call optimization. To prevent runaway
-// stack growth, we don't do tail calls - instead we change the signatures so each stage returns
-// the next stage pointer, and start_pipeline iterates until we reach the end.
-// This pessimizes everything (it requires bundling all parameters in a struct, so they can be
-// mutated by stages), but we're already in a debug build, so we accept that.
-// Additionally, MSVC ends up consuming stack space, even with optimizations enabled, so we always
-// use our alternate implementation there. This is unfortunate, but if you're using MSVC, you're
-// already getting scalar RP, so performance is going to be terrible regardless.
-#if defined(SK_DEBUG) || defined(_MSC_VER)
- #define JUMPER_TAIL_CALL 0
-#else
- #define JUMPER_TAIL_CALL 1
-#endif
-
// Our fundamental vector depth is our pixel stride.
static const size_t N = sizeof(F) / sizeof(float);
@@ -1082,14 +1068,7 @@
#define JUMPER_NARROW_STAGES 1
#endif
-#if !JUMPER_TAIL_CALL
- struct Params {
- size_t dx, dy, tail;
- F r,g,b,a, dr,dg,db,da;
- void** program;
- };
- using Stage = void*(ABI*)(Params*);
-#elif JUMPER_NARROW_STAGES
+#if JUMPER_NARROW_STAGES
struct Params {
size_t dx, dy, tail;
F dr,dg,db,da;
@@ -1105,25 +1084,7 @@
auto start = (Stage)load_and_inc(program);
const size_t x0 = dx;
for (; dy < ylimit; dy++) {
- #if !JUMPER_TAIL_CALL
- Params params = { x0,dy,0, 0,0,0,0, 0,0,0,0, nullptr };
- while (params.dx + N <= xlimit) {
- params.program = program;
- Stage fn = start;
- while (fn) {
- fn = (Stage)fn(¶ms);
- }
- params.dx += N;
- }
- if (size_t tail = xlimit - params.dx) {
- params.program = program;
- params.tail = tail;
- Stage fn = start;
- while (fn) {
- fn = (Stage)fn(¶ms);
- }
- }
- #elif JUMPER_NARROW_STAGES
+ #if JUMPER_NARROW_STAGES
Params params = { x0,dy,0, 0,0,0,0 };
while (params.dx + N <= xlimit) {
start(¶ms,program, 0,0,0,0);
@@ -1146,20 +1107,7 @@
}
}
-#if !JUMPER_TAIL_CALL
- #define STAGE(name, ...) \
- SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, \
- F& r, F& g, F& b, F& a, F& dr, F& dg, F& db, F& da); \
- static void* ABI name(Params* params) { \
- name##_k(Ctx{params->program},params->dx,params->dy,params->tail, \
- params->r,params->g,params->b,params->a, \
- params->dr, params->dg, params->db, params->da); \
- void* next = load_and_inc(params->program); \
- return next; \
- } \
- SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, \
- F& r, F& g, F& b, F& a, F& dr, F& dg, F& db, F& da)
-#elif JUMPER_NARROW_STAGES
+#if JUMPER_NARROW_STAGES
#define STAGE(name, ...) \
SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, \
F& r, F& g, F& b, F& a, F& dr, F& dg, F& db, F& da); \
@@ -1189,9 +1137,7 @@
// just_return() is a simple no-op stage that only exists to end the chain,
// returning back up to start_pipeline(), and from there to the caller.
-#if !JUMPER_TAIL_CALL
- static void* ABI just_return(Params*) { return nullptr; }
-#elif JUMPER_NARROW_STAGES
+#if JUMPER_NARROW_STAGES
static void ABI just_return(Params*, void**, F,F,F,F) {}
#else
static void ABI just_return(size_t, void**, size_t,size_t, F,F,F,F, F,F,F,F) {}
diff --git a/tests/SkRasterPipelineTest.cpp b/tests/SkRasterPipelineTest.cpp
index fea2543..57d84d9 100644
--- a/tests/SkRasterPipelineTest.cpp
+++ b/tests/SkRasterPipelineTest.cpp
@@ -568,45 +568,3 @@
p.append(SkRasterPipeline::store_8888, &ptr);
p.run(0,0,1,1);
}
-
-DEF_TEST(SkRasterPipeline_tail_call, r) {
- // This test ensures that raster pipeline is ALWAYS performing tail call optimization on stages.
- // That's important to prevent stack overflows, particularly when generating long and complex
- // pipelines (eg, from SkSL).
- //
- // Note that this test only verifies this behavior in the highp pipeline (callback has no lowp
- // implemention). SkSL is never going to target the lowp pipeline, so the risk there is minimal.
-
- struct StackCheckerCtx : SkRasterPipeline_CallbackCtx {
- std::vector<void*> stack_addrs;
- };
- StackCheckerCtx cb;
- cb.fn = [](SkRasterPipeline_CallbackCtx* self, int active_pixels) {
- auto ctx = (StackCheckerCtx*)self;
- ctx->stack_addrs.push_back(&active_pixels);
- };
-
- uint32_t rgba = 0xff00ff00;
- SkRasterPipeline_MemoryCtx ptr = { &rgba, 0 };
-
- // Our pipeline records the address of the same local variable at numerous points.
- // If tail call optimization is working, all of those addresses should be the same.
- // We insert other miscellaneous work, just to be sure.
- SkRasterPipeline_<256> p;
- p.append(SkRasterPipeline::callback, &cb);
- p.append(SkRasterPipeline::load_8888, &ptr);
- p.append(SkRasterPipeline::callback, &cb);
- p.append(SkRasterPipeline::swap_rb);
- p.append(SkRasterPipeline::callback, &cb);
- p.append(SkRasterPipeline::store_8888, &ptr);
- p.run(0,0,1,1);
-
- REPORTER_ASSERT(r, cb.stack_addrs.size() == 3);
- if (cb.stack_addrs[0] != cb.stack_addrs[1] || cb.stack_addrs[0] != cb.stack_addrs[2]) {
- REPORT_FAILURE(
- r,
- "stack growth",
- SkStringPrintf(
- "%p : %p : %p", cb.stack_addrs[0], cb.stack_addrs[1], cb.stack_addrs[2]));
- }
-}