[skottie_tool] GPU backend support

Refactor the implementation to decouple frame generators
(FrameGenerator: cpu, gpu, picture) from frame consumers (FrameSink:
png, skp, mp4, null).

Add a GPU frame generator using async readbacks.  Unlike other
generators, which execute on a thread pool, selecting the GPU generator
forces single-thread execution.

Also add a couple of backend-specific build targets (skottie_tool_cpu,
skottie_tool_gpu) to facilitate binary size experiments.

Change-Id: Id59e230b3861afe5bf9b7ecfc710d672f38eeaaf
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/551237
Reviewed-by: Brian Osman <brianosman@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
Commit-Queue: Florin Malita <fmalita@google.com>
diff --git a/BUILD.gn b/BUILD.gn
index ae416b8..236feb6 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -2172,6 +2172,12 @@
       test_app("skottie_tool") {
         deps = [ "modules/skottie:tool" ]
       }
+      test_app("skottie_tool_cpu") {
+        deps = [ "modules/skottie:tool_cpu" ]
+      }
+      test_app("skottie_tool_gpu") {
+        deps = [ "modules/skottie:tool_gpu" ]
+      }
     }
     test_app("svg_tool") {
       deps = [ "modules/svg:tool" ]
diff --git a/modules/skottie/BUILD.gn b/modules/skottie/BUILD.gn
index 0f986e8..aa964dc 100644
--- a/modules/skottie/BUILD.gn
+++ b/modules/skottie/BUILD.gn
@@ -105,6 +105,50 @@
 
         deps = [
           "../..:flags",
+          "../..:gpu_tool_utils",
+          "../..:skia",
+          "../../experimental/ffmpeg:video_encoder",
+        ]
+
+        public_deps = [
+          ":skottie",
+          ":utils",
+        ]
+      }
+
+      # A couple of backend specific targets, to facilitate binary size experiments.
+      skia_source_set("tool_cpu") {
+        check_includes = false
+        testonly = true
+
+        configs = [ "../..:skia_private" ]
+        sources = [ "src/SkottieTool.cpp" ]
+        defines = [ "CPU_ONLY" ]
+
+        deps = [
+          "../..:flags",
+          "../..:gpu_tool_utils",
+          "../..:skia",
+          "../../experimental/ffmpeg:video_encoder",
+        ]
+
+        public_deps = [
+          ":skottie",
+          ":utils",
+        ]
+      }
+
+      skia_source_set("tool_gpu") {
+        check_includes = false
+        testonly = true
+
+        configs = [ "../..:skia_private" ]
+        sources = [ "src/SkottieTool.cpp" ]
+        defines = [ "GPU_ONLY" ]
+
+        deps = [
+          "../..:flags",
+          "../..:gpu_tool_utils",
           "../..:skia",
           "../../experimental/ffmpeg:video_encoder",
         ]
diff --git a/modules/skottie/src/SkottieTool.cpp b/modules/skottie/src/SkottieTool.cpp
index 8b94094..5eece10 100644
--- a/modules/skottie/src/SkottieTool.cpp
+++ b/modules/skottie/src/SkottieTool.cpp
@@ -11,6 +11,7 @@
 #include "include/core/SkStream.h"
 #include "include/core/SkSurface.h"
 #include "include/encode/SkPngEncoder.h"
+#include "include/gpu/GrContextOptions.h"
 #include "include/private/SkTPin.h"
 #include "modules/skottie/include/Skottie.h"
 #include "modules/skottie/utils/SkottieUtils.h"
@@ -19,6 +20,7 @@
 #include "src/core/SkTaskGroup.h"
 #include "src/utils/SkOSPath.h"
 #include "tools/flags/CommandLineFlags.h"
+#include "tools/gpu/GrContextFactory.h"
 
 #include <algorithm>
 #include <chrono>
@@ -45,60 +47,59 @@
 static DEFINE_int(height, 600, "Render height.");
 static DEFINE_int(threads,  0, "Number of worker threads (0 -> cores count).");
 
+static DEFINE_bool2(gpu, g, false, "Enable GPU rasterization.");
+
 namespace {
 
 static constexpr SkColor kClearColor = SK_ColorWHITE;
 
-std::unique_ptr<SkFILEWStream> MakeFrameStream(size_t idx, const char* ext) {
-    const auto frame_file = SkStringPrintf("0%06zu.%s", idx, ext);
-    auto stream = std::make_unique<SkFILEWStream>(SkOSPath::Join(FLAGS_writePath[0],
-                                                                   frame_file.c_str()).c_str());
-    if (!stream->isValid()) {
-        return nullptr;
-    }
-
-    return stream;
-}
-
-class Sink {
-public:
-    Sink() = default;
-    virtual ~Sink() = default;
-    Sink(const Sink&) = delete;
-    Sink& operator=(const Sink&) = delete;
-
-    virtual SkCanvas* beginFrame(size_t idx) = 0;
-    virtual bool endFrame(size_t idx) = 0;
+enum class OutputFormat {
+    kPNG,
+    kSKP,
+    kNull,
+    kMP4,
 };
 
-class PNGSink final : public Sink {
-public:
-    static std::unique_ptr<Sink> Make(const SkMatrix& scale_matrix) {
-        auto surface = SkSurface::MakeRasterN32Premul(FLAGS_width, FLAGS_height);
-        if (!surface) {
-            SkDebugf("Could not allocate a %d x %d surface.\n", FLAGS_width, FLAGS_height);
-            return nullptr;
-        }
 
-        return std::unique_ptr<Sink>(new PNGSink(std::move(surface), scale_matrix));
-    }
+auto ms_since(std::chrono::steady_clock::time_point start) {
+    const auto elapsed = std::chrono::steady_clock::now() - start;
+    return std::chrono::duration_cast<std::chrono::milliseconds>(elapsed).count();
+};
+
+std::unique_ptr<SkFILEWStream> make_file_stream(size_t frame_index, const char* extension) {
+    const auto file = SkStringPrintf("0%06zu.%s", frame_index, extension);
+    const auto path = SkOSPath::Join(FLAGS_writePath[0], file.c_str());
+
+    auto stream = std::make_unique<SkFILEWStream>(path.c_str());
+
+    return stream->isValid() ? std::move(stream) : nullptr;
+}
+
+class FrameSink {
+public:
+    virtual ~FrameSink() = default;
+
+    static std::unique_ptr<FrameSink> Make(OutputFormat fmt, size_t frame_count);
+
+    virtual void writeFrame(sk_sp<SkImage> frame, size_t frame_index) = 0;
+
+    virtual void finalize(double fps) {}
+
+protected:
+    FrameSink() = default;
 
 private:
-    PNGSink(sk_sp<SkSurface> surface, const SkMatrix& scale_matrix)
-        : fSurface(std::move(surface)) {
-        fSurface->getCanvas()->concat(scale_matrix);
-    }
+    FrameSink(const FrameSink&)            = delete;
+    FrameSink& operator=(const FrameSink&) = delete;
+};
 
-    SkCanvas* beginFrame(size_t) override {
-        auto* canvas = fSurface->getCanvas();
-        canvas->clear(kClearColor);
-        return canvas;
-    }
+class PNGSink final : public FrameSink {
+public:
+    void writeFrame(sk_sp<SkImage> frame, size_t frame_index) override {
+        auto stream = make_file_stream(frame_index, "png");
 
-    bool endFrame(size_t idx) override {
-        auto stream = MakeFrameStream(idx, "png");
-        if (!stream) {
-            return false;
+        if (!frame || !stream) {
+            return;
         }
 
         // Set encoding options to favor speed over size.
@@ -106,101 +107,279 @@
         options.fZLibLevel   = 1;
         options.fFilterFlags = SkPngEncoder::FilterFlag::kNone;
 
-        sk_sp<SkImage> img = fSurface->makeImageSnapshot();
         SkPixmap pixmap;
-        return img->peekPixels(&pixmap)
-            && SkPngEncoder::Encode(stream.get(), pixmap, options);
-    }
+        SkAssertResult(frame->peekPixels(&pixmap));
 
-    const sk_sp<SkSurface> fSurface;
+        SkPngEncoder::Encode(stream.get(), pixmap, options);
+    }
 };
 
-class SKPSink final : public Sink {
+class NullSink final : public FrameSink {
 public:
-    static std::unique_ptr<Sink> Make(const SkMatrix& scale_matrix) {
-        return std::unique_ptr<Sink>(new SKPSink(scale_matrix));
+    void writeFrame(sk_sp<SkImage>, size_t) override {}
+};
+
+#if defined(HAVE_VIDEO_ENCODER)
+class MP4Sink final : public FrameSink {
+public:
+    explicit MP4Sink(size_t frame_count) {
+        fFrames.resize(frame_count);
     }
 
-private:
-    explicit SKPSink(const SkMatrix& scale_matrix)
-        : fScaleMatrix(scale_matrix) {}
-
-    SkCanvas* beginFrame(size_t) override {
-        auto canvas = fRecorder.beginRecording(FLAGS_width, FLAGS_height);
-        canvas->concat(fScaleMatrix);
-        return canvas;
+    void writeFrame(sk_sp<SkImage> frame, size_t frame_index) override {
+        fFrames[frame_index].set_value(std::move(frame));
     }
 
-    bool endFrame(size_t idx) override {
-        auto stream = MakeFrameStream(idx, "skp");
-        if (!stream) {
-            return false;
+    void finalize(double fps) override {
+        SkVideoEncoder encoder;
+        if (!encoder.beginRecording({FLAGS_width, FLAGS_height}, sk_double_round2int(fps))) {
+            fprintf(stderr, "Invalid video stream configuration.\n");
         }
 
-        fRecorder.finishRecordingAsPicture()->serialize(stream.get());
-        return true;
+        std::vector<double> starved_ms;
+        starved_ms.reserve(fFrames.size());
+
+        for (auto& frame_promise : fFrames) {
+            const auto start = std::chrono::steady_clock::now();
+            auto frame = frame_promise.get_future().get();
+            starved_ms.push_back(ms_since(start));
+
+            if (!frame) continue;
+
+            SkPixmap pixmap;
+            SkAssertResult(frame->peekPixels(&pixmap));
+            encoder.addFrame(pixmap);
+        }
+
+        auto mp4 = encoder.endRecording();
+
+        SkFILEWStream{FLAGS_writePath[0]}
+            .write(mp4->data(), mp4->size());
+
+        // If everything's going well, the first frame should account for the most,
+        // and ideally nearly all, starvation.
+        double first = starved_ms[0];
+        std::sort(starved_ms.begin(), starved_ms.end());
+        double sum = std::accumulate(starved_ms.begin(), starved_ms.end(), 0);
+        printf("Encoder starved stats: "
+               "min %gms, med %gms, avg %gms, max %gms, sum %gms, first %gms (%s)\n",
+               starved_ms[0], starved_ms[fFrames.size()/2], sum/fFrames.size(), starved_ms.back(),
+               sum, first, first == starved_ms.back() ? "ok" : "BAD");
+
     }
 
-    const SkMatrix    fScaleMatrix;
-    SkPictureRecorder fRecorder;
+    std::vector<std::promise<sk_sp<SkImage>>> fFrames;
+};
+#endif // HAVE_VIDEO_ENCODER
+
+std::unique_ptr<FrameSink> FrameSink::Make(OutputFormat fmt, size_t frame_count) {
+    switch (fmt) {
+    case OutputFormat::kPNG:
+        return std::make_unique<PNGSink>();
+    case OutputFormat::kSKP:
+        // The SKP generator does not use a sink.
+        [[fallthrough]];
+    case OutputFormat::kNull:
+        return std::make_unique<NullSink>();
+    case OutputFormat::kMP4:
+#if defined(HAVE_VIDEO_ENCODER)
+        return std::make_unique<MP4Sink>(frame_count);
+#else
+        return nullptr;
+#endif
+    }
+
+    SkUNREACHABLE;
+}
+
+class FrameGenerator {
+public:
+    virtual ~FrameGenerator() = default;
+
+    static std::unique_ptr<FrameGenerator> Make(FrameSink*, OutputFormat, const SkMatrix&);
+
+    virtual void generateFrame(const skottie::Animation*, size_t frame_index) {}
+
+protected:
+    explicit FrameGenerator(FrameSink* sink) : fSink(sink) {}
+
+    FrameSink* fSink;
+
+private:
+    FrameGenerator(const FrameGenerator&)            = delete;
+    FrameGenerator& operator=(const FrameGenerator&) = delete;
 };
 
-class NullSink final : public Sink {
+class CPUGenerator final : public FrameGenerator {
 public:
-    static std::unique_ptr<Sink> Make(const SkMatrix& scale_matrix) {
+#if defined(GPU_ONLY)
+    static std::unique_ptr<FrameGenerator> Make(FrameSink* sink, const SkMatrix& matrix) {
+        return nullptr;
+    }
+#else
+    static std::unique_ptr<FrameGenerator> Make(FrameSink* sink, const SkMatrix& matrix) {
         auto surface = SkSurface::MakeRasterN32Premul(FLAGS_width, FLAGS_height);
         if (!surface) {
             SkDebugf("Could not allocate a %d x %d surface.\n", FLAGS_width, FLAGS_height);
             return nullptr;
         }
 
-        return std::unique_ptr<Sink>(new NullSink(std::move(surface), scale_matrix));
+        return std::unique_ptr<FrameGenerator>(new CPUGenerator(sink, std::move(surface), matrix));
+    }
+
+    void generateFrame(const skottie::Animation* anim, size_t frame_index) override {
+        fSurface->getCanvas()->clear(kClearColor);
+        anim->render(fSurface->getCanvas());
+
+        fSink->writeFrame(fSurface->makeImageSnapshot(), frame_index);
     }
 
 private:
-    NullSink(sk_sp<SkSurface> surface, const SkMatrix& scale_matrix)
-        : fSurface(std::move(surface)) {
+    CPUGenerator(FrameSink* sink, sk_sp<SkSurface> surface, const SkMatrix& scale_matrix)
+        : FrameGenerator(sink)
+        , fSurface(std::move(surface))
+    {
         fSurface->getCanvas()->concat(scale_matrix);
     }
 
-    SkCanvas* beginFrame(size_t) override {
-        auto* canvas = fSurface->getCanvas();
-        canvas->clear(kClearColor);
-        return canvas;
-    }
-
-    bool endFrame(size_t) override {
-        return true;
-    }
-
     const sk_sp<SkSurface> fSurface;
+#endif // !GPU_ONLY
 };
 
-static std::vector<std::promise<sk_sp<SkImage>>> gMP4Frames;
-
-struct MP4Sink final : public Sink {
-    explicit MP4Sink(const SkMatrix& scale_matrix)
-        : fSurface(SkSurface::MakeRasterN32Premul(FLAGS_width, FLAGS_height)) {
-        fSurface->getCanvas()->concat(scale_matrix);
+class SKPGenerator final : public FrameGenerator {
+public:
+#if defined(CPU_ONLY) || defined(GPU_ONLY)
+    static std::unique_ptr<FrameGenerator> Make(FrameSink* sink, const SkMatrix& matrix) {
+        return nullptr;
+    }
+#else
+    static std::unique_ptr<FrameGenerator> Make(FrameSink* sink, const SkMatrix& scale_matrix) {
+        return std::unique_ptr<FrameGenerator>(new SKPGenerator(sink, scale_matrix));
     }
 
-    SkCanvas* beginFrame(size_t) override {
-        SkCanvas* canvas = fSurface->getCanvas();
-        canvas->clear(kClearColor);
-        return canvas;
-    }
+    void generateFrame(const skottie::Animation* anim, size_t frame_index) override {
+        auto* canvas = fRecorder.beginRecording(FLAGS_width, FLAGS_height);
+        canvas->concat(fScaleMatrix);
+        anim->render(canvas);
 
-    bool endFrame(size_t i) override {
-        if (sk_sp<SkImage> img = fSurface->makeImageSnapshot()) {
-            gMP4Frames[i].set_value(std::move(img));
-            return true;
+        auto frame  = fRecorder.finishRecordingAsPicture();
+        auto stream = make_file_stream(frame_index, "skp");
+
+        if (frame && stream) {
+            frame->serialize(stream.get());
         }
-        return false;
     }
 
-    const sk_sp<SkSurface> fSurface;
+private:
+    SKPGenerator(FrameSink* sink, const SkMatrix& scale_matrix)
+        : FrameGenerator(sink)
+        , fScaleMatrix(scale_matrix)
+    {}
+
+    const SkMatrix    fScaleMatrix;
+    SkPictureRecorder fRecorder;
+#endif // !CPU_ONLY && !GPU_ONLY
 };
 
+class GPUGenerator final : public FrameGenerator {
+public:
+#if defined(CPU_ONLY)
+    static std::unique_ptr<FrameGenerator> Make(FrameSink* sink, const SkMatrix& matrix) {
+        return nullptr;
+    }
+#else
+    static std::unique_ptr<FrameGenerator> Make(FrameSink* sink, const SkMatrix& matrix) {
+        auto gpu_generator = std::unique_ptr<GPUGenerator>(new GPUGenerator(sink, matrix));
+
+        return gpu_generator->isValid()
+                ? std::unique_ptr<FrameGenerator>(gpu_generator.release())
+                : nullptr;
+    }
+
+    ~GPUGenerator() override {
+        // ensure all pending reads are completed
+        fCtx->flushAndSubmit(true);
+    }
+
+    void generateFrame(const skottie::Animation* anim, size_t frame_index) override {
+        fSurface->getCanvas()->clear(kClearColor);
+        anim->render(fSurface->getCanvas());
+
+        auto rec = std::make_unique<AsyncRec>(fSink, frame_index);
+        fSurface->asyncRescaleAndReadPixels(SkImageInfo::MakeN32Premul(FLAGS_width, FLAGS_height),
+                                            {0, 0, FLAGS_width, FLAGS_height},
+                                            SkSurface::RescaleGamma::kSrc,
+                                            SkImage::RescaleMode::kNearest,
+                                            AsyncCallback, rec.release());
+
+        fCtx->submit();
+    }
+
+private:
+    GPUGenerator(FrameSink* sink, const SkMatrix& matrix)
+        : FrameGenerator(sink)
+    {
+        fCtx = fFactory.getContextInfo(sk_gpu_test::GrContextFactory::kGL_ContextType)
+                           .directContext();
+        fSurface =
+            SkSurface::MakeRenderTarget(fCtx,
+                                        SkBudgeted::kNo,
+                                        SkImageInfo::MakeN32Premul(FLAGS_width, FLAGS_height),
+                                        0,
+                                        GrSurfaceOrigin::kTopLeft_GrSurfaceOrigin,
+                                        nullptr);
+        if (fSurface) {
+            fSurface->getCanvas()->concat(matrix);
+        } else {
+            fprintf(stderr, "Could not initialize GL context.\n");
+        }
+    }
+
+    bool isValid() const { return !!fSurface; }
+
+    struct AsyncRec {
+        FrameSink* sink;
+        size_t     index;
+
+        AsyncRec(FrameSink* sink, size_t index) : sink(sink), index(index) {}
+    };
+
+    static void AsyncCallback(SkSurface::ReadPixelsContext ctx,
+                              std::unique_ptr<const SkSurface::AsyncReadResult> result) {
+        std::unique_ptr<const AsyncRec> rec(reinterpret_cast<const AsyncRec*>(ctx));
+        if (result && result->count() == 1) {
+            SkPixmap pm(SkImageInfo::MakeN32Premul(FLAGS_width, FLAGS_height),
+                        result->data(0), result->rowBytes(0));
+
+            auto release_proc = [](const void*, SkImage::ReleaseContext ctx) {
+                std::unique_ptr<const SkSurface::AsyncReadResult>
+                        adopted(reinterpret_cast<const SkSurface::AsyncReadResult*>(ctx));
+            };
+
+            auto frame_image = SkImage::MakeFromRaster(pm, release_proc, (void*)result.release());
+
+            rec->sink->writeFrame(std::move(frame_image), rec->index);
+        }
+    }
+
+    sk_gpu_test::GrContextFactory fFactory;
+    GrDirectContext*              fCtx;
+    sk_sp<SkSurface>              fSurface;
+#endif // !CPU_ONLY
+};
+
+std::unique_ptr<FrameGenerator> FrameGenerator::Make(FrameSink* sink,
+                                                     OutputFormat fmt,
+                                                     const SkMatrix& matrix) {
+    if (fmt == OutputFormat::kSKP) {
+        return SKPGenerator::Make(sink, matrix);
+    }
+
+    return FLAGS_gpu
+            ? GPUGenerator::Make(sink, matrix)
+            : CPUGenerator::Make(sink, matrix);
+}
+
 class Logger final : public skottie::Logger {
 public:
     struct LogEntry {
@@ -234,16 +413,6 @@
                           fWarnings;
 };
 
-std::unique_ptr<Sink> MakeSink(const char* fmt, const SkMatrix& scale_matrix) {
-    if (0 == strcmp(fmt,  "png")) return  PNGSink::Make(scale_matrix);
-    if (0 == strcmp(fmt,  "skp")) return  SKPSink::Make(scale_matrix);
-    if (0 == strcmp(fmt, "null")) return NullSink::Make(scale_matrix);
-    if (0 == strcmp(fmt,  "mp4")) return std::make_unique<MP4Sink>(scale_matrix);
-
-    SkDebugf("Unknown format: %s\n", FLAGS_format[0]);
-    return nullptr;
-}
-
 } // namespace
 
 extern bool gSkUseThreadLocalStrikeCaches_IAcknowledgeThisIsIncrediblyExperimental;
@@ -258,7 +427,23 @@
         return 1;
     }
 
-    if (!FLAGS_format.contains("mp4") && !sk_mkdir(FLAGS_writePath[0])) {
+    OutputFormat fmt;
+    if (0 == strcmp(FLAGS_format[0],  "png")) {
+        fmt = OutputFormat::kPNG;
+    } else if (0 == strcmp(FLAGS_format[0],  "skp")) {
+        fmt = OutputFormat::kSKP;
+    }  else if (0 == strcmp(FLAGS_format[0], "null")) {
+        fmt = OutputFormat::kNull;
+#if defined(HAVE_VIDEO_ENCODER)
+    } else if (0 == strcmp(FLAGS_format[0],  "mp4")) {
+        fmt = OutputFormat::kMP4;
+#endif
+    } else {
+        fprintf(stderr, "Unknown format: %s\n", FLAGS_format[0]);
+        return 1;
+    }
+
+    if (fmt != OutputFormat::kMP4 && !sk_mkdir(FLAGS_writePath[0])) {
         return 1;
     }
 
@@ -314,83 +499,61 @@
     }
     const auto fps_scale = native_fps / fps;
 
-    SkDebugf("Rendering %f seconds (%d frames @%f fps).\n", duration, frame_count, fps);
+    printf("Rendering %f seconds (%d frames @%f fps).\n", duration, frame_count, fps);
 
-    if (FLAGS_format.contains("mp4")) {
-        gMP4Frames.resize(frame_count);
-    }
+    const auto sink = FrameSink::Make(fmt, frame_count);
 
     std::vector<double> frames_ms(frame_count);
 
-    auto ms_since = [](auto start) {
-        const auto elapsed = std::chrono::steady_clock::now() - start;
-        return std::chrono::duration_cast<std::chrono::milliseconds>(elapsed).count();
-    };
-
-    SkTaskGroup::Enabler enabler(FLAGS_threads - 1);
+    const auto thread_count = FLAGS_gpu ? 0 : FLAGS_threads - 1;
+    SkTaskGroup::Enabler enabler(thread_count);
 
     SkTaskGroup tg;
-    tg.batch(frame_count, [&](int i) {
-        // SkTaskGroup::Enabler creates a LIFO work pool,
-        // but we want our early frames to start first.
-        i = frame_count - 1 - i;
-
-        const auto start = std::chrono::steady_clock::now();
-        thread_local static auto* anim =
-                skottie::Animation::Builder()
-                    .setResourceProvider(rp)
-                    .setPrecompInterceptor(precomp_interceptor)
-                    .make(static_cast<const char*>(data->data()), data->size())
-                    .release();
-        thread_local static auto* sink = MakeSink(FLAGS_format[0], scale_matrix).release();
-
-        if (sink && anim) {
-            anim->seekFrame(frame0 + i * fps_scale);
-            anim->render(sink->beginFrame(i));
-            sink->endFrame(i);
+    {
+        // Depending on type (gpu vs. everything else), we use either a single generator
+        // or one generator per worker thread, respectively.
+        // Scoping is important for the single generator case because we want its destructor to
+        // flush out any pending async operations.
+        std::unique_ptr<FrameGenerator> singleton_generator;
+        if (FLAGS_gpu) {
+            singleton_generator = FrameGenerator::Make(sink.get(), fmt, scale_matrix);
         }
 
-        frames_ms[i] = ms_since(start);
-    });
+        tg.batch(frame_count, [&](int i) {
+            // SkTaskGroup::Enabler creates a LIFO work pool,
+            // but we want our early frames to start first.
+            i = frame_count - 1 - i;
 
-#if defined(HAVE_VIDEO_ENCODER)
-    if (FLAGS_format.contains("mp4")) {
-        SkVideoEncoder enc;
-        if (!enc.beginRecording({FLAGS_width, FLAGS_height}, fps)) {
-            SkDEBUGF("Invalid video stream configuration.\n");
-            return -1;
-        }
-
-        std::vector<double> starved_ms;
-        for (std::promise<sk_sp<SkImage>>& frame : gMP4Frames) {
             const auto start = std::chrono::steady_clock::now();
-            sk_sp<SkImage> img = frame.get_future().get();
-            starved_ms.push_back(ms_since(start));
+            thread_local static auto* anim =
+                    skottie::Animation::Builder()
+                        .setResourceProvider(rp)
+                        .setPrecompInterceptor(precomp_interceptor)
+                        .make(static_cast<const char*>(data->data()), data->size())
+                        .release();
+            thread_local static auto* gen = singleton_generator
+                    ? singleton_generator.get()
+                    : FrameGenerator::Make(sink.get(), fmt, scale_matrix).release();
 
-            SkPixmap pm;
-            SkAssertResult(img->peekPixels(&pm));
-            enc.addFrame(pm);
-        }
-        sk_sp<SkData> mp4 = enc.endRecording();
+            if (gen && anim) {
+                anim->seekFrame(frame0 + i * fps_scale);
+                gen->generateFrame(anim, SkToSizeT(i));
+            } else {
+                sink->writeFrame(nullptr, SkToSizeT(i));
+            }
 
-        SkFILEWStream{FLAGS_writePath[0]}
-            .write(mp4->data(), mp4->size());
-
-        // If everything's going well, the first frame should account for the most,
-        // and ideally nearly all, starvation.
-        double first = starved_ms[0];
-        std::sort(starved_ms.begin(), starved_ms.end());
-        double sum = std::accumulate(starved_ms.begin(), starved_ms.end(), 0);
-        SkDebugf("starved min %gms, med %gms, avg %gms, max %gms, sum %gms, first %gms (%s)\n",
-                 starved_ms[0], starved_ms[frame_count/2], sum/frame_count, starved_ms.back(), sum,
-                 first, first == starved_ms.back() ? "ok" : "BAD");
+            frames_ms[i] = ms_since(start);
+        });
     }
-#endif
+
+    sink->finalize(fps);
     tg.wait();
 
+
     std::sort(frames_ms.begin(), frames_ms.end());
     double sum = std::accumulate(frames_ms.begin(), frames_ms.end(), 0);
-    SkDebugf("frame time min %gms, med %gms, avg %gms, max %gms, sum %gms\n",
-             frames_ms[0], frames_ms[frame_count/2], sum/frame_count, frames_ms.back(), sum);
+    printf("Frame time stats: min %gms, med %gms, avg %gms, max %gms, sum %gms\n",
+           frames_ms[0], frames_ms[frame_count/2], sum/frame_count, frames_ms.back(), sum);
+
     return 0;
 }