Adds gpu stats for program cache

With the addition of the DDL program pre-compilation we need to know how it is working.

This CL also fixes some threading bugs.

Bug: skia:9455
Change-Id: I20da58a7f1b19685687fae1d159d4e0db8a4964d
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/273001
Reviewed-by: Greg Daniel <egdaniel@google.com>
Commit-Queue: Robert Phillips <robertphillips@google.com>
diff --git a/dm/DMSrcSink.cpp b/dm/DMSrcSink.cpp
index ac59e68..5b05da7 100644
--- a/dm/DMSrcSink.cpp
+++ b/dm/DMSrcSink.cpp
@@ -1618,7 +1618,7 @@
 
 GPUDDLSink::GPUDDLSink(const SkCommandLineConfigGpu* config, const GrContextOptions& grCtxOptions)
         : INHERITED(config, grCtxOptions)
-    , fRecordingThreadPool(SkExecutor::MakeLIFOThreadPool(2))
+    , fRecordingThreadPool(SkExecutor::MakeLIFOThreadPool(1)) // TODO: this should be at least 2
     , fGPUThread(SkExecutor::MakeFIFOThreadPool(1)) {
 }
 
@@ -1626,7 +1626,22 @@
                            sk_sp<SkSurface> dstSurface,
                            SkTaskGroup* recordingTaskGroup,
                            SkTaskGroup* gpuTaskGroup,
+                           sk_gpu_test::TestContext* gpuTestCtx,
                            GrContext* gpuThreadCtx) const {
+
+    // We have to do this here bc characterization can hit the SkGpuDevice's thread guard (i.e.,
+    // leaving it until the DDLTileHelper ctor will result in multiple threads trying to use the
+    // same context (this thread and the gpuThread - which will be uploading textures)).
+    SkSurfaceCharacterization dstCharacterization;
+    SkAssertResult(dstSurface->characterize(&dstCharacterization));
+
+    // 'gpuTestCtx/gpuThreadCtx' is being shifted to the gpuThread. Leave the main (this)
+    // thread w/o a context.
+    gpuTestCtx->makeNotCurrent();
+
+    // Job one for the GPU thread is to make 'gpuTestCtx' current!
+    gpuTaskGroup->add([gpuTestCtx] { gpuTestCtx->makeCurrent(); });
+
     auto size = src.size();
     SkPictureRecorder recorder;
     Result result = src.draw(recorder.beginRecording(SkIntToScalar(size.width()),
@@ -1651,18 +1666,28 @@
     promiseImageHelper.uploadAllToGPU(gpuTaskGroup, gpuThreadCtx);
 
     constexpr int kNumDivisions = 3;
-    DDLTileHelper tiles(dstSurface, viewport, kNumDivisions);
+    DDLTileHelper tiles(dstSurface, dstCharacterization, viewport, kNumDivisions);
 
     // Reinflate the compressed picture individually for each thread.
     tiles.createSKPPerTile(compressedPictureData.get(), promiseImageHelper);
 
     tiles.kickOffThreadedWork(recordingTaskGroup, gpuTaskGroup, gpuThreadCtx);
+    // Apparently adding to a taskGroup isn't thread safe. Wait for the recording task group
+    // to add all its gpuThread work before adding the flush
+    recordingTaskGroup->wait();
 
     // This should be the only explicit flush for the entire DDL draw
     gpuTaskGroup->add([gpuThreadCtx]() { gpuThreadCtx->flush(); });
 
-    // All the work is schedule we just need to wait
-    recordingTaskGroup->wait(); // This should be a no-op at this point
+    // The backend textures are created on the gpuThread by the 'uploadAllToGPU' call.
+    // It is simpler to also delete them at this point on the gpuThread.
+    promiseImageHelper.deleteAllFromGPU(gpuTaskGroup, gpuThreadCtx);
+
+    // A flush has already been scheduled on the gpu thread along with the clean up of the backend
+    // textures so it is safe to schedule making 'mainCtx' not current on the gpuThread.
+    gpuTaskGroup->add([gpuTestCtx] { gpuTestCtx->makeNotCurrent(); });
+
+    // All the work is scheduled on the gpu thread, we just need to wait
     gpuTaskGroup->wait();
 
     return Result::Ok();
@@ -1714,23 +1739,13 @@
         return Result::Fatal("Could not create a surface.");
     }
 
-    // 'mainCtx' is being shifted to the gpuThread. Leave the main thread w/o
-    // a context.
-    mainTestCtx->makeNotCurrent();
-
-    // Job one for the GPU thread is to make 'mainCtx' current!
-    gpuTaskGroup.add([mainTestCtx] { mainTestCtx->makeCurrent(); });
-
-    Result result = this->ddlDraw(src, surface, &recordingTaskGroup, &gpuTaskGroup, mainCtx);
-
-    // ddlDraw schedules a flush on the gpu thread and waits so it is safe to make 'mainCtx'
-    // current here.
-    gpuTaskGroup.add([mainTestCtx] { mainTestCtx->makeNotCurrent(); });
-
+    Result result = this->ddlDraw(src, surface, &recordingTaskGroup, &gpuTaskGroup,
+                                  mainTestCtx, mainCtx);
     if (!result.isOk()) {
         return result;
     }
 
+    // 'ddlDraw' will have made 'mainCtx' not current on the gpuThread
     mainTestCtx->makeCurrent();
 
     if (FLAGS_gpuStats) {
@@ -2071,7 +2086,10 @@
         if (!tmp) {
             return Result::Fatal("ViaDDL: cannot get surface from canvas");
         }
-        sk_sp<SkSurface> surface = sk_ref_sp(tmp);
+        sk_sp<SkSurface> dstSurface = sk_ref_sp(tmp);
+
+        SkSurfaceCharacterization dstCharacterization;
+        SkAssertResult(dstSurface->characterize(&dstCharacterization));
 
         promiseImageHelper.createCallbackContexts(context);
 
@@ -2084,7 +2102,7 @@
                 canvas->clear(SK_ColorTRANSPARENT);
             }
             // First, create all the tiles (including their individual dest surfaces)
-            DDLTileHelper tiles(surface, viewport, fNumDivisions);
+            DDLTileHelper tiles(dstSurface, dstCharacterization, viewport, fNumDivisions);
 
             // Second, reinflate the compressed picture individually for each thread
             // This recreates the promise SkImages on each replay iteration. We are currently
diff --git a/dm/DMSrcSink.h b/dm/DMSrcSink.h
index 3c9dcaf..3dd109b 100644
--- a/dm/DMSrcSink.h
+++ b/dm/DMSrcSink.h
@@ -469,7 +469,8 @@
                    sk_sp<SkSurface> dstSurface,
                    SkTaskGroup* recordingTaskGroup,
                    SkTaskGroup* gpuTaskGroup,
-                   GrContext* gpuCtx) const;
+                   sk_gpu_test::TestContext* gpuTestCtx,
+                   GrContext* gpuThreadCtx) const;
 
     std::unique_ptr<SkExecutor> fRecordingThreadPool;
     std::unique_ptr<SkExecutor> fGPUThread;
diff --git a/src/core/SkDeferredDisplayList.cpp b/src/core/SkDeferredDisplayList.cpp
index 112f577..f14aa54 100644
--- a/src/core/SkDeferredDisplayList.cpp
+++ b/src/core/SkDeferredDisplayList.cpp
@@ -23,12 +23,18 @@
                                              sk_sp<LazyProxyData> lazyProxyData)
         : fCharacterization(characterization)
 #if SK_SUPPORT_GPU
-        , fLazyProxyData(std::move(lazyProxyData))
+    , fLazyProxyData(std::move(lazyProxyData))
 #endif
 {
 }
 
-SkDeferredDisplayList::~SkDeferredDisplayList() {}
+SkDeferredDisplayList::~SkDeferredDisplayList() {
+#if SK_SUPPORT_GPU && defined(SK_DEBUG)
+    for (auto& renderTask : fRenderTasks) {
+        SkASSERT(renderTask->unique());
+    }
+#endif
+}
 
 //-------------------------------------------------------------------------------------------------
 #if SK_SUPPORT_GPU
diff --git a/src/gpu/GrDrawingManager.cpp b/src/gpu/GrDrawingManager.cpp
index af1440b..6e919da 100644
--- a/src/gpu/GrDrawingManager.cpp
+++ b/src/gpu/GrDrawingManager.cpp
@@ -104,6 +104,12 @@
 }
 
 void GrDrawingManager::RenderTaskDAG::add(const SkTArray<sk_sp<GrRenderTask>>& renderTasks) {
+#ifdef SK_DEBUG
+    for (auto& renderTask : renderTasks) {
+        SkASSERT(renderTask->unique());
+    }
+#endif
+
     fRenderTasks.push_back_n(renderTasks.count(), renderTasks.begin());
 }
 
@@ -576,8 +582,9 @@
     fActiveOpsTask = nullptr;
 
     fDAG.swap(&ddl->fRenderTasks);
+    SkASSERT(!fDAG.numRenderTasks());
 
-    for (auto renderTask : ddl->fRenderTasks) {
+    for (auto& renderTask : ddl->fRenderTasks) {
         renderTask->prePrepare(fContext);
     }
 
diff --git a/src/gpu/GrGpu.cpp b/src/gpu/GrGpu.cpp
index 0eca489..d6b97a2 100644
--- a/src/gpu/GrGpu.cpp
+++ b/src/gpu/GrGpu.cpp
@@ -702,6 +702,19 @@
 #if GR_TEST_UTILS
 
 #if GR_GPU_STATS
+static const char* cache_result_to_str(int i) {
+    const char* kCacheResultStrings[GrGpu::Stats::kNumProgramCacheResults] = {
+        "hits",
+        "misses",
+        "partials"
+    };
+    static_assert(0 == (int) GrGpu::Stats::ProgramCacheResult::kHit);
+    static_assert(1 == (int) GrGpu::Stats::ProgramCacheResult::kMiss);
+    static_assert(2 == (int) GrGpu::Stats::ProgramCacheResult::kPartial);
+    static_assert(GrGpu::Stats::kNumProgramCacheResults == 3);
+    return kCacheResultStrings[i];
+}
+
 void GrGpu::Stats::dump(SkString* out) {
     out->appendf("Render Target Binds: %d\n", fRenderTargetBinds);
     out->appendf("Shader Compilations: %d\n", fShaderCompilations);
@@ -712,6 +725,26 @@
     out->appendf("Stencil Buffer Creates: %d\n", fStencilAttachmentCreates);
     out->appendf("Number of draws: %d\n", fNumDraws);
     out->appendf("Number of Scratch Textures reused %d\n", fNumScratchTexturesReused);
+
+    SkASSERT(fNumInlineCompilationFailures == 0);
+    out->appendf("Number of Inline compile failures %d\n", fNumInlineCompilationFailures);
+    for (int i = 0; i < Stats::kNumProgramCacheResults-1; ++i) {
+        out->appendf("Inline Program Cache %s %d\n", cache_result_to_str(i),
+                     fInlineProgramCacheStats[i]);
+    }
+
+    SkASSERT(fNumPreCompilationFailures == 0);
+    out->appendf("Number of precompile failures %d\n", fNumPreCompilationFailures);
+    for (int i = 0; i < Stats::kNumProgramCacheResults-1; ++i) {
+        out->appendf("Precompile Program Cache %s %d\n", cache_result_to_str(i),
+                     fPreProgramCacheStats[i]);
+    }
+
+    SkASSERT(fNumCompilationFailures == 0);
+    out->appendf("Total number of compilation failures %d\n", fNumCompilationFailures);
+    out->appendf("Total number of partial compilation successes %d\n",
+                 fNumPartialCompilationSuccesses);
+    out->appendf("Total number of compilation successes %d\n", fNumCompilationSuccesses);
 }
 
 void GrGpu::Stats::dumpKeyValuePairs(SkTArray<SkString>* keys, SkTArray<double>* values) {
diff --git a/src/gpu/GrGpu.h b/src/gpu/GrGpu.h
index c0a1bc1..aa10bd8 100644
--- a/src/gpu/GrGpu.h
+++ b/src/gpu/GrGpu.h
@@ -386,6 +386,16 @@
 
     class Stats {
     public:
+        enum class ProgramCacheResult {
+            kHit,       // the program was found in the cache
+            kMiss,      // the program was not found in the cache (and was, thus, compiled)
+            kPartial,   // a precompiled version was found in the persistent cache
+
+            kLast = kPartial
+        };
+
+        static const int kNumProgramCacheResults = (int)ProgramCacheResult::kLast + 1;
+
 #if GR_GPU_STATS
         Stats() = default;
 
@@ -424,6 +434,35 @@
         int numScratchTexturesReused() const { return fNumScratchTexturesReused; }
         void incNumScratchTexturesReused() { ++fNumScratchTexturesReused; }
 
+        int numInlineCompilationFailures() const { return fNumInlineCompilationFailures; }
+        void incNumInlineCompilationFailures() { ++fNumInlineCompilationFailures; }
+
+        int numInlineProgramCacheResult(ProgramCacheResult stat) const {
+            return fInlineProgramCacheStats[(int) stat];
+        }
+        void incNumInlineProgramCacheResult(ProgramCacheResult stat) {
+            ++fInlineProgramCacheStats[(int) stat];
+        }
+
+        int numPreCompilationFailures() const { return fNumPreCompilationFailures; }
+        void incNumPreCompilationFailures() { ++fNumPreCompilationFailures; }
+
+        int numPreProgramCacheResult(ProgramCacheResult stat) const {
+            return fPreProgramCacheStats[(int) stat];
+        }
+        void incNumPreProgramCacheResult(ProgramCacheResult stat) {
+            ++fPreProgramCacheStats[(int) stat];
+        }
+
+        int numCompilationFailures() const { return fNumCompilationFailures; }
+        void incNumCompilationFailures() { ++fNumCompilationFailures; }
+
+        int numPartialCompilationSuccesses() const { return fNumPartialCompilationSuccesses; }
+        void incNumPartialCompilationSuccesses() { ++fNumPartialCompilationSuccesses; }
+
+        int numCompilationSuccesses() const { return fNumCompilationSuccesses; }
+        void incNumCompilationSuccesses() { ++fNumCompilationSuccesses; }
+
 #if GR_TEST_UTILS
         void dump(SkString*);
         void dumpKeyValuePairs(SkTArray<SkString>* keys, SkTArray<double>* values);
@@ -440,6 +479,17 @@
         int fNumFailedDraws = 0;
         int fNumFinishFlushes = 0;
         int fNumScratchTexturesReused = 0;
+
+        int fNumInlineCompilationFailures = 0;
+        int fInlineProgramCacheStats[kNumProgramCacheResults] = { 0 };
+
+        int fNumPreCompilationFailures = 0;
+        int fPreProgramCacheStats[kNumProgramCacheResults] = { 0 };
+
+        int fNumCompilationFailures = 0;
+        int fNumPartialCompilationSuccesses = 0;
+        int fNumCompilationSuccesses = 0;
+
 #else
 
 #if GR_TEST_UTILS
@@ -455,6 +505,13 @@
         void incNumDraws() {}
         void incNumFailedDraws() {}
         void incNumFinishFlushes() {}
+        void incNumInlineCompilationFailures() {}
+        void incNumInlineProgramCacheResult(ProgramCacheResult stat) {}
+        void incNumPreCompilationFailures() {}
+        void incNumPreProgramCacheResult(ProgramCacheResult stat) {}
+        void incNumCompilationFailures() {}
+        void incNumPartialCompilationSuccesses() {}
+        void incNumCompilationSuccesses() {}
 #endif
     };
 
diff --git a/src/gpu/GrMemoryPool.cpp b/src/gpu/GrMemoryPool.cpp
index b2f627d..b5249e1 100644
--- a/src/gpu/GrMemoryPool.cpp
+++ b/src/gpu/GrMemoryPool.cpp
@@ -51,7 +51,7 @@
     int n = fAllocatedIDs.count();
     fAllocatedIDs.foreach([&i, n] (int32_t id) {
         if (++i == 1) {
-            SkDebugf("Leaked IDs (in no particular order): %d", id);
+            SkDebugf("Leaked %d IDs (in no particular order): %d%s", n, id, (n == i) ? "\n" : "");
         } else if (i < 11) {
             SkDebugf(", %d%s", id, (n == i ? "\n" : ""));
         } else if (i == 11) {
diff --git a/src/gpu/gl/GrGLGpu.h b/src/gpu/gl/GrGLGpu.h
index 315006b..de2c53f 100644
--- a/src/gpu/gl/GrGLGpu.h
+++ b/src/gpu/gl/GrGLGpu.h
@@ -333,7 +333,15 @@
         sk_sp<GrGLProgram> findOrCreateProgram(GrRenderTarget*, const GrProgramInfo&);
         sk_sp<GrGLProgram> findOrCreateProgram(const GrProgramDesc& desc,
                                                const GrProgramInfo& programInfo) {
-            return this->findOrCreateProgram(nullptr, desc, programInfo);
+            Stats::ProgramCacheResult stat;
+            sk_sp<GrGLProgram> tmp = this->findOrCreateProgram(nullptr, desc, programInfo, &stat);
+            if (!tmp) {
+                fGpu->fStats.incNumPreCompilationFailures();
+            } else {
+                fGpu->fStats.incNumPreProgramCacheResult(stat);
+            }
+
+            return tmp;
         }
         bool precompileShader(const SkData& key, const SkData& data);
 
@@ -342,7 +350,8 @@
 
         sk_sp<GrGLProgram> findOrCreateProgram(GrRenderTarget*,
                                                const GrProgramDesc&,
-                                               const GrProgramInfo&);
+                                               const GrProgramInfo&,
+                                               Stats::ProgramCacheResult*);
 
         struct DescHash {
             uint32_t operator()(const GrProgramDesc& desc) const {
diff --git a/src/gpu/gl/GrGLGpuProgramCache.cpp b/src/gpu/gl/GrGLGpuProgramCache.cpp
index 02c75a1..bf54a88 100644
--- a/src/gpu/gl/GrGLGpuProgramCache.cpp
+++ b/src/gpu/gl/GrGLGpuProgramCache.cpp
@@ -55,12 +55,22 @@
         return nullptr;
     }
 
-    return this->findOrCreateProgram(renderTarget, desc, programInfo);
+    Stats::ProgramCacheResult stat;
+    sk_sp<GrGLProgram> tmp = this->findOrCreateProgram(renderTarget, desc, programInfo, &stat);
+    if (!tmp) {
+        fGpu->fStats.incNumInlineCompilationFailures();
+    } else {
+        fGpu->fStats.incNumInlineProgramCacheResult(stat);
+    }
+
+    return tmp;
 }
 
 sk_sp<GrGLProgram> GrGLGpu::ProgramCache::findOrCreateProgram(GrRenderTarget* renderTarget,
                                                               const GrProgramDesc& desc,
-                                                              const GrProgramInfo& programInfo) {
+                                                              const GrProgramInfo& programInfo,
+                                                              Stats::ProgramCacheResult* stat) {
+    *stat = Stats::ProgramCacheResult::kHit;
     std::unique_ptr<Entry>* entry = fMap.find(desc);
     if (entry && !(*entry)->fProgram) {
         // We've pre-compiled the GL program, but don't have the GrGLProgram scaffolding
@@ -71,16 +81,22 @@
         if (!(*entry)->fProgram) {
             // Should we purge the program ID from the cache at this point?
             SkDEBUGFAIL("Couldn't create program from precompiled program");
+            fGpu->fStats.incNumCompilationFailures();
             return nullptr;
         }
+        fGpu->fStats.incNumPartialCompilationSuccesses();
+        *stat = Stats::ProgramCacheResult::kPartial;
     } else if (!entry) {
         // We have a cache miss
         sk_sp<GrGLProgram> program = GrGLProgramBuilder::CreateProgram(fGpu, renderTarget,
                                                                        desc, programInfo);
         if (!program) {
+            fGpu->fStats.incNumCompilationFailures();
             return nullptr;
         }
+        fGpu->fStats.incNumCompilationSuccesses();
         entry = fMap.insert(desc, std::unique_ptr<Entry>(new Entry(std::move(program))));
+        *stat = Stats::ProgramCacheResult::kMiss;
     }
 
     return (*entry)->fProgram;
diff --git a/tools/DDLPromiseImageHelper.cpp b/tools/DDLPromiseImageHelper.cpp
index e068a0d..9d602aa 100644
--- a/tools/DDLPromiseImageHelper.cpp
+++ b/tools/DDLPromiseImageHelper.cpp
@@ -115,6 +115,32 @@
     }
 }
 
+void DDLPromiseImageHelper::DeleteBETexturesForPromiseImage(GrContext* context,
+                                                            PromiseImageInfo* info) {
+    SkASSERT(context->priv().asDirectContext());
+
+    if (info->isYUV()) {
+        int numPixmaps;
+        SkAssertResult(SkYUVAIndex::AreValidIndices(info->yuvaIndices(), &numPixmaps));
+        for (int j = 0; j < numPixmaps; ++j) {
+            PromiseImageCallbackContext* callbackContext = info->callbackContext(j);
+            SkASSERT(callbackContext);
+
+            callbackContext->destroyBackendTexture();
+            SkASSERT(!callbackContext->promiseImageTexture());
+        }
+    } else {
+        PromiseImageCallbackContext* callbackContext = info->callbackContext(0);
+        if (!callbackContext) {
+            // This texture would've been too large to fit on the GPU
+            return;
+        }
+
+        callbackContext->destroyBackendTexture();
+        SkASSERT(!callbackContext->promiseImageTexture());
+    }
+}
+
 void DDLPromiseImageHelper::createCallbackContexts(GrContext* context) {
     const GrCaps* caps = context->priv().caps();
     const int maxDimension = caps->maxTextureSize();
@@ -168,9 +194,7 @@
         for (int i = 0; i < fImageInfo.count(); ++i) {
             PromiseImageInfo* info = &fImageInfo[i];
 
-            taskGroup->add([context, info]() {
-                              CreateBETexturesForPromiseImage(context, info);
-                           });
+            taskGroup->add([context, info]() { CreateBETexturesForPromiseImage(context, info); });
         }
     } else {
         for (int i = 0; i < fImageInfo.count(); ++i) {
@@ -179,6 +203,22 @@
     }
 }
 
+void DDLPromiseImageHelper::deleteAllFromGPU(SkTaskGroup* taskGroup, GrContext* context) {
+    SkASSERT(context->priv().asDirectContext());
+
+    if (taskGroup) {
+        for (int i = 0; i < fImageInfo.count(); ++i) {
+            PromiseImageInfo* info = &fImageInfo[i];
+
+            taskGroup->add([context, info]() { DeleteBETexturesForPromiseImage(context, info); });
+        }
+    } else {
+        for (int i = 0; i < fImageInfo.count(); ++i) {
+            DeleteBETexturesForPromiseImage(context, &fImageInfo[i]);
+        }
+    }
+}
+
 sk_sp<SkPicture> DDLPromiseImageHelper::reinflateSKP(
                                                    SkDeferredDisplayListRecorder* recorder,
                                                    SkData* compressedPictureData,
diff --git a/tools/DDLPromiseImageHelper.h b/tools/DDLPromiseImageHelper.h
index 7fff09e..f8e8dd6 100644
--- a/tools/DDLPromiseImageHelper.h
+++ b/tools/DDLPromiseImageHelper.h
@@ -56,6 +56,7 @@
     void createCallbackContexts(GrContext*);
 
     void uploadAllToGPU(SkTaskGroup*, GrContext*);
+    void deleteAllFromGPU(SkTaskGroup*, GrContext*);
 
     // reinflate a deflated SKP, replacing all the indices with promise images.
     sk_sp<SkPicture> reinflateSKP(SkDeferredDisplayListRecorder*,
@@ -85,6 +86,11 @@
 
         void setBackendTexture(const GrBackendTexture& backendTexture);
 
+        void destroyBackendTexture() {
+            SkASSERT(fPromiseImageTexture && fPromiseImageTexture->unique());
+            fPromiseImageTexture = nullptr;
+        }
+
         sk_sp<SkPromiseImageTexture> fulfill() {
             SkASSERT(fPromiseImageTexture);
             SkASSERT(fUnreleasedFulfills >= 0);
@@ -228,6 +234,7 @@
     };
 
     static void CreateBETexturesForPromiseImage(GrContext*, PromiseImageInfo*);
+    static void DeleteBETexturesForPromiseImage(GrContext*, PromiseImageInfo*);
 
     static sk_sp<SkPromiseImageTexture> PromiseImageFulfillProc(void* textureContext) {
         auto callbackContext = static_cast<PromiseImageCallbackContext*>(textureContext);
diff --git a/tools/DDLTileHelper.cpp b/tools/DDLTileHelper.cpp
index 913f2e7..45ca86d 100644
--- a/tools/DDLTileHelper.cpp
+++ b/tools/DDLTileHelper.cpp
@@ -18,14 +18,15 @@
 #include "src/image/SkImage_Gpu.h"
 #include "tools/DDLPromiseImageHelper.h"
 
-void DDLTileHelper::TileData::init(int id, sk_sp<SkSurface> dstSurface, const SkIRect& clip) {
+void DDLTileHelper::TileData::init(int id,
+                                   sk_sp<SkSurface> dstSurface,
+                                   const SkSurfaceCharacterization& dstSurfaceCharacterization,
+                                   const SkIRect& clip) {
     fID = id;
     fDstSurface = dstSurface;
     fClip = clip;
 
-    SkSurfaceCharacterization tmp;
-    SkAssertResult(fDstSurface->characterize(&tmp));
-    fCharacterization = tmp.createResized(clip.width(), clip.height());
+    fCharacterization = dstSurfaceCharacterization.createResized(clip.width(), clip.height());
     SkASSERT(fCharacterization.isValid());
 }
 
@@ -115,6 +116,7 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 DDLTileHelper::DDLTileHelper(sk_sp<SkSurface> dstSurface,
+                             const SkSurfaceCharacterization& dstChar,
                              const SkIRect& viewport,
                              int numDivisions)
         : fNumDivisions(numDivisions) {
@@ -135,7 +137,7 @@
 
             SkASSERT(viewport.contains(clip));
 
-            fTiles[y*fNumDivisions+x].init(y*fNumDivisions+x, dstSurface, clip);
+            fTiles[y*fNumDivisions+x].init(y*fNumDivisions+x, dstSurface, dstChar, clip);
         }
     }
 }
diff --git a/tools/DDLTileHelper.h b/tools/DDLTileHelper.h
index f1d2053..4edfe2d 100644
--- a/tools/DDLTileHelper.h
+++ b/tools/DDLTileHelper.h
@@ -29,7 +29,10 @@
         TileData() {}
         ~TileData();
 
-        void init(int id, sk_sp<SkSurface> dstSurface, const SkIRect& clip);
+        void init(int id,
+                  sk_sp<SkSurface> dstSurface,
+                  const SkSurfaceCharacterization& dstChar,
+                  const SkIRect& clip);
 
         // Convert the compressedPictureData into an SkPicture replacing each image-index
         // with a promise image.
@@ -66,6 +69,7 @@
     };
 
     DDLTileHelper(sk_sp<SkSurface> dstSurface,
+                  const SkSurfaceCharacterization& dstChar,
                   const SkIRect& viewport,
                   int numDivisions);
     ~DDLTileHelper() {
diff --git a/tools/flags/CommonFlagsConfig.cpp b/tools/flags/CommonFlagsConfig.cpp
index 7daa3d7..fa0c414 100644
--- a/tools/flags/CommonFlagsConfig.cpp
+++ b/tools/flags/CommonFlagsConfig.cpp
@@ -510,7 +510,7 @@
             extendedOptions.get_option_bool("testThreading", &testThreading) &&
             extendedOptions.get_option_int("testPersistentCache", &testPersistentCache) &&
             extendedOptions.get_option_bool("testPrecompile", &testPrecompile) &&
-            extendedOptions.get_option_bool("useDDLs", &useDDLs) &&
+            extendedOptions.get_option_bool("useDDLSink", &useDDLs) &&
             extendedOptions.get_option_gpu_surf_type("surf", &surfType);
 
     // testing threading and the persistent cache are mutually exclusive.
diff --git a/tools/skpbench/skpbench.cpp b/tools/skpbench/skpbench.cpp
index 5caeccf..4bf2a83 100644
--- a/tools/skpbench/skpbench.cpp
+++ b/tools/skpbench/skpbench.cpp
@@ -228,6 +228,9 @@
     const Sample::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
     const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
 
+    SkSurfaceCharacterization dstCharacterization;
+    SkAssertResult(surface->characterize(&dstCharacterization));
+
     SkIRect viewport = surface->imageInfo().bounds();
 
     DDLPromiseImageHelper promiseImageHelper;
@@ -240,7 +243,7 @@
 
     promiseImageHelper.uploadAllToGPU(nullptr, context);
 
-    DDLTileHelper tiles(surface, viewport, FLAGS_ddlTilingWidthHeight);
+    DDLTileHelper tiles(surface, dstCharacterization, viewport, FLAGS_ddlTilingWidthHeight);
 
     tiles.createSKPPerTile(compressedPictureData.get(), promiseImageHelper);