Cache the command buffer objects

Change-Id: I35bc1fab8ed6b72baf75d2e4271a040e0209440d
Reviewed-on: https://skia-review.googlesource.com/145821
Commit-Queue: Robert Phillips <robertphillips@google.com>
Reviewed-by: Greg Daniel <egdaniel@google.com>
diff --git a/src/gpu/GrGpu.h b/src/gpu/GrGpu.h
index e6001b1..783537a 100644
--- a/src/gpu/GrGpu.h
+++ b/src/gpu/GrGpu.h
@@ -239,16 +239,16 @@
                      const SkIPoint& dstPoint,
                      bool canDiscardOutsideDstRect = false);
 
-    // Creates a GrGpuRTCommandBuffer which GrOpLists send draw commands to instead of directly
+    // Returns a GrGpuRTCommandBuffer which GrOpLists send draw commands to instead of directly
     // to the Gpu object.
-    virtual GrGpuRTCommandBuffer* createCommandBuffer(
+    virtual GrGpuRTCommandBuffer* getCommandBuffer(
             GrRenderTarget*, GrSurfaceOrigin,
             const GrGpuRTCommandBuffer::LoadAndStoreInfo&,
             const GrGpuRTCommandBuffer::StencilLoadAndStoreInfo&) = 0;
 
-    // Creates a GrGpuTextureCommandBuffer which GrOpLists send texture commands to instead of
+    // Returns a GrGpuTextureCommandBuffer which GrOpLists send texture commands to instead of
     // directly to the Gpu object.
-    virtual GrGpuTextureCommandBuffer* createCommandBuffer(GrTexture*, GrSurfaceOrigin) = 0;
+    virtual GrGpuTextureCommandBuffer* getCommandBuffer(GrTexture*, GrSurfaceOrigin) = 0;
 
     // Called by GrDrawingManager when flushing.
     // Provides a hook for post-flush actions (e.g. Vulkan command buffer submits). This will also
@@ -256,6 +256,8 @@
     // inserted semaphores.
     GrSemaphoresSubmitted finishFlush(int numSemaphores, GrBackendSemaphore backendSemaphores[]);
 
+    virtual void submit(GrGpuCommandBuffer*) = 0;
+
     virtual GrFence SK_WARN_UNUSED_RESULT insertFence() = 0;
     virtual bool waitFence(GrFence, uint64_t timeout = 1000) = 0;
     virtual void deleteFence(GrFence) const = 0;
diff --git a/src/gpu/GrGpuCommandBuffer.h b/src/gpu/GrGpuCommandBuffer.h
index fe0edc9..c1b44b4 100644
--- a/src/gpu/GrGpuCommandBuffer.h
+++ b/src/gpu/GrGpuCommandBuffer.h
@@ -36,29 +36,27 @@
     virtual void insertEventMarker(const char*) = 0;
 
     virtual GrGpuRTCommandBuffer* asRTCommandBuffer() { return nullptr; }
-
-    // Sends the command buffer off to the GPU object to execute the commands built up in the
-    // buffer. The gpu object is allowed to defer execution of the commands until it is flushed.
-    virtual void submit() = 0;
-
-protected:
-    GrGpuCommandBuffer(GrSurfaceOrigin origin) : fOrigin(origin) {}
-
-    GrSurfaceOrigin fOrigin;
 };
 
 class GrGpuTextureCommandBuffer : public GrGpuCommandBuffer{
 public:
-    virtual ~GrGpuTextureCommandBuffer() {}
+    void set(GrTexture* texture, GrSurfaceOrigin origin) {
+        SkASSERT(!fTexture);
 
-    virtual void submit() = 0;
+        fOrigin = origin;
+        fTexture = texture;
+    }
 
 protected:
-    GrGpuTextureCommandBuffer(GrTexture* texture, GrSurfaceOrigin origin)
-            : INHERITED(origin)
-            , fTexture(texture) {}
+    GrGpuTextureCommandBuffer() : fOrigin(kTopLeft_GrSurfaceOrigin), fTexture(nullptr) {}
 
-    GrTexture* fTexture;
+    GrGpuTextureCommandBuffer(GrTexture* texture, GrSurfaceOrigin origin)
+            : fOrigin(origin)
+            , fTexture(texture) {
+    }
+
+    GrSurfaceOrigin fOrigin;
+    GrTexture*      fTexture;
 
 private:
     typedef GrGpuCommandBuffer INHERITED;
@@ -85,8 +83,6 @@
         GrStoreOp fStoreOp;
     };
 
-    virtual ~GrGpuRTCommandBuffer() {}
-
     GrGpuRTCommandBuffer* asRTCommandBuffer() { return this; }
 
     virtual void begin() = 0;
@@ -122,11 +118,21 @@
     virtual void discard() = 0;
 
 protected:
+    GrGpuRTCommandBuffer() : fOrigin(kTopLeft_GrSurfaceOrigin), fRenderTarget(nullptr) {}
+
     GrGpuRTCommandBuffer(GrRenderTarget* rt, GrSurfaceOrigin origin)
-            : INHERITED(origin)
+            : fOrigin(origin)
             , fRenderTarget(rt) {
     }
 
+    void set(GrRenderTarget* rt, GrSurfaceOrigin origin) {
+        SkASSERT(!fRenderTarget);
+
+        fRenderTarget = rt;
+        fOrigin = origin;
+    }
+
+    GrSurfaceOrigin fOrigin;
     GrRenderTarget* fRenderTarget;
 
 private:
diff --git a/src/gpu/GrOpList.cpp b/src/gpu/GrOpList.cpp
index 9eac091..9a9a671 100644
--- a/src/gpu/GrOpList.cpp
+++ b/src/gpu/GrOpList.cpp
@@ -183,6 +183,11 @@
         }
     }
 
+    GrSurface* surface = proxy->peekSurface();
+    if (surface->wasDestroyed()) {
+        return false;
+    }
+
     return true;
 }
 
diff --git a/src/gpu/GrRenderTargetOpList.cpp b/src/gpu/GrRenderTargetOpList.cpp
index 18136cf..9e8a8f4 100644
--- a/src/gpu/GrRenderTargetOpList.cpp
+++ b/src/gpu/GrRenderTargetOpList.cpp
@@ -107,12 +107,12 @@
     }
 }
 
-static std::unique_ptr<GrGpuRTCommandBuffer> create_command_buffer(GrGpu* gpu,
-                                                                   GrRenderTarget* rt,
-                                                                   GrSurfaceOrigin origin,
-                                                                   GrLoadOp colorLoadOp,
-                                                                   GrColor loadClearColor,
-                                                                   GrLoadOp stencilLoadOp) {
+static GrGpuRTCommandBuffer* create_command_buffer(GrGpu* gpu,
+                                                   GrRenderTarget* rt,
+                                                   GrSurfaceOrigin origin,
+                                                   GrLoadOp colorLoadOp,
+                                                   GrColor loadClearColor,
+                                                   GrLoadOp stencilLoadOp) {
     const GrGpuRTCommandBuffer::LoadAndStoreInfo kColorLoadStoreInfo {
         colorLoadOp,
         GrStoreOp::kStore,
@@ -129,20 +129,7 @@
         GrStoreOp::kStore,
     };
 
-    std::unique_ptr<GrGpuRTCommandBuffer> buffer(
-                            gpu->createCommandBuffer(rt, origin,
-                                                     kColorLoadStoreInfo,
-                                                     stencilLoadAndStoreInfo));
-    return buffer;
-}
-
-static inline void finish_command_buffer(GrGpuRTCommandBuffer* buffer) {
-    if (!buffer) {
-        return;
-    }
-
-    buffer->end();
-    buffer->submit();
+    return gpu->getCommandBuffer(rt, origin, kColorLoadStoreInfo, stencilLoadAndStoreInfo);
 }
 
 // TODO: this is where GrOp::renderTarget is used (which is fine since it
@@ -165,14 +152,14 @@
 
     // TODO: at the very least, we want the stencil store op to always be discard (at this
     // level). In Vulkan, sub-command buffers would still need to load & store the stencil buffer.
-    std::unique_ptr<GrGpuRTCommandBuffer> commandBuffer = create_command_buffer(
+    GrGpuRTCommandBuffer* commandBuffer = create_command_buffer(
                                                     flushState->gpu(),
                                                     fTarget.get()->peekRenderTarget(),
                                                     fTarget.get()->origin(),
                                                     fColorLoadOp,
                                                     fLoadClearColor,
                                                     fStencilLoadOp);
-    flushState->setCommandBuffer(commandBuffer.get());
+    flushState->setCommandBuffer(commandBuffer);
     commandBuffer->begin();
 
     // Draw all the generated geometry.
@@ -196,7 +183,8 @@
         flushState->setOpArgs(nullptr);
     }
 
-    finish_command_buffer(commandBuffer.get());
+    commandBuffer->end();
+    flushState->gpu()->submit(commandBuffer);
     flushState->setCommandBuffer(nullptr);
 
     return true;
diff --git a/src/gpu/GrTextureOpList.cpp b/src/gpu/GrTextureOpList.cpp
index 63e416c..df43bcb 100644
--- a/src/gpu/GrTextureOpList.cpp
+++ b/src/gpu/GrTextureOpList.cpp
@@ -99,10 +99,10 @@
 
     SkASSERT(fTarget.get()->peekTexture());
 
-    std::unique_ptr<GrGpuTextureCommandBuffer> commandBuffer(
-                         flushState->gpu()->createCommandBuffer(fTarget.get()->peekTexture(),
-                                                                fTarget.get()->origin()));
-    flushState->setCommandBuffer(commandBuffer.get());
+    GrGpuTextureCommandBuffer* commandBuffer(
+                         flushState->gpu()->getCommandBuffer(fTarget.get()->peekTexture(),
+                                                             fTarget.get()->origin()));
+    flushState->setCommandBuffer(commandBuffer);
 
     for (int i = 0; i < fRecordedOps.count(); ++i) {
         if (!fRecordedOps[i]) {
@@ -120,7 +120,7 @@
         flushState->setOpArgs(nullptr);
     }
 
-    commandBuffer->submit();
+    flushState->gpu()->submit(commandBuffer);
     flushState->setCommandBuffer(nullptr);
 
     return true;
diff --git a/src/gpu/gl/GrGLGpu.cpp b/src/gpu/gl/GrGLGpu.cpp
index 74ee2be..2df6382 100644
--- a/src/gpu/gl/GrGLGpu.cpp
+++ b/src/gpu/gl/GrGLGpu.cpp
@@ -2156,16 +2156,25 @@
     return true;
 }
 
-GrGpuRTCommandBuffer* GrGLGpu::createCommandBuffer(
+GrGpuRTCommandBuffer* GrGLGpu::getCommandBuffer(
         GrRenderTarget* rt, GrSurfaceOrigin origin,
         const GrGpuRTCommandBuffer::LoadAndStoreInfo& colorInfo,
         const GrGpuRTCommandBuffer::StencilLoadAndStoreInfo& stencilInfo) {
-    return new GrGLGpuRTCommandBuffer(this, rt, origin, colorInfo, stencilInfo);
+    if (!fCachedRTCommandBuffer) {
+        fCachedRTCommandBuffer.reset(new GrGLGpuRTCommandBuffer(this));
+    }
+
+    fCachedRTCommandBuffer->set(rt, origin, colorInfo, stencilInfo);
+    return fCachedRTCommandBuffer.get();
 }
 
-GrGpuTextureCommandBuffer* GrGLGpu::createCommandBuffer(GrTexture* texture,
-                                                        GrSurfaceOrigin origin) {
-    return new GrGLGpuTextureCommandBuffer(this, texture, origin);
+GrGpuTextureCommandBuffer* GrGLGpu::getCommandBuffer(GrTexture* texture, GrSurfaceOrigin origin) {
+    if (!fCachedTexCommandBuffer) {
+        fCachedTexCommandBuffer.reset(new GrGLGpuTextureCommandBuffer(this));
+    }
+
+    fCachedTexCommandBuffer->set(texture, origin);
+    return fCachedTexCommandBuffer.get();
 }
 
 void GrGLGpu::flushRenderTarget(GrGLRenderTarget* target, GrSurfaceOrigin origin,
@@ -4115,6 +4124,16 @@
     }
 }
 
+void GrGLGpu::submit(GrGpuCommandBuffer* buffer) {
+    if (buffer->asRTCommandBuffer()) {
+        SkASSERT(fCachedRTCommandBuffer.get() == buffer);
+        fCachedRTCommandBuffer->reset();
+    } else {
+        SkASSERT(fCachedTexCommandBuffer.get() == buffer);
+        fCachedTexCommandBuffer->reset();
+    }
+}
+
 GrFence SK_WARN_UNUSED_RESULT GrGLGpu::insertFence() {
     SkASSERT(this->caps()->fenceSyncSupport());
     GrGLsync sync;
diff --git a/src/gpu/gl/GrGLGpu.h b/src/gpu/gl/GrGLGpu.h
index 531207c..5ac3f84 100644
--- a/src/gpu/gl/GrGLGpu.h
+++ b/src/gpu/gl/GrGLGpu.h
@@ -25,6 +25,8 @@
 #include "SkTypes.h"
 
 class GrGLBuffer;
+class GrGLGpuRTCommandBuffer;
+class GrGLGpuTextureCommandBuffer;
 class GrPipeline;
 class GrSwizzle;
 
@@ -118,12 +120,12 @@
 
     void clearStencil(GrRenderTarget*, int clearValue) override;
 
-    GrGpuRTCommandBuffer* createCommandBuffer(
+    GrGpuRTCommandBuffer* getCommandBuffer(
             GrRenderTarget*, GrSurfaceOrigin,
             const GrGpuRTCommandBuffer::LoadAndStoreInfo&,
             const GrGpuRTCommandBuffer::StencilLoadAndStoreInfo&) override;
 
-    GrGpuTextureCommandBuffer* createCommandBuffer(GrTexture*, GrSurfaceOrigin) override;
+    GrGpuTextureCommandBuffer* getCommandBuffer(GrTexture*, GrSurfaceOrigin) override;
 
     void invalidateBoundRenderTarget() {
         fHWBoundRenderTargetUniqueID.makeInvalid();
@@ -150,6 +152,8 @@
     void testingOnly_flushGpuAndSync() override;
 #endif
 
+    void submit(GrGpuCommandBuffer* buffer) override;
+
     GrFence SK_WARN_UNUSED_RESULT insertFence() override;
     bool waitFence(GrFence, uint64_t timeout) override;
     void deleteFence(GrFence) const override;
@@ -566,7 +570,7 @@
         }
     }                                       fHWBlendState;
 
-    TriState fMSAAEnabled;
+    TriState                                fMSAAEnabled;
 
     GrStencilSettings                       fHWStencilSettings;
     TriState                                fHWStencilTestEnabled;
@@ -623,8 +627,12 @@
     GrPrimitiveType fLastPrimitiveType;
     bool fRequiresFlushBeforeNextInstancedDraw = false;
 
-    typedef GrGpu INHERITED;
+    std::unique_ptr<GrGLGpuRTCommandBuffer>      fCachedRTCommandBuffer;
+    std::unique_ptr<GrGLGpuTextureCommandBuffer> fCachedTexCommandBuffer;
+
     friend class GrGLPathRendering; // For accessing setTextureUnit.
+
+    typedef GrGpu INHERITED;
 };
 
 #endif
diff --git a/src/gpu/gl/GrGLGpuCommandBuffer.cpp b/src/gpu/gl/GrGLGpuCommandBuffer.cpp
index 4f08b83..912ffe7 100644
--- a/src/gpu/gl/GrGLGpuCommandBuffer.cpp
+++ b/src/gpu/gl/GrGLGpuCommandBuffer.cpp
@@ -22,3 +22,15 @@
         }
     }
 }
+
+void GrGLGpuRTCommandBuffer::set(GrRenderTarget* rt, GrSurfaceOrigin origin,
+                                 const GrGpuRTCommandBuffer::LoadAndStoreInfo& colorInfo,
+                                 const GrGpuRTCommandBuffer::StencilLoadAndStoreInfo& stencilInfo) {
+    SkASSERT(fGpu);
+    SkASSERT(!fRenderTarget);
+    SkASSERT(fGpu == rt->getContext()->contextPriv().getGpu());
+
+    this->INHERITED::set(rt, origin);
+    fColorLoadAndStoreInfo = colorInfo;
+    fStencilLoadAndStoreInfo = stencilInfo;
+}
diff --git a/src/gpu/gl/GrGLGpuCommandBuffer.h b/src/gpu/gl/GrGLGpuCommandBuffer.h
index fd3417a7..63aacf3 100644
--- a/src/gpu/gl/GrGLGpuCommandBuffer.h
+++ b/src/gpu/gl/GrGLGpuCommandBuffer.h
@@ -19,14 +19,7 @@
 
 class GrGLGpuTextureCommandBuffer : public GrGpuTextureCommandBuffer {
 public:
-    GrGLGpuTextureCommandBuffer(GrGLGpu* gpu, GrTexture* texture, GrSurfaceOrigin origin)
-        : INHERITED(texture, origin)
-        , fGpu(gpu) {
-    }
-
-    ~GrGLGpuTextureCommandBuffer() override {}
-
-    void submit() override {}
+    GrGLGpuTextureCommandBuffer(GrGLGpu* gpu) : fGpu(gpu) {}
 
     void copy(GrSurface* src, GrSurfaceOrigin srcOrigin, const SkIRect& srcRect,
               const SkIPoint& dstPoint) override {
@@ -37,6 +30,10 @@
         fGpu->insertEventMarker(msg);
     }
 
+    void reset() {
+        fTexture = nullptr;
+    }
+
 private:
     GrGLGpu* fGpu;
 
@@ -50,16 +47,7 @@
  * pass through functions to corresponding calls in the GrGLGpu class.
  */
 public:
-    GrGLGpuRTCommandBuffer(GrGLGpu* gpu, GrRenderTarget* rt, GrSurfaceOrigin origin,
-                           const GrGpuRTCommandBuffer::LoadAndStoreInfo& colorInfo,
-                           const GrGpuRTCommandBuffer::StencilLoadAndStoreInfo& stencilInfo)
-            : INHERITED(rt, origin)
-            , fGpu(gpu)
-            , fColorLoadAndStoreInfo(colorInfo)
-            , fStencilLoadAndStoreInfo(stencilInfo) {
-    }
-
-    ~GrGLGpuRTCommandBuffer() override {}
+    GrGLGpuRTCommandBuffer(GrGLGpu* gpu) : fGpu(gpu) {}
 
     void begin() override;
     void end() override {}
@@ -79,7 +67,13 @@
         fGpu->copySurface(fRenderTarget, fOrigin, src, srcOrigin, srcRect, dstPoint);
     }
 
-    void submit() override {}
+    void set(GrRenderTarget*, GrSurfaceOrigin,
+             const GrGpuRTCommandBuffer::LoadAndStoreInfo&,
+             const GrGpuRTCommandBuffer::StencilLoadAndStoreInfo&);
+
+    void reset() {
+        fRenderTarget = nullptr;
+    }
 
 private:
     GrGpu* gpu() override { return fGpu; }
diff --git a/src/gpu/mock/GrMockGpu.cpp b/src/gpu/mock/GrMockGpu.cpp
index 6543674..8a66b79 100644
--- a/src/gpu/mock/GrMockGpu.cpp
+++ b/src/gpu/mock/GrMockGpu.cpp
@@ -47,19 +47,25 @@
     return sk_sp<GrGpu>(new GrMockGpu(context, *mockOptions, contextOptions));
 }
 
-
-GrGpuRTCommandBuffer* GrMockGpu::createCommandBuffer(
-                                            GrRenderTarget* rt, GrSurfaceOrigin origin,
-                                            const GrGpuRTCommandBuffer::LoadAndStoreInfo&,
-                                            const GrGpuRTCommandBuffer::StencilLoadAndStoreInfo&) {
+GrGpuRTCommandBuffer* GrMockGpu::getCommandBuffer(
+                                GrRenderTarget* rt, GrSurfaceOrigin origin,
+                                const GrGpuRTCommandBuffer::LoadAndStoreInfo&,
+                                const GrGpuRTCommandBuffer::StencilLoadAndStoreInfo&) {
     return new GrMockGpuRTCommandBuffer(this, rt, origin);
 }
 
-GrGpuTextureCommandBuffer* GrMockGpu::createCommandBuffer(GrTexture* texture,
-                                                          GrSurfaceOrigin origin) {
+GrGpuTextureCommandBuffer* GrMockGpu::getCommandBuffer(GrTexture* texture, GrSurfaceOrigin origin) {
     return new GrMockGpuTextureCommandBuffer(texture, origin);
 }
 
+void GrMockGpu::submit(GrGpuCommandBuffer* buffer) {
+    if (buffer->asRTCommandBuffer()) {
+        this->submitCommandBuffer(
+                        static_cast<GrMockGpuRTCommandBuffer*>(buffer->asRTCommandBuffer()));
+    }
+
+    delete buffer;
+}
 
 void GrMockGpu::submitCommandBuffer(const GrMockGpuRTCommandBuffer* cmdBuffer) {
     for (int i = 0; i < cmdBuffer->numDraws(); ++i) {
diff --git a/src/gpu/mock/GrMockGpu.h b/src/gpu/mock/GrMockGpu.h
index cbd19f3..6f0d5e2 100644
--- a/src/gpu/mock/GrMockGpu.h
+++ b/src/gpu/mock/GrMockGpu.h
@@ -24,12 +24,12 @@
 
     ~GrMockGpu() override {}
 
-    GrGpuRTCommandBuffer* createCommandBuffer(
+    GrGpuRTCommandBuffer* getCommandBuffer(
                                     GrRenderTarget*, GrSurfaceOrigin,
                                     const GrGpuRTCommandBuffer::LoadAndStoreInfo&,
                                     const GrGpuRTCommandBuffer::StencilLoadAndStoreInfo&) override;
 
-    GrGpuTextureCommandBuffer* createCommandBuffer(GrTexture*, GrSurfaceOrigin) override;
+    GrGpuTextureCommandBuffer* getCommandBuffer(GrTexture*, GrSurfaceOrigin) override;
 
     GrFence SK_WARN_UNUSED_RESULT insertFence() override { return 0; }
     bool waitFence(GrFence, uint64_t) override { return true; }
@@ -45,11 +45,13 @@
     void waitSemaphore(sk_sp<GrSemaphore> semaphore) override {}
     sk_sp<GrSemaphore> prepareTextureForCrossContextUsage(GrTexture*) override { return nullptr; }
 
-    void submitCommandBuffer(const GrMockGpuRTCommandBuffer*);
+    void submit(GrGpuCommandBuffer* buffer) override;
 
 private:
     GrMockGpu(GrContext* context, const GrMockOptions&, const GrContextOptions&);
 
+    void submitCommandBuffer(const GrMockGpuRTCommandBuffer*);
+
     void onResetContext(uint32_t resetBits) override {}
 
     void xferBarrier(GrRenderTarget*, GrXferBarrierType) override {}
diff --git a/src/gpu/mock/GrMockGpuCommandBuffer.h b/src/gpu/mock/GrMockGpuCommandBuffer.h
index d3e26fa..cad5836 100644
--- a/src/gpu/mock/GrMockGpuCommandBuffer.h
+++ b/src/gpu/mock/GrMockGpuCommandBuffer.h
@@ -24,8 +24,6 @@
     void insertEventMarker(const char*) override {}
 
 private:
-    void submit() override {}
-
     typedef GrGpuTextureCommandBuffer INHERITED;
 };
 
@@ -47,8 +45,6 @@
 
     int numDraws() const { return fNumDraws; }
 
-    void submit() override { fGpu->submitCommandBuffer(this); }
-
 private:
     void onDraw(const GrPrimitiveProcessor&, const GrPipeline&,
                 const GrPipeline::FixedDynamicState*, const GrPipeline::DynamicStateArrays*,
diff --git a/src/gpu/mtl/GrMtlGpu.h b/src/gpu/mtl/GrMtlGpu.h
index e6e3c05..1ae9878 100644
--- a/src/gpu/mtl/GrMtlGpu.h
+++ b/src/gpu/mtl/GrMtlGpu.h
@@ -85,15 +85,17 @@
                        const SkIPoint& dstPoint,
                        bool canDiscardOutsideDstRect) override;
 
-    GrGpuRTCommandBuffer* createCommandBuffer(
+    GrGpuRTCommandBuffer* getCommandBuffer(
                                     GrRenderTarget*, GrSurfaceOrigin,
                                     const GrGpuRTCommandBuffer::LoadAndStoreInfo&,
                                     const GrGpuRTCommandBuffer::StencilLoadAndStoreInfo&) override;
 
-    GrGpuTextureCommandBuffer* createCommandBuffer(GrTexture*, GrSurfaceOrigin) override;
+    GrGpuTextureCommandBuffer* getCommandBuffer(GrTexture*, GrSurfaceOrigin) override;
 
     SkSL::Compiler* shaderCompiler() const { return fCompiler.get(); }
 
+    void submit(GrGpuCommandBuffer* buffer) override;
+
     GrFence SK_WARN_UNUSED_RESULT insertFence() override { return 0; }
     bool waitFence(GrFence, uint64_t) override { return true; }
     void deleteFence(GrFence) const override {}
diff --git a/src/gpu/mtl/GrMtlGpu.mm b/src/gpu/mtl/GrMtlGpu.mm
index eb0f330..fd1611f 100644
--- a/src/gpu/mtl/GrMtlGpu.mm
+++ b/src/gpu/mtl/GrMtlGpu.mm
@@ -105,18 +105,22 @@
     fCmdBuffer = [fQueue commandBuffer];
 }
 
-GrGpuRTCommandBuffer* GrMtlGpu::createCommandBuffer(
+GrGpuRTCommandBuffer* GrMtlGpu::getCommandBuffer(
             GrRenderTarget* renderTarget, GrSurfaceOrigin origin,
             const GrGpuRTCommandBuffer::LoadAndStoreInfo& colorInfo,
             const GrGpuRTCommandBuffer::StencilLoadAndStoreInfo& stencilInfo) {
     return new GrMtlGpuRTCommandBuffer(this, renderTarget, origin, colorInfo, stencilInfo);
 }
 
-GrGpuTextureCommandBuffer* GrMtlGpu::createCommandBuffer(GrTexture* texture,
-                                                         GrSurfaceOrigin origin) {
+GrGpuTextureCommandBuffer* GrMtlGpu::getCommandBuffer(GrTexture* texture,
+                                                      GrSurfaceOrigin origin) {
     return new GrMtlGpuTextureCommandBuffer(this, texture, origin);
 }
 
+void GrMtlGpu::submit(GrGpuCommandBuffer* buffer) {
+    delete buffer;
+}
+
 void GrMtlGpu::submitCommandBuffer(SyncQueue sync) {
     SkASSERT(fCmdBuffer);
     [fCmdBuffer commit];
@@ -768,3 +772,4 @@
     SkRectMemcpy(buffer, rowBytes, mappedMemory, transBufferRowBytes, transBufferRowBytes, height);
     return true;
 }
+
diff --git a/src/gpu/mtl/GrMtlGpuCommandBuffer.h b/src/gpu/mtl/GrMtlGpuCommandBuffer.h
index af74ead..e5b259c 100644
--- a/src/gpu/mtl/GrMtlGpuCommandBuffer.h
+++ b/src/gpu/mtl/GrMtlGpuCommandBuffer.h
@@ -32,8 +32,6 @@
     void insertEventMarker(const char* msg) override {}
 
 private:
-    void submit() override {}
-
     GrMtlGpu* fGpu;
 
     typedef GrGpuTextureCommandBuffer INHERITED;
@@ -53,8 +51,6 @@
         (void)fStencilLoadAndStoreInfo;
     }
 
-    ~GrMtlGpuRTCommandBuffer() override {}
-
     void begin() override {}
     void end() override {}
 
@@ -69,8 +65,6 @@
         fGpu->copySurface(fRenderTarget, fOrigin, src, srcOrigin, srcRect, dstPoint);
     }
 
-    void submit() override {}
-
 private:
     GrGpu* gpu() override { return fGpu; }
 
diff --git a/src/gpu/vk/GrVkGpu.cpp b/src/gpu/vk/GrVkGpu.cpp
index d792978..b0c5aee 100644
--- a/src/gpu/vk/GrVkGpu.cpp
+++ b/src/gpu/vk/GrVkGpu.cpp
@@ -274,16 +274,25 @@
 
 ///////////////////////////////////////////////////////////////////////////////
 
-GrGpuRTCommandBuffer* GrVkGpu::createCommandBuffer(
+GrGpuRTCommandBuffer* GrVkGpu::getCommandBuffer(
             GrRenderTarget* rt, GrSurfaceOrigin origin,
             const GrGpuRTCommandBuffer::LoadAndStoreInfo& colorInfo,
             const GrGpuRTCommandBuffer::StencilLoadAndStoreInfo& stencilInfo) {
-    return new GrVkGpuRTCommandBuffer(this, rt, origin, colorInfo, stencilInfo);
+    if (!fCachedRTCommandBuffer) {
+        fCachedRTCommandBuffer.reset(new GrVkGpuRTCommandBuffer(this));
+    }
+
+    fCachedRTCommandBuffer->set(rt, origin, colorInfo, stencilInfo);
+    return fCachedRTCommandBuffer.get();
 }
 
-GrGpuTextureCommandBuffer* GrVkGpu::createCommandBuffer(GrTexture* texture,
-                                                        GrSurfaceOrigin origin) {
-    return new GrVkGpuTextureCommandBuffer(this, texture, origin);
+GrGpuTextureCommandBuffer* GrVkGpu::getCommandBuffer(GrTexture* texture, GrSurfaceOrigin origin) {
+    if (!fCachedTexCommandBuffer) {
+        fCachedTexCommandBuffer.reset(new GrVkGpuTextureCommandBuffer(this));
+    }
+
+    fCachedTexCommandBuffer->set(texture, origin);
+    return fCachedTexCommandBuffer.get();
 }
 
 void GrVkGpu::submitCommandBuffer(SyncQueue sync) {
@@ -1954,6 +1963,20 @@
     this->didWriteToSurface(target, origin, &bounds);
 }
 
+void GrVkGpu::submit(GrGpuCommandBuffer* buffer) {
+    if (buffer->asRTCommandBuffer()) {
+        SkASSERT(fCachedRTCommandBuffer.get() == buffer);
+
+        fCachedRTCommandBuffer->submit();
+        fCachedRTCommandBuffer->reset();
+    } else {
+        SkASSERT(fCachedTexCommandBuffer.get() == buffer);
+
+        fCachedTexCommandBuffer->submit();
+        fCachedTexCommandBuffer->reset();
+    }
+}
+
 GrFence SK_WARN_UNUSED_RESULT GrVkGpu::insertFence() {
     VkFenceCreateInfo createInfo;
     memset(&createInfo, 0, sizeof(VkFenceCreateInfo));
diff --git a/src/gpu/vk/GrVkGpu.h b/src/gpu/vk/GrVkGpu.h
index 696394c..6417b91 100644
--- a/src/gpu/vk/GrVkGpu.h
+++ b/src/gpu/vk/GrVkGpu.h
@@ -23,6 +23,8 @@
 class GrPipeline;
 
 class GrVkBufferImpl;
+class GrVkGpuRTCommandBuffer;
+class GrVkGpuTextureCommandBuffer;
 class GrVkMemoryAllocator;
 class GrVkPipeline;
 class GrVkPipelineState;
@@ -89,12 +91,13 @@
 
     void clearStencil(GrRenderTarget* target, int clearValue) override;
 
-    GrGpuRTCommandBuffer* createCommandBuffer(
+    GrGpuRTCommandBuffer* getCommandBuffer(
             GrRenderTarget*, GrSurfaceOrigin,
             const GrGpuRTCommandBuffer::LoadAndStoreInfo&,
             const GrGpuRTCommandBuffer::StencilLoadAndStoreInfo&) override;
 
-    GrGpuTextureCommandBuffer* createCommandBuffer(GrTexture*, GrSurfaceOrigin) override;
+    GrGpuTextureCommandBuffer* getCommandBuffer(GrTexture*, GrSurfaceOrigin) override;
+
 
     void addMemoryBarrier(VkPipelineStageFlags srcStageMask,
                           VkPipelineStageFlags dstStageMask,
@@ -125,6 +128,8 @@
                                       GrVkRenderTarget*, GrSurfaceOrigin,
                                       const SkIRect& bounds);
 
+    void submit(GrGpuCommandBuffer*) override;
+
     GrFence SK_WARN_UNUSED_RESULT insertFence() override;
     bool waitFence(GrFence, uint64_t timeout) override;
     void deleteFence(GrFence) const override;
@@ -252,6 +257,9 @@
     // vulkan context.
     bool                                   fDisconnected;
 
+    std::unique_ptr<GrVkGpuRTCommandBuffer>      fCachedRTCommandBuffer;
+    std::unique_ptr<GrVkGpuTextureCommandBuffer> fCachedTexCommandBuffer;
+
     typedef GrGpu INHERITED;
 };
 
diff --git a/src/gpu/vk/GrVkGpuCommandBuffer.cpp b/src/gpu/vk/GrVkGpuCommandBuffer.cpp
index 3cbc6ad..44cf980 100644
--- a/src/gpu/vk/GrVkGpuCommandBuffer.cpp
+++ b/src/gpu/vk/GrVkGpuCommandBuffer.cpp
@@ -73,22 +73,10 @@
     }
 }
 
-GrVkGpuRTCommandBuffer::GrVkGpuRTCommandBuffer(GrVkGpu* gpu,
-                                               GrRenderTarget* rt, GrSurfaceOrigin origin,
-                                               const LoadAndStoreInfo& colorInfo,
-                                               const StencilLoadAndStoreInfo& stencilInfo)
-        : INHERITED(rt, origin)
+GrVkGpuRTCommandBuffer::GrVkGpuRTCommandBuffer(GrVkGpu* gpu)
+        : fCurrentCmdInfo(-1)
         , fGpu(gpu)
-        , fClearColor(GrColor4f::FromGrColor(colorInfo.fClearColor))
         , fLastPipelineState(nullptr) {
-    get_vk_load_store_ops(colorInfo.fLoadOp, colorInfo.fStoreOp,
-                          &fVkColorLoadOp, &fVkColorStoreOp);
-
-    get_vk_load_store_ops(stencilInfo.fLoadOp, stencilInfo.fStoreOp,
-                          &fVkStencilLoadOp, &fVkStencilStoreOp);
-    fCurrentCmdInfo = -1;
-
-    this->init();
 }
 
 void GrVkGpuRTCommandBuffer::init() {
@@ -137,13 +125,7 @@
 
 
 GrVkGpuRTCommandBuffer::~GrVkGpuRTCommandBuffer() {
-    for (int i = 0; i < fCommandBufferInfos.count(); ++i) {
-        CommandBufferInfo& cbInfo = fCommandBufferInfos[i];
-        for (int j = 0; j < cbInfo.fCommandBuffers.count(); ++j) {
-            cbInfo.fCommandBuffers[j]->unref(fGpu);
-        }
-        cbInfo.fRenderPass->unref(fGpu);
-    }
+    this->reset();
 }
 
 GrGpu* GrVkGpuRTCommandBuffer::gpu() { return fGpu; }
@@ -241,6 +223,44 @@
     }
 }
 
+void GrVkGpuRTCommandBuffer::set(GrRenderTarget* rt, GrSurfaceOrigin origin,
+                                 const GrGpuRTCommandBuffer::LoadAndStoreInfo& colorInfo,
+                                 const GrGpuRTCommandBuffer::StencilLoadAndStoreInfo& stencilInfo) {
+    SkASSERT(!fRenderTarget);
+    SkASSERT(fCommandBufferInfos.empty());
+    SkASSERT(-1 == fCurrentCmdInfo);
+    SkASSERT(fGpu == rt->getContext()->contextPriv().getGpu());
+    SkASSERT(!fLastPipelineState);
+
+    this->INHERITED::set(rt, origin);
+
+    fClearColor = GrColor4f::FromGrColor(colorInfo.fClearColor);
+
+    get_vk_load_store_ops(colorInfo.fLoadOp, colorInfo.fStoreOp,
+                          &fVkColorLoadOp, &fVkColorStoreOp);
+
+    get_vk_load_store_ops(stencilInfo.fLoadOp, stencilInfo.fStoreOp,
+                          &fVkStencilLoadOp, &fVkStencilStoreOp);
+
+    this->init();
+}
+
+void GrVkGpuRTCommandBuffer::reset() {
+    for (int i = 0; i < fCommandBufferInfos.count(); ++i) {
+        CommandBufferInfo& cbInfo = fCommandBufferInfos[i];
+        for (int j = 0; j < cbInfo.fCommandBuffers.count(); ++j) {
+            cbInfo.fCommandBuffers[j]->unref(fGpu);
+        }
+        cbInfo.fRenderPass->unref(fGpu);
+    }
+    fCommandBufferInfos.reset();
+
+    fCurrentCmdInfo = -1;
+
+    fLastPipelineState = nullptr;
+    fRenderTarget = nullptr;
+}
+
 void GrVkGpuRTCommandBuffer::discard() {
     GrVkRenderTarget* vkRT = static_cast<GrVkRenderTarget*>(fRenderTarget);
 
diff --git a/src/gpu/vk/GrVkGpuCommandBuffer.h b/src/gpu/vk/GrVkGpuCommandBuffer.h
index 01b2693..c00e4ae 100644
--- a/src/gpu/vk/GrVkGpuCommandBuffer.h
+++ b/src/gpu/vk/GrVkGpuCommandBuffer.h
@@ -23,10 +23,7 @@
 
 class GrVkGpuTextureCommandBuffer : public GrGpuTextureCommandBuffer {
 public:
-    GrVkGpuTextureCommandBuffer(GrVkGpu* gpu, GrTexture* texture, GrSurfaceOrigin origin)
-        : INHERITED(texture, origin)
-        , fGpu(gpu) {
-    }
+    GrVkGpuTextureCommandBuffer(GrVkGpu* gpu) : fGpu(gpu) {}
 
     ~GrVkGpuTextureCommandBuffer() override;
 
@@ -35,9 +32,14 @@
 
     void insertEventMarker(const char*) override;
 
-private:
-    void submit() override;
+    void reset() {
+        fCopies.reset();
+        fTexture = nullptr;
+    }
 
+    void submit();
+
+private:
     struct CopyInfo {
         CopyInfo(GrSurface* src, GrSurfaceOrigin srcOrigin, const SkIRect& srcRect,
                  const SkIPoint& dstPoint)
@@ -57,9 +59,7 @@
 
 class GrVkGpuRTCommandBuffer : public GrGpuRTCommandBuffer, private GrMesh::SendToGpuImpl {
 public:
-    GrVkGpuRTCommandBuffer(GrVkGpu*, GrRenderTarget*, GrSurfaceOrigin,
-                           const LoadAndStoreInfo&,
-                           const StencilLoadAndStoreInfo&);
+    GrVkGpuRTCommandBuffer(GrVkGpu*);
 
     ~GrVkGpuRTCommandBuffer() override;
 
@@ -74,7 +74,12 @@
     void copy(GrSurface* src, GrSurfaceOrigin srcOrigin, const SkIRect& srcRect,
               const SkIPoint& dstPoint) override;
 
-    void submit() override;
+    void set(GrRenderTarget*, GrSurfaceOrigin,
+             const GrGpuRTCommandBuffer::LoadAndStoreInfo&,
+             const GrGpuRTCommandBuffer::StencilLoadAndStoreInfo&);
+    void reset();
+
+    void submit();
 
 private:
     void init();