Add GrStagingBufferManager and use GrGpuBuffers to manage caching.

With this change if a backend Gr*Gpu wants to using staging buffers
they just add a generic GrStagingBufferManager member object. This
object can be used to get slices of upload buffers. Then they just need
to implement the virtual for taking ownership of buffers during submit.

We rely on our GrResourceCache to handle caching and reuse of these
buffers.

This change allows us to remove all other virtuals on GrGpu around
managing staging buffers.

Change-Id: I5db9a3c52133978ea89d6c0de440f434fbf91a51
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/300226
Commit-Queue: Greg Daniel <egdaniel@google.com>
Reviewed-by: Stephen White <senorblanco@google.com>
diff --git a/dm/DMGpuTestProcs.cpp b/dm/DMGpuTestProcs.cpp
index 6c3eb4c..30249f5 100644
--- a/dm/DMGpuTestProcs.cpp
+++ b/dm/DMGpuTestProcs.cpp
@@ -27,6 +27,9 @@
 bool IsDirect3DContextType(sk_gpu_test::GrContextFactory::ContextType type) {
     return GrBackendApi::kDirect3D == GrContextFactory::ContextTypeBackend(type);
 }
+bool IsDawnContextType(sk_gpu_test::GrContextFactory::ContextType type) {
+    return GrBackendApi::kDawn == GrContextFactory::ContextTypeBackend(type);
+}
 bool IsRenderingGLContextType(sk_gpu_test::GrContextFactory::ContextType type) {
     return IsGLContextType(type) && GrContextFactory::IsRenderingContext(type);
 }
diff --git a/gn/gpu.gni b/gn/gpu.gni
index 5e34d8a..9a9ae22 100644
--- a/gn/gpu.gni
+++ b/gn/gpu.gni
@@ -205,8 +205,8 @@
   "$_src/gpu/GrSimpleMesh.h",
   "$_src/gpu/GrSoftwarePathRenderer.cpp",
   "$_src/gpu/GrSoftwarePathRenderer.h",
-  "$_src/gpu/GrStagingBuffer.cpp",
-  "$_src/gpu/GrStagingBuffer.h",
+  "$_src/gpu/GrStagingBufferManager.cpp",
+  "$_src/gpu/GrStagingBufferManager.h",
   "$_src/gpu/GrStencilAttachment.cpp",
   "$_src/gpu/GrStencilAttachment.h",
   "$_src/gpu/GrStencilClip.h",
@@ -828,8 +828,6 @@
   "$_src/gpu/dawn/GrDawnRenderTarget.h",
   "$_src/gpu/dawn/GrDawnRingBuffer.cpp",
   "$_src/gpu/dawn/GrDawnRingBuffer.h",
-  "$_src/gpu/dawn/GrDawnStagingBuffer.cpp",
-  "$_src/gpu/dawn/GrDawnStagingBuffer.h",
   "$_src/gpu/dawn/GrDawnStencilAttachment.cpp",
   "$_src/gpu/dawn/GrDawnStencilAttachment.h",
   "$_src/gpu/dawn/GrDawnTexture.cpp",
diff --git a/src/gpu/GrGpu.cpp b/src/gpu/GrGpu.cpp
index 6f65b1a..c862d27 100644
--- a/src/gpu/GrGpu.cpp
+++ b/src/gpu/GrGpu.cpp
@@ -27,7 +27,7 @@
 #include "src/gpu/GrResourceCache.h"
 #include "src/gpu/GrResourceProvider.h"
 #include "src/gpu/GrSemaphore.h"
-#include "src/gpu/GrStagingBuffer.h"
+#include "src/gpu/GrStagingBufferManager.h"
 #include "src/gpu/GrStencilAttachment.h"
 #include "src/gpu/GrStencilSettings.h"
 #include "src/gpu/GrSurfacePriv.h"
@@ -36,25 +36,15 @@
 #include "src/gpu/GrTracing.h"
 #include "src/utils/SkJSONWriter.h"
 
-static const size_t kMinStagingBufferSize = 32 * 1024;
-
 ////////////////////////////////////////////////////////////////////////////////
 
 GrGpu::GrGpu(GrDirectContext* direct) : fResetBits(kAll_GrBackendState), fContext(direct) {}
 
 GrGpu::~GrGpu() {
     this->callSubmittedProcs(false);
-    SkASSERT(fBusyStagingBuffers.isEmpty());
 }
 
-void GrGpu::disconnect(DisconnectType type) {
-    if (DisconnectType::kAbandon == type) {
-        fAvailableStagingBuffers.reset();
-        fActiveStagingBuffers.reset();
-        fBusyStagingBuffers.reset();
-    }
-    fStagingBuffers.clear();
-}
+void GrGpu::disconnect(DisconnectType type) {}
 
 ////////////////////////////////////////////////////////////////////////////////
 
@@ -615,35 +605,6 @@
     return fSamplePatternDictionary.findOrAssignSamplePatternKey(sampleLocations);
 }
 
-#ifdef SK_DEBUG
-bool GrGpu::inStagingBuffers(GrStagingBuffer* b) const {
-    for (const auto& i : fStagingBuffers) {
-        if (b == i.get()) {
-            return true;
-        }
-    }
-    return false;
-}
-
-void GrGpu::validateStagingBuffers() const {
-    for (const auto& i : fStagingBuffers) {
-        GrStagingBuffer* buffer = i.get();
-        SkASSERT(fAvailableStagingBuffers.isInList(buffer) ||
-                 fActiveStagingBuffers.isInList(buffer) ||
-                 fBusyStagingBuffers.isInList(buffer));
-    }
-    for (auto b : fAvailableStagingBuffers) {
-        SkASSERT(this->inStagingBuffers(b));
-    }
-    for (auto b : fActiveStagingBuffers) {
-        SkASSERT(this->inStagingBuffers(b));
-    }
-    for (auto b : fBusyStagingBuffers) {
-        SkASSERT(this->inStagingBuffers(b));
-    }
-}
-#endif
-
 void GrGpu::executeFlushInfo(GrSurfaceProxy* proxies[],
                              int numProxies,
                              SkSurface::BackendSurfaceAccess access,
@@ -696,10 +657,9 @@
 bool GrGpu::submitToGpu(bool syncCpu) {
     this->stats()->incNumSubmitToGpus();
 
-#ifdef SK_DEBUG
-    this->validateStagingBuffers();
-#endif
-    this->unmapStagingBuffers();
+    if (auto manager = this->stagingBufferManager()) {
+        manager->detachBuffers();
+    }
 
     bool submitted = this->onSubmitToGpu(syncCpu);
 
@@ -978,62 +938,3 @@
     return this->onUpdateCompressedBackendTexture(backendTexture, std::move(finishedCallback),
                                                   data);
 }
-
-GrStagingBuffer* GrGpu::findStagingBuffer(size_t size) {
-#ifdef SK_DEBUG
-    this->validateStagingBuffers();
-#endif
-    for (auto b : fActiveStagingBuffers) {
-        if (b->remaining() >= size) {
-            return b;
-        }
-    }
-    for (auto b : fAvailableStagingBuffers) {
-        if (b->remaining() >= size) {
-            fAvailableStagingBuffers.remove(b);
-            fActiveStagingBuffers.addToTail(b);
-            return b;
-        }
-    }
-    size = SkNextPow2(size);
-    size = std::max(size, kMinStagingBufferSize);
-    std::unique_ptr<GrStagingBuffer> b = this->createStagingBuffer(size);
-    GrStagingBuffer* stagingBuffer = b.get();
-    fStagingBuffers.push_back(std::move(b));
-    fActiveStagingBuffers.addToTail(stagingBuffer);
-    return stagingBuffer;
-}
-
-GrStagingBuffer::Slice GrGpu::allocateStagingBufferSlice(size_t size) {
-#ifdef SK_DEBUG
-    this->validateStagingBuffers();
-#endif
-    GrStagingBuffer* stagingBuffer = this->findStagingBuffer(size);
-    return stagingBuffer->allocate(size);
-}
-
-void GrGpu::unmapStagingBuffers() {
-#ifdef SK_DEBUG
-    this->validateStagingBuffers();
-#endif
-    // Unmap all active buffers.
-    for (auto buffer : fActiveStagingBuffers) {
-        buffer->unmap();
-    }
-}
-
-void GrGpu::moveStagingBufferFromBusyToAvailable(GrStagingBuffer* buffer) {
-#ifdef SK_DEBUG
-    this->validateStagingBuffers();
-#endif
-    fBusyStagingBuffers.remove(buffer);
-    fAvailableStagingBuffers.addToTail(buffer);
-}
-
-void GrGpu::moveStagingBufferFromActiveToBusy(GrStagingBuffer* buffer) {
-#ifdef SK_DEBUG
-    this->validateStagingBuffers();
-#endif
-    fActiveStagingBuffers.remove(buffer);
-    fBusyStagingBuffers.addToTail(buffer);
-}
diff --git a/src/gpu/GrGpu.h b/src/gpu/GrGpu.h
index 8446bd7..d16439a 100644
--- a/src/gpu/GrGpu.h
+++ b/src/gpu/GrGpu.h
@@ -16,7 +16,6 @@
 #include "src/gpu/GrCaps.h"
 #include "src/gpu/GrOpsRenderPass.h"
 #include "src/gpu/GrSamplePatternDictionary.h"
-#include "src/gpu/GrStagingBuffer.h"
 #include "src/gpu/GrSwizzle.h"
 #include "src/gpu/GrTextureProducer.h"
 #include "src/gpu/GrXferProcessor.h"
@@ -35,6 +34,7 @@
 class GrPrimitiveProcessor;
 class GrRenderTarget;
 class GrSemaphore;
+class GrStagingBufferManager;
 class GrStencilAttachment;
 class GrStencilSettings;
 class GrSurface;
@@ -57,6 +57,8 @@
 
     GrPathRendering* pathRendering() { return fPathRendering.get();  }
 
+    virtual GrStagingBufferManager* stagingBufferManager() { return nullptr; }
+
     enum class DisconnectType {
         // No cleanup should be attempted, immediately cease making backend API calls
         kAbandon,
@@ -382,6 +384,8 @@
 
     virtual void checkFinishProcs() = 0;
 
+    virtual void takeOwnershipOfStagingBuffer(sk_sp<GrGpuBuffer>) {}
+
     /**
      * Checks if we detected an OOM from the underlying 3D API and if so returns true and resets
      * the internal OOM state to false. Otherwise, returns false.
@@ -700,13 +704,6 @@
     // Called before certain draws in order to guarantee coherent results from dst reads.
     virtual void xferBarrier(GrRenderTarget*, GrXferBarrierType) = 0;
 
-    GrStagingBuffer* findStagingBuffer(size_t size);
-    GrStagingBuffer::Slice allocateStagingBufferSlice(size_t size);
-    virtual std::unique_ptr<GrStagingBuffer> createStagingBuffer(size_t size) { return nullptr; }
-    void unmapStagingBuffers();
-    void moveStagingBufferFromActiveToBusy(GrStagingBuffer* buffer);
-    void moveStagingBufferFromBusyToAvailable(GrStagingBuffer* buffer);
-
 protected:
     static bool MipMapsAreCorrect(SkISize dimensions, GrMipMapped, const BackendTextureData*);
     static bool CompressedDataIsCorrect(SkISize dimensions, SkImage::CompressionType,
@@ -718,11 +715,6 @@
 
     void setOOMed() { fOOMed = true; }
 
-    typedef SkTInternalLList<GrStagingBuffer> StagingBufferList;
-    const StagingBufferList& availableStagingBuffers() { return fAvailableStagingBuffers; }
-    const StagingBufferList& activeStagingBuffers() { return fActiveStagingBuffers; }
-    const StagingBufferList& busyStagingBuffers() { return fBusyStagingBuffers; }
-
     Stats                            fStats;
     std::unique_ptr<GrPathRendering> fPathRendering;
     // Subclass must initialize this in its constructor.
@@ -856,10 +848,6 @@
         this->onResetContext(fResetBits);
         fResetBits = 0;
     }
-#ifdef SK_DEBUG
-    bool inStagingBuffers(GrStagingBuffer* b) const;
-    void validateStagingBuffers() const;
-#endif
 
     void callSubmittedProcs(bool success);
 
@@ -868,12 +856,6 @@
     GrDirectContext* fContext;
     GrSamplePatternDictionary fSamplePatternDictionary;
 
-    std::vector<std::unique_ptr<GrStagingBuffer>> fStagingBuffers;
-
-    StagingBufferList                fAvailableStagingBuffers;
-    StagingBufferList                fActiveStagingBuffers;
-    StagingBufferList                fBusyStagingBuffers;
-
     struct SubmittedProc {
         SubmittedProc(GrGpuSubmittedProc proc, GrGpuSubmittedContext context)
                 : fProc(proc), fContext(context) {}
diff --git a/src/gpu/GrStagingBuffer.cpp b/src/gpu/GrStagingBuffer.cpp
deleted file mode 100644
index 2a224f9..0000000
--- a/src/gpu/GrStagingBuffer.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright 2020 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#include "src/gpu/GrGpu.h"
-#include "src/gpu/GrStagingBuffer.h"
-
-#include "src/core/SkMathPriv.h"
-
-void GrStagingBuffer::markAvailable(void* data) {
-    fData = data;
-    fOffset = 0;
-    fGpu->moveStagingBufferFromBusyToAvailable(this);
-}
-
-void GrStagingBuffer::unmap() {
-    this->onUnmap();
-}
-
-GrStagingBuffer::Slice GrStagingBuffer::allocate(size_t size) {
-    size_t offset = fOffset;
-    fOffset += size;
-    char* data = static_cast<char*>(fData) + offset;
-    return Slice(this, offset, data);
-}
diff --git a/src/gpu/GrStagingBuffer.h b/src/gpu/GrStagingBuffer.h
deleted file mode 100644
index a50b1a4..0000000
--- a/src/gpu/GrStagingBuffer.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright 2020 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#ifndef GrStagingBuffer_DEFINED
-#define GrStagingBuffer_DEFINED
-
-#include "src/core/SkTInternalLList.h"
-
-class GrGpu;
-
-class GrStagingBuffer {
-public:
-    GrStagingBuffer(GrGpu* gpu, size_t size, void* data) : fGpu(gpu), fSize(size), fData(data) {}
-    virtual ~GrStagingBuffer() {
-        fGpu = nullptr;
-    }
-    void markAvailable(void* data);
-    struct Slice {
-        Slice(GrStagingBuffer* buffer, int offset, void* data)
-          : fBuffer(buffer), fOffset(offset), fData(data) {}
-        GrStagingBuffer*   fBuffer;
-        int                fOffset;
-        void*              fData;
-    };
-    size_t remaining() const { return fSize - fOffset; }
-    GrGpu* getGpu() const { return fGpu; }
-    void unmap();
-    Slice allocate(size_t size);
-private:
-    virtual void onUnmap() = 0;
-
-    GrGpu*                 fGpu;
-    size_t                 fSize;
-    size_t                 fOffset = 0;
-    void*                  fData;
-
-    SK_DECLARE_INTERNAL_LLIST_INTERFACE(GrStagingBuffer);
-};
-
-#endif
diff --git a/src/gpu/GrStagingBufferManager.cpp b/src/gpu/GrStagingBufferManager.cpp
new file mode 100644
index 0000000..2172250
--- /dev/null
+++ b/src/gpu/GrStagingBufferManager.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "src/gpu/GrStagingBufferManager.h"
+
+#include "include/gpu/GrDirectContext.h"
+#include "src/gpu/GrContextPriv.h"
+#include "src/gpu/GrGpu.h"
+#include "src/gpu/GrResourceProvider.h"
+
+GrStagingBufferManager::Slice GrStagingBufferManager::allocateStagingBufferSlice(size_t size) {
+    StagingBuffer* buffer = nullptr;
+    for (size_t i = 0; i < fBuffers.size(); ++i) {
+        if (fBuffers[i].remaining() >= size) {
+            buffer = &fBuffers[i];
+            break;
+        }
+    }
+
+    if (!buffer) {
+        GrResourceProvider* resourceProvider = fGpu->getContext()->priv().resourceProvider();
+        size_t bufferSize = std::max(size, kMinStagingBufferSize);
+        sk_sp<GrGpuBuffer> newBuffer = resourceProvider->createBuffer(
+            bufferSize, GrGpuBufferType::kXferCpuToGpu, kDynamic_GrAccessPattern, nullptr);
+        if (!newBuffer) {
+            return {}; // invalid slice
+        }
+        void* mapPtr = newBuffer->map();
+        if (!mapPtr) {
+            return {}; // invalid slice
+        }
+        fBuffers.emplace_back(std::move(newBuffer), mapPtr);
+        buffer = &fBuffers.back();
+    }
+
+    SkASSERT(buffer);
+    SkASSERT(buffer->remaining() >= size);
+
+    size_t sliceOffset = buffer->fOffset;
+    buffer->fOffset += size;
+    char* offsetMapPtr = static_cast<char*>(buffer->fMapPtr) + sliceOffset;
+    return {buffer->fBuffer.get(), sliceOffset, offsetMapPtr};
+}
+
+void GrStagingBufferManager::detachBuffers() {
+    for (size_t i = 0; i < fBuffers.size(); ++i) {
+        fBuffers[i].fBuffer->unmap();
+        fGpu->takeOwnershipOfStagingBuffer(std::move(fBuffers[i].fBuffer));
+    }
+    fBuffers.clear();
+}
diff --git a/src/gpu/GrStagingBufferManager.h b/src/gpu/GrStagingBufferManager.h
new file mode 100644
index 0000000..7b0989e
--- /dev/null
+++ b/src/gpu/GrStagingBufferManager.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrStagingBufferManager_DEFINED
+#define GrStagingBufferManager_DEFINED
+
+#include "include/core/SkRefCnt.h"
+#include "src/gpu/GrGpuBuffer.h"
+#include <vector>
+
+class GrGpu;
+
+class GrStagingBufferManager {
+public:
+    GrStagingBufferManager(GrGpu* gpu) : fGpu(gpu) {}
+
+    struct Slice {
+        Slice() {}
+        Slice(GrGpuBuffer* buffer, size_t offset, void* offsetMapPtr)
+                : fBuffer(buffer), fOffset(offset), fOffsetMapPtr(offsetMapPtr) {}
+        GrGpuBuffer* fBuffer = nullptr;
+        size_t fOffset = 0;
+        void* fOffsetMapPtr = nullptr;
+    };
+
+    Slice allocateStagingBufferSlice(size_t size);
+
+    // This call is used to move all the buffers off of the manager and to backend gpu by calling
+    // the virtual GrGpu::takeOwnershipOfStagingBuffer on each buffer. This is called during
+    // submitToGpu. It is up to the backend to take refs to the buffers in their implemented
+    // takeOwnershipOfStagingBuffer implementation if they need to. After this call returns the
+    // manager will have released all refs to its buffers.
+    void detachBuffers();
+
+    bool hasBuffers() { return !fBuffers.empty(); }
+
+    void reset() {
+        for (size_t i = 0; i < fBuffers.size(); ++i) {
+            fBuffers[i].fBuffer->unmap();
+        }
+        fBuffers.clear();
+    }
+
+private:
+    static constexpr size_t kMinStagingBufferSize = 64 * 1024;
+
+    struct StagingBuffer {
+        StagingBuffer(sk_sp<GrGpuBuffer> buffer, void* mapPtr)
+                : fBuffer(std::move(buffer))
+                , fMapPtr(mapPtr) {}
+
+        sk_sp<GrGpuBuffer> fBuffer;
+        void* fMapPtr;
+        size_t fOffset = 0;
+
+        size_t remaining() { return fBuffer->size() - fOffset; }
+    };
+
+    std::vector<StagingBuffer> fBuffers;
+    GrGpu* fGpu;
+};
+
+#endif
+
diff --git a/src/gpu/dawn/GrDawnBuffer.cpp b/src/gpu/dawn/GrDawnBuffer.cpp
index afa0be1..2ebfa34 100644
--- a/src/gpu/dawn/GrDawnBuffer.cpp
+++ b/src/gpu/dawn/GrDawnBuffer.cpp
@@ -6,7 +6,6 @@
  */
 
 #include "src/gpu/dawn/GrDawnBuffer.h"
-#include "src/gpu/dawn/GrDawnStagingBuffer.h"
 
 #include "src/gpu/dawn/GrDawnGpu.h"
 
@@ -63,10 +62,12 @@
     }
 
     if (fMappable == Mappable::kNot) {
-        GrStagingBuffer::Slice slice = getGpu()->allocateStagingBufferSlice(this->size());
-        fStagingBuffer = static_cast<GrDawnStagingBuffer*>(slice.fBuffer)->buffer();
+        GrStagingBufferManager::Slice slice =
+                this->getDawnGpu()->stagingBufferManager()->allocateStagingBufferSlice(
+                        this->size());
+        fStagingBuffer = static_cast<GrDawnBuffer*>(slice.fBuffer)->get();
         fStagingOffset = slice.fOffset;
-        fMapPtr = slice.fData;
+        fMapPtr = slice.fOffsetMapPtr;
     } else {
         // We always create this buffers mapped or if they've been used on the gpu before we use the
         // async map callback to know when it is safe to reuse them. Thus by the time we get here
diff --git a/src/gpu/dawn/GrDawnGpu.cpp b/src/gpu/dawn/GrDawnGpu.cpp
index 42915d9..dc6bfb1 100644
--- a/src/gpu/dawn/GrDawnGpu.cpp
+++ b/src/gpu/dawn/GrDawnGpu.cpp
@@ -11,6 +11,7 @@
 #include "include/gpu/GrBackendSurface.h"
 #include "include/gpu/GrContextOptions.h"
 #include "include/gpu/GrDirectContext.h"
+#include "src/gpu/GrContextPriv.h"
 #include "src/gpu/GrDataUtils.h"
 #include "src/gpu/GrGeometryProcessor.h"
 #include "src/gpu/GrGpuResourceCacheAccess.h"
@@ -24,7 +25,6 @@
 #include "src/gpu/dawn/GrDawnOpsRenderPass.h"
 #include "src/gpu/dawn/GrDawnProgramBuilder.h"
 #include "src/gpu/dawn/GrDawnRenderTarget.h"
-#include "src/gpu/dawn/GrDawnStagingBuffer.h"
 #include "src/gpu/dawn/GrDawnStencilAttachment.h"
 #include "src/gpu/dawn/GrDawnTexture.h"
 #include "src/gpu/dawn/GrDawnUtil.h"
@@ -114,23 +114,21 @@
         , fQueue(device.GetDefaultQueue())
         , fCompiler(new SkSL::Compiler())
         , fUniformRingBuffer(this, wgpu::BufferUsage::Uniform)
+        , fStagingBufferManager(this)
         , fRenderPipelineCache(kMaxRenderPipelineEntries)
         , fFinishCallbacks(this) {
     fCaps.reset(new GrDawnCaps(options));
 }
 
 GrDawnGpu::~GrDawnGpu() {
-    while (!this->busyStagingBuffers().isEmpty()) {
-        fDevice.Tick();
-    }
+    this->waitOnAllBusyStagingBuffers();
 }
 
 void GrDawnGpu::disconnect(DisconnectType type) {
     if (DisconnectType::kCleanup == type) {
-        while (!this->busyStagingBuffers().isEmpty()) {
-            fDevice.Tick();
-        }
+        this->waitOnAllBusyStagingBuffers();
     }
+    fStagingBufferManager.reset();
     fQueue = nullptr;
     fDevice = nullptr;
     INHERITED::disconnect(type);
@@ -374,12 +372,13 @@
         size_t origRowBytes = bpp * w;
         size_t rowBytes = GrDawnRoundRowBytes(origRowBytes);
         size_t size = rowBytes * h;
-        GrStagingBuffer::Slice stagingBuffer = this->allocateStagingBufferSlice(size);
+        GrStagingBufferManager::Slice stagingBuffer =
+                this->stagingBufferManager()->allocateStagingBufferSlice(size);
         if (rowBytes == origRowBytes) {
-            memcpy(stagingBuffer.fData, pixels, size);
+            memcpy(stagingBuffer.fOffsetMapPtr, pixels, size);
         } else {
             const char* src = static_cast<const char*>(pixels);
-            char* dst = static_cast<char*>(stagingBuffer.fData);
+            char* dst = static_cast<char*>(stagingBuffer.fOffsetMapPtr);
             for (int row = 0; row < h; row++) {
                 memcpy(dst, src, origRowBytes);
                 dst += rowBytes;
@@ -387,7 +386,7 @@
             }
         }
         wgpu::BufferCopyView srcBuffer;
-        srcBuffer.buffer = static_cast<GrDawnStagingBuffer*>(stagingBuffer.fBuffer)->buffer();
+        srcBuffer.buffer = static_cast<GrDawnBuffer*>(stagingBuffer.fBuffer)->get();
         srcBuffer.bytesPerRow = 0; // TODO: remove this once the deprecated fields are gone.
         srcBuffer.layout.offset = stagingBuffer.fOffset;
         srcBuffer.layout.bytesPerRow = rowBytes;
@@ -484,6 +483,26 @@
     fFinishCallbacks.add(finishedProc, finishedContext);
 }
 
+void GrDawnGpu::checkForCompletedStagingBuffers() {
+    // We expect all the buffer maps to trigger in order of submission so we bail after the first
+    // non finished map since we always push new busy buffers to the back of our list.
+    while (!fBusyStagingBuffers.empty() && fBusyStagingBuffers.front()->isMapped()) {
+        fBusyStagingBuffers.pop_front();
+    }
+}
+
+void GrDawnGpu::waitOnAllBusyStagingBuffers() {
+    while (!fBusyStagingBuffers.empty()) {
+        fDevice.Tick();
+        this->checkForCompletedStagingBuffers();
+    }
+}
+
+void GrDawnGpu::takeOwnershipOfStagingBuffer(sk_sp<GrGpuBuffer> buffer) {
+    fSubmittedStagingBuffers.push_back(std::move(buffer));
+}
+
+
 static void callback(WGPUFenceCompletionStatus status, void* userData) {
     *static_cast<bool*>(userData) = true;
 }
@@ -494,7 +513,8 @@
         fQueue.Submit(fCommandBuffers.size(), &fCommandBuffers.front());
         fCommandBuffers.clear();
     }
-    this->mapStagingBuffers();
+
+    this->moveStagingBuffersToBusyAndMapAsync();
     if (syncCpu) {
         wgpu::FenceDescriptor desc;
         wgpu::Fence fence = fQueue.CreateFence(&desc);
@@ -505,6 +525,9 @@
         }
         fFinishCallbacks.callAll(true);
     }
+
+    this->checkForCompletedStagingBuffers();
+
     return true;
 }
 
@@ -698,15 +721,6 @@
     return fUniformRingBuffer.allocate(size);
 }
 
-std::unique_ptr<GrStagingBuffer> GrDawnGpu::createStagingBuffer(size_t size) {
-    wgpu::BufferDescriptor desc;
-    desc.usage = wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::CopySrc;
-    desc.size = size;
-    wgpu::CreateBufferMappedResult result = fDevice.CreateBufferMapped(&desc);
-    auto stagingBuffer = new GrDawnStagingBuffer(this, result.buffer, desc.size, result.data);
-    return std::unique_ptr<GrStagingBuffer>(stagingBuffer);
-}
-
 void GrDawnGpu::appendCommandBuffer(wgpu::CommandBuffer commandBuffer) {
     if (commandBuffer) {
         fCommandBuffers.push_back(commandBuffer);
@@ -727,10 +741,11 @@
     }
 }
 
-void GrDawnGpu::mapStagingBuffers() {
-    // Map all active buffers, so we get a callback when they're done.
-    while (auto buffer = this->activeStagingBuffers().head()) {
-        this->moveStagingBufferFromActiveToBusy(buffer);
-        static_cast<GrDawnStagingBuffer*>(buffer)->mapAsync();
+void GrDawnGpu::moveStagingBuffersToBusyAndMapAsync() {
+    for (size_t i = 0; i < fSubmittedStagingBuffers.size(); ++i) {
+        GrDawnBuffer* buffer = static_cast<GrDawnBuffer*>(fSubmittedStagingBuffers[i].get());
+        buffer->mapWriteAsync();
+        fBusyStagingBuffers.push_back(std::move(fSubmittedStagingBuffers[i]));
     }
+    fSubmittedStagingBuffers.clear();
 }
diff --git a/src/gpu/dawn/GrDawnGpu.h b/src/gpu/dawn/GrDawnGpu.h
index 2be5f284..3805561 100644
--- a/src/gpu/dawn/GrDawnGpu.h
+++ b/src/gpu/dawn/GrDawnGpu.h
@@ -14,6 +14,7 @@
 #include "src/core/SkLRUCache.h"
 #include "src/gpu/GrFinishCallbacks.h"
 #include "src/gpu/GrProgramDesc.h"
+#include "src/gpu/GrStagingBufferManager.h"
 #include "src/gpu/dawn/GrDawnRingBuffer.h"
 
 #include <unordered_map>
@@ -36,6 +37,9 @@
 
     void disconnect(DisconnectType) override;
 
+    GrStagingBufferManager* stagingBufferManager() override { return &fStagingBufferManager; }
+    void takeOwnershipOfStagingBuffer(sk_sp<GrGpuBuffer>) override;
+
     const wgpu::Device& device() const { return fDevice; }
     const wgpu::Queue&  queue() const { return fQueue; }
 
@@ -53,7 +57,6 @@
 
     void testingOnly_flushGpuAndSync() override;
 #endif
-    std::unique_ptr<GrStagingBuffer> createStagingBuffer(size_t size) override;
 
     GrStencilAttachment* createStencilAttachmentForRenderTarget(const GrRenderTarget*,
                                                                 int width,
@@ -97,6 +100,8 @@
     void flushCopyEncoder();
     void appendCommandBuffer(wgpu::CommandBuffer commandBuffer);
 
+    void waitOnAllBusyStagingBuffers();
+
 private:
     GrDawnGpu(GrDirectContext*, const GrContextOptions&, const wgpu::Device&);
 
@@ -187,7 +192,8 @@
 
     bool onSubmitToGpu(bool syncCpu) override;
 
-    void mapStagingBuffers();
+    void moveStagingBuffersToBusyAndMapAsync();
+    void checkForCompletedStagingBuffers();
 
     wgpu::Device                                    fDevice;
     wgpu::Queue                                     fQueue;
@@ -196,6 +202,12 @@
     GrDawnRingBuffer                                fUniformRingBuffer;
     wgpu::CommandEncoder                            fCopyEncoder;
     std::vector<wgpu::CommandBuffer>                fCommandBuffers;
+    GrStagingBufferManager                          fStagingBufferManager;
+    std::list<sk_sp<GrGpuBuffer>>                   fBusyStagingBuffers;
+    // Temporary array of staging buffers to hold refs on the staging buffers between detaching
+    // from the GrStagingManager and moving them to the busy list which must happen after
+    // submission.
+    std::vector<sk_sp<GrGpuBuffer>>                 fSubmittedStagingBuffers;
 
     struct ProgramDescHash {
         uint32_t operator()(const GrProgramDesc& desc) const {
diff --git a/src/gpu/dawn/GrDawnRingBuffer.cpp b/src/gpu/dawn/GrDawnRingBuffer.cpp
index 2e7cb83..72edf85 100644
--- a/src/gpu/dawn/GrDawnRingBuffer.cpp
+++ b/src/gpu/dawn/GrDawnRingBuffer.cpp
@@ -8,11 +8,10 @@
 #include "src/gpu/dawn/GrDawnRingBuffer.h"
 
 #include "src/gpu/dawn/GrDawnGpu.h"
-#include "src/gpu/dawn/GrDawnStagingBuffer.h"
 #include "src/gpu/dawn/GrDawnUtil.h"
 
 namespace {
-    const int kDefaultSize = 32 * 1024;
+    const int kDefaultSize = 64 * 1024;
 }
 
 GrDawnRingBuffer::GrDawnRingBuffer(GrDawnGpu* gpu, wgpu::BufferUsage usage)
@@ -31,12 +30,13 @@
         fOffset = 0;
     }
 
-    GrStagingBuffer::Slice staging = fGpu->allocateStagingBufferSlice(size);
+    GrStagingBufferManager::Slice staging =
+            fGpu->stagingBufferManager()->allocateStagingBufferSlice(size);
     size_t offset = fOffset;
     fOffset += size;
     fOffset = GrDawnRoundRowBytes(fOffset);
-    wgpu::Buffer srcBuffer = static_cast<GrDawnStagingBuffer*>(staging.fBuffer)->buffer();
-    fGpu->getCopyEncoder().CopyBufferToBuffer(srcBuffer, staging.fOffset,
+    GrDawnBuffer* srcBuffer = static_cast<GrDawnBuffer*>(staging.fBuffer);
+    fGpu->getCopyEncoder().CopyBufferToBuffer(srcBuffer->get(), staging.fOffset,
                                               fBuffer, offset, size);
-    return Slice(fBuffer, offset, staging.fData);
+    return Slice(fBuffer, offset, staging.fOffsetMapPtr);
 }
diff --git a/src/gpu/dawn/GrDawnStagingBuffer.cpp b/src/gpu/dawn/GrDawnStagingBuffer.cpp
deleted file mode 100644
index 25b0844..0000000
--- a/src/gpu/dawn/GrDawnStagingBuffer.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright 2020 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#include "src/gpu/dawn/GrDawnGpu.h"
-#include "src/gpu/dawn/GrDawnStagingBuffer.h"
-
-#include "src/core/SkMathPriv.h"
-
-static void callback(WGPUBufferMapAsyncStatus status, void* data, uint64_t dataLength,
-                     void* userData) {
-    GrDawnStagingBuffer* buffer = static_cast<GrDawnStagingBuffer*>(userData);
-    buffer->markAvailable(data);
-}
-
-GrDawnGpu* GrDawnStagingBuffer::getDawnGpu() const {
-    return static_cast<GrDawnGpu*>(this->getGpu());
-}
-
-void GrDawnStagingBuffer::mapAsync() {
-    fBuffer.MapWriteAsync(callback, this);
-}
-
-void GrDawnStagingBuffer::onUnmap() {
-    fBuffer.Unmap();
-}
diff --git a/src/gpu/dawn/GrDawnStagingBuffer.h b/src/gpu/dawn/GrDawnStagingBuffer.h
deleted file mode 100644
index 82f53b1..0000000
--- a/src/gpu/dawn/GrDawnStagingBuffer.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright 2020 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#ifndef GrDawnStagingBuffer_DEFINED
-#define GrDawnStagingBuffer_DEFINED
-
-#include "dawn/webgpu_cpp.h"
-#include "src/gpu/GrStagingBuffer.h"
-
-class GrDawnStagingBuffer : public GrStagingBuffer {
-public:
-    GrDawnStagingBuffer(GrGpu* gpu, wgpu::Buffer buffer, size_t size, void* data)
-        : INHERITED(gpu, size, data), fBuffer(buffer) {}
-    ~GrDawnStagingBuffer() override {}
-    void           mapAsync();
-    wgpu::Buffer   buffer() const { return fBuffer; }
-    GrDawnGpu*     getDawnGpu() const;
-
-private:
-    void           onUnmap() override;
-
-    wgpu::Buffer   fBuffer;
-    typedef GrStagingBuffer INHERITED;
-};
-
-#endif
diff --git a/src/gpu/dawn/GrDawnTexture.cpp b/src/gpu/dawn/GrDawnTexture.cpp
index 1c74eea..53f0655 100644
--- a/src/gpu/dawn/GrDawnTexture.cpp
+++ b/src/gpu/dawn/GrDawnTexture.cpp
@@ -9,7 +9,6 @@
 
 #include "src/core/SkConvertPixels.h"
 #include "src/gpu/dawn/GrDawnGpu.h"
-#include "src/gpu/dawn/GrDawnStagingBuffer.h"
 #include "src/gpu/dawn/GrDawnTextureRenderTarget.h"
 #include "src/gpu/dawn/GrDawnUtil.h"
 
@@ -152,11 +151,12 @@
         size_t trimRowBytes = width * SkColorTypeBytesPerPixel(colorType);
         size_t dstRowBytes = GrDawnRoundRowBytes(trimRowBytes);
         size_t size = dstRowBytes * height;
-        GrStagingBuffer::Slice slice = getDawnGpu()->allocateStagingBufferSlice(size);
-        SkRectMemcpy(slice.fData, dstRowBytes, src, srcRowBytes, trimRowBytes, height);
+        GrStagingBufferManager::Slice slice =
+                this->getDawnGpu()->stagingBufferManager()->allocateStagingBufferSlice(size);
+        SkRectMemcpy(slice.fOffsetMapPtr, dstRowBytes, src, srcRowBytes, trimRowBytes, height);
 
         wgpu::BufferCopyView srcBuffer;
-        srcBuffer.buffer = static_cast<GrDawnStagingBuffer*>(slice.fBuffer)->buffer();
+        srcBuffer.buffer = static_cast<GrDawnBuffer*>(slice.fBuffer)->get();
         srcBuffer.bytesPerRow = 0; // TODO: remove this once the deprecated fields have been removed.
         srcBuffer.layout.offset = slice.fOffset;
         srcBuffer.layout.bytesPerRow = dstRowBytes;
diff --git a/src/gpu/vk/GrVkCommandBuffer.cpp b/src/gpu/vk/GrVkCommandBuffer.cpp
index c1dfe13..eba29cf 100644
--- a/src/gpu/vk/GrVkCommandBuffer.cpp
+++ b/src/gpu/vk/GrVkCommandBuffer.cpp
@@ -44,6 +44,7 @@
     SkASSERT(!fIsActive);
     SkASSERT(!fTrackedResources.count());
     SkASSERT(!fTrackedRecycledResources.count());
+    SkASSERT(!fTrackedGpuBuffers.count());
     SkASSERT(cmdPool != VK_NULL_HANDLE);
     SkASSERT(!this->isWrapped());
 
@@ -76,6 +77,8 @@
         fTrackedRecycledResources.rewind();
     }
 
+    fTrackedGpuBuffers.reset();
+
     this->invalidateState();
 
     this->onReleaseResources();
@@ -377,41 +380,6 @@
     fHasWork = true;
 }
 
-#ifdef SK_DEBUG
-bool GrVkCommandBuffer::validateNoSharedImageResources(const GrVkCommandBuffer* other) {
-    auto resourceIsInCommandBuffer = [this](const GrManagedResource* resource) {
-        if (!resource->asVkImageResource()) {
-            return false;
-        }
-
-        for (int i = 0; i < fTrackedResources.count(); ++i) {
-            if (resource == fTrackedResources[i]->asVkImageResource()) {
-                return true;
-            }
-        }
-        for (int i = 0; i < fTrackedRecycledResources.count(); ++i) {
-            if (resource == fTrackedRecycledResources[i]->asVkImageResource()) {
-                return true;
-            }
-        }
-        return false;
-    };
-
-    for (int i = 0; i < other->fTrackedResources.count(); ++i) {
-        if (resourceIsInCommandBuffer(other->fTrackedResources[i])) {
-            return false;
-        }
-    }
-
-    for (int i = 0; i < other->fTrackedRecycledResources.count(); ++i) {
-        if (resourceIsInCommandBuffer(other->fTrackedRecycledResources[i])) {
-            return false;
-        }
-    }
-    return true;
-}
-#endif
-
 ///////////////////////////////////////////////////////////////////////////////
 // PrimaryCommandBuffer
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/vk/GrVkCommandBuffer.h b/src/gpu/vk/GrVkCommandBuffer.h
index 309980c..19f5276 100644
--- a/src/gpu/vk/GrVkCommandBuffer.h
+++ b/src/gpu/vk/GrVkCommandBuffer.h
@@ -122,16 +122,16 @@
         fTrackedRecycledResources.append(1, &resource);
     }
 
+    void addGpuBuffer(sk_sp<GrGpuBuffer> buffer) {
+        fTrackedGpuBuffers.push_back(std::move(buffer));
+    }
+
     void releaseResources();
 
     void freeGPUData(const GrGpu* gpu, VkCommandPool pool) const;
 
     bool hasWork() const { return fHasWork; }
 
-#ifdef SK_DEBUG
-    bool validateNoSharedImageResources(const GrVkCommandBuffer* other);
-#endif
-
 protected:
     GrVkCommandBuffer(VkCommandBuffer cmdBuffer, bool isWrapped = false)
             : fCmdBuffer(cmdBuffer)
@@ -147,8 +147,9 @@
 
     void submitPipelineBarriers(const GrVkGpu* gpu);
 
-    SkTDArray<const GrManagedResource*>   fTrackedResources;
-    SkTDArray<const GrRecycledResource*>  fTrackedRecycledResources;
+    SkTDArray<const GrManagedResource*>  fTrackedResources;
+    SkTDArray<const GrRecycledResource*> fTrackedRecycledResources;
+    SkSTArray<16, sk_sp<GrGpuBuffer>>    fTrackedGpuBuffers;
 
     // Tracks whether we are in the middle of a command buffer begin/end calls and thus can add
     // new commands to the buffer;
diff --git a/src/gpu/vk/GrVkGpu.cpp b/src/gpu/vk/GrVkGpu.cpp
index be75458..0b1513e 100644
--- a/src/gpu/vk/GrVkGpu.cpp
+++ b/src/gpu/vk/GrVkGpu.cpp
@@ -178,6 +178,7 @@
         , fQueue(backendContext.fQueue)
         , fQueueIndex(backendContext.fGraphicsQueueIndex)
         , fResourceProvider(this)
+        , fStagingBufferManager(this)
         , fDisconnected(false)
         , fProtectedContext(backendContext.fProtectedContext) {
     SkASSERT(!backendContext.fOwnsInstanceAndDevice);
@@ -280,6 +281,8 @@
     }
     fSemaphoresToSignal.reset();
 
+    fStagingBufferManager.reset();
+
     // must call this just before we destroy the command pool and VkDevice
     fResourceProvider.destroyResources(VK_ERROR_DEVICE_LOST == res);
 }
@@ -2039,6 +2042,10 @@
     fResourceProvider.addFinishedProcToActiveCommandBuffers(std::move(finishedCallback));
 }
 
+void GrVkGpu::takeOwnershipOfStagingBuffer(sk_sp<GrGpuBuffer> buffer) {
+    this->currentCommandBuffer()->addGpuBuffer(std::move(buffer));
+}
+
 bool GrVkGpu::onSubmitToGpu(bool syncCpu) {
     if (syncCpu) {
         return this->submitCommandBuffer(kForce_SyncQueue);
diff --git a/src/gpu/vk/GrVkGpu.h b/src/gpu/vk/GrVkGpu.h
index 32a244d..9d4fd6bf 100644
--- a/src/gpu/vk/GrVkGpu.h
+++ b/src/gpu/vk/GrVkGpu.h
@@ -11,6 +11,7 @@
 #include "include/gpu/vk/GrVkBackendContext.h"
 #include "include/gpu/vk/GrVkTypes.h"
 #include "src/gpu/GrGpu.h"
+#include "src/gpu/GrStagingBufferManager.h"
 #include "src/gpu/vk/GrVkCaps.h"
 #include "src/gpu/vk/GrVkMemory.h"
 #include "src/gpu/vk/GrVkMeshBuffer.h"
@@ -48,6 +49,9 @@
     const GrVkInterface* vkInterface() const { return fInterface.get(); }
     const GrVkCaps& vkCaps() const { return *fVkCaps; }
 
+    GrStagingBufferManager* stagingBufferManager() override { return &fStagingBufferManager; }
+    void takeOwnershipOfStagingBuffer(sk_sp<GrGpuBuffer>) override;
+
     bool isDeviceLost() const override { return fDeviceIsLost; }
     void setDeviceLost() { fDeviceIsLost = true; }
 
@@ -328,6 +332,7 @@
 
     // Created by GrVkGpu
     GrVkResourceProvider                                  fResourceProvider;
+    GrStagingBufferManager                                fStagingBufferManager;
 
     GrVkCommandPool*                                      fMainCmdPool;
     // just a raw pointer; object's lifespan is managed by fCmdPool
diff --git a/tests/Test.h b/tests/Test.h
index 270099e..117b015 100644
--- a/tests/Test.h
+++ b/tests/Test.h
@@ -118,6 +118,7 @@
 extern bool IsGLContextType(GrContextFactoryContextType);
 extern bool IsVulkanContextType(GrContextFactoryContextType);
 extern bool IsMetalContextType(GrContextFactoryContextType);
+extern bool IsDawnContextType(GrContextFactoryContextType);
 extern bool IsDirect3DContextType(GrContextFactoryContextType);
 extern bool IsRenderingGLContextType(GrContextFactoryContextType);
 extern bool IsRenderingGLOrMetalContextType(GrContextFactoryContextType);
@@ -218,6 +219,9 @@
 #define DEF_GPUTEST_FOR_D3D_CONTEXT(name, reporter, context_info)                           \
     DEF_GPUTEST_FOR_CONTEXTS(name, &skiatest::IsDirect3DContextType,                        \
                              reporter, context_info, nullptr)
+#define DEF_GPUTEST_FOR_DAWN_CONTEXT(name, reporter, context_info)                          \
+    DEF_GPUTEST_FOR_CONTEXTS(name, &skiatest::IsDawnContextType,                            \
+                             reporter, context_info, nullptr)
 
 #define REQUIRE_PDF_DOCUMENT(TEST_NAME, REPORTER)                          \
     do {                                                                   \
diff --git a/tests/TextureProxyTest.cpp b/tests/TextureProxyTest.cpp
index 543b70d..7ca9446 100644
--- a/tests/TextureProxyTest.cpp
+++ b/tests/TextureProxyTest.cpp
@@ -21,6 +21,10 @@
 #include "include/core/SkImage.h"
 #include "src/gpu/SkGr.h"
 
+#ifdef SK_DAWN
+#include "src/gpu/dawn/GrDawnGpu.h"
+#endif
+
 int GrProxyProvider::numUniqueKeyProxies_TestOnly() const {
     return fUniquelyKeyedProxies.count();
 }
@@ -226,9 +230,16 @@
         REPORTER_ASSERT(reporter, 0 == cache->getResourceCount());
     }
 
+    // Some of our backends use buffers to do uploads that will live in our resource cache. So we
+    // need to account for those extra resources here.
+    int bufferResources = 0;
+    if (context->backend() == GrBackendApi::kDawn) {
+        bufferResources = 1;
+    }
+
     sk_sp<SkImage> textureImg = rasterImg->makeTextureImage(context);
     REPORTER_ASSERT(reporter, 0 == proxyProvider->numUniqueKeyProxies_TestOnly());
-    REPORTER_ASSERT(reporter, 1 == cache->getResourceCount());
+    REPORTER_ASSERT(reporter, 1 + bufferResources == cache->getResourceCount());
 
     rasterImg = nullptr;        // this invalidates the uniqueKey
 
@@ -237,9 +248,24 @@
     context->setResourceCacheLimit(maxBytes-1);
 
     REPORTER_ASSERT(reporter, 0 == proxyProvider->numUniqueKeyProxies_TestOnly());
-    REPORTER_ASSERT(reporter, 1 == cache->getResourceCount());
+    REPORTER_ASSERT(reporter, 1 + bufferResources == cache->getResourceCount());
 
     textureImg = nullptr;
+
+    // For backends that use buffers to upload lets make sure that work has been submit and done
+    // before we try to purge all resources.
+    context->submit(true);
+
+#ifdef SK_DAWN
+    // The forced cpu sync in dawn doesn't actually mean the async map will finish thus we may
+    // still have a ref on the GrGpuBuffer and it will not get purged by the call below. We dig
+    // deep into the dawn gpu to make sure we wait for the async map to finish.
+    if (context->backend() == GrBackendApi::kDawn) {
+        GrDawnGpu* gpu = static_cast<GrDawnGpu*>(context->priv().getGpu());
+        gpu->waitOnAllBusyStagingBuffers();
+    }
+#endif
+
     context->priv().testingOnly_purgeAllUnlockedResources();
 
     REPORTER_ASSERT(reporter, 0 == proxyProvider->numUniqueKeyProxies_TestOnly());