Metal: use staging buffer for updating non-dynamic buffers

For non-dynamic (i.e. GPU-resident) buffers, onUpdateData() uses a
staging buffer rather than map() and unmap(). In addition, map() and
unmap() do nothing for non-dynamic buffers.

Also removes fOffset member, as it was never anything but 0.

Change-Id: I8bcd36c10c14d21395693686696462cca79a0e70
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/433678
Reviewed-by: Greg Daniel <egdaniel@google.com>
Commit-Queue: Jim Van Verth <jvanverth@google.com>
diff --git a/src/gpu/mtl/GrMtlBuffer.h b/src/gpu/mtl/GrMtlBuffer.h
index 5d89909..5716779 100644
--- a/src/gpu/mtl/GrMtlBuffer.h
+++ b/src/gpu/mtl/GrMtlBuffer.h
@@ -24,7 +24,6 @@
     ~GrMtlBuffer() override;
 
     id<MTLBuffer> mtlBuffer() const { return fMtlBuffer; }
-    size_t offset() const { return fOffset; }
 
 protected:
     GrMtlBuffer(GrMtlGpu*, size_t size, GrGpuBufferType intendedType, GrAccessPattern);
@@ -48,8 +47,6 @@
 
     bool fIsDynamic;
     id<MTLBuffer> fMtlBuffer;
-    size_t        fOffset;       // offset into shared buffer for dynamic buffers
-    id<MTLBuffer> fMappedBuffer; // buffer used by static buffers for uploads
 
     using INHERITED = GrGpuBuffer;
 };
diff --git a/src/gpu/mtl/GrMtlBuffer.mm b/src/gpu/mtl/GrMtlBuffer.mm
index d942235..ea54618 100644
--- a/src/gpu/mtl/GrMtlBuffer.mm
+++ b/src/gpu/mtl/GrMtlBuffer.mm
@@ -7,6 +7,7 @@
 
 #include "include/private/GrTypesPriv.h"
 #include "src/gpu/GrGpuResourcePriv.h"
+#include "src/gpu/GrStagingBufferManager.h"
 #include "src/gpu/mtl/GrMtlBuffer.h"
 #include "src/gpu/mtl/GrMtlCommandBuffer.h"
 #include "src/gpu/mtl/GrMtlGpu.h"
@@ -46,8 +47,7 @@
 GrMtlBuffer::GrMtlBuffer(GrMtlGpu* gpu, size_t size, GrGpuBufferType intendedType,
                          GrAccessPattern accessPattern)
         : INHERITED(gpu, size, intendedType, accessPattern)
-        , fIsDynamic(accessPattern != kStatic_GrAccessPattern)
-        , fOffset(0) {
+        , fIsDynamic(accessPattern != kStatic_GrAccessPattern) {
     NSUInteger options = 0;
     if (@available(macOS 10.11, iOS 9.0, *)) {
         if (fIsDynamic) {
@@ -76,34 +76,54 @@
 }
 
 GrMtlBuffer::~GrMtlBuffer() {
-    SkASSERT(fMtlBuffer == nil);
-    SkASSERT(fMappedBuffer == nil);
-    SkASSERT(fMapPtr == nullptr);
+    SkASSERT(!fMtlBuffer);
+    SkASSERT(!fMapPtr);
 }
 
-bool GrMtlBuffer::onUpdateData(const void* src, size_t srcInBytes) {
-    if (!fIsDynamic) {
-        if (fMtlBuffer == nil) {
-            return false;
-        }
-        if (srcInBytes > fMtlBuffer.length) {
-            return false;
-        }
-    }
-    VALIDATE();
-
-    this->internalMap(srcInBytes);
-    if (fMapPtr == nil) {
+bool GrMtlBuffer::onUpdateData(const void* src, size_t sizeInBytes) {
+    if (this->wasDestroyed()) {
         return false;
     }
-    SkASSERT(fMappedBuffer);
-    if (!fIsDynamic) {
-        SkASSERT(SkAlign4(srcInBytes) == fMappedBuffer.length);
-    }
-    memcpy(fMapPtr, src, srcInBytes);
-    this->internalUnmap(srcInBytes);
 
-    VALIDATE();
+    if (sizeInBytes > this->size()) {
+        return false;
+    }
+
+    if (fIsDynamic) {
+        this->internalMap(sizeInBytes);
+        if (!fMapPtr) {
+            return false;
+        }
+        memcpy(fMapPtr, src, sizeInBytes);
+        this->internalUnmap(sizeInBytes);
+    } else {
+        // copy data to gpu buffer
+        GrStagingBufferManager::Slice slice;
+    #ifdef SK_BUILD_FOR_MAC
+        // Mac requires 4-byte alignment for copies
+        // TODO: true for Apple Silicon?
+        static const size_t kMinAlignment = 4;
+    #else
+        static const size_t kMinAlignment = 1;
+    #endif
+         slice = this->mtlGpu()->stagingBufferManager()->allocateStagingBufferSlice(sizeInBytes,
+                                                                                    kMinAlignment);
+        if (!slice.fBuffer) {
+            return false;
+        }
+        memcpy(slice.fOffsetMapPtr, src, sizeInBytes);
+
+        GrMtlCommandBuffer* cmdBuffer = this->mtlGpu()->commandBuffer();
+        id<MTLBlitCommandEncoder> GR_NORETAIN blitCmdEncoder = cmdBuffer->getBlitCommandEncoder();
+        GrMtlBuffer* mtlBuffer = static_cast<GrMtlBuffer*>(slice.fBuffer);
+        id<MTLBuffer> transferBuffer = mtlBuffer->mtlBuffer();
+        [blitCmdEncoder copyFromBuffer: transferBuffer
+                          sourceOffset: slice.fOffset
+                              toBuffer: fMtlBuffer
+                     destinationOffset: 0
+                                  size: sizeInBytes];
+    }
+
     return true;
 }
 
@@ -114,7 +134,6 @@
 
 void GrMtlBuffer::onAbandon() {
     fMtlBuffer = nil;
-    fMappedBuffer = nil;
     fMapPtr = nullptr;
     VALIDATE();
     INHERITED::onAbandon();
@@ -124,7 +143,6 @@
     if (!this->wasDestroyed()) {
         VALIDATE();
         fMtlBuffer = nil;
-        fMappedBuffer = nil;
         fMapPtr = nullptr;
         VALIDATE();
     }
@@ -132,73 +150,38 @@
 }
 
 void GrMtlBuffer::internalMap(size_t sizeInBytes) {
-    if (this->wasDestroyed()) {
-        return;
-    }
-    VALIDATE();
-    SkASSERT(!this->isMapped());
     if (fIsDynamic) {
-        fMappedBuffer = fMtlBuffer;
-        fMapPtr = static_cast<char*>(fMtlBuffer.contents) + fOffset;
-    } else {
-        SkASSERT(fMtlBuffer);
-        SkASSERT(fMappedBuffer == nil);
-        NSUInteger options = 0;
-        if (@available(macOS 10.11, iOS 9.0, *)) {
-            options |= MTLResourceStorageModeShared;
-        }
-#ifdef SK_BUILD_FOR_MAC
-        // Mac requires 4-byte alignment for copies so we pad this out
-        sizeInBytes = SkAlign4(sizeInBytes);
-#endif
-        fMappedBuffer =
-                [this->mtlGpu()->device() newBufferWithLength: sizeInBytes
-                                                      options: options];
-        fMapPtr = fMappedBuffer.contents;
+        VALIDATE();
+        SkASSERT(sizeInBytes <= this->size());
+        SkASSERT(!this->isMapped());
+        fMapPtr = static_cast<char*>(fMtlBuffer.contents);
+        VALIDATE();
     }
-    VALIDATE();
 }
 
 void GrMtlBuffer::internalUnmap(size_t sizeInBytes) {
     SkASSERT(fMtlBuffer);
-    if (this->wasDestroyed()) {
-        return;
-    }
-    VALIDATE();
-    SkASSERT(this->isMapped());
-    if (fMtlBuffer == nil) {
-        fMappedBuffer = nil;
-        fMapPtr = nullptr;
-        return;
-    }
-#ifdef SK_BUILD_FOR_MAC
-    // In both cases the size needs to be 4-byte aligned on Mac
-    sizeInBytes = SkAlign4(sizeInBytes);
-#endif
     if (fIsDynamic) {
+        VALIDATE();
+        SkASSERT(sizeInBytes <= this->size());
+        SkASSERT(this->isMapped());
 #ifdef SK_BUILD_FOR_MAC
-        SkASSERT(0 == (fOffset & 0x3));  // should be 4-byte aligned
-        [fMtlBuffer didModifyRange: NSMakeRange(fOffset, sizeInBytes)];
+        [fMtlBuffer didModifyRange: NSMakeRange(0, sizeInBytes)];
 #endif
-    } else {
-        GrMtlCommandBuffer* cmdBuffer = this->mtlGpu()->commandBuffer();
-        id<MTLBlitCommandEncoder> GR_NORETAIN blitCmdEncoder = cmdBuffer->getBlitCommandEncoder();
-        [blitCmdEncoder copyFromBuffer: fMappedBuffer
-                          sourceOffset: 0
-                              toBuffer: fMtlBuffer
-                     destinationOffset: 0
-                                  size: sizeInBytes];
+        fMapPtr = nullptr;
     }
-    fMappedBuffer = nil;
-    fMapPtr = nullptr;
 }
 
 void GrMtlBuffer::onMap() {
-    this->internalMap(this->size());
+    if (!this->wasDestroyed()) {
+        this->internalMap(this->size());
+    }
 }
 
 void GrMtlBuffer::onUnmap() {
-    this->internalUnmap(this->size());
+    if (!this->wasDestroyed()) {
+        this->internalUnmap(this->size());
+    }
 }
 
 #ifdef SK_DEBUG
@@ -210,8 +193,7 @@
              this->intendedType() == GrGpuBufferType::kXferGpuToCpu ||
              this->intendedType() == GrGpuBufferType::kDrawIndirect ||
              this->intendedType() == GrGpuBufferType::kUniform);
-    SkASSERT(fMappedBuffer == nil || fMtlBuffer == nil ||
-             fMappedBuffer.length <= fMtlBuffer.length);
+    SkASSERT((fMapPtr && fMtlBuffer) || !fMapPtr);
 }
 #endif
 
diff --git a/src/gpu/mtl/GrMtlGpu.h b/src/gpu/mtl/GrMtlGpu.h
index 9a1b5a4..b6d31df 100644
--- a/src/gpu/mtl/GrMtlGpu.h
+++ b/src/gpu/mtl/GrMtlGpu.h
@@ -48,6 +48,8 @@
 
     GrMtlResourceProvider& resourceProvider() { return fResourceProvider; }
 
+    GrStagingBufferManager* stagingBufferManager() override { return &fStagingBufferManager; }
+
     GrMtlCommandBuffer* commandBuffer();
 
     enum SyncQueue {
@@ -124,7 +126,6 @@
 
     void xferBarrier(GrRenderTarget*, GrXferBarrierType) override {}
 
-    GrStagingBufferManager* stagingBufferManager() override { return &fStagingBufferManager; }
     void takeOwnershipOfBuffer(sk_sp<GrGpuBuffer>) override;
 
     GrBackendTexture onCreateBackendTexture(SkISize dimensions,
diff --git a/src/gpu/mtl/GrMtlGpu.mm b/src/gpu/mtl/GrMtlGpu.mm
index 5d22155..fcb44c4 100644
--- a/src/gpu/mtl/GrMtlGpu.mm
+++ b/src/gpu/mtl/GrMtlGpu.mm
@@ -1419,7 +1419,7 @@
     [blitCmdEncoder pushDebugGroup:@"onTransferPixelsTo"];
 #endif
     [blitCmdEncoder copyFromBuffer: mtlBuffer
-                      sourceOffset: offset + grMtlBuffer->offset()
+                      sourceOffset: offset
                  sourceBytesPerRow: rowBytes
                sourceBytesPerImage: rowBytes*rect.height()
                         sourceSize: MTLSizeMake(rect.width(), rect.height(), 1)
@@ -1465,7 +1465,7 @@
                                       rect,
                                       bufferColorType,
                                       grMtlBuffer->mtlBuffer(),
-                                      offset + grMtlBuffer->offset(),
+                                      offset,
                                       transBufferImageBytes,
                                       transBufferRowBytes);
 }
diff --git a/src/gpu/mtl/GrMtlOpsRenderPass.mm b/src/gpu/mtl/GrMtlOpsRenderPass.mm
index 95353ae..ed7aa7d 100644
--- a/src/gpu/mtl/GrMtlOpsRenderPass.mm
+++ b/src/gpu/mtl/GrMtlOpsRenderPass.mm
@@ -353,7 +353,7 @@
                           fCurrentVertexStride * baseVertex, 0);
 
     auto mtlIndexBuffer = static_cast<const GrMtlBuffer*>(fActiveIndexBuffer.get());
-    size_t indexOffset = mtlIndexBuffer->offset() + sizeof(uint16_t) * baseIndex;
+    size_t indexOffset = sizeof(uint16_t) * baseIndex;
     id<MTLBuffer> indexBuffer = mtlIndexBuffer->mtlBuffer();
     fActiveRenderCmdEncoder->drawIndexedPrimitives(fActivePrimitiveType, indexCount,
                                                    MTLIndexTypeUInt16, indexBuffer, indexOffset);
@@ -405,7 +405,7 @@
     this->setVertexBuffer(fActiveRenderCmdEncoder, fActiveVertexBuffer.get(), 0, 0);
 
     auto mtlIndexBuffer = static_cast<const GrMtlBuffer*>(fActiveIndexBuffer.get());
-    size_t indexOffset = mtlIndexBuffer->offset() + sizeof(uint16_t) * baseIndex;
+    size_t indexOffset = sizeof(uint16_t) * baseIndex;
     if (@available(macOS 10.11, iOS 9.0, *)) {
         fActiveRenderCmdEncoder->drawIndexedPrimitives(fActivePrimitiveType, indexCount,
                                                        MTLIndexTypeUInt16,
@@ -473,7 +473,7 @@
 
     auto mtlIndexBuffer = static_cast<const GrMtlBuffer*>(fActiveIndexBuffer.get());
     auto mtlIndirectBuffer = static_cast<const GrMtlBuffer*>(drawIndirectBuffer);
-    size_t indexOffset = mtlIndexBuffer->offset();
+    size_t indexOffset = 0;
 
     const size_t stride = sizeof(GrDrawIndexedIndirectCommand);
     while (drawCount >= 1) {
@@ -512,7 +512,7 @@
     auto mtlBuffer = static_cast<const GrMtlBuffer*>(buffer);
     id<MTLBuffer> mtlVertexBuffer = mtlBuffer->mtlBuffer();
     SkASSERT(mtlVertexBuffer);
-    size_t offset = mtlBuffer->offset() + vertexOffset;
+    size_t offset = vertexOffset;
     encoder->setVertexBuffer(mtlVertexBuffer, offset, index);
 }