src/gpu/graphite/BufferManager.cpp - skia - Git at Google

 /*
  * Copyright 2021 Google Inc.
  *
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE file.
  */
  #include "src/gpu/graphite/BufferManager.h"

 #include "include/gpu/GpuTypes.h"
 #include "include/gpu/graphite/Recording.h"
 #include "include/private/base/SkAlign.h"
 #include "include/private/base/SkAssert.h"
 #include "include/private/base/SkMath.h"
 #include "include/private/base/SkTo.h"
 #include "src/gpu/graphite/Caps.h"
 #include "src/gpu/graphite/GlobalCache.h"
 #include "src/gpu/graphite/Log.h"
 #include "src/gpu/graphite/QueueManager.h"
 #include "src/gpu/graphite/RecordingPriv.h"
 #include "src/gpu/graphite/Resource.h"
 #include "src/gpu/graphite/ResourceProvider.h"
 #include "src/gpu/graphite/UploadBufferManager.h"
 #include "src/gpu/graphite/task/ClearBuffersTask.h"
 #include "src/gpu/graphite/task/CopyTask.h"
 #include "src/gpu/graphite/task/Task.h"
 #include "src/gpu/graphite/task/TaskList.h"

 #include <algorithm>
 #include <cstddef>
 #include <cstring>
 #include <limits>
 #include <numeric>
 #include <tuple>

 namespace skgpu::graphite {

 namespace {

 // The limit for all data created by the StaticBufferManager. This data remains alive for
 // the entire SharedContext so we want to keep it small and give a concrete upper bound to
 // clients for our steady-state memory usage.
 // FIXME The current usage is 4732 bytes across static vertex and index buffers, but that includes
 // multiple copies of tessellation data, and an unoptimized AnalyticRRect mesh. Once those issues
 // are addressed, we can tighten this and decide on the transfer buffer sizing as well.
 [[maybe_unused]] static constexpr uint32_t kMaxStaticDataSize = 6 << 10;

 uint32_t validate_count_and_stride(size_t count, size_t stride, uint32_t alignment) {
     // size_t may just be uint32_t, so this ensures we have enough bits to
     // compute the required byte product.
     uint64_t count64 = SkTo<uint64_t>(count);
     uint64_t stride64 = SkTo<uint64_t>(stride);
     uint64_t bytes64 = count64*stride64;
     if (count64 > std::numeric_limits<uint32_t>::max() ||
         stride64 > std::numeric_limits<uint32_t>::max() ||
         bytes64 > std::numeric_limits<uint32_t>::max() - (alignment + 1)) {
         // Return 0 to skip further allocation attempts.
         return 0;
     }
     // Since count64 and stride64 fit into 32-bits, their product won't overflow a 64-bit multiply,
     // and we've confirmed product fits into 32-bits with head room to be aligned w/o overflow.
     return SkTo<uint32_t>(bytes64);
 }

 // Calculates the LCM of `alignMaybePow2` and `alignProbNonPow2`. Neither value needs to be a
 // power of 2, but this is optimized to check for whether or not `alignMaybePow2` is a power of 2.
 // It assumes the probability of the 2nd alignment value being a power of 2 is low enough to not
 // be worth checking.
 uint32_t lcm_alignment(uint32_t alignMaybePow2, uint32_t alignProbNonPow2) {
     SkASSERT(alignMaybePow2 != 0 && alignProbNonPow2 != 0);
     if (alignMaybePow2 == 1 ||
         alignMaybePow2 == alignProbNonPow2 ||
         (SkIsPow2(alignMaybePow2) &&
                 alignProbNonPow2 > alignMaybePow2 &&
                 (alignProbNonPow2 & (alignMaybePow2 - 1)) == 0)) {
         // Trivial LCM since alignProbNonPow2 is the same or a larger multiple of alignMaybePow2
         return alignProbNonPow2;
     } else {
         return std::lcm(alignMaybePow2, alignProbNonPow2);
     }
 }

 // Helpers for creating a BufferState based on type, options, and caps

 AccessPattern get_gpu_access_pattern(bool isGpuOnlyAccess, const DrawBufferManager::Options& opts) {
     if (isGpuOnlyAccess) {
 #if defined(GPU_TEST_UTILS)
         if (opts.fAllowCopyingGpuOnly) {
             return AccessPattern::kGpuOnlyCopySrc;
         }
 #endif
         return AccessPattern::kGpuOnly;
     } else {
         return AccessPattern::kHostVisible;
     }
 }

 // This returns the minimum required alignment depending on the type of buffer. This is guaranteed
 // to be a power of two.
 uint32_t minimum_alignment(BufferType type, bool useTransferBuffers, const Caps* caps) {
     uint32_t alignment = 4;
     if (type == BufferType::kUniform) {
         alignment = SkTo<uint32_t>(caps->requiredUniformBufferAlignment());
     } else if (type == BufferType::kStorage || type == BufferType::kVertexStorage ||
                type == BufferType::kIndexStorage || type == BufferType::kIndirect) {
         alignment = SkTo<uint32_t>(caps->requiredStorageBufferAlignment());
     }
     if (useTransferBuffers) {
         // Both alignment and the requiredTransferBufferAlignment must be powers of two, so max
         // provides the correct alignment semantics
         alignment = std::max(alignment, SkTo<uint32_t>(caps->requiredTransferBufferAlignment()));
     }
     return alignment;
 }

 uint32_t min_block_size(BufferType type,
                         uint32_t minAlignment,
                         const DrawBufferManager::Options& opts) {
     uint32_t size;
     if (type == BufferType::kIndex || type == BufferType::kIndexStorage) {
         size = opts.fIndexBufferSize;
     } else if (type == BufferType::kVertex || type == BufferType::kVertexStorage) {
         size = opts.fVertexBufferMinSize;
     } else {
         size = opts.fStorageBufferMinSize;
     }
 #if defined(GPU_TEST_UTILS)
     if (opts.fUseExactBuffSizes) {
         return size; // No extra alignment
     }
 #endif

     return SkAlignTo(size, minAlignment);
 }

 uint32_t max_block_size(BufferType type,
                         uint32_t minAlignment,
                         const DrawBufferManager::Options& opts) {
 #if defined(GPU_TEST_UTILS)
     if (opts.fUseExactBuffSizes) {
         // Clamp to the minimum size
         return min_block_size(type, minAlignment, opts);
     }
 #endif

     uint32_t size;
     if (type == BufferType::kIndex || type == BufferType::kIndexStorage) {
         size = opts.fIndexBufferSize;
     } else if (type == BufferType::kVertex || type == BufferType::kVertexStorage) {
         size = opts.fVertexBufferMaxSize;
     } else {
         size = opts.fStorageBufferMaxSize;
     }

     return SkAlignTo(size, minAlignment);
 }

 } // anonymous namespace

 // ------------------------------------------------------------------------------------------------
 // BufferSubAllocator

 BufferSubAllocator::BufferSubAllocator(DrawBufferManager* owner,
                                        int stateIndex,
                                        sk_sp<Buffer> buffer,
                                        BindBufferInfo transferBuffer,
                                        void* mappedPtr,
                                        uint32_t xtraAlignment)
         : fOwner(owner)
         , fStateIndex(stateIndex)
         , fBuffer(std::move(buffer))
         , fTransferBuffer(transferBuffer)
         , fMappedPtr(mappedPtr) {
     this->resetForNewBinding(xtraAlignment);
 }

 BufferSubAllocator& BufferSubAllocator::operator=(BufferSubAllocator&& other) {
     if (this == &other) {
         return *this; // no-op moving into itself
     }

     // Reset the destination allocator first since other's contents will overwrite whatever came
     // beforehand and that must go back to the manager.
     this->reset();

     // Copy fields
     fOwner = other.fOwner;
     fStateIndex = other.fStateIndex;
     fTransferBuffer = other.fTransferBuffer;
     fMappedPtr = other.fMappedPtr;
     fAlignment = other.fAlignment;
     fOffset = other.fOffset;

     // Move buffer (leaving other in an invalid state)
     fBuffer = std::move(other.fBuffer);
     SkASSERT(!other);
     return *this;
 }

 BindBufferInfo BufferSubAllocator::reserve(size_t count, size_t stride, size_t reservedCount) {
     // fAlignment starts as the LCM of the binding alignment and the requested extra alignment.
     // It is reset to 1 after the first reservation so that subsequent suballocations are aligned
     // to just `stride` until resetForNewBinding()
     // NOTE: We do not use SkTo<uint32_t> on stride because we don't want to crash if stride would
     // overflow. If it does overflow, align32 will be incorrect, but validate_count_and_stride will
     // still correctly detect stride's overflow so we won't use it.
     const uint32_t align32 = lcm_alignment(fAlignment, (uint32_t) stride);

     reservedCount = std::max(count, reservedCount);
     uint32_t requiredBytes32 = validate_count_and_stride(reservedCount, stride, align32);
     if (!requiredBytes32 || !fBuffer) {
         return {}; // Size overflowed
     }

     const uint32_t bufferSize = SkTo<uint32_t>(fBuffer->size());
     uint32_t offset = SkAlignNonPow2(fOffset, align32);

     if (bufferSize < offset || requiredBytes32 > bufferSize - offset) {
         // Not enough space left
         return {};
     }

     // count*stride is safe since validate_count_and_stride succeeded with reservedCount. For the
     // actual reservation, we only use count*stride bytes.
     requiredBytes32 = SkTo<uint32_t>(count) * SkTo<uint32_t>(stride);
     fOffset = offset + requiredBytes32;
     fAlignment = 1; // Next reservation will only be affected by its stride
     return {fBuffer.get(), offset, requiredBytes32};
 }

 void BufferSubAllocator::reset() {
     if (fBuffer) {
         SkASSERT(fOwner);

         DrawBufferManager::BufferState& state = fOwner->fCurrentBuffers[fStateIndex];
         if (fBuffer->shareable() == Shareable::kScratch) {
             // TODO: Merge this reuse of scratch resources with the ScratchResourceManager, but
             // currently this is resolved outside of Task::prepareResources().

             // The scratch buffer's availability for reuse (scoped to the owning DrawBufferManager)
             // was tied to this BufferSubAllocator, so when that is reset, we just remove the buffer
             // from the set of unavailable buffers.
             SkASSERT((fOwner->fMappingFailed && state.fUnavailableScratchBuffers.empty()) ||
                      state.fUnavailableScratchBuffers.contains(fBuffer.get()));
             if (!fOwner->fMappingFailed) {
                 state.fUnavailableScratchBuffers.remove(fBuffer.get());
             }

             SkASSERT(!fTransferBuffer); // Scratch buffers shouldn't be using transfer buffers
             fOwner->fUsedBuffers.emplace_back(std::move(fBuffer), BindBufferInfo{});
         } else if (state.fAvailableBuffer.fBuffer.get() == fBuffer.get() || // can't stash itself
                    this->remainingBytes() < state.fAvailableBuffer.remainingBytes() || // too small
                    this->remainingBytes() < state.fMinAlignment) { // basically empty
             // Transfer ownership of the buffer (and any transfer buffer) back to the manager, using
             // the current offset as a more restricted limit for copying.
             if (fTransferBuffer) {
                 // This alignment ensures we are copying a subset that still respects xfer alignment
                 fTransferBuffer.fSize = SkAlignTo(fOffset, state.fMinAlignment);
             }
             fOwner->fUsedBuffers.emplace_back(std::move(fBuffer), fTransferBuffer);
         } else {
             // Save this buffer for later, which leaves this instance empty and resets the prior
             // value of fAvailableBuffer (which then goes through the true branch of this if).
             state.fAvailableBuffer = std::move(*this);
         }

         SkASSERT(!fBuffer);
     } // else nothing to reset
 }

 void BufferSubAllocator::resetForNewBinding(size_t alignment) {
     if (fOwner) {
         const uint32_t minAlignment = fOwner->fCurrentBuffers[fStateIndex].fMinAlignment;
         fAlignment = lcm_alignment(minAlignment, SkTo<uint32_t>(alignment));
     } // else an empty BufferSubAllocator so ignore this, all allocations will fail
 }

 // ------------------------------------------------------------------------------------------------
 // DrawBufferManager::BufferState

 DrawBufferManager::BufferState::BufferState(BufferType type,
                                             const char* label,
                                             bool isGpuOnly,
                                             const Options& opts,
                                             const Caps* caps)
         : fType(type)
         // The buffer can be GPU-only if
         //     a) the caller does not intend to ever upload CPU data to the buffer; or
         //     b) CPU data will get uploaded to fBuffer only via a transfer buffer
         , fAccessPattern(get_gpu_access_pattern(isGpuOnly || !caps->drawBufferCanBeMapped(), opts))
         , fUseTransferBuffer(!isGpuOnly && !caps->drawBufferCanBeMapped())
         , fLabel(label)
         , fMinAlignment(minimum_alignment(type, fUseTransferBuffer, caps))
         , fMinBlockSize(min_block_size(type, fMinAlignment, opts))
         , fMaxBlockSize(max_block_size(type, fMinAlignment, opts)) {
     SkASSERT(SkIsPow2(fMinAlignment));
     SkASSERT(fMinBlockSize <= fMaxBlockSize);
 }

 sk_sp<Buffer> DrawBufferManager::BufferState::findOrCreateBuffer(ResourceProvider* provider,
                                                                  Shareable shareable,
                                                                  uint32_t byteCount) {
     if (shareable == Shareable::kScratch) {
         sk_sp<Buffer> scratchBuffer = provider->findOrCreateScratchBuffer(
                 byteCount, fType, fAccessPattern, fLabel, fUnavailableScratchBuffers);
         if (scratchBuffer) {
             fUnavailableScratchBuffers.add(scratchBuffer.get());
         }
         return scratchBuffer;
     } else {
         return provider->findOrCreateNonShareableBuffer(byteCount, fType, fAccessPattern, fLabel);
     }
 }

 // ------------------------------------------------------------------------------------------------
 // DrawBufferManager

 DrawBufferManager::DrawBufferManager(ResourceProvider* resourceProvider,
                                      const Caps* caps,
                                      UploadBufferManager* uploadManager,
                                      Options dbmOpts)
         : fResourceProvider(resourceProvider)
         , fCaps(caps)
         , fUploadManager(uploadManager)
         , fCurrentBuffers{{
             // Mappable buffers
             {BufferType::kVertex, "VertexBuffer", /*isGpuOnly=*/false, dbmOpts, caps},
             {BufferType::kIndex, "IndexBuffer", /*isGpuOnly=*/false, dbmOpts, caps},
             {BufferType::kUniform, "UniformBuffer", /*isGpuOnly=*/false, dbmOpts, caps},
             {BufferType::kStorage, "StorageBuffer", /*isGpuOnly=*/false, dbmOpts, caps},
             // GPU-only buffers
             {BufferType::kStorage, "GPUOnlyStorageBuffer", /*isGpuOnly=*/true, dbmOpts, caps},
             {BufferType::kVertexStorage, "VertexStorageBuffer", /*isGpuOnly=*/true, dbmOpts, caps},
             {BufferType::kIndexStorage, "IndexStorageBuffer", /*isGpuOnly=*/true, dbmOpts, caps},
             {BufferType::kIndirect, "IndirectStorageBuffer", /*isGpuOnly=*/true, dbmOpts, caps}}} {}

 DrawBufferManager::~DrawBufferManager() {
     // Must reset these *before* we are deleted
     for (auto& b : fCurrentBuffers) {
         b.fAvailableBuffer.reset();
     }
 }

 void DrawBufferManager::onFailedBuffer() {
     fMappingFailed = true;

     // Clean up and unmap everything now
     fClearList.clear();
     for (auto& state : fCurrentBuffers) {
         state.fAvailableBuffer.reset();
          // We aren't allocating anything anymore so don't maintain this list. Their outstanding
          // BufferSubAllocators will have a no-op when they get reset.
         state.fUnavailableScratchBuffers.reset();
         state.fLastBufferSize = 0;
     }

     for (auto& [buffer, _] : fUsedBuffers) {
         if (buffer->isMapped()) {
             buffer->unmap();
         }
     }
     fUsedBuffers.clear();
 }

 bool DrawBufferManager::transferToRecording(Recording* recording) {
     if (fMappingFailed) {
         // All state should have been reset by onFailedBuffer() except for this error flag.
         SkASSERT(fUsedBuffers.empty() && fClearList.empty());
 #if defined(SK_DEBUG)
         for (const auto& state : fCurrentBuffers) {
             SkASSERT(!SkToBool(state.fAvailableBuffer));
             SkASSERT(state.fUnavailableScratchBuffers.empty());
         }
 #endif

         fMappingFailed = false;
         return false;
     }

     for (auto& state : fCurrentBuffers) {
         // Reset all available buffer sub allocators since they won't be allocatable anymore.
         // This pushes the underlying resource and transfer range to fUsedBuffers
         state.fAvailableBuffer.reset();
         // BufferSubAllocators should have gone out of scope well before Recorder::snap() is called.
         SkASSERT(state.fUnavailableScratchBuffers.empty());

         // We reset the last buffer size back to 0 to keep the buffer growth behavior the same
         // across calls to snap(). If we knew every snap() would be approximately the same workload,
         // we could choose to keep the last alloc size as-is so that subsequent frames create
         // fewer buffers. We choose *not* to do this because:
         //  - Chrome often snaps Recordings with disparate workloads within a frame (e.g. tile vs
         //    canvas2d) and we don't want to overallocate on a small recording.
         //  - It obfuscates the performance cost of the first frame if we reach a steady state that
         //    requires no additional buffer allocations.
         // We could choose to reduce fLastBufferSize (e.g. halve it) to get a head start and reduce
         // the potential for over-allocation, but in performance measurements on buffer-heavy scenes
         // this did not lead to measurable improvements. Thus, we reset so every frame is the same.
         state.fLastBufferSize = 0;
     }

     if (!fClearList.empty()) {
         recording->priv().taskList()->add(ClearBuffersTask::Make(std::move(fClearList)));
     }

     for (auto& [buffer, transferBuffer] : fUsedBuffers) {
         if (transferBuffer) {
             SkASSERT(buffer);
             SkASSERT(!fCaps->drawBufferCanBeMapped());
             // Since the transfer buffer is managed by the UploadManager, we don't manually unmap
             // it here or need to pass a ref into CopyBufferToBufferTask.
             size_t copySize = buffer->size();
             recording->priv().taskList()->add(
                     CopyBufferToBufferTask::Make(transferBuffer.fBuffer,
                                                  transferBuffer.fOffset,
                                                  std::move(buffer),
                                                  /*dstOffset=*/0,
                                                  copySize));
         } else {
             if (buffer->isMapped()) {
                 buffer->unmap();
             }
             recording->priv().addResourceRef(std::move(buffer));
         }
     }

     fUsedBuffers.clear();

     return true;
 }

 BufferSubAllocator DrawBufferManager::getBuffer(
         int stateIndex,
         size_t count,
         size_t stride,
         size_t xtraAlignment,
         ClearBuffer cleared,
         Shareable shareable) {
     BufferState& state = fCurrentBuffers[stateIndex];
     // The size for a buffer is aligned to the minimum block size for better resource reuse, which
     // is more conservative than fMinAlignment.
     uint32_t requiredBytes32 = validate_count_and_stride(count, stride, state.fMinBlockSize);
     if (fMappingFailed || !requiredBytes32) {
         return {};
     }

     const bool supportCpuUpload = state.fAccessPattern == AccessPattern::kHostVisible ||
                                   state.fUseTransferBuffer;
     // Shareable buffers must be GPU-only to actually share effectively.
     SkASSERT(shareable == Shareable::kNo || !supportCpuUpload);

     // For non-shareable buffers, we keep the largest relinquished non-shareable buffer in case it
     // has room leftover to be used by future allocations. Scratch buffer ownership is entirely
     // managed by the caller, so always create a new BufferSubAllocator.
     if (shareable == Shareable::kNo) {
         state.fAvailableBuffer.resetForNewBinding(xtraAlignment);
         BindBufferInfo nextBinding = state.fAvailableBuffer.reserve(count, stride, count);
         if (nextBinding) {
             // The available buffer has enough room so reuse it. Subtracting the size of the binding
             // ensures the caller's next request for count*stride bytes succeeds, and fOffset will
             // be aligned to xtraAlignment.
             state.fAvailableBuffer.fOffset -= nextBinding.fSize;
             SkASSERT(state.fAvailableBuffer.fOffset % xtraAlignment == 0);
             SkASSERT(state.fAvailableBuffer.fBuffer);
             SkASSERT(state.fAvailableBuffer.fBuffer->shareable() == shareable);
             SkASSERT(SkToBool(state.fAvailableBuffer.fMappedPtr) == supportCpuUpload);
             return std::move(state.fAvailableBuffer);
         }

         // Not enough room in the available buffer so release it and create a new buffer.
         state.fAvailableBuffer.reset();
     }

     // Create the next buffer by doubling the size of the previous buffer and clamping to be within
     // the min and max block sizes if `requiredBytes` is less than the max. Otherwise, create a
     // buffer large enough to satisfy `requiredBytes` but align it to minBlockSize.
     uint32_t bufferSize = SkAlignTo(requiredBytes32, state.fMinBlockSize);
     if (bufferSize < state.fMaxBlockSize) {
         // fMaxBlockSize should be sufficiently small that there's no risk of overflowing here.
         SkASSERT(std::numeric_limits<uint32_t>::max() /2 > state.fLastBufferSize);
         bufferSize = std::max(bufferSize, std::min(state.fLastBufferSize * 2, state.fMaxBlockSize));
         state.fLastBufferSize = bufferSize;
         SkASSERT(bufferSize <= state.fMaxBlockSize);
     } else {
         // Jump to the max block size for subsequent amortized allocations if we get a really big
         // buffer request.
         state.fLastBufferSize = state.fMaxBlockSize;
     }
     SkASSERT(bufferSize >= requiredBytes32 && bufferSize >= state.fMinBlockSize);

     sk_sp<Buffer> buffer = state.findOrCreateBuffer(fResourceProvider, shareable, bufferSize);
     if (!buffer) {
         this->onFailedBuffer();
         return {};
     }

     BindBufferInfo transferBuffer;
     void* mappedPtr = nullptr;
     if (supportCpuUpload) {
         if (state.fUseTransferBuffer) {
             std::tie(mappedPtr, transferBuffer) = fUploadManager->makeBindInfo(buffer->size(),
                     fCaps->requiredTransferBufferAlignment(), "TransferForDataBuffer");
         } else {
             mappedPtr = buffer->map();
         }

         if (!mappedPtr) {
             this->onFailedBuffer(); // Either transfer buffer failed or direct mapping failed
             return {};
         }
     }

     if (cleared == ClearBuffer::kYes) {
         fClearList.push_back(BindBufferInfo{buffer.get(), 0, bufferSize});
     }

     // The returned buffer is not set to fAvailableBuffer because it is going to be passed up to
     // the caller for their use first.
     return BufferSubAllocator(this, stateIndex, std::move(buffer),
                               transferBuffer, mappedPtr, xtraAlignment);
 }

 // ------------------------------------------------------------------------------------------------
 // StaticBufferManager

 StaticBufferManager::StaticBufferManager(ResourceProvider* resourceProvider,
                                          const Caps* caps)
         : fResourceProvider(resourceProvider)
         , fUploadManager(resourceProvider, caps)
         , fRequiredTransferAlignment(SkTo<uint32_t>(caps->requiredTransferBufferAlignment()))
         , fVertexBufferState(BufferType::kVertex, caps)
         , fIndexBufferState(BufferType::kIndex, caps) {}
 StaticBufferManager::~StaticBufferManager() = default;

 StaticBufferManager::BufferState::BufferState(BufferType type, const Caps* caps)
         : fBufferType(type)
         , fMinimumAlignment(minimum_alignment(type, /*useTransferBuffers=*/true, caps))
         , fTotalRequiredBytes(0) {}

 // ARM hardware b/399631317 also means that static vertex data must be padded and zeroed out. So we
 // always request a count 4 aligned offset, count 4 aligned amount of space, and zero it.
 VertexWriter StaticBufferManager::getVertexWriter(size_t count,
                                                   size_t stride,
                                                   BindBufferInfo* binding) {
     const size_t size = count * stride;
     const size_t alignedCount = SkAlign4(count);
     void* data = this->prepareStaticData(&fVertexBufferState, size, stride * 4, binding);
     if (alignedCount > count) {
         const uint32_t byteDiff = (alignedCount - count) * stride;
         void* zPtr = SkTAddOffset<void>(data, count * stride);
         memset(zPtr, 0, byteDiff);
     }
     return VertexWriter{data, size};
 }

 VertexWriter StaticBufferManager::getIndexWriter(size_t size, BindBufferInfo* binding) {
     // The index writer does not have the same alignment requirements as a vertex, so we simply pass
     // in the minimum alignment as the required alignment
     void* data = this->prepareStaticData(&fIndexBufferState,
                                          size,
                                          fIndexBufferState.fMinimumAlignment,
                                          binding);
     return VertexWriter{data, size};
 }

 void* StaticBufferManager::prepareStaticData(BufferState* state,
                                              size_t requiredBytes,
                                              size_t requiredAlignment,
                                              BindBufferInfo* target) {
     // Zero-out the target binding in the event of any failure in actually transfering data later.
     // Unlike in BufferSubAllocator::reserve(), we do use SkTo<uint32_t> to check
     // `requiredAlignment`. This is not dynamic data and is fully controlled by Graphite, so if it
     // asserts, then there is a bug in the static data for a Renderer that must be fixed.
     const uint32_t align32 = lcm_alignment(state->fMinimumAlignment,
                                            SkTo<uint32_t>(requiredAlignment));

     SkASSERT(target);
     *target = {nullptr, 0};
     uint32_t size32 = validate_count_and_stride(requiredBytes, /*stride=*/1, align32);
     if (!size32 || fMappingFailed) {
         return nullptr;
     }

     // Copy data must be aligned to the transfer alignment, so align the reserved size to the LCM
     // of the minimum alignment (already net buffer and transfer alignment) and the required
     // alignment stride.
     size32 = SkAlignNonPow2(size32, align32);
     auto [transferMapPtr, transferBindInfo] =
             fUploadManager.makeBindInfo(size32,
                                         fRequiredTransferAlignment,
                                         "TransferForStaticBuffer");
     if (!transferMapPtr) {
         SKGPU_LOG_E("Failed to create or map transfer buffer that initializes static GPU data.");
         fMappingFailed = true;
         return nullptr;
     }

     state->fData.push_back(
             {transferBindInfo,
              target,
              SkTo<uint32_t>(requiredAlignment),
 #if defined(GPU_TEST_UTILS)
              SkTo<uint32_t>(requiredBytes)
 #endif
             });

     state->fTotalRequiredBytes = SkAlignNonPow2(state->fTotalRequiredBytes, align32) + size32;

     return transferMapPtr;
 }

 bool StaticBufferManager::BufferState::createAndUpdateBindings(
         ResourceProvider* resourceProvider,
         Context* context,
         QueueManager* queueManager,
         GlobalCache* globalCache,
         std::string_view label) const {
     if (!fTotalRequiredBytes) {
         return true; // No buffer needed
     }

     // The static buffer is always copyable when testing.
     constexpr AccessPattern gpuAccessPattern =
 #if defined(GPU_TEST_UTILS)
         AccessPattern::kGpuOnlyCopySrc;
 #else
         AccessPattern::kGpuOnly;
 #endif

     sk_sp<Buffer> staticBuffer = resourceProvider->findOrCreateNonShareableBuffer(
             fTotalRequiredBytes,
             fBufferType,
             gpuAccessPattern,
             std::move(label));
     if (!staticBuffer) {
         SKGPU_LOG_E("Failed to create static buffer for type %d of size %u bytes.\n",
                     (int) fBufferType, fTotalRequiredBytes);
         return false;
     }

     uint32_t offset = 0;
     for (const CopyRange& data : fData) {
         // Each copy range's size should be aligned to the lcm of the required alignment and minimum
         // alignment so we can increment the offset in the static buffer.
         const uint32_t alignment = lcm_alignment(fMinimumAlignment, data.fRequiredAlignment);
         offset = SkAlignNonPow2(offset, alignment);
         SkASSERT(!(offset % fMinimumAlignment) && !(offset % data.fRequiredAlignment));
         uint32_t size = data.fSource.fSize;
         data.fTarget->fBuffer = staticBuffer.get();
         data.fTarget->fOffset = offset;
         data.fTarget->fSize = size;

         auto copyTask = CopyBufferToBufferTask::Make(
                 data.fSource.fBuffer, data.fSource.fOffset,
                 sk_ref_sp(data.fTarget->fBuffer), data.fTarget->fOffset,
                 size);
         // For static buffers, we want them all to be optimized as GPU only buffers. If we are in
         // a protected context, this means the buffers must be non-protected since they will be
         // read in the vertex shader which doesn't allow protected memory access. Thus all the
         // uploads to these buffers must be done as non-protected commands.
         if (!queueManager->addTask(copyTask.get(), context, Protected::kNo)) {
             SKGPU_LOG_E("Failed to copy data to static buffer.\n");
             return false;
         }

         offset += size;
     }

     SkASSERT(offset == fTotalRequiredBytes);
     globalCache->addStaticResource(std::move(staticBuffer));
     return true;
 }

 StaticBufferManager::FinishResult StaticBufferManager::finalize(Context* context,
                                                                 QueueManager* queueManager,
                                                                 GlobalCache* globalCache) {
     if (fMappingFailed) {
         return FinishResult::kFailure;
     }

     const size_t totalRequiredBytes = fVertexBufferState.fTotalRequiredBytes +
                                       fIndexBufferState.fTotalRequiredBytes;
     SkASSERT(totalRequiredBytes <= kMaxStaticDataSize);
     if (!totalRequiredBytes) {
         return FinishResult::kNoWork;
     }

     if (!fVertexBufferState.createAndUpdateBindings(fResourceProvider,
                                                    context,
                                                    queueManager,
                                                    globalCache,
                                                    "StaticVertexBuffer")) {
         return FinishResult::kFailure;
     }

 #if defined(GPU_TEST_UTILS)
     skia_private::TArray<GlobalCache::StaticVertexCopyRanges> statVertCopy;
     for (const CopyRange& data : fVertexBufferState.fData) {
         statVertCopy.push_back({data.fTarget->fOffset,
                                 data.fUnalignedSize,
                                 data.fTarget->fSize,
                                 data.fRequiredAlignment});
     }
     globalCache->testingOnly_SetStaticVertexInfo(
             statVertCopy,
             fVertexBufferState.fData[0].fTarget->fBuffer);
 #endif

     if (!fIndexBufferState.createAndUpdateBindings(fResourceProvider,
                                                    context,
                                                    queueManager,
                                                    globalCache,
                                                    "StaticIndexBuffer")) {
         return FinishResult::kFailure;
     }
     queueManager->addUploadBufferManagerRefs(&fUploadManager);

     // Reset the static buffer manager since the Recording's copy tasks now manage ownership of
     // the transfer buffers and the GlobalCache owns the final static buffers.
     fVertexBufferState.reset();
     fIndexBufferState.reset();

     return FinishResult::kSuccess;
 }

 } // namespace skgpu::graphite