| /* |
| * Copyright 2021 Google Inc. |
| * |
| * Use of this source code is governed by a BSD-style license that can be |
| * found in the LICENSE file. |
| */ |
| #ifndef skgpu_graphite_BufferManager_DEFINED |
| #define skgpu_graphite_BufferManager_DEFINED |
| |
| #include "include/core/SkRefCnt.h" |
| #include "include/private/base/SkTArray.h" |
| #include "src/core/SkTHash.h" |
| #include "src/gpu/BufferWriter.h" |
| #include "src/gpu/graphite/Buffer.h" |
| #include "src/gpu/graphite/ResourceTypes.h" |
| #include "src/gpu/graphite/UploadBufferManager.h" |
| |
| #include <array> |
| #include <cstddef> |
| #include <cstdint> |
| #include <string_view> |
| #include <utility> |
| |
| namespace skgpu::graphite { |
| |
| class Caps; |
| class Context; |
| class DrawBufferManager; |
| class GlobalCache; |
| class QueueManager; |
| class Recording; |
| class ResourceProvider; |
| |
| /** |
| * BufferSubAllocator provides an entire GPU buffer to the caller so that the caller can sub |
| * allocate intervals within the buffer. Each buffer type has a minimum required alignment for |
| * binding. This alignment is automatically used for the *first* suballocation from an allocator |
| * instance. Scoping the lifetime of an allocator to when the contents are bound allows these |
| * binding requirements to automatically be met and use a tighter alignment for additional |
| * suballocations that can be accessed without requiring a new binding. |
| * |
| * When a BufferSubAllocator goes out of scope, its underlying Buffer is returned to the manager. By |
| * default, any remaining space can be returned by subsequent allocation requests but written bytes |
| * will not be able to be overwritten by later BufferSubAllocators. The exception is with the |
| * BufferSubAllocator instances returned by BufferManager::getScratchStorage(), whose Buffers will |
| * be Shareable::kScratch resources, and can be fully reused by other Recorders or once the |
| * BufferSubAllocator goes out of scope. |
| * |
| * Buffers created by the DrawBufferManager for an allocator are automatically transferred to the |
| * Recording and CommandBuffers when snapped or inserted. |
| */ |
| class BufferSubAllocator final { |
| public: |
| BufferSubAllocator() = default; |
| |
| // Disallow copy |
| BufferSubAllocator(const BufferSubAllocator&) = delete; |
| BufferSubAllocator& operator=(const BufferSubAllocator&) = delete; |
| |
| // Allow move |
| BufferSubAllocator(BufferSubAllocator&& b) { *this = std::move(b); } |
| BufferSubAllocator& operator=(BufferSubAllocator&&); |
| |
| ~BufferSubAllocator() { this->reset(); } |
| |
| // Returns false if the underlying buffer has been returned to the reuse pool or moved. |
| bool isValid() const { return SkToBool(fBuffer); } |
| explicit operator bool() const { return this->isValid(); } |
| |
| // Returns the number of remaining bytes in the GPU buffer, assuming an alignment of 1. |
| uint32_t remainingBytes() const { |
| return fBuffer ? SkTo<uint32_t>(fBuffer->size()) - fOffset : 0; |
| } |
| |
| /** |
| * Suballocate `count*stride` bytes and a pointer (wrapped in a BufferWriter) to the mapped |
| * range and the BindBufferInfo defining that range in a GPU-backed Buffer. The returned |
| * subrange will be aligned according to the following rules: |
| * - The first suballocation, or the first after resetForNewBinding(), will be aligned to the |
| * lowest common multiple of `stride`, the binding's required alignment, and any extra base |
| * alignment set in resetForNewBinding() or when the BufferSubAllocator was created. |
| * - Subsequent suballocations will be aligned to just `stride`. |
| * |
| * It is assumed the caller will write all `count*stride` bytes to the returned address. If |
| * `reservedCount` is greater than `count`, the suballocation will only succeed if the buffer |
| * has room for an aligned `reservedCount*stride` bytes. The returned pointer can still only |
| * write `count*stride` bytes, the remaining `reservedCount-count` is available for future |
| * suballocations guaranteed to then fit within the same buffer (assuming the same or lower |
| * alignment). |
| * |
| * An invalid BufferWriter and empty BindBufferInfo are returned if the buffer does not have |
| * enough room remaining to fulfill the suballocation in this buffer. |
| */ |
| std::pair<BufferWriter, BindBufferInfo> getMappedSubrange( |
| size_t count, |
| size_t stride, |
| size_t reservedCount = 0) { |
| SkASSERT(fMappedPtr || !fBuffer); // Writing should have checked validity of allocator first |
| BindBufferInfo binding = this->reserve(count, stride, reservedCount); |
| if (binding) { |
| return {this->getWriter(binding), binding}; |
| } else { |
| return {nullptr, BindBufferInfo{}}; |
| } |
| } |
| |
| // Sub-allocate a slice within the scratch buffer object. This variation should be used when the |
| // returned range will be written to by the GPU as part of executing a command buffer. |
| // |
| // Other than returning just a buffer slice to be written to later by a GPU task, the |
| // suballocation behaves identically to getMappedSubrange(). |
| BindBufferInfo getSubrange(size_t count, size_t stride, size_t reservedCount = 0) { |
| SkASSERT(!fMappedPtr); // Should not be used when data is intended to be written by CPU |
| return this->reserve(count, stride, reservedCount); |
| } |
| |
| // Returns the underlying buffer object back to the pool and invalidates this allocator. |
| // Depending on the GPU buffer's Shareable value, either: |
| // - kNo: The remaining space that hasn't been written to can be used by another allocator, |
| // but it will assume that use will involve a new buffer binding command. |
| // - kScratch: The entire buffer can be overwritten by another allocator. |
| void reset(); |
| |
| void resetForNewBinding(size_t alignment=1); |
| |
| private: |
| friend class DrawBufferManager; |
| |
| BufferSubAllocator(DrawBufferManager* owner, |
| int stateIndex, |
| sk_sp<Buffer> buffer, |
| BindBufferInfo transferBuffer, // optional (when direct mapping unavailable) |
| void* mappedPtr, // `buffer` or `transferBuffer`'s ptr, or null if GPU-only |
| uint32_t xtraAlignment); |
| |
| BindBufferInfo reserve(size_t count, size_t stride, size_t reservedCount); |
| |
| BindBufferInfo binding(uint32_t offset, uint32_t size) const { |
| return {fBuffer.get(), offset, size}; |
| } |
| |
| BufferWriter getWriter(BindBufferInfo binding) const { |
| // Should only be called for a mapped BufferSubAllocator with a binding that has already |
| // been sub-allocated. |
| SkASSERT(fMappedPtr); |
| SkASSERT(binding.fBuffer == fBuffer.get()); |
| SkASSERT(binding.fOffset + binding.fSize <= fOffset); |
| return BufferWriter(SkTAddOffset<void>(fMappedPtr, binding.fOffset), binding.fSize); |
| } |
| |
| // Non-null when valid and not already returned to the pool |
| DrawBufferManager* fOwner = nullptr; |
| int fStateIndex = 0; |
| |
| sk_sp<Buffer> fBuffer; |
| BindBufferInfo fTransferBuffer; |
| |
| // If mapped for writing, this is the CPU address of offset 0 of the buffer. When a mapped |
| // buffer is returned to the DrawBufferManager, only the bytes after fOffset can be reused. |
| // If there is no mapped buffer pointer, it's assumed the GPU buffer is reusable for another |
| // BufferSubAllocator instance (this default reuse policy can be revisited if needed). |
| void* fMappedPtr = nullptr; |
| uint32_t fAlignment = 1; // Default alignment |
| uint32_t fOffset = 0; // Next suballocation can start at fOffset at the earliest |
| }; |
| |
| /** |
| * DrawBufferManager controls writing to buffer data ranges within larger, cacheable Buffers and |
| * automatically handles either mapping or copying via transfer buffer depending on what the GPU |
| * hardware supports for the requested buffer type and use case. It is intended for repeatedly |
| * uploading dynamic data to the GPU. |
| */ |
| class DrawBufferManager { |
| public: |
| struct Options { |
| Options() = default; |
| |
| uint32_t fVertexBufferMinSize = 16 << 10; // 16 KB; |
| uint32_t fVertexBufferMaxSize = 1 << 20; // 1 MB |
| uint32_t fIndexBufferSize = 2 << 10; // 2 KB |
| uint32_t fStorageBufferMinSize = 2 << 10; // 2 KB; |
| uint32_t fStorageBufferMaxSize = 1 << 20; // 1 MB; |
| |
| #if defined(GPU_TEST_UTILS) |
| bool fUseExactBuffSizes = false; // Disables automatic buffer growth |
| bool fAllowCopyingGpuOnly = false; // Adds kCopySrc to GPU-only buffer usage |
| #endif |
| }; |
| |
| DrawBufferManager(ResourceProvider* resourceProvider, const Caps* caps, |
| UploadBufferManager* uploadManager, |
| Options dbmOpts); |
| ~DrawBufferManager(); |
| |
| // Let possible users check if the manager is already in a bad mapping state and skip any extra |
| // work that will be wasted because the next Recording snap will fail. |
| bool hasMappingFailed() const { return fMappingFailed; } |
| |
| // Return a BufferWriter to write to the count*dataStride bytes of the GPU buffer subrange |
| // represented by the returned BindBufferInfo. The returned BufferSubAllocator represents the |
| // entire GPU buffer that the mapped subrange belongs to; it can be used to get additional |
| // mapped suballocations, which when successful are guaranteed to be in the same buffer. This |
| // allows callers to more easily manage when buffers must be bound. |
| // |
| // The returned {BufferWriter, BindBufferInfo} are effectively an automatic call to |
| // BufferSubAllocator.getMappedSubrange(count, stride, reservedCount). The offset of this first |
| // allocation will be aligned to the LCM of `stride` and the minimum required alignment for the |
| // buffer type. For function variants that take an extra `alignment`, the initial suballocation |
| // will also be aligned to that, equivalent to if resetForNewBinding(alignment) had been called |
| // before. Subsequent suballocations from the returned allocator will only be aligned to their |
| // requested stride unless resetForNewBinding() was called. |
| // |
| // When the returned BufferSubAllocator goes out of scope, any remaining bytes that were never |
| // returned from either this function or later calls to getMappedSubrange() can be used to |
| // satisfy a future call to getMapped[X]Buffer. |
| using MappedAllocationInfo = std::tuple<BufferWriter, BindBufferInfo, BufferSubAllocator>; |
| |
| MappedAllocationInfo getMappedVertexBuffer(size_t count, size_t stride, |
| size_t reservedCount=0, size_t alignment=1) { |
| return this->getMappedBuffer(kVertexBufferIndex, count, stride, reservedCount, alignment); |
| } |
| MappedAllocationInfo getMappedIndexBuffer(size_t count) { |
| return this->getMappedBuffer(kIndexBufferIndex, count, sizeof(uint16_t)); |
| } |
| MappedAllocationInfo getMappedUniformBuffer(size_t count, size_t stride) { |
| return this->getMappedBuffer(kUniformBufferIndex, count, stride); |
| } |
| MappedAllocationInfo getMappedStorageBuffer(size_t count, size_t stride) { |
| return this->getMappedBuffer(kStorageBufferIndex, count, stride); |
| } |
| |
| // The remaining writers and buffer allocator functions assume that byte counts are safely |
| // calculated by the caller (e.g. Vello). |
| |
| // Utilities that return an unmapped buffer suballocation for a particular usage. These buffers |
| // are intended to be only accessed by the GPU and are not intended for CPU data uploads. |
| BindBufferInfo getStorage(size_t requiredBytes, ClearBuffer cleared = ClearBuffer::kNo) { |
| return this->getBinding(kGpuOnlyStorageBufferIndex, requiredBytes, cleared); |
| } |
| BindBufferInfo getVertexStorage(size_t requiredBytes) { |
| return this->getBinding(kVertexStorageBufferIndex, requiredBytes, ClearBuffer::kNo); |
| } |
| BindBufferInfo getIndexStorage(size_t requiredBytes) { |
| return this->getBinding(kIndexStorageBufferIndex, requiredBytes, ClearBuffer::kNo); |
| } |
| BindBufferInfo getIndirectStorage(size_t requiredBytes, ClearBuffer cleared=ClearBuffer::kNo) { |
| return this->getBinding(kIndirectStorageBufferIndex, requiredBytes, cleared); |
| } |
| |
| // Returns an entire storage buffer object that is large enough to fit `requiredBytes`. The |
| // returned BufferSubAllocator can be used to sub-allocate one or more storage buffer bindings |
| // that reference the same buffer object. |
| // |
| // When the BufferSubAllocator goes out of scope, the buffer object gets added to an internal |
| // pool and is available for immediate reuse. getScratchStorage() returns buffers from this pool |
| // if possible. A BufferSubAllocator can be explicitly returned to the pool by calling |
| // `returnToPool()`. |
| // |
| // Returning a BufferSubAllocator back to the buffer too early can result in validation failures |
| // and/or data races. It is the callers responsibility to manage reuse within a Recording and |
| // guarantee synchronized access to buffer bindings. |
| // |
| // This type of usage is currently limited to GPU-only storage buffers. |
| BufferSubAllocator getScratchStorage(size_t requiredBytes) { |
| return this->getBuffer(kGpuOnlyStorageBufferIndex, requiredBytes, |
| /*stride=*/1, /*xtraAlignment=*/1, |
| ClearBuffer::kNo, Shareable::kScratch); |
| } |
| |
| // Finalizes all buffers and transfers ownership of them to a Recording. Returns true on success |
| // and false if a mapping had previously failed. |
| // |
| // Regardless of success or failure, the DrawBufferManager is reset to a valid initial state |
| // for recording buffer data for the next Recording. |
| [[nodiscard]] bool transferToRecording(Recording*); |
| |
| private: |
| friend class BufferSubAllocator; |
| |
| struct BufferState { |
| const BufferType fType; |
| const AccessPattern fAccessPattern; |
| const bool fUseTransferBuffer; |
| const char* fLabel; |
| |
| const uint32_t fMinAlignment; // guaranteed power of two, required for binding |
| const uint32_t fMinBlockSize; |
| const uint32_t fMaxBlockSize; |
| |
| BufferSubAllocator fAvailableBuffer; |
| |
| // Buffers held in this array are owned by still-alive BufferSubAllocators that were created |
| // with Shareable::kScratch. This is compatible with ResourceCache::ScratchResourceSet. |
| skia_private::THashSet<const Resource*> fUnavailableScratchBuffers; |
| |
| // The size of the last allocated Buffer, pinned to min/max block size, for amortizing the |
| // number of buffer allocations for large Recordings. |
| uint32_t fLastBufferSize = 0; |
| |
| BufferState(BufferType, const char* label, bool isGpuOnly, |
| const Options&, const Caps* caps); |
| |
| sk_sp<Buffer> findOrCreateBuffer(ResourceProvider*, Shareable, uint32_t byteCount); |
| }; |
| |
| BufferSubAllocator getBuffer(int stateIndex, |
| size_t count, |
| size_t stride, |
| size_t xtraAlignment, |
| ClearBuffer cleared, |
| Shareable shareable); |
| |
| MappedAllocationInfo getMappedBuffer(int stateIndex, size_t count, size_t stride, |
| size_t reservedCount=0, size_t xtraAlignment=1) { |
| BufferSubAllocator buffer = this->getBuffer(stateIndex, |
| std::max(count, reservedCount), |
| stride, |
| xtraAlignment, |
| ClearBuffer::kNo, |
| Shareable::kNo); |
| if (buffer) { |
| // This is a shortcut since we know that buffer has enough space for `count*stride` |
| // bytes at the right alignment if getBuffer() succeeded. |
| const uint32_t byteCount = SkTo<uint32_t>(count * stride); |
| |
| SkASSERT(buffer.fOffset % xtraAlignment == 0); |
| SkASSERT(buffer.fOffset + byteCount <= buffer.fBuffer->size()); |
| |
| BindBufferInfo binding = buffer.binding(buffer.fOffset, byteCount); |
| buffer.fOffset += byteCount; |
| buffer.fAlignment = 1; |
| return {buffer.getWriter(binding), binding, std::move(buffer)}; |
| } else { |
| // Failed to allocate a new buffer |
| return {BufferWriter(), BindBufferInfo(), std::move(buffer)}; |
| } |
| } |
| |
| // Helper method for the public GPU-only BufferBindInfo methods |
| BindBufferInfo getBinding(int stateIndex, size_t requiredBytes, ClearBuffer cleared) { |
| auto alloc = this->getBuffer(stateIndex, requiredBytes, |
| /*stride=*/1, /*xtraAlignment=*/1, |
| cleared, Shareable::kNo); |
| // `alloc` goes out of scope when this returns, but that is okay because it is only used |
| // for GPU-only, non-shareable buffers. The returned BindBufferInfo will be unique still. |
| return alloc.getSubrange(requiredBytes, /*stride=*/1); |
| } |
| |
| // Marks manager in a failed state, unmaps any previously collected buffers. |
| void onFailedBuffer(); |
| |
| ResourceProvider* const fResourceProvider; |
| const Caps* const fCaps; |
| UploadBufferManager* fUploadManager; |
| |
| static constexpr size_t kVertexBufferIndex = 0; |
| static constexpr size_t kIndexBufferIndex = 1; |
| static constexpr size_t kUniformBufferIndex = 2; |
| static constexpr size_t kStorageBufferIndex = 3; |
| static constexpr size_t kGpuOnlyStorageBufferIndex = 4; |
| static constexpr size_t kVertexStorageBufferIndex = 5; |
| static constexpr size_t kIndexStorageBufferIndex = 6; |
| static constexpr size_t kIndirectStorageBufferIndex = 7; |
| std::array<BufferState, 8> fCurrentBuffers; |
| |
| // Vector of buffer and transfer buffer pairs. |
| skia_private::TArray<std::pair<sk_sp<Buffer>, BindBufferInfo>> fUsedBuffers; |
| |
| // List of buffer regions that were requested to be cleared at the time of allocation. |
| skia_private::TArray<BindBufferInfo> fClearList; |
| |
| // If mapping failed on Buffers created/managed by this DrawBufferManager or by the mapped |
| // transfer buffers from the UploadManager, remember so that the next Recording will fail. |
| bool fMappingFailed = false; |
| }; |
| |
| /** |
| * The StaticBufferManager is the one-time-only analog to DrawBufferManager and provides "static" |
| * Buffers to RenderSteps and other Context-lifetime-tied objects, where the Buffers' contents will |
| * not change and can benefit from prioritizing GPU reads. The assumed use case is that they remain |
| * read-only on the GPU as well, so a single static buffer can be shared by all Recorders. |
| * |
| * Unlike DrawBufferManager's getXWriter() functions that return both a Writer and a BindBufferInfo, |
| * StaticBufferManager returns only a Writer and accepts a BindBufferInfo* as an argument. This will |
| * be re-written with the final binding info for the GPU-private data once that can be determined |
| * after *all* static buffers have been requested. |
| */ |
| class StaticBufferManager { |
| public: |
| StaticBufferManager(ResourceProvider*, const Caps*); |
| ~StaticBufferManager(); |
| |
| // The passed in BindBufferInfos are updated when finalize() is later called, to point to the |
| // packed, GPU-private buffer at the appropriate offset. The data written to the returned Writer |
| // is copied to the private buffer at that offset. 'binding' must live until finalize() returns. |
| |
| // For the vertex writer, the count and stride of the buffer is passed to allow alignment of |
| // future vertices. |
| VertexWriter getVertexWriter(size_t count, size_t stride, BindBufferInfo* binding); |
| // TODO: Update the tessellation index buffer generation functions to use an IndexWriter so this |
| // can return an IndexWriter vs. a VertexWriter that happens to just write uint16s... |
| VertexWriter getIndexWriter(size_t size, BindBufferInfo* binding); |
| |
| enum class FinishResult : int { |
| kFailure, // Unable to create or copy static buffers |
| kSuccess, // Successfully created static buffers and added GPU tasks to the queue |
| kNoWork // No static buffers required, no GPU tasks add to the queue |
| }; |
| |
| // Finalizes all buffers and records a copy task to compact and privatize static data. The |
| // final static buffers will become owned by the Context's GlobalCache. |
| FinishResult finalize(Context*, QueueManager*, GlobalCache*); |
| |
| private: |
| struct CopyRange { |
| BindBufferInfo fSource; // The CPU-to-GPU buffer and offset for the source of the copy |
| BindBufferInfo* fTarget; // The late-assigned destination of the copy |
| uint32_t fRequiredAlignment; // The requested stride of the data. |
| #if defined(GPU_TEST_UTILS) |
| uint32_t fUnalignedSize; // The requested size without count-4 alignment |
| #endif |
| }; |
| struct BufferState { |
| BufferState(BufferType type, const Caps* caps); |
| |
| bool createAndUpdateBindings(ResourceProvider*, Context*, QueueManager*, GlobalCache*, |
| std::string_view label) const; |
| void reset() { |
| fData.clear(); |
| fTotalRequiredBytes = 0; |
| } |
| |
| const BufferType fBufferType; |
| // This is the lcm of the alignment requirement of the buffer type and the transfer buffer |
| // alignment requirement. |
| const uint32_t fMinimumAlignment; |
| |
| skia_private::TArray<CopyRange> fData; |
| uint32_t fTotalRequiredBytes; |
| }; |
| |
| void* prepareStaticData(BufferState* info, |
| size_t requiredBytes, |
| size_t requiredAlignment, |
| BindBufferInfo* target); |
| |
| ResourceProvider* const fResourceProvider; |
| UploadBufferManager fUploadManager; |
| const uint32_t fRequiredTransferAlignment; |
| |
| // The source data that's copied into a final GPU-private buffer |
| BufferState fVertexBufferState; |
| BufferState fIndexBufferState; |
| |
| // If mapping failed on Buffers created/managed by this StaticBufferManager or by the mapped |
| // transfer buffers from the UploadManager, remember so that finalize() will fail. |
| bool fMappingFailed = false; |
| }; |
| |
| } // namespace skgpu::graphite |
| |
| #endif // skgpu_graphite_BufferManager_DEFINED |