blob: 76f3cf0fbd20abd018707757c169d208fbfa273c [file] [log] [blame]
/*
* Copyright 2021 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef skgpu_graphite_BufferManager_DEFINED
#define skgpu_graphite_BufferManager_DEFINED
#include "include/core/SkRefCnt.h"
#include "include/private/base/SkTArray.h"
#include "src/core/SkTHash.h"
#include "src/gpu/BufferWriter.h"
#include "src/gpu/graphite/Buffer.h"
#include "src/gpu/graphite/ResourceTypes.h"
#include "src/gpu/graphite/UploadBufferManager.h"
#include <array>
#include <cstddef>
#include <cstdint>
#include <string_view>
#include <utility>
namespace skgpu::graphite {
class Caps;
class Context;
class DrawBufferManager;
class GlobalCache;
class QueueManager;
class Recording;
class ResourceProvider;
/**
* BufferSubAllocator provides an entire GPU buffer to the caller so that the caller can sub
* allocate intervals within the buffer. Each buffer type has a minimum required alignment for
* binding. This alignment is automatically used for the *first* suballocation from an allocator
* instance. Scoping the lifetime of an allocator to when the contents are bound allows these
* binding requirements to automatically be met and use a tighter alignment for additional
* suballocations that can be accessed without requiring a new binding.
*
* When a BufferSubAllocator goes out of scope, its underlying Buffer is returned to the manager. By
* default, any remaining space can be returned by subsequent allocation requests but written bytes
* will not be able to be overwritten by later BufferSubAllocators. The exception is with the
* BufferSubAllocator instances returned by BufferManager::getScratchStorage(), whose Buffers will
* be Shareable::kScratch resources, and can be fully reused by other Recorders or once the
* BufferSubAllocator goes out of scope.
*
* Buffers created by the DrawBufferManager for an allocator are automatically transferred to the
* Recording and CommandBuffers when snapped or inserted.
*/
class BufferSubAllocator final {
public:
BufferSubAllocator() = default;
// Disallow copy
BufferSubAllocator(const BufferSubAllocator&) = delete;
BufferSubAllocator& operator=(const BufferSubAllocator&) = delete;
// Allow move
BufferSubAllocator(BufferSubAllocator&& b) { *this = std::move(b); }
BufferSubAllocator& operator=(BufferSubAllocator&&);
~BufferSubAllocator() { this->reset(); }
// Returns false if the underlying buffer has been returned to the reuse pool or moved.
bool isValid() const { return SkToBool(fBuffer); }
explicit operator bool() const { return this->isValid(); }
// Returns the number of remaining bytes in the GPU buffer, assuming an alignment of 1.
uint32_t remainingBytes() const {
return fBuffer ? SkTo<uint32_t>(fBuffer->size()) - fOffset : 0;
}
/**
* Suballocate `count*stride` bytes and a pointer (wrapped in a BufferWriter) to the mapped
* range and the BindBufferInfo defining that range in a GPU-backed Buffer. The returned
* subrange will be aligned according to the following rules:
* - The first suballocation, or the first after resetForNewBinding(), will be aligned to the
* lowest common multiple of `stride`, the binding's required alignment, and any extra base
* alignment set in resetForNewBinding() or when the BufferSubAllocator was created.
* - Subsequent suballocations will be aligned to just `stride`.
*
* It is assumed the caller will write all `count*stride` bytes to the returned address. If
* `reservedCount` is greater than `count`, the suballocation will only succeed if the buffer
* has room for an aligned `reservedCount*stride` bytes. The returned pointer can still only
* write `count*stride` bytes, the remaining `reservedCount-count` is available for future
* suballocations guaranteed to then fit within the same buffer (assuming the same or lower
* alignment).
*
* An invalid BufferWriter and empty BindBufferInfo are returned if the buffer does not have
* enough room remaining to fulfill the suballocation in this buffer.
*/
std::pair<BufferWriter, BindBufferInfo> getMappedSubrange(
size_t count,
size_t stride,
size_t reservedCount = 0) {
SkASSERT(fMappedPtr || !fBuffer); // Writing should have checked validity of allocator first
BindBufferInfo binding = this->reserve(count, stride, reservedCount);
if (binding) {
return {this->getWriter(binding), binding};
} else {
return {nullptr, BindBufferInfo{}};
}
}
// Sub-allocate a slice within the scratch buffer object. This variation should be used when the
// returned range will be written to by the GPU as part of executing a command buffer.
//
// Other than returning just a buffer slice to be written to later by a GPU task, the
// suballocation behaves identically to getMappedSubrange().
BindBufferInfo getSubrange(size_t count, size_t stride, size_t reservedCount = 0) {
SkASSERT(!fMappedPtr); // Should not be used when data is intended to be written by CPU
return this->reserve(count, stride, reservedCount);
}
// Returns the underlying buffer object back to the pool and invalidates this allocator.
// Depending on the GPU buffer's Shareable value, either:
// - kNo: The remaining space that hasn't been written to can be used by another allocator,
// but it will assume that use will involve a new buffer binding command.
// - kScratch: The entire buffer can be overwritten by another allocator.
void reset();
void resetForNewBinding(size_t alignment=1);
private:
friend class DrawBufferManager;
BufferSubAllocator(DrawBufferManager* owner,
int stateIndex,
sk_sp<Buffer> buffer,
BindBufferInfo transferBuffer, // optional (when direct mapping unavailable)
void* mappedPtr, // `buffer` or `transferBuffer`'s ptr, or null if GPU-only
uint32_t xtraAlignment);
BindBufferInfo reserve(size_t count, size_t stride, size_t reservedCount);
BindBufferInfo binding(uint32_t offset, uint32_t size) const {
return {fBuffer.get(), offset, size};
}
BufferWriter getWriter(BindBufferInfo binding) const {
// Should only be called for a mapped BufferSubAllocator with a binding that has already
// been sub-allocated.
SkASSERT(fMappedPtr);
SkASSERT(binding.fBuffer == fBuffer.get());
SkASSERT(binding.fOffset + binding.fSize <= fOffset);
return BufferWriter(SkTAddOffset<void>(fMappedPtr, binding.fOffset), binding.fSize);
}
// Non-null when valid and not already returned to the pool
DrawBufferManager* fOwner = nullptr;
int fStateIndex = 0;
sk_sp<Buffer> fBuffer;
BindBufferInfo fTransferBuffer;
// If mapped for writing, this is the CPU address of offset 0 of the buffer. When a mapped
// buffer is returned to the DrawBufferManager, only the bytes after fOffset can be reused.
// If there is no mapped buffer pointer, it's assumed the GPU buffer is reusable for another
// BufferSubAllocator instance (this default reuse policy can be revisited if needed).
void* fMappedPtr = nullptr;
uint32_t fAlignment = 1; // Default alignment
uint32_t fOffset = 0; // Next suballocation can start at fOffset at the earliest
};
/**
* DrawBufferManager controls writing to buffer data ranges within larger, cacheable Buffers and
* automatically handles either mapping or copying via transfer buffer depending on what the GPU
* hardware supports for the requested buffer type and use case. It is intended for repeatedly
* uploading dynamic data to the GPU.
*/
class DrawBufferManager {
public:
struct Options {
Options() = default;
uint32_t fVertexBufferMinSize = 16 << 10; // 16 KB;
uint32_t fVertexBufferMaxSize = 1 << 20; // 1 MB
uint32_t fIndexBufferSize = 2 << 10; // 2 KB
uint32_t fStorageBufferMinSize = 2 << 10; // 2 KB;
uint32_t fStorageBufferMaxSize = 1 << 20; // 1 MB;
#if defined(GPU_TEST_UTILS)
bool fUseExactBuffSizes = false; // Disables automatic buffer growth
bool fAllowCopyingGpuOnly = false; // Adds kCopySrc to GPU-only buffer usage
#endif
};
DrawBufferManager(ResourceProvider* resourceProvider, const Caps* caps,
UploadBufferManager* uploadManager,
Options dbmOpts);
~DrawBufferManager();
// Let possible users check if the manager is already in a bad mapping state and skip any extra
// work that will be wasted because the next Recording snap will fail.
bool hasMappingFailed() const { return fMappingFailed; }
// Return a BufferWriter to write to the count*dataStride bytes of the GPU buffer subrange
// represented by the returned BindBufferInfo. The returned BufferSubAllocator represents the
// entire GPU buffer that the mapped subrange belongs to; it can be used to get additional
// mapped suballocations, which when successful are guaranteed to be in the same buffer. This
// allows callers to more easily manage when buffers must be bound.
//
// The returned {BufferWriter, BindBufferInfo} are effectively an automatic call to
// BufferSubAllocator.getMappedSubrange(count, stride, reservedCount). The offset of this first
// allocation will be aligned to the LCM of `stride` and the minimum required alignment for the
// buffer type. For function variants that take an extra `alignment`, the initial suballocation
// will also be aligned to that, equivalent to if resetForNewBinding(alignment) had been called
// before. Subsequent suballocations from the returned allocator will only be aligned to their
// requested stride unless resetForNewBinding() was called.
//
// When the returned BufferSubAllocator goes out of scope, any remaining bytes that were never
// returned from either this function or later calls to getMappedSubrange() can be used to
// satisfy a future call to getMapped[X]Buffer.
using MappedAllocationInfo = std::tuple<BufferWriter, BindBufferInfo, BufferSubAllocator>;
MappedAllocationInfo getMappedVertexBuffer(size_t count, size_t stride,
size_t reservedCount=0, size_t alignment=1) {
return this->getMappedBuffer(kVertexBufferIndex, count, stride, reservedCount, alignment);
}
MappedAllocationInfo getMappedIndexBuffer(size_t count) {
return this->getMappedBuffer(kIndexBufferIndex, count, sizeof(uint16_t));
}
MappedAllocationInfo getMappedUniformBuffer(size_t count, size_t stride) {
return this->getMappedBuffer(kUniformBufferIndex, count, stride);
}
MappedAllocationInfo getMappedStorageBuffer(size_t count, size_t stride) {
return this->getMappedBuffer(kStorageBufferIndex, count, stride);
}
// The remaining writers and buffer allocator functions assume that byte counts are safely
// calculated by the caller (e.g. Vello).
// Utilities that return an unmapped buffer suballocation for a particular usage. These buffers
// are intended to be only accessed by the GPU and are not intended for CPU data uploads.
BindBufferInfo getStorage(size_t requiredBytes, ClearBuffer cleared = ClearBuffer::kNo) {
return this->getBinding(kGpuOnlyStorageBufferIndex, requiredBytes, cleared);
}
BindBufferInfo getVertexStorage(size_t requiredBytes) {
return this->getBinding(kVertexStorageBufferIndex, requiredBytes, ClearBuffer::kNo);
}
BindBufferInfo getIndexStorage(size_t requiredBytes) {
return this->getBinding(kIndexStorageBufferIndex, requiredBytes, ClearBuffer::kNo);
}
BindBufferInfo getIndirectStorage(size_t requiredBytes, ClearBuffer cleared=ClearBuffer::kNo) {
return this->getBinding(kIndirectStorageBufferIndex, requiredBytes, cleared);
}
// Returns an entire storage buffer object that is large enough to fit `requiredBytes`. The
// returned BufferSubAllocator can be used to sub-allocate one or more storage buffer bindings
// that reference the same buffer object.
//
// When the BufferSubAllocator goes out of scope, the buffer object gets added to an internal
// pool and is available for immediate reuse. getScratchStorage() returns buffers from this pool
// if possible. A BufferSubAllocator can be explicitly returned to the pool by calling
// `returnToPool()`.
//
// Returning a BufferSubAllocator back to the buffer too early can result in validation failures
// and/or data races. It is the callers responsibility to manage reuse within a Recording and
// guarantee synchronized access to buffer bindings.
//
// This type of usage is currently limited to GPU-only storage buffers.
BufferSubAllocator getScratchStorage(size_t requiredBytes) {
return this->getBuffer(kGpuOnlyStorageBufferIndex, requiredBytes,
/*stride=*/1, /*xtraAlignment=*/1,
ClearBuffer::kNo, Shareable::kScratch);
}
// Finalizes all buffers and transfers ownership of them to a Recording. Returns true on success
// and false if a mapping had previously failed.
//
// Regardless of success or failure, the DrawBufferManager is reset to a valid initial state
// for recording buffer data for the next Recording.
[[nodiscard]] bool transferToRecording(Recording*);
private:
friend class BufferSubAllocator;
struct BufferState {
const BufferType fType;
const AccessPattern fAccessPattern;
const bool fUseTransferBuffer;
const char* fLabel;
const uint32_t fMinAlignment; // guaranteed power of two, required for binding
const uint32_t fMinBlockSize;
const uint32_t fMaxBlockSize;
BufferSubAllocator fAvailableBuffer;
// Buffers held in this array are owned by still-alive BufferSubAllocators that were created
// with Shareable::kScratch. This is compatible with ResourceCache::ScratchResourceSet.
skia_private::THashSet<const Resource*> fUnavailableScratchBuffers;
// The size of the last allocated Buffer, pinned to min/max block size, for amortizing the
// number of buffer allocations for large Recordings.
uint32_t fLastBufferSize = 0;
BufferState(BufferType, const char* label, bool isGpuOnly,
const Options&, const Caps* caps);
sk_sp<Buffer> findOrCreateBuffer(ResourceProvider*, Shareable, uint32_t byteCount);
};
BufferSubAllocator getBuffer(int stateIndex,
size_t count,
size_t stride,
size_t xtraAlignment,
ClearBuffer cleared,
Shareable shareable);
MappedAllocationInfo getMappedBuffer(int stateIndex, size_t count, size_t stride,
size_t reservedCount=0, size_t xtraAlignment=1) {
BufferSubAllocator buffer = this->getBuffer(stateIndex,
std::max(count, reservedCount),
stride,
xtraAlignment,
ClearBuffer::kNo,
Shareable::kNo);
if (buffer) {
// This is a shortcut since we know that buffer has enough space for `count*stride`
// bytes at the right alignment if getBuffer() succeeded.
const uint32_t byteCount = SkTo<uint32_t>(count * stride);
SkASSERT(buffer.fOffset % xtraAlignment == 0);
SkASSERT(buffer.fOffset + byteCount <= buffer.fBuffer->size());
BindBufferInfo binding = buffer.binding(buffer.fOffset, byteCount);
buffer.fOffset += byteCount;
buffer.fAlignment = 1;
return {buffer.getWriter(binding), binding, std::move(buffer)};
} else {
// Failed to allocate a new buffer
return {BufferWriter(), BindBufferInfo(), std::move(buffer)};
}
}
// Helper method for the public GPU-only BufferBindInfo methods
BindBufferInfo getBinding(int stateIndex, size_t requiredBytes, ClearBuffer cleared) {
auto alloc = this->getBuffer(stateIndex, requiredBytes,
/*stride=*/1, /*xtraAlignment=*/1,
cleared, Shareable::kNo);
// `alloc` goes out of scope when this returns, but that is okay because it is only used
// for GPU-only, non-shareable buffers. The returned BindBufferInfo will be unique still.
return alloc.getSubrange(requiredBytes, /*stride=*/1);
}
// Marks manager in a failed state, unmaps any previously collected buffers.
void onFailedBuffer();
ResourceProvider* const fResourceProvider;
const Caps* const fCaps;
UploadBufferManager* fUploadManager;
static constexpr size_t kVertexBufferIndex = 0;
static constexpr size_t kIndexBufferIndex = 1;
static constexpr size_t kUniformBufferIndex = 2;
static constexpr size_t kStorageBufferIndex = 3;
static constexpr size_t kGpuOnlyStorageBufferIndex = 4;
static constexpr size_t kVertexStorageBufferIndex = 5;
static constexpr size_t kIndexStorageBufferIndex = 6;
static constexpr size_t kIndirectStorageBufferIndex = 7;
std::array<BufferState, 8> fCurrentBuffers;
// Vector of buffer and transfer buffer pairs.
skia_private::TArray<std::pair<sk_sp<Buffer>, BindBufferInfo>> fUsedBuffers;
// List of buffer regions that were requested to be cleared at the time of allocation.
skia_private::TArray<BindBufferInfo> fClearList;
// If mapping failed on Buffers created/managed by this DrawBufferManager or by the mapped
// transfer buffers from the UploadManager, remember so that the next Recording will fail.
bool fMappingFailed = false;
};
/**
* The StaticBufferManager is the one-time-only analog to DrawBufferManager and provides "static"
* Buffers to RenderSteps and other Context-lifetime-tied objects, where the Buffers' contents will
* not change and can benefit from prioritizing GPU reads. The assumed use case is that they remain
* read-only on the GPU as well, so a single static buffer can be shared by all Recorders.
*
* Unlike DrawBufferManager's getXWriter() functions that return both a Writer and a BindBufferInfo,
* StaticBufferManager returns only a Writer and accepts a BindBufferInfo* as an argument. This will
* be re-written with the final binding info for the GPU-private data once that can be determined
* after *all* static buffers have been requested.
*/
class StaticBufferManager {
public:
StaticBufferManager(ResourceProvider*, const Caps*);
~StaticBufferManager();
// The passed in BindBufferInfos are updated when finalize() is later called, to point to the
// packed, GPU-private buffer at the appropriate offset. The data written to the returned Writer
// is copied to the private buffer at that offset. 'binding' must live until finalize() returns.
// For the vertex writer, the count and stride of the buffer is passed to allow alignment of
// future vertices.
VertexWriter getVertexWriter(size_t count, size_t stride, BindBufferInfo* binding);
// TODO: Update the tessellation index buffer generation functions to use an IndexWriter so this
// can return an IndexWriter vs. a VertexWriter that happens to just write uint16s...
VertexWriter getIndexWriter(size_t size, BindBufferInfo* binding);
enum class FinishResult : int {
kFailure, // Unable to create or copy static buffers
kSuccess, // Successfully created static buffers and added GPU tasks to the queue
kNoWork // No static buffers required, no GPU tasks add to the queue
};
// Finalizes all buffers and records a copy task to compact and privatize static data. The
// final static buffers will become owned by the Context's GlobalCache.
FinishResult finalize(Context*, QueueManager*, GlobalCache*);
private:
struct CopyRange {
BindBufferInfo fSource; // The CPU-to-GPU buffer and offset for the source of the copy
BindBufferInfo* fTarget; // The late-assigned destination of the copy
uint32_t fRequiredAlignment; // The requested stride of the data.
#if defined(GPU_TEST_UTILS)
uint32_t fUnalignedSize; // The requested size without count-4 alignment
#endif
};
struct BufferState {
BufferState(BufferType type, const Caps* caps);
bool createAndUpdateBindings(ResourceProvider*, Context*, QueueManager*, GlobalCache*,
std::string_view label) const;
void reset() {
fData.clear();
fTotalRequiredBytes = 0;
}
const BufferType fBufferType;
// This is the lcm of the alignment requirement of the buffer type and the transfer buffer
// alignment requirement.
const uint32_t fMinimumAlignment;
skia_private::TArray<CopyRange> fData;
uint32_t fTotalRequiredBytes;
};
void* prepareStaticData(BufferState* info,
size_t requiredBytes,
size_t requiredAlignment,
BindBufferInfo* target);
ResourceProvider* const fResourceProvider;
UploadBufferManager fUploadManager;
const uint32_t fRequiredTransferAlignment;
// The source data that's copied into a final GPU-private buffer
BufferState fVertexBufferState;
BufferState fIndexBufferState;
// If mapping failed on Buffers created/managed by this StaticBufferManager or by the mapped
// transfer buffers from the UploadManager, remember so that finalize() will fail.
bool fMappingFailed = false;
};
} // namespace skgpu::graphite
#endif // skgpu_graphite_BufferManager_DEFINED