blob: b377fa3ed17c730250fb8e260322cef3d037697e [file] [log] [blame]
/*
* Copyright 2021 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "src/gpu/graphite/BufferManager.h"
#include "include/gpu/GpuTypes.h"
#include "include/gpu/graphite/Recording.h"
#include "include/private/base/SkAlign.h"
#include "include/private/base/SkAssert.h"
#include "include/private/base/SkMath.h"
#include "include/private/base/SkTo.h"
#include "src/gpu/graphite/Caps.h"
#include "src/gpu/graphite/GlobalCache.h"
#include "src/gpu/graphite/Log.h"
#include "src/gpu/graphite/QueueManager.h"
#include "src/gpu/graphite/RecordingPriv.h"
#include "src/gpu/graphite/Resource.h"
#include "src/gpu/graphite/ResourceProvider.h"
#include "src/gpu/graphite/UploadBufferManager.h"
#include "src/gpu/graphite/task/ClearBuffersTask.h"
#include "src/gpu/graphite/task/CopyTask.h"
#include "src/gpu/graphite/task/Task.h"
#include "src/gpu/graphite/task/TaskList.h"
#include <algorithm>
#include <cstddef>
#include <cstring>
#include <limits>
#include <numeric>
#include <tuple>
namespace skgpu::graphite {
namespace {
// The limit for all data created by the StaticBufferManager. This data remains alive for
// the entire SharedContext so we want to keep it small and give a concrete upper bound to
// clients for our steady-state memory usage.
// FIXME The current usage is 4732 bytes across static vertex and index buffers, but that includes
// multiple copies of tessellation data, and an unoptimized AnalyticRRect mesh. Once those issues
// are addressed, we can tighten this and decide on the transfer buffer sizing as well.
[[maybe_unused]] static constexpr uint32_t kMaxStaticDataSize = 6 << 10;
uint32_t validate_count_and_stride(size_t count, size_t stride, uint32_t alignment) {
// size_t may just be uint32_t, so this ensures we have enough bits to
// compute the required byte product.
uint64_t count64 = SkTo<uint64_t>(count);
uint64_t stride64 = SkTo<uint64_t>(stride);
uint64_t bytes64 = count64*stride64;
if (count64 > std::numeric_limits<uint32_t>::max() ||
stride64 > std::numeric_limits<uint32_t>::max() ||
bytes64 > std::numeric_limits<uint32_t>::max() - (alignment + 1)) {
// Return 0 to skip further allocation attempts.
return 0;
}
// Since count64 and stride64 fit into 32-bits, their product won't overflow a 64-bit multiply,
// and we've confirmed product fits into 32-bits with head room to be aligned w/o overflow.
return SkTo<uint32_t>(bytes64);
}
// Calculates the LCM of `alignMaybePow2` and `alignProbNonPow2`. Neither value needs to be a
// power of 2, but this is optimized to check for whether or not `alignMaybePow2` is a power of 2.
// It assumes the probability of the 2nd alignment value being a power of 2 is low enough to not
// be worth checking.
uint32_t lcm_alignment(uint32_t alignMaybePow2, uint32_t alignProbNonPow2) {
SkASSERT(alignMaybePow2 != 0 && alignProbNonPow2 != 0);
if (alignMaybePow2 == 1 ||
alignMaybePow2 == alignProbNonPow2 ||
(SkIsPow2(alignMaybePow2) &&
alignProbNonPow2 > alignMaybePow2 &&
(alignProbNonPow2 & (alignMaybePow2 - 1)) == 0)) {
// Trivial LCM since alignProbNonPow2 is the same or a larger multiple of alignMaybePow2
return alignProbNonPow2;
} else {
return std::lcm(alignMaybePow2, alignProbNonPow2);
}
}
// Helpers for creating a BufferState based on type, options, and caps
AccessPattern get_gpu_access_pattern(bool isGpuOnlyAccess, const DrawBufferManager::Options& opts) {
if (isGpuOnlyAccess) {
#if defined(GPU_TEST_UTILS)
if (opts.fAllowCopyingGpuOnly) {
return AccessPattern::kGpuOnlyCopySrc;
}
#endif
return AccessPattern::kGpuOnly;
} else {
return AccessPattern::kHostVisible;
}
}
// This returns the minimum required alignment depending on the type of buffer. This is guaranteed
// to be a power of two.
uint32_t minimum_alignment(BufferType type, bool useTransferBuffers, const Caps* caps) {
uint32_t alignment = 4;
if (type == BufferType::kUniform) {
alignment = SkTo<uint32_t>(caps->requiredUniformBufferAlignment());
} else if (type == BufferType::kStorage || type == BufferType::kVertexStorage ||
type == BufferType::kIndexStorage || type == BufferType::kIndirect) {
alignment = SkTo<uint32_t>(caps->requiredStorageBufferAlignment());
}
if (useTransferBuffers) {
// Both alignment and the requiredTransferBufferAlignment must be powers of two, so max
// provides the correct alignment semantics
alignment = std::max(alignment, SkTo<uint32_t>(caps->requiredTransferBufferAlignment()));
}
return alignment;
}
uint32_t min_block_size(BufferType type,
uint32_t minAlignment,
const DrawBufferManager::Options& opts) {
uint32_t size;
if (type == BufferType::kIndex || type == BufferType::kIndexStorage) {
size = opts.fIndexBufferSize;
} else if (type == BufferType::kVertex || type == BufferType::kVertexStorage) {
size = opts.fVertexBufferMinSize;
} else {
size = opts.fStorageBufferMinSize;
}
#if defined(GPU_TEST_UTILS)
if (opts.fUseExactBuffSizes) {
return size; // No extra alignment
}
#endif
return SkAlignTo(size, minAlignment);
}
uint32_t max_block_size(BufferType type,
uint32_t minAlignment,
const DrawBufferManager::Options& opts) {
#if defined(GPU_TEST_UTILS)
if (opts.fUseExactBuffSizes) {
// Clamp to the minimum size
return min_block_size(type, minAlignment, opts);
}
#endif
uint32_t size;
if (type == BufferType::kIndex || type == BufferType::kIndexStorage) {
size = opts.fIndexBufferSize;
} else if (type == BufferType::kVertex || type == BufferType::kVertexStorage) {
size = opts.fVertexBufferMaxSize;
} else {
size = opts.fStorageBufferMaxSize;
}
return SkAlignTo(size, minAlignment);
}
} // anonymous namespace
// ------------------------------------------------------------------------------------------------
// BufferSubAllocator
BufferSubAllocator::BufferSubAllocator(DrawBufferManager* owner,
int stateIndex,
sk_sp<Buffer> buffer,
BindBufferInfo transferBuffer,
void* mappedPtr,
uint32_t xtraAlignment)
: fOwner(owner)
, fStateIndex(stateIndex)
, fBuffer(std::move(buffer))
, fTransferBuffer(transferBuffer)
, fMappedPtr(mappedPtr) {
this->resetForNewBinding(xtraAlignment);
}
BufferSubAllocator& BufferSubAllocator::operator=(BufferSubAllocator&& other) {
if (this == &other) {
return *this; // no-op moving into itself
}
// Reset the destination allocator first since other's contents will overwrite whatever came
// beforehand and that must go back to the manager.
this->reset();
// Copy fields
fOwner = other.fOwner;
fStateIndex = other.fStateIndex;
fTransferBuffer = other.fTransferBuffer;
fMappedPtr = other.fMappedPtr;
fAlignment = other.fAlignment;
fOffset = other.fOffset;
// Move buffer (leaving other in an invalid state)
fBuffer = std::move(other.fBuffer);
SkASSERT(!other);
return *this;
}
BindBufferInfo BufferSubAllocator::reserve(size_t count, size_t stride, size_t reservedCount) {
// fAlignment starts as the LCM of the binding alignment and the requested extra alignment.
// It is reset to 1 after the first reservation so that subsequent suballocations are aligned
// to just `stride` until resetForNewBinding()
// NOTE: We do not use SkTo<uint32_t> on stride because we don't want to crash if stride would
// overflow. If it does overflow, align32 will be incorrect, but validate_count_and_stride will
// still correctly detect stride's overflow so we won't use it.
const uint32_t align32 = lcm_alignment(fAlignment, (uint32_t) stride);
reservedCount = std::max(count, reservedCount);
uint32_t requiredBytes32 = validate_count_and_stride(reservedCount, stride, align32);
if (!requiredBytes32 || !fBuffer) {
return {}; // Size overflowed
}
const uint32_t bufferSize = SkTo<uint32_t>(fBuffer->size());
uint32_t offset = SkAlignNonPow2(fOffset, align32);
if (bufferSize < offset || requiredBytes32 > bufferSize - offset) {
// Not enough space left
return {};
}
// count*stride is safe since validate_count_and_stride succeeded with reservedCount. For the
// actual reservation, we only use count*stride bytes.
requiredBytes32 = SkTo<uint32_t>(count) * SkTo<uint32_t>(stride);
fOffset = offset + requiredBytes32;
fAlignment = 1; // Next reservation will only be affected by its stride
return {fBuffer.get(), offset, requiredBytes32};
}
void BufferSubAllocator::reset() {
if (fBuffer) {
SkASSERT(fOwner);
DrawBufferManager::BufferState& state = fOwner->fCurrentBuffers[fStateIndex];
if (fBuffer->shareable() == Shareable::kScratch) {
// TODO: Merge this reuse of scratch resources with the ScratchResourceManager, but
// currently this is resolved outside of Task::prepareResources().
// The scratch buffer's availability for reuse (scoped to the owning DrawBufferManager)
// was tied to this BufferSubAllocator, so when that is reset, we just remove the buffer
// from the set of unavailable buffers.
SkASSERT((fOwner->fMappingFailed && state.fUnavailableScratchBuffers.empty()) ||
state.fUnavailableScratchBuffers.contains(fBuffer.get()));
if (!fOwner->fMappingFailed) {
state.fUnavailableScratchBuffers.remove(fBuffer.get());
}
SkASSERT(!fTransferBuffer); // Scratch buffers shouldn't be using transfer buffers
fOwner->fUsedBuffers.emplace_back(std::move(fBuffer), BindBufferInfo{});
} else if (state.fAvailableBuffer.fBuffer.get() == fBuffer.get() || // can't stash itself
this->remainingBytes() < state.fAvailableBuffer.remainingBytes() || // too small
this->remainingBytes() < state.fMinAlignment) { // basically empty
// Transfer ownership of the buffer (and any transfer buffer) back to the manager, using
// the current offset as a more restricted limit for copying.
if (fTransferBuffer) {
// This alignment ensures we are copying a subset that still respects xfer alignment
fTransferBuffer.fSize = SkAlignTo(fOffset, state.fMinAlignment);
}
fOwner->fUsedBuffers.emplace_back(std::move(fBuffer), fTransferBuffer);
} else {
// Save this buffer for later, which leaves this instance empty and resets the prior
// value of fAvailableBuffer (which then goes through the true branch of this if).
state.fAvailableBuffer = std::move(*this);
}
SkASSERT(!fBuffer);
} // else nothing to reset
}
void BufferSubAllocator::resetForNewBinding(size_t alignment) {
if (fOwner) {
const uint32_t minAlignment = fOwner->fCurrentBuffers[fStateIndex].fMinAlignment;
fAlignment = lcm_alignment(minAlignment, SkTo<uint32_t>(alignment));
} // else an empty BufferSubAllocator so ignore this, all allocations will fail
}
// ------------------------------------------------------------------------------------------------
// DrawBufferManager::BufferState
DrawBufferManager::BufferState::BufferState(BufferType type,
const char* label,
bool isGpuOnly,
const Options& opts,
const Caps* caps)
: fType(type)
// The buffer can be GPU-only if
// a) the caller does not intend to ever upload CPU data to the buffer; or
// b) CPU data will get uploaded to fBuffer only via a transfer buffer
, fAccessPattern(get_gpu_access_pattern(isGpuOnly || !caps->drawBufferCanBeMapped(), opts))
, fUseTransferBuffer(!isGpuOnly && !caps->drawBufferCanBeMapped())
, fLabel(label)
, fMinAlignment(minimum_alignment(type, fUseTransferBuffer, caps))
, fMinBlockSize(min_block_size(type, fMinAlignment, opts))
, fMaxBlockSize(max_block_size(type, fMinAlignment, opts)) {
SkASSERT(SkIsPow2(fMinAlignment));
SkASSERT(fMinBlockSize <= fMaxBlockSize);
}
sk_sp<Buffer> DrawBufferManager::BufferState::findOrCreateBuffer(ResourceProvider* provider,
Shareable shareable,
uint32_t byteCount) {
if (shareable == Shareable::kScratch) {
sk_sp<Buffer> scratchBuffer = provider->findOrCreateScratchBuffer(
byteCount, fType, fAccessPattern, fLabel, fUnavailableScratchBuffers);
if (scratchBuffer) {
fUnavailableScratchBuffers.add(scratchBuffer.get());
}
return scratchBuffer;
} else {
return provider->findOrCreateNonShareableBuffer(byteCount, fType, fAccessPattern, fLabel);
}
}
// ------------------------------------------------------------------------------------------------
// DrawBufferManager
DrawBufferManager::DrawBufferManager(ResourceProvider* resourceProvider,
const Caps* caps,
UploadBufferManager* uploadManager,
Options dbmOpts)
: fResourceProvider(resourceProvider)
, fCaps(caps)
, fUploadManager(uploadManager)
, fCurrentBuffers{{
// Mappable buffers
{BufferType::kVertex, "VertexBuffer", /*isGpuOnly=*/false, dbmOpts, caps},
{BufferType::kIndex, "IndexBuffer", /*isGpuOnly=*/false, dbmOpts, caps},
{BufferType::kUniform, "UniformBuffer", /*isGpuOnly=*/false, dbmOpts, caps},
{BufferType::kStorage, "StorageBuffer", /*isGpuOnly=*/false, dbmOpts, caps},
// GPU-only buffers
{BufferType::kStorage, "GPUOnlyStorageBuffer", /*isGpuOnly=*/true, dbmOpts, caps},
{BufferType::kVertexStorage, "VertexStorageBuffer", /*isGpuOnly=*/true, dbmOpts, caps},
{BufferType::kIndexStorage, "IndexStorageBuffer", /*isGpuOnly=*/true, dbmOpts, caps},
{BufferType::kIndirect, "IndirectStorageBuffer", /*isGpuOnly=*/true, dbmOpts, caps}}} {}
DrawBufferManager::~DrawBufferManager() {
// Must reset these *before* we are deleted
for (auto& b : fCurrentBuffers) {
b.fAvailableBuffer.reset();
}
}
void DrawBufferManager::onFailedBuffer() {
fMappingFailed = true;
// Clean up and unmap everything now
fClearList.clear();
for (auto& state : fCurrentBuffers) {
state.fAvailableBuffer.reset();
// We aren't allocating anything anymore so don't maintain this list. Their outstanding
// BufferSubAllocators will have a no-op when they get reset.
state.fUnavailableScratchBuffers.reset();
state.fLastBufferSize = 0;
}
for (auto& [buffer, _] : fUsedBuffers) {
if (buffer->isMapped()) {
buffer->unmap();
}
}
fUsedBuffers.clear();
}
bool DrawBufferManager::transferToRecording(Recording* recording) {
if (fMappingFailed) {
// All state should have been reset by onFailedBuffer() except for this error flag.
SkASSERT(fUsedBuffers.empty() && fClearList.empty());
#if defined(SK_DEBUG)
for (const auto& state : fCurrentBuffers) {
SkASSERT(!SkToBool(state.fAvailableBuffer));
SkASSERT(state.fUnavailableScratchBuffers.empty());
}
#endif
fMappingFailed = false;
return false;
}
for (auto& state : fCurrentBuffers) {
// Reset all available buffer sub allocators since they won't be allocatable anymore.
// This pushes the underlying resource and transfer range to fUsedBuffers
state.fAvailableBuffer.reset();
// BufferSubAllocators should have gone out of scope well before Recorder::snap() is called.
SkASSERT(state.fUnavailableScratchBuffers.empty());
// We reset the last buffer size back to 0 to keep the buffer growth behavior the same
// across calls to snap(). If we knew every snap() would be approximately the same workload,
// we could choose to keep the last alloc size as-is so that subsequent frames create
// fewer buffers. We choose *not* to do this because:
// - Chrome often snaps Recordings with disparate workloads within a frame (e.g. tile vs
// canvas2d) and we don't want to overallocate on a small recording.
// - It obfuscates the performance cost of the first frame if we reach a steady state that
// requires no additional buffer allocations.
// We could choose to reduce fLastBufferSize (e.g. halve it) to get a head start and reduce
// the potential for over-allocation, but in performance measurements on buffer-heavy scenes
// this did not lead to measurable improvements. Thus, we reset so every frame is the same.
state.fLastBufferSize = 0;
}
if (!fClearList.empty()) {
recording->priv().taskList()->add(ClearBuffersTask::Make(std::move(fClearList)));
}
for (auto& [buffer, transferBuffer] : fUsedBuffers) {
if (transferBuffer) {
SkASSERT(buffer);
SkASSERT(!fCaps->drawBufferCanBeMapped());
// Since the transfer buffer is managed by the UploadManager, we don't manually unmap
// it here or need to pass a ref into CopyBufferToBufferTask.
size_t copySize = buffer->size();
recording->priv().taskList()->add(
CopyBufferToBufferTask::Make(transferBuffer.fBuffer,
transferBuffer.fOffset,
std::move(buffer),
/*dstOffset=*/0,
copySize));
} else {
if (buffer->isMapped()) {
buffer->unmap();
}
recording->priv().addResourceRef(std::move(buffer));
}
}
fUsedBuffers.clear();
return true;
}
BufferSubAllocator DrawBufferManager::getBuffer(
int stateIndex,
size_t count,
size_t stride,
size_t xtraAlignment,
ClearBuffer cleared,
Shareable shareable) {
BufferState& state = fCurrentBuffers[stateIndex];
// The size for a buffer is aligned to the minimum block size for better resource reuse, which
// is more conservative than fMinAlignment.
uint32_t requiredBytes32 = validate_count_and_stride(count, stride, state.fMinBlockSize);
if (fMappingFailed || !requiredBytes32) {
return {};
}
const bool supportCpuUpload = state.fAccessPattern == AccessPattern::kHostVisible ||
state.fUseTransferBuffer;
// Shareable buffers must be GPU-only to actually share effectively.
SkASSERT(shareable == Shareable::kNo || !supportCpuUpload);
// For non-shareable buffers, we keep the largest relinquished non-shareable buffer in case it
// has room leftover to be used by future allocations. Scratch buffer ownership is entirely
// managed by the caller, so always create a new BufferSubAllocator.
if (shareable == Shareable::kNo) {
state.fAvailableBuffer.resetForNewBinding(xtraAlignment);
BindBufferInfo nextBinding = state.fAvailableBuffer.reserve(count, stride, count);
if (nextBinding) {
// The available buffer has enough room so reuse it. Subtracting the size of the binding
// ensures the caller's next request for count*stride bytes succeeds, and fOffset will
// be aligned to xtraAlignment.
state.fAvailableBuffer.fOffset -= nextBinding.fSize;
SkASSERT(state.fAvailableBuffer.fOffset % xtraAlignment == 0);
SkASSERT(state.fAvailableBuffer.fBuffer);
SkASSERT(state.fAvailableBuffer.fBuffer->shareable() == shareable);
SkASSERT(SkToBool(state.fAvailableBuffer.fMappedPtr) == supportCpuUpload);
return std::move(state.fAvailableBuffer);
}
// Not enough room in the available buffer so release it and create a new buffer.
state.fAvailableBuffer.reset();
}
// Create the next buffer by doubling the size of the previous buffer and clamping to be within
// the min and max block sizes if `requiredBytes` is less than the max. Otherwise, create a
// buffer large enough to satisfy `requiredBytes` but align it to minBlockSize.
uint32_t bufferSize = SkAlignTo(requiredBytes32, state.fMinBlockSize);
if (bufferSize < state.fMaxBlockSize) {
// fMaxBlockSize should be sufficiently small that there's no risk of overflowing here.
SkASSERT(std::numeric_limits<uint32_t>::max() /2 > state.fLastBufferSize);
bufferSize = std::max(bufferSize, std::min(state.fLastBufferSize * 2, state.fMaxBlockSize));
state.fLastBufferSize = bufferSize;
SkASSERT(bufferSize <= state.fMaxBlockSize);
} else {
// Jump to the max block size for subsequent amortized allocations if we get a really big
// buffer request.
state.fLastBufferSize = state.fMaxBlockSize;
}
SkASSERT(bufferSize >= requiredBytes32 && bufferSize >= state.fMinBlockSize);
sk_sp<Buffer> buffer = state.findOrCreateBuffer(fResourceProvider, shareable, bufferSize);
if (!buffer) {
this->onFailedBuffer();
return {};
}
BindBufferInfo transferBuffer;
void* mappedPtr = nullptr;
if (supportCpuUpload) {
if (state.fUseTransferBuffer) {
std::tie(mappedPtr, transferBuffer) = fUploadManager->makeBindInfo(buffer->size(),
fCaps->requiredTransferBufferAlignment(), "TransferForDataBuffer");
} else {
mappedPtr = buffer->map();
}
if (!mappedPtr) {
this->onFailedBuffer(); // Either transfer buffer failed or direct mapping failed
return {};
}
}
if (cleared == ClearBuffer::kYes) {
fClearList.push_back(BindBufferInfo{buffer.get(), 0, bufferSize});
}
// The returned buffer is not set to fAvailableBuffer because it is going to be passed up to
// the caller for their use first.
return BufferSubAllocator(this, stateIndex, std::move(buffer),
transferBuffer, mappedPtr, xtraAlignment);
}
// ------------------------------------------------------------------------------------------------
// StaticBufferManager
StaticBufferManager::StaticBufferManager(ResourceProvider* resourceProvider,
const Caps* caps)
: fResourceProvider(resourceProvider)
, fUploadManager(resourceProvider, caps)
, fRequiredTransferAlignment(SkTo<uint32_t>(caps->requiredTransferBufferAlignment()))
, fVertexBufferState(BufferType::kVertex, caps)
, fIndexBufferState(BufferType::kIndex, caps) {}
StaticBufferManager::~StaticBufferManager() = default;
StaticBufferManager::BufferState::BufferState(BufferType type, const Caps* caps)
: fBufferType(type)
, fMinimumAlignment(minimum_alignment(type, /*useTransferBuffers=*/true, caps))
, fTotalRequiredBytes(0) {}
// ARM hardware b/399631317 also means that static vertex data must be padded and zeroed out. So we
// always request a count 4 aligned offset, count 4 aligned amount of space, and zero it.
VertexWriter StaticBufferManager::getVertexWriter(size_t count,
size_t stride,
BindBufferInfo* binding) {
const size_t size = count * stride;
const size_t alignedCount = SkAlign4(count);
void* data = this->prepareStaticData(&fVertexBufferState, size, stride * 4, binding);
if (alignedCount > count) {
const uint32_t byteDiff = (alignedCount - count) * stride;
void* zPtr = SkTAddOffset<void>(data, count * stride);
memset(zPtr, 0, byteDiff);
}
return VertexWriter{data, size};
}
VertexWriter StaticBufferManager::getIndexWriter(size_t size, BindBufferInfo* binding) {
// The index writer does not have the same alignment requirements as a vertex, so we simply pass
// in the minimum alignment as the required alignment
void* data = this->prepareStaticData(&fIndexBufferState,
size,
fIndexBufferState.fMinimumAlignment,
binding);
return VertexWriter{data, size};
}
void* StaticBufferManager::prepareStaticData(BufferState* state,
size_t requiredBytes,
size_t requiredAlignment,
BindBufferInfo* target) {
// Zero-out the target binding in the event of any failure in actually transfering data later.
// Unlike in BufferSubAllocator::reserve(), we do use SkTo<uint32_t> to check
// `requiredAlignment`. This is not dynamic data and is fully controlled by Graphite, so if it
// asserts, then there is a bug in the static data for a Renderer that must be fixed.
const uint32_t align32 = lcm_alignment(state->fMinimumAlignment,
SkTo<uint32_t>(requiredAlignment));
SkASSERT(target);
*target = {nullptr, 0};
uint32_t size32 = validate_count_and_stride(requiredBytes, /*stride=*/1, align32);
if (!size32 || fMappingFailed) {
return nullptr;
}
// Copy data must be aligned to the transfer alignment, so align the reserved size to the LCM
// of the minimum alignment (already net buffer and transfer alignment) and the required
// alignment stride.
size32 = SkAlignNonPow2(size32, align32);
auto [transferMapPtr, transferBindInfo] =
fUploadManager.makeBindInfo(size32,
fRequiredTransferAlignment,
"TransferForStaticBuffer");
if (!transferMapPtr) {
SKGPU_LOG_E("Failed to create or map transfer buffer that initializes static GPU data.");
fMappingFailed = true;
return nullptr;
}
state->fData.push_back(
{transferBindInfo,
target,
SkTo<uint32_t>(requiredAlignment),
#if defined(GPU_TEST_UTILS)
SkTo<uint32_t>(requiredBytes)
#endif
});
state->fTotalRequiredBytes = SkAlignNonPow2(state->fTotalRequiredBytes, align32) + size32;
return transferMapPtr;
}
bool StaticBufferManager::BufferState::createAndUpdateBindings(
ResourceProvider* resourceProvider,
Context* context,
QueueManager* queueManager,
GlobalCache* globalCache,
std::string_view label) const {
if (!fTotalRequiredBytes) {
return true; // No buffer needed
}
// The static buffer is always copyable when testing.
constexpr AccessPattern gpuAccessPattern =
#if defined(GPU_TEST_UTILS)
AccessPattern::kGpuOnlyCopySrc;
#else
AccessPattern::kGpuOnly;
#endif
sk_sp<Buffer> staticBuffer = resourceProvider->findOrCreateNonShareableBuffer(
fTotalRequiredBytes,
fBufferType,
gpuAccessPattern,
std::move(label));
if (!staticBuffer) {
SKGPU_LOG_E("Failed to create static buffer for type %d of size %u bytes.\n",
(int) fBufferType, fTotalRequiredBytes);
return false;
}
uint32_t offset = 0;
for (const CopyRange& data : fData) {
// Each copy range's size should be aligned to the lcm of the required alignment and minimum
// alignment so we can increment the offset in the static buffer.
const uint32_t alignment = lcm_alignment(fMinimumAlignment, data.fRequiredAlignment);
offset = SkAlignNonPow2(offset, alignment);
SkASSERT(!(offset % fMinimumAlignment) && !(offset % data.fRequiredAlignment));
uint32_t size = data.fSource.fSize;
data.fTarget->fBuffer = staticBuffer.get();
data.fTarget->fOffset = offset;
data.fTarget->fSize = size;
auto copyTask = CopyBufferToBufferTask::Make(
data.fSource.fBuffer, data.fSource.fOffset,
sk_ref_sp(data.fTarget->fBuffer), data.fTarget->fOffset,
size);
// For static buffers, we want them all to be optimized as GPU only buffers. If we are in
// a protected context, this means the buffers must be non-protected since they will be
// read in the vertex shader which doesn't allow protected memory access. Thus all the
// uploads to these buffers must be done as non-protected commands.
if (!queueManager->addTask(copyTask.get(), context, Protected::kNo)) {
SKGPU_LOG_E("Failed to copy data to static buffer.\n");
return false;
}
offset += size;
}
SkASSERT(offset == fTotalRequiredBytes);
globalCache->addStaticResource(std::move(staticBuffer));
return true;
}
StaticBufferManager::FinishResult StaticBufferManager::finalize(Context* context,
QueueManager* queueManager,
GlobalCache* globalCache) {
if (fMappingFailed) {
return FinishResult::kFailure;
}
const size_t totalRequiredBytes = fVertexBufferState.fTotalRequiredBytes +
fIndexBufferState.fTotalRequiredBytes;
SkASSERT(totalRequiredBytes <= kMaxStaticDataSize);
if (!totalRequiredBytes) {
return FinishResult::kNoWork;
}
if (!fVertexBufferState.createAndUpdateBindings(fResourceProvider,
context,
queueManager,
globalCache,
"StaticVertexBuffer")) {
return FinishResult::kFailure;
}
#if defined(GPU_TEST_UTILS)
skia_private::TArray<GlobalCache::StaticVertexCopyRanges> statVertCopy;
for (const CopyRange& data : fVertexBufferState.fData) {
statVertCopy.push_back({data.fTarget->fOffset,
data.fUnalignedSize,
data.fTarget->fSize,
data.fRequiredAlignment});
}
globalCache->testingOnly_SetStaticVertexInfo(
statVertCopy,
fVertexBufferState.fData[0].fTarget->fBuffer);
#endif
if (!fIndexBufferState.createAndUpdateBindings(fResourceProvider,
context,
queueManager,
globalCache,
"StaticIndexBuffer")) {
return FinishResult::kFailure;
}
queueManager->addUploadBufferManagerRefs(&fUploadManager);
// Reset the static buffer manager since the Recording's copy tasks now manage ownership of
// the transfer buffers and the GlobalCache owns the final static buffers.
fVertexBufferState.reset();
fIndexBufferState.reset();
return FinishResult::kSuccess;
}
} // namespace skgpu::graphite