blob: baeb5389f75e26498c26f6ed625250813ee51146 [file] [log] [blame]
/*
* Copyright 2021 Google LLC
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef skgpu_graphite_DrawWriter_DEFINED
#define skgpu_graphite_DrawWriter_DEFINED
#include "src/base/SkAutoMalloc.h"
#include "src/gpu/BufferWriter.h"
#include "src/gpu/graphite/BufferManager.h"
#include "src/gpu/graphite/DrawTypes.h"
#include <optional>
namespace skgpu::graphite {
namespace DrawPassCommands {
class List;
}
/**
* DrawWriter is a helper around recording draws (to a temporary buffer or directly to a
* CommandBuffer), particularly when the number of draws is not known ahead of time, or the vertex
* and instance data is computed at record time and does not have a known size.
*
* To use, construct the DrawWriter with the current pipeline layout or call newPipelineState() on
* an existing DrawWriter and then bind that matching pipeline. When other dynamic state needs to
* change between draw calls, notify the DrawWriter using newDynamicState() before recording the
* modifications. See the listing below for how to append dynamic data or draw with existing buffers
*
* CommandBuffer::draw(vertices)
* - dynamic vertex data -> DrawWriter::Vertices(writer) verts;
* verts.append(n) << ...;
* - fixed vertex data -> writer.draw(vertices, {}, vertexCount)
*
* CommandBuffer::drawIndexed(vertices, indices)
* - dynamic vertex data -> unsupported
* - fixed vertex,index data -> writer.drawIndexed(vertices, indices, indexCount)
*
* CommandBuffer::drawInstances(vertices, instances)
* - dynamic instance data + fixed vertex data ->
* DrawWriter::Instances instances(writer, vertices, {}, vertexCount);
* instances.append(n) << ...;
* - fixed vertex and instance data ->
* writer.drawInstanced(vertices, vertexCount, instances, instanceCount)
*
* CommandBuffer::drawIndexedInstanced(vertices, indices, instances)
* - dynamic instance data + fixed vertex, index data ->
* DrawWriter::Instances instances(writer, vertices, indices, indexCount);
* instances.append(n) << ...;
* - fixed vertex, index, and instance data ->
* writer.drawIndexedInstanced(vertices, indices, indexCount, instances, instanceCount)
*
* NOTE: DrawWriter automatically handles failures to find or create a GPU buffer or map it to
* be writable. All returned VertexWriters will have a non-null pointer to write to, even if it will
* be discarded due to GPU failure at Recorder::snap() time.
*/
class DrawWriter {
public:
// NOTE: This constructor creates a writer that defaults 0 vertex and instance stride, so
// 'newPipelineState()' must be called once the pipeline properties are known before it's used.
DrawWriter(DrawPassCommands::List*, DrawBufferManager*);
// Cannot move or copy
DrawWriter(const DrawWriter&) = delete;
DrawWriter(DrawWriter&&) = delete;
// flush() should be called before the writer is destroyed
~DrawWriter() { SkASSERT(fPendingCount == 0); }
DrawBufferManager* bufferManager() { return fManager; }
// Issue draw calls for any pending vertex and instance data collected by the writer.
// Use either flush() or newDynamicState() based on context and readability.
void flush() {
this->padAndZero();
this->flushInternal();
}
void newDynamicState() { this->flush(); }
// Notify the DrawWriter that a new pipeline needs to be bound, providing the primitive type,
// attribute strides, and render state of the new pipeline. This issues draw calls for pending
// data that relied on the old pipeline, so this must be called *before* binding new pipeline.
void newPipelineState(PrimitiveType type,
size_t staticStride,
size_t appendStride,
SkEnumBitMask<RenderStateFlags> newRenderState,
std::optional<BarrierType> barrierType) {
this->flush();
// Once flushed, any pending data must have been drawn.
SkASSERT(fPendingCount == 0);
fPrimitiveType = type;
fStaticStride = staticStride;
fAppendStride = appendStride;
fRenderState = newRenderState;
// ARM hardware b/399631317: The initial offset when appending vertices must be 4-count
// aligned, regardless of the previous render state.
fShouldAlign4 = SkToBool(newRenderState & RenderStateFlags::kAppendVertices);
// Assign the (optional) barrier type. If a valid value, then the DrawWriter will append
// AddBarrier commands of the indicated type prior to appending any draw commands used with
// this pipeline.
fBarrierToIssueBeforeDraws = barrierType;
}
#ifdef SK_DEBUG
// Query current pipeline state for validation
size_t appendStride() const { return fAppendStride; }
size_t staticStride() const { return fStaticStride; }
PrimitiveType primitiveType() const { return fPrimitiveType; }
#endif
// Collects new vertex data for a call to CommandBuffer::draw(). Automatically accumulates
// vertex data into a buffer, issuing draw and bind calls as needed when a new buffer is
// required, so that it is seamless to the caller. The draws do not use instances or indices.
//
// Usage (assuming writer has already had 'newPipelineState()' called with correct strides):
// DrawWriter::Vertices verts{writer};
// verts.append(n) << x << y << ...;
//
// This should not be used when the vertex stride is 0.
class Vertices;
// Collects new instance data for a call to CommandBuffer::drawInstanced() or
// drawIndexedInstanced(). The specific draw call that's issued depends on if a non-null index
// buffer is provided for the template. Like DrawWriter::Vertices, this automatically merges
// the appended data into as few buffer binds and draw calls as possible, while remaining
// seamless to the caller.
//
// Usage for drawInstanced (assuming writer has correct strides):
// DrawWriter::Instances instances{writer, fixedVerts, {}, fixedVertexCount};
// instances.append(n) << foo << bar << ...;
//
// Usage for drawIndexedInstanced:
// DrawWriter::Instances instances{writer, fixedVerts, fixedIndices, fixedIndexCount};
// instances.append(n) << foo << bar << ...;
//
// This should not be used when the instance stride is 0. However, the fixed vertex buffer can
// be null (or have a stride of 0) if the vertex shader only relies on the vertex ID and no
// other per-vertex data.
class Instances;
// Collects new instance data for a call to CommandBuffer::drawInstanced() or
// drawIndexedInstanced() (depending on presence of index data in the template). Unlike the
// Instances mode, the template's index or vertex count is not provided at the time of creation.
// Instead, DynamicInstances can be used with pipeline programs that can have a flexible number
// of vertices per instance. Appended instances specify a proxy object that can be converted
// to the minimum index/vertex count they must be drawn with; but if they are later batched with
// instances that would use more, the pipeline's vertex shader knows how to handle it.
//
// The proxy object serves as a useful point of indirection when the actual index count is
// expensive to compute, but can be derived from correlated geometric properties. The proxy
// can store those properties and accumulate a "worst-case" and then calculate the index count
// when DrawWriter has to flush.
//
// The VertexCountProxy type must provide:
// - a default constructor and copy assignment, where the initial value represents the minimum
// supported vertex count.
// - an 'unsigned int' operator that converts the proxy to the actual index count that is
// needed in order to dispatch a draw call.
// - operator <<(const V&) where V is any type the caller wants to pass to append() that
// represents the proxy for the about-to-be-written instances. This operator then updates its
// internal state to represent the worst case between what had previously been recorded and
// the latest V value.
//
// Usage for drawInstanced (fixedIndices == {}) or drawIndexedInstanced:
// DrawWriter::DynamicInstances<ProxyType> instances(writer, fixedVerts, fixedIndices);
// instances.append(minIndexProxy1, n1) << ...;
// instances.append(minIndexProxy2, n2) << ...;
//
// In this example, if the two sets of instances were contiguous, a single draw call with
// (n1 + n2) instances would still be made using max(minIndexCount1, minIndexCount2) as the
// index/vertex count, 'minIndexCountX' was derived from 'minIndexProxyX'. If the available
// vertex data from the DrawBufferManager forced a flush after the first, then the second would
// use minIndexCount2 unless a subsequent compatible DynamicInstances template appended more
// contiguous data.
template <typename VertexCountProxy>
class DynamicInstances;
// Issues draws with fully specified data. This can be used when all instance data has already
// been written to known buffers, or when the vertex shader only depends on the vertex or
// instance IDs.
//
// This will not merge with any already appended instance or vertex data, pending data is issued
// in its own draw call first. These are currently unused.
void draw(BindBufferInfo vertices, unsigned int vertexCount) {
this->bindAndFlush({}, {}, vertices, 0, vertexCount);
}
void drawIndexed(BindBufferInfo vertices, BindBufferInfo indices, unsigned int indexCount) {
this->bindAndFlush(vertices, indices, {}, 0, indexCount);
}
void drawInstanced(BindBufferInfo vertices, unsigned int vertexCount,
BindBufferInfo instances, unsigned int instanceCount) {
SkASSERT(vertexCount > 0);
this->bindAndFlush(vertices, {}, instances, vertexCount, instanceCount);
}
void drawIndexedInstanced(BindBufferInfo vertices, BindBufferInfo indices,
unsigned int indexCount, BindBufferInfo instances,
unsigned int instanceCount) {
SkASSERT(indexCount > 0);
this->bindAndFlush(vertices, indices, instances, indexCount, instanceCount);
}
private:
// Both of these pointers must outlive the DrawWriter.
DrawPassCommands::List* fCommandList;
DrawBufferManager* fManager;
SkAutoMalloc fFailureStorage; // storage address for VertexWriter when GPU buffer mapping fails
// Current operating mode of the DrawWriter, dictating how draw data is provided and
// interpreted. Determines whether fPendingCount refers to vertices or instances, and which
// buffer (fVertices or fInstances) serves as the append target. Set via newPipelineState().
SkEnumBitMask<RenderStateFlags> fRenderState;
PrimitiveType fPrimitiveType;
uint32_t fStaticStride;
uint32_t fAppendStride;
// - fAppend: Holds buffer information for data that is generated and appended during the
// drawPass. The data can be either vertex (kAppendVertices) or instance
// (kAppendInstances/kAppendDynamicInstances) data.
// - fStatic: Holds buffer information that does not change between invocations of a renderstep.
// Currently this only holds vertex data, but this could change in the future.
// - Indices: Defines the (for now static) buffer used for any kind of index drawing. A
// renderstep with a valid index buffer implies that it will be performing indexed
// drawing.
BindBufferInfo fAppend;
BindBufferInfo fStatic;
BindBufferInfo fIndices;
// These track the buffers *last bound* by the command list. Used to ensure minimal binding.
BindBufferInfo fBoundAppend;
BindBufferInfo fBoundStatic;
BindBufferInfo fBoundIndices;
// Per-instance count for instanced draws (vertex count if no index buffer, index count
// otherwise).
// - For fixed instancing (kAppendInstances): Represents the constant vertex/index count per
// instance.
// - For dynamic instancing (kAppendDynamicInstances): Represents the *maximum* vertex/index
// count required across the currently accumulated batch of instances (updated via max()).
// - Not used (remains 0) for non-instanced draws (kAppendVertices) or direct draw calls.
uint32_t fTemplateCount;
// Number of items (vertices or instances, depending on fRenderState) that have been appended
// via an Appender (Vertices, Instances, DynamicInstances) but not yet issued in a draw call.
// Reset to 0 after a flush().
uint32_t fPendingCount;
// ARM hardware b/399631317: Track whenever a newPipelineState occurs with appending vertices,
// to let the next reserve() call know that we need a 4 count aligned offset.
bool fShouldAlign4;
std::optional<BarrierType> fBarrierToIssueBeforeDraws = std::nullopt;
void flushInternal();
// ARM hardware b/399631317: Unreferenced vertices in sequential indexes of 4 will be
// speculatively executed. To work around this, we pad the buffer by requesting additional
// space, and then ensure valid, minimally deleterious data by memsetting the padding to zero.
void padAndZero() {
if (fPendingCount && (fRenderState & RenderStateFlags::kAppendVertices)) {
const uint32_t alignedCount = SkAlign4(fPendingCount);
if (alignedCount > fPendingCount) {
const uint32_t byteDiff = (alignedCount - fPendingCount) * fAppendStride;
SkASSERT(!fManager->willVertexOverflow(byteDiff, 1, 1));
auto[zWriter, zBuff] = fManager->getVertexWriter(byteDiff, 1, 1);
if (zWriter) {
zWriter.zeroBytes(byteDiff);
}
}
}
}
void setTemplate(BindBufferInfo staticData, BindBufferInfo indices, BindBufferInfo appendData,
uint32_t templateCount);
void bindAndFlush(BindBufferInfo staticData, BindBufferInfo indices, BindBufferInfo appendData,
uint32_t templateCount, unsigned int drawCount) {
SkASSERT(drawCount > 0);
SkASSERT(!fAppender); // Shouldn't be appending and manually drawing at the same time.
SkASSERT(fPendingCount == 0); // Any prior appends must have been flushed by now.
// CAUTION: If appending vertices, we make NO checks here to ensure that the initial offset
// is four count aligned or that the data is padded. Caller MUST ensure any unaligned data
// is safe.
this->setTemplate(staticData, indices, appendData, templateCount);
fPendingCount = drawCount;
this->flush();
}
// RAII - Sets the DrawWriter's template and marks the writer in append mode (disabling direct
// draws until the Appender is destructed).
class Appender;
SkDEBUGCODE(const Appender* fAppender = nullptr;)
};
// Appender implementations for DrawWriter that set the template on creation and provide a
// template-specific API to accumulate vertex/instance data.
class DrawWriter::Appender {
public:
Appender(DrawWriter& w, SkEnumBitMask<RenderStateFlags> renderState)
: fDrawer(w)
, fReservedCount(0)
, fNextWriter() {
SkASSERT(w.fAppendStride > 0);
SkASSERT(!w.fAppender);
SkASSERT(w.fRenderState == renderState);
SkDEBUGCODE(w.fAppender = this;)
}
virtual ~Appender() {
if (fReservedCount > 0) {
fDrawer.fManager->returnVertexBytes(fReservedCount * fDrawer.fAppendStride);
}
SkASSERT(fDrawer.fAppender == this);
SkDEBUGCODE(fDrawer.fAppender = nullptr;)
}
protected:
DrawWriter& fDrawer;
uint32_t fReservedCount; // in target stride units
VertexWriter fNextWriter; // writing to the target buffer binding
virtual void prepareFlush() {}
// Reserves 'count' elements, managing potential re-allocation and buffer contiguity.
// For vertex appends (AppendVerts), addresses ARM hardware issue (b/399631317) by:
// 1. Requesting 4-count aligned space (for necessary padding).
// 2. Ensuring the initial buffer offset is 4-count stride aligned after a newPipelineState().
// 3. Checking whether a new reservation will overflow to a new buffer, and if it will, safely
// padding the current buffer.
// If current reservation is insufficient, any existing reserved (but unused) contiguous
// bytes are returned to the manager before attempting a new allocation.
// If the newly allocated chunk isn't contiguous with the current target buffer,
// pending draws are flushed, and the target is updated to this new chunk.
template<bool AppendVerts>
void reserve(unsigned int count) {
SkASSERT(AppendVerts == SkToBool(fDrawer.fRenderState & RenderStateFlags::kAppendVertices));
uint32_t alignedCount = count;
uint32_t alignedStride = fDrawer.fAppendStride;
if constexpr (AppendVerts) {
alignedCount = SkAlign4(fDrawer.fPendingCount + count) - fDrawer.fPendingCount;
if (fDrawer.fShouldAlign4) {
alignedStride *= 4;
fDrawer.fShouldAlign4 = false;
}
}
if (fReservedCount >= alignedCount) {
return;
} else if (fReservedCount > 0) {
SkASSERT(SkTFitsIn<uint32_t>((uint64_t)fReservedCount*(uint64_t)fDrawer.fAppendStride));
const uint32_t returnedBytes = fReservedCount * fDrawer.fAppendStride;
SkASSERT(fDrawer.fAppend.fSize >= returnedBytes);
fDrawer.fManager->returnVertexBytes(returnedBytes);
fDrawer.fAppend.fSize -= returnedBytes;
fReservedCount = 0;
}
// If we are appending verts, we need to check if our requested allocation will overflow the
// current buffer
if constexpr (AppendVerts) {
// If it does, we need to get a vertex writer to pad and zero out the old buffer
// *before* we get our new one and lose bufferWriter's mapping to the transfer buffer.
if (fDrawer.fManager->willVertexOverflow(
alignedCount, fDrawer.fAppendStride, alignedStride)) {
fDrawer.padAndZero();
// When we overflow the buffer, the previous alignedCount will be incorrect, since
// it accounted for the about-to-be-flushed fPendingCount, which will not apply to
// writing to the new buffer.
alignedCount = SkAlign4(count);
}
}
// NOTE: Cannot bind tuple directly to fNextWriter, compilers don't produce the right
// move assignment.
auto [writer, reservedChunk] =
fDrawer.fManager->getVertexWriter(alignedCount, fDrawer.fAppendStride, alignedStride);
if (writer) {
fReservedCount = alignedCount;
if (reservedChunk.fBuffer != fDrawer.fAppend.fBuffer ||
reservedChunk.fOffset !=
fDrawer.fAppend.fOffset + fDrawer.fPendingCount * fDrawer.fAppendStride) {
// Not contiguous, so flush and update binding to 'reservedChunk'
this->prepareFlush();
fDrawer.flushInternal();
fDrawer.fAppend = reservedChunk;
} else {
fDrawer.fAppend.fSize += reservedChunk.fSize;
}
}
fNextWriter = std::move(writer);
}
template<bool AppendVerts>
VertexWriter append(unsigned int count) {
SkASSERT(count > 0);
this->reserve<AppendVerts>(count);
if (!fNextWriter) SK_UNLIKELY {
// If the GPU mapped buffer failed, ensure we have a sufficiently large CPU address to
// write to so that RenderSteps don't have to worry about error handling. The Recording
// will fail since the map failure is tracked by BufferManager.
// Since one of the reasons for GPU mapping failure is that count*stride does not fit
// in 32-bits, we calculate the CPU-side size carefully.
uint64_t size = (uint64_t)count * (uint64_t)fDrawer.fAppendStride;
if (!SkTFitsIn<size_t>(size)) {
sk_report_container_overflow_and_die();
}
return VertexWriter(fDrawer.fFailureStorage.reset(size, SkAutoMalloc::kReuse_OnShrink),
SkTo<size_t>(size));
}
SkASSERT(fReservedCount >= count);
fReservedCount -= count;
fDrawer.fPendingCount += count;
// Since we have a writer, we know count*stride is valid.
return std::exchange(fNextWriter, fNextWriter.makeOffset(count * fDrawer.fAppendStride));
}
};
class DrawWriter::Vertices : private DrawWriter::Appender {
public:
Vertices(DrawWriter& w) :
Appender(w, RenderStateFlags::kAppendVertices) {
w.setTemplate({}, {}, w.fAppend, 0);
}
VertexWriter append(unsigned int count) { return this->Appender::append<true>(count); }
void reserve(unsigned int count) { return this->Appender::reserve<true>(count); }
};
class DrawWriter::Instances : private DrawWriter::Appender {
public:
Instances(DrawWriter& w,
BindBufferInfo vertices,
BindBufferInfo indices,
unsigned int vertexCount)
: Appender(w, RenderStateFlags::kAppendInstances) {
SkASSERT(vertexCount > 0);
w.setTemplate(vertices, indices, w.fAppend, vertexCount);
}
VertexWriter append(unsigned int count) { return this->Appender::append<false>(count); }
void reserve(unsigned int count) { return this->Appender::reserve<false>(count); }
};
template <typename VertexCountProxy>
class DrawWriter::DynamicInstances : private DrawWriter::Appender {
public:
DynamicInstances(DrawWriter& w,
BindBufferInfo vertices,
BindBufferInfo indices)
: Appender(w, RenderStateFlags::kAppendDynamicInstances) {
w.setTemplate(vertices, indices, w.fAppend, 0);
}
~DynamicInstances() override {
// See updateTemplateCount() "Destructor Case"
this->updateTemplateCount();
}
void reserve(unsigned int count) { return this->Appender::reserve<false>(count); }
template <typename V>
VertexWriter append(const V& vertexCount, unsigned int instanceCount) {
VertexWriter w = this->Appender::append<false>(instanceCount);
// Record index count after appending instance data in case the append triggered a flush
// and the max index count is reset. However, the contents of 'w' will not have been flushed
// so 'fProxy' will account for 'vertexCount' when it is actually drawn.
fProxy << vertexCount;
return w;
}
private:
// updateTemplateCount() is called in two places:
// 1. When reserve() acquires a new buffer:
// This ensures data from the *previous* buffer is included in the ensuing flush.
// The count needs updating to signal that the prior buffer holds complete data.
//
// 2. In the DrawWriter::DynamicInstances destructor:
// This occurs after all data appending for the dynamic instances is finished. The update
// makes the final index count for these instances visible for the flush or combines the
// count with the next draw call's DynamicInstances object if there was no pipeline change
// between calls to RenderStep::writeVertices
// - max() is used to allow batches of multiple dynamic instance appends.
// - Since index data gets aligned to the largest count in a batch, we use max()
// to ensure the recorded count matches this alignment.
void updateTemplateCount() {
fDrawer.fTemplateCount = std::max(fDrawer.fTemplateCount, static_cast<uint32_t>(fProxy));
// By resetting the proxy after updating the template count, the next batch will start over
// with the minimum required vertex count and grow from there.
fProxy = {};
}
// See updateTemplateCount() "Reserve Case"
void prepareFlush() override {
this->updateTemplateCount();
}
VertexCountProxy fProxy = {};
};
} // namespace skgpu::graphite
#endif // skgpu_graphite_DrawWriter_DEFINED