blob: fa82022946d2341edc3c9b1d34625b798c18eb6a [file] [log] [blame]
/*
* Copyright 2021 Google LLC
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "src/gpu/graphite/DrawList.h"
#include "include/core/SkTypes.h"
#include "include/gpu/graphite/Recorder.h"
#include "src/core/SkTraceEvent.h"
#include "src/gpu/graphite/DrawPass.h"
#include "src/gpu/graphite/DrawWriter.h"
#include "src/gpu/graphite/KeyContext.h"
#include "src/gpu/graphite/RecorderPriv.h"
#include "src/gpu/graphite/Renderer.h"
#include "src/gpu/graphite/geom/Geometry.h"
namespace skgpu::graphite {
namespace {
// Writes uniform data either to uniform buffers or to shared storage buffers, and tracks when
// bindings need to change between draws.
class UniformTracker {
public:
UniformTracker(bool useStorageBuffers) : fUseStorageBuffers(useStorageBuffers) {}
bool writeUniforms(UniformDataCache& uniformCache,
DrawBufferManager* bufferMgr,
UniformDataCache::Index index) {
if (index >= UniformDataCache::kInvalidIndex) {
return false;
}
if (index == fLastIndex) {
return false;
}
fLastIndex = index;
UniformDataCache::Entry& uniformData = uniformCache.lookup(index);
const size_t uniformDataSize = uniformData.fCpuData.size();
// Upload the uniform data if we haven't already.
// Alternatively, re-upload the uniform data to avoid a rebind if we're using storage
// buffers. This will result in more data uploaded, but the tradeoff seems worthwhile.
if (!uniformData.fBufferBinding.fBuffer ||
(fUseStorageBuffers && uniformData.fBufferBinding.fBuffer != fLastBinding.fBuffer)) {
BufferWriter writer;
std::tie(writer, uniformData.fBufferBinding) =
fUseStorageBuffers ? bufferMgr->getAlignedSsboWriter(1, uniformDataSize)
: bufferMgr->getUniformWriter(1, uniformDataSize);
// Early out if buffer mapping failed.
if (!writer) {
return {};
}
writer.write(uniformData.fCpuData.data(), uniformDataSize);
if (fUseStorageBuffers) {
// When using storage buffers, store the SSBO index in the binding's offset field
// and always use the entire buffer's size in the size field.
SkASSERT(uniformData.fBufferBinding.fOffset % uniformDataSize == 0);
uniformData.fBufferBinding.fOffset /= uniformDataSize;
uniformData.fBufferBinding.fSize = uniformData.fBufferBinding.fBuffer->size();
}
}
const bool needsRebind =
uniformData.fBufferBinding.fBuffer != fLastBinding.fBuffer ||
(!fUseStorageBuffers && uniformData.fBufferBinding.fOffset != fLastBinding.fOffset);
fLastBinding = uniformData.fBufferBinding;
return needsRebind;
}
void bindUniforms(UniformSlot slot, DrawPassCommands::List* commandList) {
BindBufferInfo binding = fLastBinding;
if (fUseStorageBuffers) {
// Track the SSBO index in fLastBinding, but set offset = 0 in the actual used binding.
binding.fOffset = 0;
}
commandList->bindUniformBuffer(binding, slot);
}
uint32_t ssboIndex() const {
// The SSBO index for the last-bound storage buffer is stored in the binding's offset field.
return fLastBinding.fOffset;
}
private:
// Internally track the last binding returned, so that we know whether new uploads or rebindings
// are necessary. If we're using SSBOs, this is treated specially -- the fOffset field holds the
// index in the storage buffer of the last-written uniforms, and the offsets used for actual
// bindings are always zero.
BindBufferInfo fLastBinding;
// This keeps track of the last index used for writing uniforms from a provided uniform cache.
// If a provided index matches the last index, the uniforms are assumed to already be written
// and no additional uploading is performed. This assumes a UniformTracker will always be
// provided with the same uniform cache.
UniformDataCache::Index fLastIndex = UniformDataCache::kInvalidIndex;
const bool fUseStorageBuffers;
};
// Tracks when to issue BindTexturesAndSamplers commands to a command list and converts
// TextureDataBlocks to that representation as needed.
class TextureTracker {
public:
TextureTracker(TextureDataCache* textureCache)
: fTextureCache(textureCache) {}
bool setCurrentTextureBindings(TextureDataCache::Index bindingIndex) {
if (bindingIndex < TextureDataCache::kInvalidIndex && fLastIndex != bindingIndex) {
fLastIndex = bindingIndex;
return true;
}
// No binding change
return false;
}
void bindTextures(DrawPassCommands::List* commandList) {
SkASSERT(fLastIndex < TextureDataCache::kInvalidIndex);
TextureDataBlock binding = fTextureCache->lookup(fLastIndex);
auto [textures, samplers] =
commandList->bindDeferredTexturesAndSamplers(binding.numTextures());
for (int i = 0; i < binding.numTextures(); ++i) {
auto [t, s] = binding.texture(i);
textures[i] = t.get();
samplers[i] = s;
}
}
private:
TextureDataCache::Index fLastIndex = TextureDataCache::kInvalidIndex;
TextureDataCache* const fTextureCache;
};
} // anonymous namespace
const Transform& DrawList::deduplicateTransform(const Transform& localToDevice) {
// TODO: This is a pretty simple deduplication strategy and doesn't take advantage of the stack
// knowledge that Device has.
if (fTransforms.empty() || fTransforms.back() != localToDevice) {
fTransforms.push_back(localToDevice);
}
return fTransforms.back();
}
void DrawList::recordDraw(const Renderer* renderer,
const Transform& localToDevice,
const Geometry& geometry,
const Clip& clip,
DrawOrder ordering,
UniquePaintParamsID paintID,
SkEnumBitMask<DstUsage> dstUsage,
BarrierType barrierBeforeDraws,
PipelineDataGatherer* gatherer,
const StrokeStyle* stroke) {
SkASSERT(localToDevice.valid());
SkASSERT(!geometry.isEmpty() && !clip.drawBounds().isEmptyNegativeOrNaN());
SkASSERT(!(renderer->depthStencilFlags() & DepthStencilFlags::kStencil) ||
ordering.stencilIndex() != DrawOrder::kUnassigned);
// TODO: Add validation that the renderer's expected shape type and stroke params match provided
// Create a sort key for every render step in this draw, extracting out any
// RenderStep-specific data.
UniformDataCache::Index shadingUniformIndex = UniformDataCache::kInvalidIndex;
if (paintID.isValid()) {
UniformDataBlock paintUniforms = gatherer->endPaintData();
if (paintUniforms) {
shadingUniformIndex = fShadingUniformDataCache.insert(paintUniforms);
}
}
const Draw& draw = fDraws.emplace_back(renderer,
this->deduplicateTransform(localToDevice),
geometry,
clip,
ordering,
barrierBeforeDraws,
stroke);
fRenderStepCount += renderer->numRenderSteps();
gatherer->setRenderStepManagerActive();
for (int stepIndex = 0; stepIndex < draw.renderer()->numRenderSteps(); ++stepIndex) {
gatherer->rewindForRenderStep();
const RenderStep* const step = draw.renderer()->steps()[stepIndex];
const bool performsShading = step->performsShading();
GraphicsPipelineCache::Index pipelineIndex = fPipelineCache.insert(
{ step->renderStepID(),
performsShading ? paintID : UniquePaintParamsID::Invalid() });
step->writeUniformsAndTextures(draw.drawParams(), gatherer);
auto [stepUniforms, combinedTextures] = gatherer->endRenderStepData(performsShading);
UniformDataCache::Index geomUniformIndex = stepUniforms ?
fGeometryUniformDataCache.insert(stepUniforms) : UniformDataCache::kInvalidIndex;
TextureDataCache::Index textureBindingIndex = combinedTextures ?
fTextureDataCache.insert(combinedTextures) : TextureDataCache::kInvalidIndex;
fSortKeys.push_back({&draw,
stepIndex,
pipelineIndex,
geomUniformIndex,
performsShading ? shadingUniformIndex : UniformDataCache::kInvalidIndex,
textureBindingIndex});
}
fPassBounds.join(clip.drawBounds());
fRequiresMSAA |= renderer->requiresMSAA();
fDepthStencilFlags |= renderer->depthStencilFlags();
if (dstUsage & DstUsage::kDstReadRequired) {
// For paints that read from the dst, update the bounds. It may later be determined that the
// DstReadStrategy does not require them, but they are inexpensive to track.
fDstReadBounds.join(clip.drawBounds());
}
#if defined(SK_DEBUG)
if (geometry.isCoverageMaskShape()) {
fCoverageMaskShapeDrawCount++;
}
#endif
}
std::unique_ptr<DrawPass> DrawList::snapDrawPass(Recorder* recorder,
sk_sp<TextureProxy> target,
const SkImageInfo& targetInfo,
const DstReadStrategy dstReadStrategy) {
// NOTE: This assert is here to ensure SortKey is as tightly packed as possible. Any change to
// its size should be done with care and good reason. The performance of sorting the keys is
// heavily tied to the total size.
//
// At 24 bytes (current), sorting is about 30% slower than if SortKey could be packed into just
// 16 bytes. There are several ways this could be done if necessary:
// - Restricting the max draw count to 16k (14-bits) and only using a single index to refer to
// the uniform data => 8 bytes of key, 8 bytes of pointer.
// - Restrict the max draw count to 32k (15-bits), use a single uniform index, and steal the
// 4 low bits from the Draw* pointer since it's 16 byte aligned.
// - Compact the Draw* to an index into the original collection, although that has extra
// indirection and does not work as well with SkTBlockList.
// In pseudo tests, manipulating the pointer or having to mask out indices was about 15% slower
// than an 8 byte key and unmodified pointer.
static_assert(sizeof(SortKey) == SkAlignTo(16 + sizeof(void*), alignof(SortKey)));
// TODO: Explore sorting algorithms; in all likelihood this will be mostly sorted already, so
// algorithms that approach O(n) in that condition may be favorable. Alternatively, could
// explore radix sort that is always O(n). Brief testing suggested std::sort was faster than
// std::stable_sort and SkTQSort on my [ml]'s Windows desktop. Also worth considering in-place
// vs. algorithms that require an extra O(n) storage.
// TODO: It's not strictly necessary, but would a stable sort be useful or just end up hiding
// bugs in the DrawOrder determination code?
std::sort(fSortKeys.begin(), fSortKeys.end());
TRACE_EVENT1("skia.gpu", TRACE_FUNC, "draw count", fDraws.count());
// The DrawList is converted directly into the DrawPass' data structures, but once the DrawPass
// is returned from Make(), it is considered immutable.
std::unique_ptr<DrawPass> drawPass(new DrawPass(target, {fLoadOp, StoreOp::kStore}, fClearColor,
recorder->priv().refFloatStorageManager()));
DrawBufferManager* bufferMgr = recorder->priv().drawBufferManager();
DrawWriter drawWriter(&drawPass->fCommandList, bufferMgr);
GraphicsPipelineCache::Index lastPipeline = GraphicsPipelineCache::kInvalidIndex;
const SkIRect targetBounds = SkIRect::MakeSize(targetInfo.dimensions());
SkIRect lastScissor = targetBounds;
SkASSERT(drawPass->fTarget->isFullyLazy() ||
SkIRect::MakeSize(drawPass->fTarget->dimensions()).contains(lastScissor));
drawPass->fCommandList.setScissor(lastScissor);
const Caps* caps = recorder->priv().caps();
const bool useStorageBuffers = caps->storageBufferSupport();
UniformTracker geometryUniformTracker(useStorageBuffers);
UniformTracker shadingUniformTracker(useStorageBuffers);
// TODO(b/372953722): Remove this forced binding command behavior once dst copies are always
// bound separately from the rest of the textures.
const bool rebindTexturesOnPipelineChange = dstReadStrategy == DstReadStrategy::kTextureCopy;
// Keep track of the prior draw's PaintOrder. If the current draw requires barriers and there
// is no pipeline or state change, then we must compare the current and prior draw's PaintOrders
// to determine if the draws overlap. If they do, we must inject a flush between them such that
// the barrier addition and draw commands are ordered correctly.
CompressedPaintersOrder priorDrawPaintOrder {};
#if defined(SK_TRACE_GRAPHITE_PIPELINE_USE)
// Accumulate rough pixel area touched by each pipeline as we iterate the SortKeys
drawPass->fPipelineDrawAreas.push_back_n(fPipelineCache.count(), 0.f);
#endif
TextureTracker textureBindingTracker(&fTextureDataCache);
for (const DrawList::SortKey& key : fSortKeys) {
const DrawList::Draw& draw = key.draw();
const RenderStep& renderStep = key.renderStep();
const bool pipelineChange = key.pipelineIndex() != lastPipeline;
#if defined(SK_TRACE_GRAPHITE_PIPELINE_USE)
drawPass->fPipelineDrawAreas[key.pipelineIndex()] +=
draw.drawParams().clip().drawBounds().area();
#endif
const bool geomBindingChange = geometryUniformTracker.writeUniforms(
fGeometryUniformDataCache, bufferMgr, key.geometryUniformIndex());
const bool shadingBindingChange = shadingUniformTracker.writeUniforms(
fShadingUniformDataCache, bufferMgr, key.shadingUniformIndex());
// TODO(b/372953722): The Dawn and Vulkan CommandBuffer implementations currently append any
// dst copy to the texture bind group/descriptor set automatically when processing a
// BindTexturesAndSamplers call because they use a single group to contain all textures.
// However, from the DrawPass POV, we can run into the scenario where two pipelines have the
// same textures+samplers except one requires a dst-copy and the other does not. In this
// case we wouldn't necessarily insert a new command when the pipeline changed and then
// end up with layout validation errors.
const bool textureBindingsChange = textureBindingTracker.setCurrentTextureBindings(
key.textureBindingIndex()) ||
(rebindTexturesOnPipelineChange && pipelineChange &&
key.textureBindingIndex() != TextureDataCache::kInvalidIndex);
std::optional<SkIRect> newScissor =
renderStep.getScissor(draw.drawParams(), lastScissor, targetBounds);
const bool stateChange = geomBindingChange ||
shadingBindingChange ||
textureBindingsChange ||
newScissor.has_value();
// Update DrawWriter *before* we actually change any state so that accumulated draws from
// the previous state use the proper state.
if (pipelineChange) {
drawWriter.newPipelineState(renderStep.primitiveType(),
renderStep.staticDataStride(),
renderStep.appendDataStride(),
renderStep.getRenderStateFlags(),
draw.barrierBeforeDraws());
} else if (stateChange) {
drawWriter.newDynamicState();
} else if (draw.barrierBeforeDraws() != BarrierType::kNone &&
priorDrawPaintOrder != draw.drawParams().order().paintOrder()) {
// Even if there is no pipeline or state change, we must consider whether a
// DrawPassCommand to add barriers must be inserted before any draw commands. If so,
// then determine if the current and prior draws overlap (ie, their PaintOrders are
// unequal). If so, perform a flush() to make sure the draw and add barrier commands are
// appended to the command list in the proper order.
drawWriter.flush();
}
// Make state changes before accumulating new draw data
if (pipelineChange) {
drawPass->fCommandList.bindGraphicsPipeline(key.pipelineIndex());
lastPipeline = key.pipelineIndex();
}
if (stateChange) {
if (geomBindingChange) {
geometryUniformTracker.bindUniforms(UniformSlot::kRenderStep,
&drawPass->fCommandList);
}
if (shadingBindingChange) {
shadingUniformTracker.bindUniforms(UniformSlot::kPaint, &drawPass->fCommandList);
}
if (textureBindingsChange) {
textureBindingTracker.bindTextures(&drawPass->fCommandList);
}
if (newScissor.has_value()) {
drawPass->fCommandList.setScissor(*newScissor);
lastScissor = *newScissor;
}
}
uint32_t geometrySsboIndex = useStorageBuffers ? geometryUniformTracker.ssboIndex() : 0;
uint32_t shadingSsboIndex = useStorageBuffers ? shadingUniformTracker.ssboIndex() : 0;
skvx::uint2 ssboIndices = {geometrySsboIndex, shadingSsboIndex};
renderStep.writeVertices(&drawWriter, draw.drawParams(), ssboIndices);
if (bufferMgr->hasMappingFailed()) {
SKGPU_LOG_W("Failed to write necessary vertex/instance data for DrawPass, dropping!");
return nullptr;
}
// Update priorDrawPaintOrder value before iterating to analyze the next draw.
priorDrawPaintOrder = draw.drawParams().order().paintOrder();
}
// Finish recording draw calls for any collected data still pending at end of the loop
drawWriter.flush();
drawPass->fBounds = fPassBounds.roundOut().asSkIRect();
drawPass->fPipelineDescs = fPipelineCache.detach();
drawPass->fSampledTextures = fTextureDataCache.detachTextures();
TRACE_COUNTER1("skia.gpu", "# pipelines", drawPass->fPipelineDescs.size());
TRACE_COUNTER1("skia.gpu", "# textures", drawPass->fSampledTextures.size());
TRACE_COUNTER1("skia.gpu", "# commands", drawPass->fCommandList.count());
this->reset(LoadOp::kLoad);
return drawPass;
}
void DrawList::reset(LoadOp loadOp, SkColor4f color) {
fLoadOp = loadOp;
fClearColor = color.premul().array();
fSortKeys.clear();
fDraws.reset();
fTransforms.reset();
// Accumulate renderer information for each draw added to this list
fRenderStepCount = 0;
fRequiresMSAA = false;
fDepthStencilFlags = DepthStencilFlags::kNone;
SkDEBUGCODE(fCoverageMaskShapeDrawCount = 0);
fDstReadBounds = Rect::InfiniteInverted();
fPassBounds = Rect::InfiniteInverted();
fGeometryUniformDataCache.reset();
fShadingUniformDataCache.reset();
fTextureDataCache.reset();
fPipelineCache.reset();
}
} // namespace skgpu::graphite