blob: e5247dfafdddab97a33011a6e5fa0041c9091561 [file] [log] [blame]
/*
* Copyright 2021 Google LLC
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef skgpu_graphite_DrawList_DEFINED
#define skgpu_graphite_DrawList_DEFINED
#include "include/gpu/graphite/GraphiteTypes.h"
#include "include/private/base/SkDebug.h"
#include "src/base/SkBlockAllocator.h"
#include "src/base/SkEnumBitMask.h"
#include "src/base/SkTBlockList.h"
#include "src/gpu/graphite/ContextUtils.h"
#include "src/gpu/graphite/DrawCommands.h"
#include "src/gpu/graphite/DrawOrder.h"
#include "src/gpu/graphite/DrawParams.h"
#include "src/gpu/graphite/PaintParams.h"
#include "src/gpu/graphite/PipelineData.h"
#include "src/gpu/graphite/geom/Rect.h"
#include "src/gpu/graphite/geom/Transform.h"
#include <cstdint>
#include <limits>
#include <optional>
namespace skgpu::graphite {
class DrawPass;
class Geometry;
class Renderer;
/**
* A DrawList represents a collection of drawing commands (and related clip/shading state) in
* a form that closely mirrors what can be rendered efficiently and directly by the GPU backend
* (while balancing how much pre-processing to do for draws that might get eliminated later due to
* occlusion culling).
*
* A draw command combines:
* - a shape
* - a transform
* - a primitive clip (not affected by the transform)
* - optional shading description (shader, color filter, blend mode, etc)
* - a draw ordering (compressed painters index, stencil set, and write/test depth)
*
* Commands are accumulated in an arbitrary order and then sorted by increasing sort z when the list
* is prepared into an actual command buffer. The result of a draw command is the rasterization of
* the transformed shape, restricted by its primitive clip (e.g. a scissor rect) and a depth test
* of "GREATER" vs. its write/test z. (A test of GREATER, as opposed to GEQUAL, avoids double hits
* for draws that may have overlapping geometry, e.g. stroking.) If the command has a shading
* description, the color buffer will be modified; if not, it will be a depth-only draw.
*
* In addition to sorting the collected commands, the command list can be optimized during
* preparation. Commands that are fully occluded by later operations can be skipped entirely without
* affecting the final results. Adjacent commands (post sort) that would use equivalent GPU
* pipelines are merged to produce fewer (but larger) operations on the GPU.
*
* Other than flush-time optimizations (sort, cull, and merge), the command list does what you tell
* it to. Draw-specific simplification, style application, and advanced clipping should be handled
* at a higher layer.
*/
class DrawList {
public:
// The maximum number of render steps that can be recorded into a DrawList before it must be
// converted to a DrawPass. The true fundamental limit is imposed by the limits of the depth
// attachment and precision of CompressedPaintersOrder and PaintDepth. These values can be
// shared by multiple draw calls so it's more difficult to reason about how much room is left
// in a DrawList. Limiting it to this keeps tracking simple and ensures that the sequences in
// DrawOrder cannot overflow since they are always less than or equal to the number of draws.
// TODO(b/322840221): The theoretic max for this value is 16-bit, but we see markedly better
// performance with smaller values. This should be understood and fixed directly rather than as
// a magic side-effect, but for now, let it go fast.
static constexpr int kMaxRenderSteps = 4096;
static_assert(kMaxRenderSteps <= std::numeric_limits<uint16_t>::max());
// Add a construtor to prevent default zero initialization of SkTBlockList members' storage.
DrawList() {}
// DrawList requires that all Transforms be valid and asserts as much; invalid transforms should
// be detected at the Device level or similar. The provided Renderer must be compatible with the
// 'shape' and 'stroke' parameters. If the renderer uses coverage AA, 'ordering' must have a
// compressed painters order that reflects that. If the renderer uses stencil, the 'ordering'
// must have a valid stencil index as well.
void recordDraw(const Renderer* renderer,
const Transform& localToDevice,
const Geometry& geometry,
const Clip& clip,
DrawOrder ordering,
UniquePaintParamsID paintID,
SkEnumBitMask<DstUsage> dstUsage,
BarrierType barrierBeforeDraws,
PipelineDataGatherer* gatherer,
const StrokeStyle* stroke);
std::unique_ptr<DrawPass> snapDrawPass(Recorder* recorder,
sk_sp<TextureProxy> target,
const SkImageInfo& targetInfo,
const DstReadStrategy dstReadStrategy);
int renderStepCount() const { return fRenderStepCount; }
bool modifiesTarget() const {
return this->renderStepCount() > 0 || fLoadOp == LoadOp::kClear;
}
// Discard all previously recorded draws and set to the requested load op (with optional clear
// color).
void reset(LoadOp op, SkColor4f clearColor = {0.f, 0.f, 0.f, 0.f});
// Bounds for a dst read required by this DrawList. These bounds are only valid if drawsReadDst
// returns true.
const Rect& dstReadBounds() const { return fDstReadBounds; }
const Rect& passBounds() const { return fPassBounds; }
bool drawsReadDst() const { return !fDstReadBounds.isEmptyNegativeOrNaN(); }
bool drawsRequireMSAA() const { return fRequiresMSAA; }
SkEnumBitMask<DepthStencilFlags> depthStencilFlags() const { return fDepthStencilFlags; }
SkDEBUGCODE(bool hasCoverageMaskDraws() const { return fCoverageMaskShapeDrawCount > 0; })
private:
friend class DrawPass;
struct Draw {
public:
Draw(const Renderer* renderer, const Transform& transform, const Geometry& geometry,
const Clip& clip, DrawOrder order, BarrierType barrierBeforeDraws,
const StrokeStyle* stroke)
: fRenderer(renderer)
, fDrawParams(transform, geometry, clip, order, stroke)
, fBarrierBeforeDraws(barrierBeforeDraws) {}
const Renderer* renderer() const { return fRenderer; }
const DrawParams& drawParams() const { return fDrawParams; }
const BarrierType& barrierBeforeDraws() const { return fBarrierBeforeDraws; }
private:
const Renderer* fRenderer; // Owned by SharedContext of Recorder that recorded the draw
DrawParams fDrawParams; // The DrawParam's transform is owned by fTransforms of the DrawList
BarrierType fBarrierBeforeDraws;
};
template <uint64_t Bits, uint64_t Offset>
struct Bitfield {
static constexpr uint64_t kMask = ((uint64_t) 1 << Bits) - 1;
static constexpr uint64_t kOffset = Offset;
static constexpr uint64_t kBits = Bits;
static uint32_t get(uint64_t v) { return static_cast<uint32_t>((v >> kOffset) & kMask); }
static uint64_t set(uint32_t v) { return (v & kMask) << kOffset; }
};
/**
* Each Draw in a DrawList might be processed by multiple RenderSteps (determined by the Draw's
* Renderer), which can be sorted independently. Each (step, draw) pair produces its own
* SortKey.
*
* The goal of sorting draws for the DrawPass is to minimize pipeline transitions and dynamic
* binds within a pipeline, while still respecting the overall painter's order. This decreases
* the number of low-level draw commands in a command buffer and increases the size of those,
* allowing the GPU to operate more efficiently and have fewer bubbles within its own
* instruction stream.
*
* The Draw's CompresssedPaintersOrder and DisjointStencilIndex represent the most significant
* bits of the key, and are shared by all SortKeys produced by the same draw. Next, the pipeline
* description is encoded in two steps:
* 1. The index of the RenderStep packed in the high bits to ensure each step for a draw is
* ordered correctly.
* 2. An index into a cache of pipeline descriptions is used to encode the identity of the
* pipeline (SortKeys that differ in the bits from #1 necessarily would have different
* descriptions, but then the specific ordering of the RenderSteps isn't enforced). Last,
* the SortKey encodes an index into the set of uniform bindings accumulated for a DrawPass.
* This allows the SortKey to cluster draw steps that have both a compatible pipeline and do
* not require rebinding uniform data or other state (e.g. scissor). Since the uniform data
* index and the pipeline description index are packed into indices and not actual pointers,
* a given SortKey is only valid for the a specific DrawList->DrawPass conversion.
*/
class SortKey {
public:
SortKey(const DrawList::Draw* draw,
int renderStep,
GraphicsPipelineCache::Index pipelineIndex,
UniformDataCache::Index geomUniformIndex,
UniformDataCache::Index shadingUniformIndex,
TextureDataCache::Index textureBindingIndex)
: fPipelineKey(
ColorDepthOrderField::set(draw->drawParams().order().paintOrder().bits())
| StencilIndexField::set(draw->drawParams().order().stencilIndex().bits())
| RenderStepField::set(static_cast<uint32_t>(renderStep))
| PipelineField::set(pipelineIndex))
, fUniformKey(GeometryUniformField::set(geomUniformIndex) |
ShadingUniformField::set(shadingUniformIndex) |
TextureBindingsField::set(textureBindingIndex))
, fDraw(draw) {
SkASSERT(pipelineIndex < GraphicsPipelineCache::kInvalidIndex);
SkASSERT(renderStep <= draw->renderer()->numRenderSteps());
}
bool operator<(const SortKey& k) const {
return fPipelineKey < k.fPipelineKey ||
(fPipelineKey == k.fPipelineKey && fUniformKey < k.fUniformKey);
}
const RenderStep& renderStep() const {
return fDraw->renderer()->step(RenderStepField::get(fPipelineKey));
}
const DrawList::Draw& draw() const { return *fDraw; }
GraphicsPipelineCache::Index pipelineIndex() const {
return PipelineField::get(fPipelineKey);
}
UniformDataCache::Index geometryUniformIndex() const {
return GeometryUniformField::get(fUniformKey);
}
UniformDataCache::Index shadingUniformIndex() const {
return ShadingUniformField::get(fUniformKey);
}
TextureDataCache::Index textureBindingIndex() const {
return TextureBindingsField::get(fUniformKey);
}
private:
// Fields are ordered from most-significant to least when sorting by 128-bit value.
// NOTE: We don't use C++ bit fields because field ordering is implementation defined and we
// need to sort consistently.
using ColorDepthOrderField = Bitfield<16, 48>; // sizeof(CompressedPaintersOrder)
using StencilIndexField = Bitfield<16, 32>; // sizeof(DisjointStencilIndex)
using RenderStepField = Bitfield<2, 30>; // bits >= log2(Renderer::kMaxRenderSteps)
using PipelineField = Bitfield<30, 0>; // bits >= log2(max total steps in draw list)
uint64_t fPipelineKey;
// The uniform/texture index fields need 1 extra bit to encode "no-data". Values that are
// greater than or equal to 2^(bits-1) represent "no-data", while values between
// [0, 2^(bits-1)-1] can access data arrays without extra logic.
using GeometryUniformField = Bitfield<17, 47>; // bits >= 1+log2(max total steps)
using ShadingUniformField = Bitfield<17, 30>; // bits >= 1+log2(max total steps)
using TextureBindingsField = Bitfield<30, 0>; // bits >= 1+log2(max total steps)
uint64_t fUniformKey;
// Backpointer to the draw that produced the sort key
const DrawList::Draw* fDraw;
static_assert(ColorDepthOrderField::kBits >= sizeof(CompressedPaintersOrder));
static_assert(StencilIndexField::kBits >= sizeof(DisjointStencilIndex));
static_assert(RenderStepField::kBits >= SkNextLog2_portable(Renderer::kMaxRenderSteps));
static_assert(PipelineField::kBits >= SkNextLog2_portable(DrawList::kMaxRenderSteps));
static_assert(GeometryUniformField::kBits >= 1+SkNextLog2_portable(DrawList::kMaxRenderSteps));
static_assert(ShadingUniformField::kBits >= 1+SkNextLog2_portable(DrawList::kMaxRenderSteps));
static_assert(TextureBindingsField::kBits >= 1+SkNextLog2_portable(DrawList::kMaxRenderSteps));
};
// The returned Transform reference remains valid for the lifetime of the DrawList.
const Transform& deduplicateTransform(const Transform&);
SkTBlockList<Transform, 16> fTransforms{SkBlockAllocator::GrowthPolicy::kFibonacci};
SkTBlockList<Draw, 16> fDraws{SkBlockAllocator::GrowthPolicy::kFibonacci};
// Running total of RenderSteps for all draws, assuming nothing is culled
int fRenderStepCount = 0;
#if defined(SK_DEBUG)
// The number of CoverageMask draws that have been recorded. Used in debugging.
int fCoverageMaskShapeDrawCount = 0;
#endif
// Tracked for all paints that read from the dst. If it is later determined that the
// DstReadStrategy is not kTextureCopy, this value can simply be ignored.
Rect fDstReadBounds = Rect::InfiniteInverted();
Rect fPassBounds = Rect::InfiniteInverted();
// Other properties of draws contained within this DrawList
bool fRequiresMSAA = false;
SkEnumBitMask<DepthStencilFlags> fDepthStencilFlags = DepthStencilFlags::kNone;
std::vector<SortKey> fSortKeys;
UniformDataCache fGeometryUniformDataCache;
UniformDataCache fShadingUniformDataCache;
TextureDataCache fTextureDataCache;
GraphicsPipelineCache fPipelineCache;
LoadOp fLoadOp = LoadOp::kLoad;
std::array<float, 4> fClearColor = {0.f, 0.f, 0.f, 0.f};
};
} // namespace skgpu::graphite
#endif // skgpu_graphite_DrawList_DEFINED