blob: 47203903bc6696a37ce1f03138c0ef3e9148a1b2 [file] [log] [blame] [edit]
/*
* Copyright 2022 Rive
*/
#include "rive/renderer/render_context.hpp"
#include "gr_inner_fan_triangulator.hpp"
#include "intersection_board.hpp"
#include "gradient.hpp"
#include "rive_render_paint.hpp"
#include "rive/renderer/draw.hpp"
#include "rive/renderer/rive_render_image.hpp"
#include "rive/renderer/render_context_impl.hpp"
#include "rive/profiler/profiler_macros.h"
#include "shaders/constants.glsl"
#include <string_view>
#ifdef RIVE_DECODERS
#include "rive/decoders/bitmap_decoder.hpp"
#endif
namespace rive::gpu
{
constexpr size_t kDefaultSimpleGradientCapacity = 512;
constexpr size_t kDefaultComplexGradientCapacity = 1024;
constexpr size_t kDefaultDrawCapacity = 2048;
// TODO: Move this variable to PlatformFeatures.
constexpr uint32_t kMaxTextureHeight = 2048;
constexpr size_t kMaxTessellationVertexCount =
kMaxTextureHeight * kTessTextureWidth;
constexpr size_t kMaxTessellationPaddingVertexCount =
gpu::kMidpointFanPatchSegmentSpan + // Padding at the beginning of the tess
// texture
(gpu::kOuterCurvePatchSegmentSpan -
1) + // Max padding between patch types in the tess texture
1; // Padding at the end of the tessellation texture
constexpr size_t kMaxTessellationVertexCountBeforePadding =
kMaxTessellationVertexCount - kMaxTessellationPaddingVertexCount;
// Metal requires vertex buffers to be 256-byte aligned.
constexpr size_t kMaxTessellationAlignmentVertices =
gpu::kTessVertexBufferAlignmentInElements - 1;
// We can only reorder 32767 draws at a time since the one-based groupIndex
// returned by IntersectionBoard is a signed 16-bit integer.
constexpr size_t kMaxReorderedDrawPassCount =
std::numeric_limits<int16_t>::max();
// How tall to make a resource texture in order to support the given number of
// items.
template <size_t WidthInItems>
constexpr static size_t resource_texture_height(size_t itemCount)
{
return (itemCount + WidthInItems - 1) / WidthInItems;
}
constexpr static size_t gradient_data_height(size_t simpleRampCount,
size_t complexRampCount)
{
return resource_texture_height<gpu::kGradTextureWidthInSimpleRamps>(
simpleRampCount) +
complexRampCount;
}
inline GradientContentKey::GradientContentKey(rcp<const Gradient> gradient) :
m_gradient(std::move(gradient))
{}
inline GradientContentKey::GradientContentKey(GradientContentKey&& other) :
m_gradient(std::move(other.m_gradient))
{}
bool GradientContentKey::operator==(const GradientContentKey& other) const
{
if (m_gradient.get() == other.m_gradient.get())
{
return true;
}
else
{
return m_gradient->count() == other.m_gradient->count() &&
!memcmp(m_gradient->stops(),
other.m_gradient->stops(),
m_gradient->count() * sizeof(float)) &&
!memcmp(m_gradient->colors(),
other.m_gradient->colors(),
m_gradient->count() * sizeof(ColorInt));
}
}
size_t DeepHashGradient::operator()(const GradientContentKey& key) const
{
const Gradient* grad = key.gradient();
std::hash<std::string_view> hash;
size_t x =
hash(std::string_view(reinterpret_cast<const char*>(grad->stops()),
grad->count() * sizeof(float)));
size_t y =
hash(std::string_view(reinterpret_cast<const char*>(grad->colors()),
grad->count() * sizeof(ColorInt)));
return x ^ y;
}
RenderContext::RenderContext(std::unique_ptr<RenderContextImpl> impl) :
m_impl(std::move(impl)),
// -1 from m_maxPathID so we reserve a path record for the clearColor paint
// (for atomic mode). This also allows us to index the storage buffers
// directly by pathID.
m_maxPathID(MaxPathID(m_impl->platformFeatures().pathIDGranularity) - 1)
{
setResourceSizes(ResourceAllocationCounts(), /*forceRealloc =*/true);
releaseResources();
}
RenderContext::~RenderContext()
{
// Always call flush() to avoid deadlock.
assert(!m_didBeginFrame);
// Delete the logical flushes before the block allocators let go of their
// allocations.
m_logicalFlushes.clear();
}
const gpu::PlatformFeatures& RenderContext::platformFeatures() const
{
return m_impl->platformFeatures();
}
rcp<RenderBuffer> RenderContext::makeRenderBuffer(RenderBufferType type,
RenderBufferFlags flags,
size_t sizeInBytes)
{
return m_impl->makeRenderBuffer(type, flags, sizeInBytes);
}
rcp<RenderImage> RenderContext::decodeImage(Span<const uint8_t> encodedBytes)
{
RIVE_PROF_SCOPE()
rcp<Texture> texture = m_impl->platformDecodeImageTexture(encodedBytes);
#ifdef RIVE_DECODERS
if (texture == nullptr)
{
auto bitmap = Bitmap::decode(encodedBytes.data(), encodedBytes.size());
if (bitmap)
{
// For now, RenderContextImpl::makeImageTexture() only accepts RGBA.
if (bitmap->pixelFormat() != Bitmap::PixelFormat::RGBAPremul)
{
bitmap->pixelFormat(Bitmap::PixelFormat::RGBAPremul);
}
uint32_t width = bitmap->width();
uint32_t height = bitmap->height();
uint32_t mipLevelCount = math::msb(height | width);
texture = m_impl->makeImageTexture(width,
height,
mipLevelCount,
bitmap->bytes());
}
}
#endif
return texture != nullptr ? make_rcp<RiveRenderImage>(std::move(texture))
: nullptr;
}
void RenderContext::releaseResources()
{
assert(!m_didBeginFrame);
resetContainers();
setResourceSizes(ResourceAllocationCounts());
m_maxRecentResourceRequirements = ResourceAllocationCounts();
m_lastResourceTrimTimeInSeconds = m_impl->secondsNow();
}
void RenderContext::resetContainers()
{
assert(!m_didBeginFrame);
if (!m_logicalFlushes.empty())
{
// Should get reset to 1 after flush().
assert(m_logicalFlushes.size() == 1);
m_logicalFlushes.resize(1);
m_logicalFlushes.front()->resetContainers();
}
m_indirectDrawList.clear();
m_indirectDrawList.shrink_to_fit();
m_intersectionBoard = nullptr;
}
RenderContext::LogicalFlush::LogicalFlush(RenderContext* parent) : m_ctx(parent)
{
rewind();
}
void RenderContext::LogicalFlush::rewind()
{
RIVE_PROF_SCOPE()
m_resourceCounts = Draw::ResourceCounters();
m_drawPassCount = 0;
m_simpleGradients.clear();
m_pendingSimpleGradDraws.clear();
m_complexGradients.clear();
m_pendingComplexGradDraws.clear();
m_pendingGradSpanCount = 0;
m_clips.clear();
m_draws.clear();
m_combinedDrawBounds = {std::numeric_limits<int32_t>::max(),
std::numeric_limits<int32_t>::max(),
std::numeric_limits<int32_t>::min(),
std::numeric_limits<int32_t>::min()};
m_pathPaddingCount = 0;
m_paintPaddingCount = 0;
m_paintAuxPaddingCount = 0;
m_contourPaddingCount = 0;
m_gradSpanPaddingCount = 0;
m_midpointFanTessEndLocation = 0;
m_outerCubicTessEndLocation = 0;
m_outerCubicTessVertexIdx = 0;
m_midpointFanTessVertexIdx = 0;
m_flushDesc = FlushDescriptor();
m_drawList.reset();
m_combinedShaderFeatures = gpu::ShaderFeatures::NONE;
m_currentPathID = 0;
m_currentContourID = 0;
if (m_atlasRectanizer != nullptr)
{
m_atlasRectanizer->reset();
}
m_atlasMaxX = 0;
m_atlasMaxY = 0;
m_pendingAtlasDraws.clear();
m_coverageBufferLength = 0;
m_pendingBarriers = BarrierFlags::none;
m_currentZIndex = 0;
RIVE_DEBUG_CODE(m_hasDoneLayout = false;)
}
void RenderContext::LogicalFlush::resetContainers()
{
m_clips.clear();
m_clips.shrink_to_fit();
m_draws.clear();
m_draws.shrink_to_fit();
m_draws.reserve(kDefaultDrawCapacity);
m_simpleGradients.rehash(0);
m_simpleGradients.reserve(kDefaultSimpleGradientCapacity);
m_pendingSimpleGradDraws.clear();
m_pendingSimpleGradDraws.shrink_to_fit();
m_pendingSimpleGradDraws.reserve(kDefaultSimpleGradientCapacity);
m_complexGradients.rehash(0);
m_complexGradients.reserve(kDefaultComplexGradientCapacity);
m_pendingComplexGradDraws.clear();
m_pendingComplexGradDraws.shrink_to_fit();
m_pendingComplexGradDraws.reserve(kDefaultComplexGradientCapacity);
m_pendingAtlasDraws.clear();
m_pendingAtlasDraws.shrink_to_fit();
// Don't reserve any space in m_pendingAtlasDraws since there are many
// usecases where it isn't used at all.
}
void RenderContext::beginFrame(const FrameDescriptor& frameDescriptor)
{
RIVE_PROF_SCOPE()
m_impl->preBeginFrame(this);
assert(!m_didBeginFrame);
assert(frameDescriptor.renderTargetWidth > 0);
assert(frameDescriptor.renderTargetHeight > 0);
m_frameDescriptor = frameDescriptor;
if (!platformFeatures().supportsRasterOrdering &&
!platformFeatures().supportsFragmentShaderAtomics)
{
// We don't have pixel local storage in any form. Use 4x MSAA if
// msaaSampleCount wasn't already specified.
m_frameDescriptor.msaaSampleCount =
m_frameDescriptor.msaaSampleCount > 0
? m_frameDescriptor.msaaSampleCount
: 4;
}
if (m_frameDescriptor.msaaSampleCount > 0)
{
m_frameInterlockMode = gpu::InterlockMode::msaa;
}
else if (platformFeatures().supportsRasterOrdering &&
(!m_frameDescriptor.disableRasterOrdering ||
!platformFeatures().supportsFragmentShaderAtomics))
{
m_frameInterlockMode = gpu::InterlockMode::rasterOrdering;
}
else if (frameDescriptor.clockwiseFillOverride &&
platformFeatures().supportsClockwiseAtomicRendering)
{
assert(platformFeatures().supportsFragmentShaderAtomics);
m_frameInterlockMode = gpu::InterlockMode::clockwiseAtomic;
}
else
{
assert(platformFeatures().supportsFragmentShaderAtomics);
m_frameInterlockMode = gpu::InterlockMode::atomics;
}
m_frameShaderFeaturesMask =
gpu::ShaderFeaturesMaskFor(m_frameInterlockMode);
if (m_logicalFlushes.empty())
{
m_logicalFlushes.emplace_back(new LogicalFlush(this));
}
RIVE_DEBUG_CODE(m_didBeginFrame = true);
}
bool RenderContext::isOutsideCurrentFrame(const IAABB& pixelBounds)
{
assert(m_didBeginFrame);
int4 bounds = simd::load4i(&pixelBounds);
auto renderTargetSize =
simd::cast<int32_t>(uint2{m_frameDescriptor.renderTargetWidth,
m_frameDescriptor.renderTargetHeight});
return simd::any(bounds.xy >= renderTargetSize || bounds.zw <= 0 ||
bounds.xy >= bounds.zw);
}
bool RenderContext::frameSupportsClipRects() const
{
assert(m_didBeginFrame);
return m_frameInterlockMode != gpu::InterlockMode::msaa ||
platformFeatures().supportsClipPlanes;
}
bool RenderContext::frameSupportsImagePaintForPaths() const
{
assert(m_didBeginFrame);
return m_frameInterlockMode != gpu::InterlockMode::atomics;
}
uint32_t RenderContext::generateClipID(const IAABB& contentBounds)
{
assert(m_didBeginFrame);
assert(!m_logicalFlushes.empty());
return m_logicalFlushes.back()->generateClipID(contentBounds);
}
uint32_t RenderContext::LogicalFlush::generateClipID(const IAABB& contentBounds)
{
if (m_clips.size() < m_ctx->m_maxPathID) // maxClipID == maxPathID.
{
m_clips.emplace_back(contentBounds);
assert(m_ctx->m_clipContentID != m_clips.size());
return math::lossless_numeric_cast<uint32_t>(m_clips.size());
}
return 0; // There are no available clip IDs. The caller should flush and
// try again.
}
RenderContext::LogicalFlush::ClipInfo& RenderContext::LogicalFlush::
getWritableClipInfo(uint32_t clipID)
{
assert(clipID > 0);
assert(clipID <= m_clips.size());
return m_clips[clipID - 1];
}
void RenderContext::LogicalFlush::addClipReadBounds(uint32_t clipID,
const IAABB& bounds)
{
assert(clipID > 0);
assert(clipID <= m_clips.size());
ClipInfo& clipInfo = getWritableClipInfo(clipID);
clipInfo.readBounds = clipInfo.readBounds.join(bounds);
}
bool RenderContext::pushDraws(DrawUniquePtr draws[], size_t drawCount)
{
assert(m_didBeginFrame);
assert(!m_logicalFlushes.empty());
return m_logicalFlushes.back()->pushDraws(draws, drawCount);
}
bool RenderContext::LogicalFlush::pushDraws(DrawUniquePtr draws[],
size_t drawCount)
{
RIVE_PROF_SCOPE()
assert(!m_hasDoneLayout);
auto countsVector = m_resourceCounts.toVec();
for (size_t i = 0; i < drawCount; ++i)
{
assert(!draws[i]->pixelBounds().empty());
assert(m_ctx->frameSupportsClipRects() ||
draws[i]->clipRectInverseMatrix() == nullptr);
countsVector += draws[i]->resourceCounts().toVec();
}
Draw::ResourceCounters countsWithNewBatch = countsVector;
// Textures and buffers have hard size limits. If the new batch doesn't fit
// within our constraints, the caller needs to flush and try again.
if (countsWithNewBatch.pathCount > m_ctx->m_maxPathID ||
countsWithNewBatch.contourCount > kMaxContourID ||
countsWithNewBatch.midpointFanTessVertexCount +
countsWithNewBatch.outerCubicTessVertexCount >
kMaxTessellationVertexCountBeforePadding)
{
return false;
}
// Allocate subpasses.
int passCountInBatch = 0;
for (size_t i = 0; i < drawCount; ++i)
{
draws[i]->countSubpasses();
assert(draws[i]->prepassCount() >= 0);
assert(draws[i]->subpassCount() >= 0);
assert(draws[i]->prepassCount() + draws[i]->subpassCount() >= 1);
passCountInBatch += draws[i]->prepassCount() + draws[i]->subpassCount();
}
// We can only reorder 32k draws at a time in atomic and msaa modes since
// the sort key addresses them with a signed 16-bit index. Make sure we
// don't exceed that limit.
if (m_ctx->frameInterlockMode() != gpu::InterlockMode::rasterOrdering &&
m_drawPassCount + passCountInBatch > kMaxReorderedDrawPassCount)
{
return false;
}
// Allocate final resources.
for (size_t i = 0; i < drawCount; ++i)
{
if (!draws[i]->allocateResources(this))
{
// The draw failed to allocate resources. Give up and let the caller
// flush and try again.
//
// FIXME: This works today, but the surrounding code could be
// modified to inadvertently leave a stale dangling reference to one
// of these draws in m_pendingAtlasDraws. This needs to be
// revisited.
return false;
}
}
for (size_t i = 0; i < drawCount; ++i)
{
m_draws.push_back(std::move(draws[i]));
m_combinedDrawBounds =
m_combinedDrawBounds.join(m_draws.back()->pixelBounds());
}
m_resourceCounts = countsWithNewBatch;
m_drawPassCount += passCountInBatch;
return true;
}
bool RenderContext::LogicalFlush::allocateGradient(
const Gradient* gradient,
gpu::ColorRampLocation* colorRampLocation)
{
RIVE_PROF_SCOPE()
assert(!m_hasDoneLayout);
const float* stops = gradient->stops();
size_t stopCount = gradient->count();
assert(stopCount > 0); // RiveRenderFactory guarantees this.
if (stopCount == 1 || (stopCount == 2 && stops[0] == 0 && stops[1] == 1))
{
// This is a simple gradient that can be implemented by a two-texel
// color ramp.
const ColorInt* colors = gradient->colors();
TwoTexelRamp colorRamp = {colors[0],
// Handle ramps with a single stop.
colors[std::min<size_t>(1, stopCount - 1)]};
uint64_t simpleKey;
static_assert(sizeof(simpleKey) == sizeof(ColorInt) * 2);
RIVE_INLINE_MEMCPY(&simpleKey, &colorRamp, sizeof(ColorInt) * 2);
uint32_t rampTexelsIdx;
auto iter = m_simpleGradients.find(simpleKey);
if (iter != m_simpleGradients.end())
{
// This gradient is already in the texture.
rampTexelsIdx = iter->second;
}
else
{
if (gradient_data_height(m_simpleGradients.size() + 1,
m_complexGradients.size()) >
kMaxTextureHeight)
{
// We ran out of rows in the gradient texture. Caller has to
// flush and try again.
return false;
}
rampTexelsIdx = math::lossless_numeric_cast<uint32_t>(
m_simpleGradients.size() * 2);
m_simpleGradients.insert({simpleKey, rampTexelsIdx});
m_pendingSimpleGradDraws.push_back(colorRamp);
// Simple gradients get uploaded to the GPU as a single GradientSpan
// instance.
++m_pendingGradSpanCount;
}
colorRampLocation->row = rampTexelsIdx / kGradTextureWidth;
colorRampLocation->col = rampTexelsIdx % kGradTextureWidth;
}
else
{
// This is a complex gradient. Render it to an entire row of the
// gradient texture.
GradientContentKey key(ref_rcp(gradient));
auto iter = m_complexGradients.find(key);
uint16_t row;
if (iter != m_complexGradients.end())
{
row = iter->second; // This gradient is already in the texture.
}
else
{
if (gradient_data_height(m_simpleGradients.size(),
m_complexGradients.size() + 1) >
kMaxTextureHeight)
{
// We ran out of rows in the gradient texture. Caller has to
// flush and try again.
return false;
}
row = static_cast<uint32_t>(m_complexGradients.size());
m_complexGradients.emplace(std::move(key), row);
m_pendingComplexGradDraws.push_back(gradient);
size_t spanCount = stopCount - 1;
m_pendingGradSpanCount += spanCount;
}
// Store the row relative to the first complex gradient for now.
// PaintData::set() will offset this value by the number of simple
// gradient rows once its final value is known.
colorRampLocation->row = row;
colorRampLocation->col = ColorRampLocation::kComplexGradientMarker;
}
return true;
}
bool RenderContext::LogicalFlush::allocateAtlasDraw(
PathDraw* pathDraw,
uint16_t drawWidth,
uint16_t drawHeight,
uint16_t desiredPadding,
uint16_t* x,
uint16_t* y,
TAABB<uint16_t>* paddedRegion)
{
RIVE_PROF_SCOPE()
if (m_atlasRectanizer == nullptr)
{
uint16_t atlasMaxSize = m_ctx->atlasMaxSize();
// Use an atlas larger than atlasMaxSize if it's too small for the
// request (meaning the render target is larger than atlasMaxSize).
m_atlasRectanizer = std::make_unique<skgpu::RectanizerSkyline>(
std::max(atlasMaxSize, drawWidth),
std::max(atlasMaxSize, drawHeight));
}
const uint16_t atlasMaxWidth = m_atlasRectanizer->width();
const uint16_t atlasMaxHeight = m_atlasRectanizer->height();
uint16_t paddedWidth =
std::min<uint16_t>(drawWidth + desiredPadding * 2, atlasMaxWidth);
uint16_t paddedHeight =
std::min<uint16_t>(drawHeight + desiredPadding * 2, atlasMaxHeight);
int16_t ix, iy;
if (!m_atlasRectanizer->addRect(paddedWidth, paddedHeight, &ix, &iy))
{
// Delete the rectanizer of it wasn't big enough for this path. It will
// be reallocated to a large enough size on the next call.
if (drawWidth > atlasMaxWidth || drawHeight > atlasMaxHeight)
{
m_atlasRectanizer = nullptr;
}
m_atlasRectanizer = nullptr;
return false;
}
assert(ix >= 0);
assert(iy >= 0);
assert(ix + paddedWidth <= atlasMaxWidth);
assert(iy + paddedHeight <= atlasMaxHeight);
*x = ix + (paddedWidth - drawWidth) / 2;
*y = iy + (paddedHeight - drawHeight) / 2;
*paddedRegion = {ix, iy, ix + paddedWidth, iy + paddedHeight};
assert((TAABB<uint16_t>{0, 0, atlasMaxWidth, atlasMaxHeight})
.contains(*paddedRegion));
m_atlasMaxX = std::max<uint32_t>(m_atlasMaxX, paddedRegion->right);
m_atlasMaxY = std::max<uint32_t>(m_atlasMaxY, paddedRegion->bottom);
assert(m_atlasMaxX <= atlasMaxWidth);
assert(m_atlasMaxY <= atlasMaxHeight);
m_pendingAtlasDraws.push_back(pathDraw);
return true;
}
size_t RenderContext::LogicalFlush::allocateCoverageBufferRange(size_t length)
{
RIVE_PROF_SCOPE()
assert(m_ctx->frameInterlockMode() == gpu::InterlockMode::clockwiseAtomic);
assert(length % (32 * 32) == 0u); // Allocations must support 32x32 tiles.
uint32_t offset = m_coverageBufferLength;
if (offset + length > m_ctx->platformFeatures().maxCoverageBufferLength)
{
return -1;
}
m_coverageBufferLength += length;
return offset;
}
void RenderContext::logicalFlush()
{
assert(m_didBeginFrame);
// Reset clipping state after every logical flush because the clip buffer is
// not preserved between render passes.
m_clipContentID = 0;
// Don't issue any GPU commands between logical flushes. Instead, build up a
// list of flushes that we will submit all at once at the end of the frame.
m_logicalFlushes.emplace_back(new LogicalFlush(this));
}
void RenderContext::flush(const FlushResources& flushResources)
{
RIVE_PROF_SCOPE()
assert(m_didBeginFrame);
assert(flushResources.renderTarget->width() ==
m_frameDescriptor.renderTargetWidth);
assert(flushResources.renderTarget->height() ==
m_frameDescriptor.renderTargetHeight);
m_clipContentID = 0;
// Layout this frame's resource buffers and textures.
LogicalFlush::ResourceCounters totalFrameResourceCounts;
LogicalFlush::LayoutCounters layoutCounts;
for (size_t i = 0; i < m_logicalFlushes.size(); ++i)
{
m_logicalFlushes[i]->layoutResources(flushResources,
i,
&totalFrameResourceCounts,
&layoutCounts);
}
// Determine the minimum required resource allocation sizes to service this
// flush.
ResourceAllocationCounts resourceRequirements;
resourceRequirements.flushUniformBufferCount = m_logicalFlushes.size();
resourceRequirements.imageDrawUniformBufferCount =
totalFrameResourceCounts.imageDrawCount;
resourceRequirements.pathBufferCount =
totalFrameResourceCounts.pathCount + layoutCounts.pathPaddingCount;
resourceRequirements.paintBufferCount =
totalFrameResourceCounts.pathCount + layoutCounts.paintPaddingCount;
resourceRequirements.paintAuxBufferCount =
totalFrameResourceCounts.pathCount + layoutCounts.paintAuxPaddingCount;
resourceRequirements.contourBufferCount =
totalFrameResourceCounts.contourCount +
layoutCounts.contourPaddingCount;
resourceRequirements.gradSpanBufferCount =
layoutCounts.gradSpanCount + layoutCounts.gradSpanPaddingCount;
resourceRequirements.tessSpanBufferCount =
totalFrameResourceCounts.maxTessellatedSegmentCount;
resourceRequirements.triangleVertexBufferCount =
totalFrameResourceCounts.maxTriangleVertexCount;
resourceRequirements.gradTextureHeight = layoutCounts.maxGradTextureHeight;
resourceRequirements.tessTextureHeight = layoutCounts.maxTessTextureHeight;
resourceRequirements.atlasTextureWidth = layoutCounts.maxAtlasWidth;
resourceRequirements.atlasTextureHeight = layoutCounts.maxAtlasHeight;
resourceRequirements.coverageBufferLength =
layoutCounts.maxCoverageBufferLength;
// Ensure we're within hardware limits.
assert(resourceRequirements.gradTextureHeight <= kMaxTextureHeight);
assert(resourceRequirements.tessTextureHeight <= kMaxTextureHeight);
assert(resourceRequirements.atlasTextureWidth <= atlasMaxSize() ||
resourceRequirements.atlasTextureWidth <=
frameDescriptor().renderTargetWidth);
assert(resourceRequirements.atlasTextureHeight <= atlasMaxSize() ||
resourceRequirements.atlasTextureHeight <=
frameDescriptor().renderTargetHeight);
assert(resourceRequirements.coverageBufferLength <=
platformFeatures().maxCoverageBufferLength);
// Track m_maxRecentResourceRequirements so we can trim GPU allocations when
// steady-state usage goes down.
m_maxRecentResourceRequirements =
simd::max(resourceRequirements.toVec(),
m_maxRecentResourceRequirements.toVec());
// Grow resources enough to handle this flush.
// If "allocs" already fits in our current allocations, then don't change
// them. If they don't fit, overallocate by 25% in order to create some
// slack for growth.
ResourceAllocationCounts allocs = simd::if_then_else(
resourceRequirements.toVec() <= m_currentResourceAllocations.toVec(),
m_currentResourceAllocations.toVec(),
resourceRequirements.toVec() * size_t(5) / size_t(4));
// In case the 25% growth pushed us above limits.
allocs.gradTextureHeight =
std::min<size_t>(allocs.gradTextureHeight, kMaxTextureHeight);
allocs.tessTextureHeight =
std::min<size_t>(allocs.tessTextureHeight, kMaxTextureHeight);
allocs.atlasTextureWidth = std::min<size_t>(
allocs.atlasTextureWidth,
std::max(atlasMaxSize(), frameDescriptor().renderTargetWidth));
allocs.atlasTextureHeight = std::min<size_t>(
allocs.atlasTextureHeight,
std::max(atlasMaxSize(), frameDescriptor().renderTargetHeight));
allocs.coverageBufferLength =
std::min(allocs.coverageBufferLength,
platformFeatures().maxCoverageBufferLength);
// Additionally, every 5 seconds, trim resources down to the most recent
// steady-state usage.
double flushTime = m_impl->secondsNow();
bool needsResourceTrim = flushTime - m_lastResourceTrimTimeInSeconds >= 5;
if (needsResourceTrim)
{
// Trim GPU resource allocations to 125% of their maximum recent usage,
// and only if the recent usage is 2/3 or less of the current
// allocation.
allocs = simd::if_then_else(m_maxRecentResourceRequirements.toVec() <=
allocs.toVec() * size_t(2) / size_t(3),
m_maxRecentResourceRequirements.toVec() *
size_t(5) / size_t(4),
allocs.toVec());
// Ensure we stayed within limits.
assert(allocs.gradTextureHeight <= kMaxTextureHeight);
assert(allocs.tessTextureHeight <= kMaxTextureHeight);
assert(allocs.atlasTextureWidth <= atlasMaxSize() ||
allocs.atlasTextureWidth <= frameDescriptor().renderTargetWidth);
assert(allocs.atlasTextureHeight <= atlasMaxSize() ||
allocs.atlasTextureHeight <=
frameDescriptor().renderTargetHeight);
assert(allocs.coverageBufferLength <=
platformFeatures().maxCoverageBufferLength);
// Zero out m_maxRecentResourceRequirements for the next interval.
m_maxRecentResourceRequirements = ResourceAllocationCounts();
m_lastResourceTrimTimeInSeconds = flushTime;
}
setResourceSizes(allocs);
m_impl->prepareToFlush(flushResources.currentFrameNumber,
flushResources.safeFrameNumber);
mapResourceBuffers(resourceRequirements);
for (const auto& flush : m_logicalFlushes)
{
flush->writeResources();
}
assert(m_flushUniformData.elementsWritten() == m_logicalFlushes.size());
assert(m_imageDrawUniformData.elementsWritten() ==
totalFrameResourceCounts.imageDrawCount);
assert(m_pathData.elementsWritten() ==
totalFrameResourceCounts.pathCount + layoutCounts.pathPaddingCount);
assert(m_paintData.elementsWritten() ==
totalFrameResourceCounts.pathCount + layoutCounts.paintPaddingCount);
assert(m_paintAuxData.elementsWritten() ==
totalFrameResourceCounts.pathCount +
layoutCounts.paintAuxPaddingCount);
assert(m_contourData.elementsWritten() ==
totalFrameResourceCounts.contourCount +
layoutCounts.contourPaddingCount);
assert(m_gradSpanData.elementsWritten() ==
layoutCounts.gradSpanCount + layoutCounts.gradSpanPaddingCount);
assert(m_tessSpanData.elementsWritten() <=
totalFrameResourceCounts.maxTessellatedSegmentCount);
assert(m_triangleVertexData.elementsWritten() <=
totalFrameResourceCounts.maxTriangleVertexCount);
unmapResourceBuffers(resourceRequirements);
// Issue logical flushes to the backend.
for (const auto& flush : m_logicalFlushes)
{
m_impl->flush(flush->desc());
}
m_impl->postFlush(flushResources);
if (!m_logicalFlushes.empty())
{
m_logicalFlushes.resize(1);
m_logicalFlushes.front()->rewind();
}
// Drop all memory that was allocated for this frame using
// TrivialBlockAllocator.
m_perFrameAllocator.reset();
m_numChopsAllocator.reset();
m_chopVerticesAllocator.reset();
m_tangentPairsAllocator.reset();
m_polarSegmentCountsAllocator.reset();
m_parametricSegmentCountsAllocator.reset();
m_frameDescriptor = FrameDescriptor();
RIVE_DEBUG_CODE(m_didBeginFrame = false;)
// Wait to reset CPU-side containers until after the flush has finished.
if (needsResourceTrim)
{
resetContainers();
}
}
void RenderContext::LogicalFlush::layoutResources(
const FlushResources& flushResources,
size_t logicalFlushIdx,
ResourceCounters* runningFrameResourceCounts,
LayoutCounters* runningFrameLayoutCounts)
{
RIVE_PROF_SCOPE()
assert(!m_hasDoneLayout);
const FrameDescriptor& frameDescriptor = m_ctx->frameDescriptor();
// Reserve a path record for the clearColor paint (used by atomic mode).
// This also allows us to index the storage buffers directly by pathID.
++m_resourceCounts.pathCount;
// Storage buffer offsets are required to be aligned on multiples of 256.
m_pathPaddingCount =
math::padding_to_align_up<gpu::kPathBufferAlignmentInElements>(
m_resourceCounts.pathCount);
m_paintPaddingCount =
math::padding_to_align_up<gpu::kPaintBufferAlignmentInElements>(
m_resourceCounts.pathCount);
m_paintAuxPaddingCount =
math::padding_to_align_up<gpu::kPaintAuxBufferAlignmentInElements>(
m_resourceCounts.pathCount);
m_contourPaddingCount =
math::padding_to_align_up<gpu::kContourBufferAlignmentInElements>(
m_resourceCounts.contourCount);
// Metal requires vertex buffers to be 256-byte aligned.
m_gradSpanPaddingCount =
math::padding_to_align_up<gpu::kGradSpanBufferAlignmentInElements>(
m_pendingGradSpanCount);
size_t totalTessVertexCountWithPadding = 0;
if ((m_resourceCounts.midpointFanTessVertexCount |
m_resourceCounts.outerCubicTessVertexCount) != 0)
{
// midpointFan tessellation vertices reside at the beginning of the
// tessellation texture, after 1 patch of padding vertices.
constexpr uint32_t kPrePadding = gpu::kMidpointFanPatchSegmentSpan;
m_midpointFanTessVertexIdx = kPrePadding;
m_midpointFanTessEndLocation =
m_midpointFanTessVertexIdx +
math::lossless_numeric_cast<uint32_t>(
m_resourceCounts.midpointFanTessVertexCount);
// outerCubic tessellation vertices reside after the midpointFan
// vertices, aligned on a multiple of the outerCubic patch size.
uint32_t interiorPadding =
math::padding_to_align_up<gpu::kOuterCurvePatchSegmentSpan>(
m_midpointFanTessEndLocation);
m_outerCubicTessVertexIdx =
m_midpointFanTessEndLocation + interiorPadding;
m_outerCubicTessEndLocation =
m_outerCubicTessVertexIdx +
math::lossless_numeric_cast<uint32_t>(
m_resourceCounts.outerCubicTessVertexCount);
// We need one more padding vertex after all the tessellation vertices.
constexpr uint32_t kPostPadding = 1;
totalTessVertexCountWithPadding =
m_outerCubicTessEndLocation + kPostPadding;
assert(kPrePadding + interiorPadding + kPostPadding <=
kMaxTessellationPaddingVertexCount);
assert(totalTessVertexCountWithPadding <= kMaxTessellationVertexCount);
}
uint32_t tessDataHeight = math::lossless_numeric_cast<uint32_t>(
resource_texture_height<kTessTextureWidth>(
totalTessVertexCountWithPadding));
if (m_resourceCounts.maxTessellatedSegmentCount != 0)
{
// Conservatively account for line breaks and padding in the
// tessellation span count. Line breaks potentially introduce a new
// span. Count the maximum number of line breaks we might encounter,
// which is at most TWO for every line in the tessellation texture (one
// for a forward span, and one for its reflection.)
size_t maxSpanBreakCount = tessDataHeight * 2;
// The tessellation texture requires 3 separate spans of padding
// vertices (see above and below).
constexpr size_t kPaddingSpanCount = 3;
m_resourceCounts.maxTessellatedSegmentCount +=
maxSpanBreakCount + kPaddingSpanCount +
kMaxTessellationAlignmentVertices;
}
// Complex gradients begin on the first row immediately after the simple
// gradients.
m_gradTextureLayout.complexOffsetY = math::lossless_numeric_cast<uint32_t>(
resource_texture_height<gpu::kGradTextureWidthInSimpleRamps>(
m_simpleGradients.size()));
m_flushDesc.renderTarget = flushResources.renderTarget;
m_flushDesc.interlockMode = m_ctx->frameInterlockMode();
m_flushDesc.msaaSampleCount = frameDescriptor.msaaSampleCount;
// In atomic mode, we may be able to skip the explicit clear of the color
// buffer and fold it into the atomic "resolve" operation instead.
bool doClearDuringAtomicResolve = false;
if (logicalFlushIdx != 0)
{
// We always have to preserve the renderTarget between logical flushes.
m_flushDesc.colorLoadAction = gpu::LoadAction::preserveRenderTarget;
}
else if (frameDescriptor.loadAction == gpu::LoadAction::clear)
{
// In atomic mode, we can clear during the resolve operation if the
// clearColor is opaque (because we don't want or have a "source only"
// blend mode).
doClearDuringAtomicResolve =
m_ctx->frameInterlockMode() == gpu::InterlockMode::atomics &&
colorAlpha(frameDescriptor.clearColor) == 255;
m_flushDesc.colorLoadAction = doClearDuringAtomicResolve
? gpu::LoadAction::dontCare
: gpu::LoadAction::clear;
}
else
{
m_flushDesc.colorLoadAction = frameDescriptor.loadAction;
}
m_flushDesc.colorClearValue = frameDescriptor.clearColor;
if (doClearDuringAtomicResolve)
{
// In atomic mode we can accomplish a clear of the color buffer while
// the shader resolves coverage, instead of actually clearing.
// writeResources() will configure the fill for pathID=0 to be a solid
// fill matching the clearColor, so if we just initialize coverage
// buffer to solid coverage with pathID=0, the resolve step will write
// out the correct clear color.
assert(m_flushDesc.interlockMode == gpu::InterlockMode::atomics);
m_flushDesc.coverageClearValue =
static_cast<uint32_t>(FIXED_COVERAGE_ONE);
}
else if (m_flushDesc.interlockMode == gpu::InterlockMode::atomics)
{
// When we don't skip the initial clear in atomic mode, clear the
// coverage buffer to pathID=0 and a transparent coverage value.
// pathID=0 meets the requirement that pathID is always monotonically
// increasing. Transparent coverage makes sure the clearColor doesn't
// get written out while resolving.
m_flushDesc.coverageClearValue =
static_cast<uint32_t>(FIXED_COVERAGE_ZERO);
}
else
{
// In non-atomic mode, the coverage buffer just needs to be initialized
// with "pathID=0" to avoid collisions with any pathIDs being rendered.
m_flushDesc.coverageClearValue = 0;
}
if (doClearDuringAtomicResolve ||
m_flushDesc.colorLoadAction == gpu::LoadAction::clear)
{
// If we're clearing then we always update the entire render target.
m_flushDesc.renderTargetUpdateBounds =
m_flushDesc.renderTarget->bounds();
}
else
{
// When we don't clear, we only update the draw bounds.
m_flushDesc.renderTargetUpdateBounds =
m_flushDesc.renderTarget->bounds().intersect(m_combinedDrawBounds);
}
if (m_flushDesc.renderTargetUpdateBounds.empty())
{
// If this is empty it means there are no draws and no clear.
m_flushDesc.renderTargetUpdateBounds = {0, 0, 0, 0};
}
m_flushDesc.atlasContentWidth = m_atlasMaxX;
m_flushDesc.atlasContentHeight = m_atlasMaxY;
m_flushDesc.flushUniformDataOffsetInBytes =
logicalFlushIdx * sizeof(gpu::FlushUniforms);
m_flushDesc.pathCount =
math::lossless_numeric_cast<uint32_t>(m_resourceCounts.pathCount);
m_flushDesc.firstPath = runningFrameResourceCounts->pathCount +
runningFrameLayoutCounts->pathPaddingCount;
m_flushDesc.firstPaint = runningFrameResourceCounts->pathCount +
runningFrameLayoutCounts->paintPaddingCount;
m_flushDesc.firstPaintAux = runningFrameResourceCounts->pathCount +
runningFrameLayoutCounts->paintAuxPaddingCount;
m_flushDesc.contourCount =
math::lossless_numeric_cast<uint32_t>(m_resourceCounts.contourCount);
m_flushDesc.firstContour = runningFrameResourceCounts->contourCount +
runningFrameLayoutCounts->contourPaddingCount;
m_flushDesc.gradSpanCount =
math::lossless_numeric_cast<uint32_t>(m_pendingGradSpanCount);
m_flushDesc.firstGradSpan = runningFrameLayoutCounts->gradSpanCount +
runningFrameLayoutCounts->gradSpanPaddingCount;
m_flushDesc.gradDataHeight = math::lossless_numeric_cast<uint32_t>(
m_gradTextureLayout.complexOffsetY + m_complexGradients.size());
m_flushDesc.tessDataHeight = tessDataHeight;
m_flushDesc.clockwiseFillOverride = frameDescriptor.clockwiseFillOverride;
m_flushDesc.wireframe = frameDescriptor.wireframe;
#ifdef WITH_RIVE_TOOLS
m_flushDesc.synthesizedFailureType = frameDescriptor.synthesizedFailureType;
#endif
m_flushDesc.externalCommandBuffer = flushResources.externalCommandBuffer;
*runningFrameResourceCounts =
runningFrameResourceCounts->toVec() + m_resourceCounts.toVec();
runningFrameLayoutCounts->pathPaddingCount += m_pathPaddingCount;
runningFrameLayoutCounts->paintPaddingCount += m_paintPaddingCount;
runningFrameLayoutCounts->paintAuxPaddingCount += m_paintAuxPaddingCount;
runningFrameLayoutCounts->contourPaddingCount += m_contourPaddingCount;
runningFrameLayoutCounts->gradSpanCount += m_pendingGradSpanCount;
runningFrameLayoutCounts->gradSpanPaddingCount += m_gradSpanPaddingCount;
runningFrameLayoutCounts->maxGradTextureHeight =
std::max(m_flushDesc.gradDataHeight,
runningFrameLayoutCounts->maxGradTextureHeight);
runningFrameLayoutCounts->maxTessTextureHeight =
std::max(m_flushDesc.tessDataHeight,
runningFrameLayoutCounts->maxTessTextureHeight);
runningFrameLayoutCounts->maxAtlasWidth =
std::max(m_atlasMaxX, runningFrameLayoutCounts->maxAtlasWidth);
runningFrameLayoutCounts->maxAtlasHeight =
std::max(m_atlasMaxY, runningFrameLayoutCounts->maxAtlasHeight);
runningFrameLayoutCounts->maxCoverageBufferLength =
std::max<size_t>(m_coverageBufferLength,
runningFrameLayoutCounts->maxCoverageBufferLength);
assert(m_flushDesc.firstPath % gpu::kPathBufferAlignmentInElements == 0);
assert(m_flushDesc.firstPaint % gpu::kPaintBufferAlignmentInElements == 0);
assert(m_flushDesc.firstPaintAux %
gpu::kPaintAuxBufferAlignmentInElements ==
0);
assert(m_flushDesc.firstContour % gpu::kContourBufferAlignmentInElements ==
0);
assert(m_flushDesc.firstGradSpan %
gpu::kGradSpanBufferAlignmentInElements ==
0);
RIVE_DEBUG_CODE(m_hasDoneLayout = true;)
}
void RenderContext::LogicalFlush::writeResources()
{
RIVE_PROF_SCOPE()
const gpu::PlatformFeatures& platformFeatures = m_ctx->platformFeatures();
assert(m_hasDoneLayout);
assert(m_flushDesc.firstPath == m_ctx->m_pathData.elementsWritten());
assert(m_flushDesc.firstPaint == m_ctx->m_paintData.elementsWritten());
assert(m_flushDesc.firstPaintAux ==
m_ctx->m_paintAuxData.elementsWritten());
// Wait until here before we record these texture sizes; they aren't decided
// until after all LogicalFlushes have run layoutResources().
m_flushDesc.atlasTextureWidth = math::lossless_numeric_cast<uint32_t>(
m_ctx->m_currentResourceAllocations.atlasTextureWidth);
m_flushDesc.atlasTextureHeight = math::lossless_numeric_cast<uint32_t>(
m_ctx->m_currentResourceAllocations.atlasTextureHeight);
m_gradTextureLayout.inverseHeight =
1.f / m_ctx->m_currentResourceAllocations.gradTextureHeight;
// Exact tessSpan/triangleVertex counts aren't known until after their data
// is written out.
size_t firstTessVertexSpan = m_ctx->m_tessSpanData.elementsWritten();
size_t initialTriangleVertexDataSize =
m_ctx->m_triangleVertexData.bytesWritten();
// Metal requires vertex buffers to be 256-byte aligned.
size_t tessAlignmentPadding =
math::padding_to_align_up<gpu::kTessVertexBufferAlignmentInElements>(
firstTessVertexSpan);
assert(tessAlignmentPadding <= kMaxTessellationAlignmentVertices);
m_ctx->m_tessSpanData.push_back_n(nullptr, tessAlignmentPadding);
m_flushDesc.firstTessVertexSpan =
firstTessVertexSpan + tessAlignmentPadding;
assert(m_flushDesc.firstTessVertexSpan ==
m_ctx->m_tessSpanData.elementsWritten());
// Write out the simple gradient data.
constexpr static uint32_t ONE_TEXEL_FIXED = 65536 / gpu::kGradTextureWidth;
assert(m_simpleGradients.size() == m_pendingSimpleGradDraws.size());
if (!m_pendingSimpleGradDraws.empty())
{
for (size_t i = 0; i < m_pendingSimpleGradDraws.size(); ++i)
{
// Render each simple gradient as a single, empty GradientSpan with
// 1px borders to the left and right.
auto [color0, color1] = m_pendingSimpleGradDraws[i];
uint32_t y = math::lossless_numeric_cast<uint32_t>(
i / gpu::kGradTextureWidthInSimpleRamps);
size_t centerX = (i % gpu::kGradTextureWidthInSimpleRamps) * 2 + 1;
uint32_t centerXFixed = math::lossless_numeric_cast<uint32_t>(
centerX * ONE_TEXEL_FIXED);
m_ctx->m_gradSpanData.set_back(centerXFixed,
centerXFixed,
y,
GRAD_SPAN_FLAG_LEFT_BORDER |
GRAD_SPAN_FLAG_RIGHT_BORDER,
color0,
color1);
}
}
// Write out the vertex data for rendering complex gradients.
assert(m_complexGradients.size() == m_pendingComplexGradDraws.size());
if (!m_pendingComplexGradDraws.empty())
{
// The viewport will start at simpleGradDataHeight when rendering color
// ramps.
for (uint32_t i = 0; i < m_pendingComplexGradDraws.size(); ++i)
{
// Push "GradientSpan" instances that will render each section of
// this color ramp's gradient.
const Gradient* gradient = m_pendingComplexGradDraws[i];
const float* stops = gradient->stops();
const ColorInt* colors = gradient->colors();
size_t stopCount = gradient->count();
uint32_t y = i + m_gradTextureLayout.complexOffsetY;
// "stop * m + a" converts a stop position to a fixed-point x
// coordinate in the gradient texture. (In an ideal world, stops
// would all be aligned on pixel centers for the texture sampling to
// be identical to the gradient, but here we just stretch it across
// kGradTextureWidth pixels and hope everything looks ok.)
float m = (kGradTextureWidth - 1.f) * ONE_TEXEL_FIXED;
float a = .5f * ONE_TEXEL_FIXED;
uint32_t lastXFixed = static_cast<uint32_t>(stops[0] * m + a);
ColorInt lastColor = colors[0];
assert(stopCount >= 2);
for (size_t i = 1; i < stopCount; ++i)
{
uint32_t xFixed = static_cast<uint32_t>(stops[i] * m + a);
// stops[] must be ordered.
assert(lastXFixed <= xFixed && xFixed < 65536);
uint32_t flags = GRAD_SPAN_FLAG_COMPLEX_BORDER;
if (i == 1)
flags |= GRAD_SPAN_FLAG_LEFT_BORDER;
if (i == stopCount - 1)
flags |= GRAD_SPAN_FLAG_RIGHT_BORDER;
m_ctx->m_gradSpanData.set_back(lastXFixed,
xFixed,
y,
flags,
lastColor,
colors[i]);
lastColor = colors[i];
lastXFixed = xFixed;
}
}
}
// Write a path record for the clearColor paint (used by atomic mode).
// This also allows us to index the storage buffers directly by pathID.
gpu::SimplePaintValue clearColorValue;
clearColorValue.color = m_ctx->frameDescriptor().clearColor;
m_ctx->m_pathData.skip_back();
m_ctx->m_paintData.set_back(gpu::DrawContents::none,
PaintType::solidColor,
clearColorValue,
GradTextureLayout(),
/*clipID =*/0,
/*hasClipRect =*/false,
BlendMode::srcOver);
m_ctx->m_paintAuxData.skip_back();
// Render padding vertices in the tessellation texture.
if (m_flushDesc.tessDataHeight > 0)
{
// Padding at the beginning of the tessellation texture.
pushPaddingVertices(gpu::kMidpointFanPatchSegmentSpan, 0);
// Padding between patch types in the tessellation texture.
if (m_outerCubicTessVertexIdx > m_midpointFanTessEndLocation)
{
pushPaddingVertices(m_outerCubicTessVertexIdx -
m_midpointFanTessEndLocation,
m_midpointFanTessEndLocation);
}
// The final vertex of the final patch of each contour crosses over into
// the next contour. (This is how we wrap around back to the beginning.)
// Therefore, the final contour of the flush needs an out-of-contour
// vertex to cross into as well, so we emit a padding vertex here at the
// end.
pushPaddingVertices(1, m_outerCubicTessEndLocation);
}
// Write out all the data for our high level draws, and build up a low-level
// draw list.
if (m_ctx->frameInterlockMode() == gpu::InterlockMode::rasterOrdering)
{
for (const DrawUniquePtr& draw : m_draws)
{
// TODO: We don't currently support a front-to-back prepass in
// rasterOrdering mode. If we decide to support this, we will either
// need to walk the draws backwards here, or, more likely, start
// sorting and re-ordering in rasterOrdering mode as well.
assert(draw->prepassCount() == 0);
assert(draw->subpassCount() > 0);
for (int i = 0; i < draw->subpassCount(); ++i)
{
draw->pushToRenderContext(this, i);
}
}
}
else
{
assert(m_drawPassCount <= kMaxReorderedDrawPassCount);
// Sort the draw list to optimize batching, since we can only batch
// non-overlapping draws.
std::vector<int64_t>& indirectDrawList = m_ctx->m_indirectDrawList;
indirectDrawList.clear();
indirectDrawList.reserve(m_drawPassCount);
if (m_ctx->m_intersectionBoard == nullptr)
{
m_ctx->m_intersectionBoard = std::make_unique<IntersectionBoard>();
}
IntersectionBoard* intersectionBoard = m_ctx->m_intersectionBoard.get();
intersectionBoard->resizeAndReset(m_flushDesc.renderTarget->width(),
m_flushDesc.renderTarget->height());
// Build a list of sort keys that determine the final draw order.
constexpr static int kDrawGroupShift =
48; // Where in the key does the draw group begin?
constexpr static int64_t kDrawGroupMask = 0x7fffllu << kDrawGroupShift;
constexpr static int kDrawTypeShift = 45;
constexpr static int64_t kDrawTypeMask RIVE_MAYBE_UNUSED =
7llu << kDrawTypeShift;
constexpr static int kTextureHashShift = 31;
constexpr static int64_t kTextureHashMask = 0x3fffllu
<< kTextureHashShift;
constexpr static int kBlendModeShift = 27;
constexpr static int kBlendModeMask = 0xf << kBlendModeShift;
constexpr static int kDrawContentsShift = 18;
constexpr static int64_t kDrawContentsMask = 0x1ffllu
<< kDrawContentsShift;
constexpr static int kDrawIndexShift = 2;
constexpr static int64_t kDrawIndexMask = 0x7fff << kDrawIndexShift;
constexpr static int64_t kSubpassIndexMask = 0x3;
for (size_t i = 0; i < m_draws.size(); ++i)
{
Draw* draw = m_draws[i].get();
int4 drawBounds = simd::load4i(&m_draws[i]->pixelBounds());
// Add one extra pixel of padding to the draw bounds to make
// absolutely certain we get no overlapping pixels, which destroy
// the atomic shader.
constexpr int32_t kMax32i = std::numeric_limits<int32_t>::max();
constexpr int32_t kMin32i = std::numeric_limits<int32_t>::min();
drawBounds = simd::if_then_else(
drawBounds != int4{kMin32i, kMin32i, kMax32i, kMax32i},
drawBounds + int4{-1, -1, 1, 1},
drawBounds);
// Our top priority in re-ordering is to group non-overlapping draws
// together, in order to maximize batching while preserving
// correctness.
int maxPasses =
std::max(draw->prepassCount(), draw->subpassCount());
int16_t drawGroupIdx =
intersectionBoard->addRectangle(drawBounds, maxPasses);
assert(drawGroupIdx > 0);
int64_t key = static_cast<int64_t>(drawGroupIdx) << kDrawGroupShift;
// Within sub-groups of non-overlapping draws, sort similar draw
// types together.
int64_t drawType = static_cast<int64_t>(draw->type());
assert(drawType <= kDrawTypeMask >> kDrawTypeShift);
key |= drawType << kDrawTypeShift;
// Within sub-groups of matching draw type, sort by texture binding.
int64_t textureHash =
draw->imageTexture() != nullptr
? draw->imageTexture()->textureResourceHash() &
(kTextureHashMask >> kTextureHashShift)
: 0;
key |= textureHash << kTextureHashShift;
// If using KHR_blend_equation_advanced, we need a batching barrier
// between draws with different blend modes. If not using
// KHR_blend_equation_advanced, sorting by blend mode may still give
// us better branching on the GPU.
int64_t blendMode =
gpu::ConvertBlendModeToPLSBlendMode(draw->blendMode());
assert(blendMode <= kBlendModeMask >> kBlendModeShift);
key |= blendMode << kBlendModeShift;
// msaa mode draws strokes, fills, and even/odd with different
// stencil settings.
int64_t drawContents = static_cast<int64_t>(draw->drawContents());
assert(drawContents <= kDrawContentsMask >> kDrawContentsShift);
key |= drawContents << kDrawContentsShift;
// Draw and subpass indices go at the bottom of the key so we can
// reference them again after sorting without affecting the order.
assert(i <= kDrawIndexMask >> kDrawIndexShift);
key |= i << kDrawIndexShift;
assert((key & kDrawGroupMask) >> kDrawGroupShift == drawGroupIdx);
assert((key & kDrawTypeMask) >> kDrawTypeShift == drawType);
assert((key & kTextureHashMask) >> kTextureHashShift ==
textureHash);
assert((key & kBlendModeMask) >> kBlendModeShift == blendMode);
assert((key & kDrawContentsMask) >> kDrawContentsShift ==
drawContents);
assert((key & kDrawIndexMask) >> kDrawIndexShift == i);
// Add the first prepass and subpass, if any.
if (draw->prepassCount() > 0)
{
// Negating the key is an easy way to sort the prepasses
// front-to-back, and before the subpasses.
indirectDrawList.push_back(-key);
}
if (draw->subpassCount() > 0)
{
indirectDrawList.push_back(key);
}
// Add any additional passes.
for (int i = 1; i < maxPasses; ++i)
{
// Increment the drawGroupIdx and i both at once. (The
// intersectionBoard already reserved "maxPasses" layers of
// drawGroupIndices for us.)
key += (1ll << kDrawGroupShift) + 1;
assert((key & kDrawGroupMask) >> kDrawGroupShift ==
drawGroupIdx + i);
assert((key & kSubpassIndexMask) == i);
if (i < draw->prepassCount())
{
// Negating the key is an easy way to sort the prepasses
// front-to-back, and before the subpasses.
indirectDrawList.push_back(-key);
}
if (i < draw->subpassCount())
{
indirectDrawList.push_back(key);
}
}
}
assert(indirectDrawList.size() == m_drawPassCount);
// Re-order the draws!!
std::sort(indirectDrawList.begin(), indirectDrawList.end());
assert(m_pendingBarriers == BarrierFlags::none);
if (m_ctx->frameInterlockMode() == gpu::InterlockMode::atomics &&
platformFeatures.atomicPLSInitNeedsDraw)
{
// Atomic mode sometimes needs to initialize PLS with a draw when
// the backend can't do it with typical clear/load APIs.
// So far only Metal needs this, and its implementation doesn't
// require a barrier before or after.
m_drawList.emplace_back(m_ctx->perFrameAllocator(),
DrawType::renderPassInitialize,
gpu::ShaderMiscFlags::none,
1,
0,
BlendMode::srcOver,
ImageSampler::LinearClamp(),
BarrierFlags::none);
}
else if (m_ctx->frameInterlockMode() == gpu::InterlockMode::msaa &&
m_flushDesc.colorLoadAction ==
gpu::LoadAction::preserveRenderTarget &&
platformFeatures.msaaColorPreserveNeedsDraw)
{
// When implemented with a transient attachment, MSAA needs us to
// draw the old renderTarget contents into the framebuffer at the
// beginning of the render pass when
// LoadAction::preserveRenderTarget is specified.
m_drawList.emplace_back(m_ctx->perFrameAllocator(),
DrawType::renderPassInitialize,
gpu::ShaderMiscFlags::none,
1,
0,
BlendMode::srcOver,
ImageSampler::LinearClamp(),
// The MSAA init reads the framebuffer, so
// it needs the equivalent of a "dstBlend"
// barrier.
BarrierFlags::dstBlend);
m_drawList.tail().drawContents = gpu::DrawContents::opaquePaint;
// The draw that follows the this init will need a special
// "msaaPostInit" barrier.
m_pendingBarriers |= BarrierFlags::msaaPostInit;
}
// Find a mask that tells us when to insert barriers, and which barriers
// are needed. When the keys of two adjacent draws differ within this
// bitmask, we insert a barrier between them.
int64_t needsBarrierMask = 0;
BarrierFlags neededBarriers = BarrierFlags::none;
switch (m_flushDesc.interlockMode)
{
case gpu::InterlockMode::rasterOrdering:
// rasterOrdering mode doesn't reorder draws.
RIVE_UNREACHABLE();
case gpu::InterlockMode::atomics:
// In atomic mode, we need barriers any time draws overlap.
// Insert a barrier every time the drawGroupIdx changes.
needsBarrierMask = kDrawGroupMask;
neededBarriers = BarrierFlags::plsAtomic;
break;
case gpu::InterlockMode::clockwiseAtomic:
// In clockwiseAtomic mode, we only need a barrier between the
// borrowedCoverage prepasses and the main rendering. Prepasses
// have a negative key, so just insert a barrier when the sign
// changes.
needsBarrierMask = 1ll << 63;
neededBarriers = BarrierFlags::clockwiseBorrowedCoverage;
break;
case gpu::InterlockMode::msaa:
// MSAA mode can't batch draws that overlap because they both
// rely on the stencil buffer across subpasses. Stop batching
// every time the drawGroupIdx changes.
needsBarrierMask = kDrawGroupMask;
// MSAA mode draws clips, strokes, fills, and even/odd with
// different stencil settings, so these can't be batched.
needsBarrierMask |= kDrawContentsMask;
if (platformFeatures.supportsBlendAdvancedKHR)
{
// If using KHR_blend_equation_advanced, we also need to
// stop batching between blend modes in order to change the
// blend equation.
needsBarrierMask |= kBlendModeMask;
}
// MSAA barriers only need to prevent batching of draws for now.
// If we also need a dstBlend barrier, that will be decided
// later.
neededBarriers = BarrierFlags::drawBatchBreak;
break;
}
// Write out the draw data from the sorted draw list, and build up a
// condensed/batched list of low-level draws.
constexpr int64_t BEGIN_KEY = std::numeric_limits<int64_t>::min();
int64_t priorSignedKey = BEGIN_KEY;
for (const int64_t signedKey : indirectDrawList)
{
assert(signedKey >= priorSignedKey);
// The first draw always gets barriers because we need the barriers
// after the initial clears, loads, etc.
if (priorSignedKey == BEGIN_KEY ||
(priorSignedKey & needsBarrierMask) !=
(signedKey & needsBarrierMask))
{
m_pendingBarriers |= neededBarriers;
}
int64_t key = abs(signedKey);
uint32_t drawIndex = (key & kDrawIndexMask) >> kDrawIndexShift;
int subpassIndex = key & kSubpassIndexMask;
if (signedKey < 0)
{
// Negative keys are a prepass. Update the subpassIndex to be
// negative.
subpassIndex = -1 - subpassIndex;
}
// FIXME: m_currentZIndex shouldn't be a stateful variable; it
// should be passed to pushToRenderContext() instead.
m_currentZIndex = math::lossless_numeric_cast<uint32_t>(
abs(key >> static_cast<int64_t>(kDrawGroupShift)));
m_draws[drawIndex]->pushToRenderContext(this, subpassIndex);
priorSignedKey = signedKey;
}
// Atomic mode needs one more draw to resolve all the pixels.
if (m_ctx->frameInterlockMode() == gpu::InterlockMode::atomics)
{
m_drawList
.emplace_back(m_ctx->perFrameAllocator(),
DrawType::renderPassResolve,
gpu::ShaderMiscFlags::none,
1,
0,
BlendMode::srcOver,
ImageSampler::LinearClamp(),
BarrierFlags::plsAtomicPreResolve)
.shaderFeatures = m_combinedShaderFeatures;
}
}
// Write out the draws to the feather atlas. Do this after the main draws
// (even though the atlas ones execute first) so that our path info and Z
// index are decided and available to pushAtlasTessellation().
if (!m_pendingAtlasDraws.empty())
{
TAABB<uint16_t> fullAtlasViewport = {0,
0,
m_flushDesc.atlasContentWidth,
m_flushDesc.atlasContentHeight};
gpu::AtlasDrawBatch* currentBatch =
m_ctx->m_perFrameAllocator.makePODArray<gpu::AtlasDrawBatch>(
m_pendingAtlasDraws.size());
// Iterate the atlas draws 4 times so we can sort by fill / stroke /
// scissored / not, and batch together the draws that don't have
// scissor.
for (bool stroked : {false, true})
{
if (stroked)
{
m_flushDesc.atlasStrokeBatches = currentBatch;
}
else
{
m_flushDesc.atlasFillBatches = currentBatch;
}
for (bool scissored : {false, true})
{
gpu::AtlasDrawBatch* lastBatch = nullptr;
for (PathDraw* draw : m_pendingAtlasDraws)
{
if (draw->isStroke() != stroked ||
draw->atlasScissorEnabled() != scissored)
{
continue;
}
uint32_t tessVertexCount, tessBaseVertex;
draw->pushAtlasTessellation(this,
&tessVertexCount,
&tessBaseVertex);
if (tessVertexCount == 0)
{
continue;
}
uint32_t patchCount =
tessVertexCount / gpu::kMidpointFanPatchSegmentSpan;
uint32_t basePatch =
tessBaseVertex / gpu::kMidpointFanPatchSegmentSpan;
assert(patchCount * gpu::kMidpointFanPatchSegmentSpan ==
tessVertexCount);
assert(basePatch * gpu::kMidpointFanPatchSegmentSpan ==
tessBaseVertex);
if (lastBatch == nullptr || scissored)
{
lastBatch = currentBatch++;
*lastBatch = {
lastBatch->scissor = scissored
? draw->atlasScissor()
: fullAtlasViewport,
lastBatch->patchCount = patchCount,
lastBatch->basePatch = basePatch,
};
}
else
{
assert(lastBatch->basePatch + lastBatch->patchCount ==
basePatch);
lastBatch->patchCount += patchCount;
}
}
}
if (stroked)
{
m_flushDesc.atlasStrokeBatchCount =
currentBatch - m_flushDesc.atlasStrokeBatches;
}
else
{
m_flushDesc.atlasFillBatchCount =
currentBatch - m_flushDesc.atlasFillBatches;
}
}
assert(m_flushDesc.atlasFillBatchCount +
m_flushDesc.atlasStrokeBatchCount ==
currentBatch - m_flushDesc.atlasFillBatches);
assert(m_flushDesc.atlasFillBatchCount +
m_flushDesc.atlasStrokeBatchCount <=
m_pendingAtlasDraws.size());
}
// Pad our buffers to 256-byte alignment.
m_ctx->m_pathData.push_back_n(nullptr, m_pathPaddingCount);
m_ctx->m_paintData.push_back_n(nullptr, m_paintPaddingCount);
m_ctx->m_paintAuxData.push_back_n(nullptr, m_paintAuxPaddingCount);
m_ctx->m_contourData.push_back_n(nullptr, m_contourPaddingCount);
m_ctx->m_gradSpanData.push_back_n(nullptr, m_gradSpanPaddingCount);
assert(m_ctx->m_pathData.elementsWritten() ==
m_flushDesc.firstPath + m_resourceCounts.pathCount +
m_pathPaddingCount);
assert(m_ctx->m_paintData.elementsWritten() ==
m_flushDesc.firstPaint + m_resourceCounts.pathCount +
m_paintPaddingCount);
assert(m_ctx->m_paintAuxData.elementsWritten() ==
m_flushDesc.firstPaintAux + m_resourceCounts.pathCount +
m_paintAuxPaddingCount);
assert(m_ctx->m_contourData.elementsWritten() ==
m_flushDesc.firstContour + m_resourceCounts.contourCount +
m_contourPaddingCount);
assert(m_ctx->m_gradSpanData.elementsWritten() ==
m_flushDesc.firstGradSpan + m_pendingGradSpanCount +
m_gradSpanPaddingCount);
assert(m_midpointFanTessVertexIdx == m_midpointFanTessEndLocation);
assert(m_outerCubicTessVertexIdx == m_outerCubicTessEndLocation);
// Some of the flushDescriptor's data isn't known until after
// writeResources(). Update it now that it's known.
m_flushDesc.combinedShaderFeatures = m_combinedShaderFeatures;
m_flushDesc.atomicFixedFunctionColorOutput =
m_ctx->frameInterlockMode() == InterlockMode::atomics &&
!(m_combinedShaderFeatures & ShaderFeatures::ENABLE_ADVANCED_BLEND);
if (m_coverageBufferLength > 0)
{
assert(m_flushDesc.interlockMode ==
gpu::InterlockMode::clockwiseAtomic);
// The coverage buffer prefix gets reset to zero when the buffer is
// reallocated, so wait until here to get the prefix.
m_flushDesc.coverageBufferPrefix = m_ctx->incrementCoverageBufferPrefix(
&m_flushDesc.needsCoverageBufferClear);
}
m_flushDesc.tessVertexSpanCount = math::lossless_numeric_cast<uint32_t>(
m_ctx->m_tessSpanData.elementsWritten() -
m_flushDesc.firstTessVertexSpan);
m_flushDesc.hasTriangleVertices =
m_ctx->m_triangleVertexData.bytesWritten() !=
initialTriangleVertexDataSize;
m_flushDesc.drawList = &m_drawList;
// Write out the uniforms for this flush now that the flushDescriptor is
// complete.
m_ctx->m_flushUniformData.emplace_back(m_flushDesc, platformFeatures);
}
void RenderContext::setResourceSizes(ResourceAllocationCounts allocs,
bool forceRealloc)
{
RIVE_PROF_SCOPE()
#if 0
class Logger
{
public:
void logSize(const char* name,
size_t oldSize,
size_t newSize,
size_t newSizeInBytes)
{
m_totalSizeInBytes += newSizeInBytes;
if (oldSize == newSize)
{
return;
}
if (!m_hasChanged)
{
printf("RenderContext::setResourceSizes():\n");
m_hasChanged = true;
}
printf(" resize %s: %zu -> %zu (%zu KiB)\n",
name,
oldSize,
newSize,
newSizeInBytes >> 10);
}
void logTextureSize(const char* widthName,
const char* heightName,
size_t oldWidth,
size_t oldHeight,
size_t newWidth,
size_t newHeight,
size_t bytesPerPixel)
{
m_totalSizeInBytes += newHeight * newWidth * bytesPerPixel;
if (oldWidth == newWidth && oldHeight == newHeight)
{
return;
}
if (!m_hasChanged)
{
printf("RenderContext::setResourceSizes():\n");
m_hasChanged = true;
}
printf(" resize %s x %s: %zu x %zu -> %zu x %zu (%zu KiB)\n",
widthName,
heightName,
oldWidth,
oldHeight,
newWidth,
newHeight,
(newHeight * newWidth * bytesPerPixel) >> 10);
}
~Logger()
{
if (!m_hasChanged)
{
return;
}
printf(" TOTAL GPU resource usage: %zu KiB\n",
m_totalSizeInBytes >> 10);
}
private:
size_t m_totalSizeInBytes = 0;
bool m_hasChanged = false;
} logger;
#define LOG_BUFFER_RING_SIZE(NAME, ITEM_SIZE_IN_BYTES) \
logger.logSize(#NAME, \
m_currentResourceAllocations.NAME, \
allocs.NAME, \
allocs.NAME* ITEM_SIZE_IN_BYTES* gpu::kBufferRingSize)
#define LOG_TEXTURE_HEIGHT(NAME, BYTES_PER_ROW) \
logger.logSize(#NAME, \
m_currentResourceAllocations.NAME, \
allocs.NAME, \
allocs.NAME* BYTES_PER_ROW)
#define LOG_TEXTURE_SIZE(WIDTH_NAME, HEIGHT_NAME, BYTES_PER_PIXEL) \
logger.logTextureSize(#WIDTH_NAME, \
#HEIGHT_NAME, \
m_currentResourceAllocations.WIDTH_NAME, \
m_currentResourceAllocations.HEIGHT_NAME, \
allocs.WIDTH_NAME, \
allocs.HEIGHT_NAME, \
BYTES_PER_PIXEL)
#define LOG_BUFFER_SIZE(NAME, BYTES_PER_ELEMENT) \
logger.logSize(#NAME, \
m_currentResourceAllocations.NAME, \
allocs.NAME, \
allocs.NAME* BYTES_PER_ELEMENT)
#else
#define LOG_BUFFER_RING_SIZE(NAME, ITEM_SIZE_IN_BYTES)
#define LOG_TEXTURE_HEIGHT(NAME, BYTES_PER_ROW)
#define LOG_TEXTURE_SIZE(WIDTH_NAME, HEIGHT_NAME, BYTES_PER_PIXEL)
#define LOG_BUFFER_SIZE(NAME, BYTES_PER_ELEMENT)
#endif
LOG_BUFFER_RING_SIZE(flushUniformBufferCount, sizeof(gpu::FlushUniforms));
if (allocs.flushUniformBufferCount !=
m_currentResourceAllocations.flushUniformBufferCount ||
forceRealloc)
{
m_impl->resizeFlushUniformBuffer(allocs.flushUniformBufferCount *
sizeof(gpu::FlushUniforms));
}
LOG_BUFFER_RING_SIZE(imageDrawUniformBufferCount,
sizeof(gpu::ImageDrawUniforms));
if (allocs.imageDrawUniformBufferCount !=
m_currentResourceAllocations.imageDrawUniformBufferCount ||
forceRealloc)
{
m_impl->resizeImageDrawUniformBuffer(
allocs.imageDrawUniformBufferCount *
sizeof(gpu::ImageDrawUniforms));
}
LOG_BUFFER_RING_SIZE(pathBufferCount, sizeof(gpu::PathData));
if (allocs.pathBufferCount !=
m_currentResourceAllocations.pathBufferCount ||
forceRealloc)
{
m_impl->resizePathBuffer(allocs.pathBufferCount * sizeof(gpu::PathData),
gpu::PathData::kBufferStructure);
}
LOG_BUFFER_RING_SIZE(paintBufferCount, sizeof(gpu::PaintData));
if (allocs.paintBufferCount !=
m_currentResourceAllocations.paintBufferCount ||
forceRealloc)
{
m_impl->resizePaintBuffer(allocs.paintBufferCount *
sizeof(gpu::PaintData),
gpu::PaintData::kBufferStructure);
}
LOG_BUFFER_RING_SIZE(paintAuxBufferCount, sizeof(gpu::PaintAuxData));
if (allocs.paintAuxBufferCount !=
m_currentResourceAllocations.paintAuxBufferCount ||
forceRealloc)
{
m_impl->resizePaintAuxBuffer(allocs.paintAuxBufferCount *
sizeof(gpu::PaintAuxData),
gpu::PaintAuxData::kBufferStructure);
}
LOG_BUFFER_RING_SIZE(contourBufferCount, sizeof(gpu::ContourData));
if (allocs.contourBufferCount !=
m_currentResourceAllocations.contourBufferCount ||
forceRealloc)
{
m_impl->resizeContourBuffer(allocs.contourBufferCount *
sizeof(gpu::ContourData),
gpu::ContourData::kBufferStructure);
}
LOG_BUFFER_RING_SIZE(gradSpanBufferCount, sizeof(gpu::GradientSpan));
if (allocs.gradSpanBufferCount !=
m_currentResourceAllocations.gradSpanBufferCount ||
forceRealloc)
{
m_impl->resizeGradSpanBuffer(allocs.gradSpanBufferCount *
sizeof(gpu::GradientSpan));
}
LOG_BUFFER_RING_SIZE(tessSpanBufferCount, sizeof(gpu::TessVertexSpan));
if (allocs.tessSpanBufferCount !=
m_currentResourceAllocations.tessSpanBufferCount ||
forceRealloc)
{
m_impl->resizeTessVertexSpanBuffer(allocs.tessSpanBufferCount *
sizeof(gpu::TessVertexSpan));
}
LOG_BUFFER_RING_SIZE(triangleVertexBufferCount,
sizeof(gpu::TriangleVertex));
if (allocs.triangleVertexBufferCount !=
m_currentResourceAllocations.triangleVertexBufferCount ||
forceRealloc)
{
m_impl->resizeTriangleVertexBuffer(allocs.triangleVertexBufferCount *
sizeof(gpu::TriangleVertex));
}
assert(allocs.gradTextureHeight <= kMaxTextureHeight);
LOG_TEXTURE_HEIGHT(gradTextureHeight, gpu::kGradTextureWidth * 4);
if (allocs.gradTextureHeight !=
m_currentResourceAllocations.gradTextureHeight ||
forceRealloc)
{
m_impl->resizeGradientTexture(
gpu::kGradTextureWidth,
math::lossless_numeric_cast<uint32_t>(allocs.gradTextureHeight));
}
assert(allocs.tessTextureHeight <= kMaxTextureHeight);
LOG_TEXTURE_HEIGHT(tessTextureHeight, gpu::kTessTextureWidth * 4 * 4);
if (allocs.tessTextureHeight !=
m_currentResourceAllocations.tessTextureHeight ||
forceRealloc)
{
m_impl->resizeTessellationTexture(
gpu::kTessTextureWidth,
math::lossless_numeric_cast<uint32_t>(allocs.tessTextureHeight));
}
assert(allocs.atlasTextureWidth <= atlasMaxSize() ||
allocs.atlasTextureWidth <= frameDescriptor().renderTargetWidth);
assert(allocs.atlasTextureHeight <= atlasMaxSize() ||
allocs.atlasTextureHeight <= frameDescriptor().renderTargetHeight);
LOG_TEXTURE_SIZE(atlasTextureWidth, atlasTextureHeight, sizeof(uint16_t));
if (allocs.atlasTextureWidth !=
m_currentResourceAllocations.atlasTextureWidth ||
allocs.atlasTextureHeight !=
m_currentResourceAllocations.atlasTextureHeight ||
forceRealloc)
{
m_impl->resizeAtlasTexture(
math::lossless_numeric_cast<uint32_t>(allocs.atlasTextureWidth),
math::lossless_numeric_cast<uint32_t>(allocs.atlasTextureHeight));
}
assert(allocs.coverageBufferLength <=
platformFeatures().maxCoverageBufferLength);
LOG_BUFFER_SIZE(coverageBufferLength, sizeof(uint32_t));
if (allocs.coverageBufferLength !=
m_currentResourceAllocations.coverageBufferLength ||
forceRealloc)
{
m_impl->resizeCoverageBuffer(allocs.coverageBufferLength *
sizeof(uint32_t));
// Start the coverageBufferPrefix over at zero. This ensure the new
// buffer gets cleared because the only criteria for clearing it is when
// the prefix wraps around to 0.
m_coverageBufferPrefix = 0;
}
m_currentResourceAllocations = allocs;
}
void RenderContext::mapResourceBuffers(
const ResourceAllocationCounts& mapCounts)
{
RIVE_PROF_SCOPE()
if (mapCounts.flushUniformBufferCount > 0)
{
m_flushUniformData.mapElements(
m_impl.get(),
&RenderContextImpl::mapFlushUniformBuffer,
mapCounts.flushUniformBufferCount);
}
assert(m_flushUniformData.hasRoomFor(mapCounts.flushUniformBufferCount));
if (mapCounts.imageDrawUniformBufferCount > 0)
{
m_imageDrawUniformData.mapElements(
m_impl.get(),
&RenderContextImpl::mapImageDrawUniformBuffer,
mapCounts.imageDrawUniformBufferCount);
}
assert(m_imageDrawUniformData.hasRoomFor(
mapCounts.imageDrawUniformBufferCount > 0));
if (mapCounts.pathBufferCount > 0)
{
m_pathData.mapElements(m_impl.get(),
&RenderContextImpl::mapPathBuffer,
mapCounts.pathBufferCount);
}
assert(m_pathData.hasRoomFor(mapCounts.pathBufferCount));
if (mapCounts.paintBufferCount > 0)
{
m_paintData.mapElements(m_impl.get(),
&RenderContextImpl::mapPaintBuffer,
mapCounts.paintBufferCount);
}
assert(m_paintData.hasRoomFor(mapCounts.paintBufferCount));
if (mapCounts.paintAuxBufferCount > 0)
{
m_paintAuxData.mapElements(m_impl.get(),
&RenderContextImpl::mapPaintAuxBuffer,
mapCounts.paintAuxBufferCount);
}
assert(m_paintAuxData.hasRoomFor(mapCounts.paintAuxBufferCount));
if (mapCounts.contourBufferCount > 0)
{
m_contourData.mapElements(m_impl.get(),
&RenderContextImpl::mapContourBuffer,
mapCounts.contourBufferCount);
}
assert(m_contourData.hasRoomFor(mapCounts.contourBufferCount));
if (mapCounts.gradSpanBufferCount > 0)
{
m_gradSpanData.mapElements(m_impl.get(),
&RenderContextImpl::mapGradSpanBuffer,
mapCounts.gradSpanBufferCount);
}
assert(m_gradSpanData.hasRoomFor(mapCounts.gradSpanBufferCount));
if (mapCounts.tessSpanBufferCount > 0)
{
m_tessSpanData.mapElements(m_impl.get(),
&RenderContextImpl::mapTessVertexSpanBuffer,
mapCounts.tessSpanBufferCount);
}
assert(m_tessSpanData.hasRoomFor(mapCounts.tessSpanBufferCount));
if (mapCounts.triangleVertexBufferCount > 0)
{
m_triangleVertexData.mapElements(
m_impl.get(),
&RenderContextImpl::mapTriangleVertexBuffer,
mapCounts.triangleVertexBufferCount);
}
assert(
m_triangleVertexData.hasRoomFor(mapCounts.triangleVertexBufferCount));
}
void RenderContext::unmapResourceBuffers(
const ResourceAllocationCounts& mapCounts)
{
RIVE_PROF_SCOPE()
if (m_flushUniformData)
{
m_flushUniformData.unmapElements(
m_impl.get(),
&RenderContextImpl::unmapFlushUniformBuffer,
mapCounts.flushUniformBufferCount);
}
if (m_imageDrawUniformData)
{
m_imageDrawUniformData.unmapElements(
m_impl.get(),
&RenderContextImpl::unmapImageDrawUniformBuffer,
mapCounts.imageDrawUniformBufferCount);
}
if (m_pathData)
{
m_pathData.unmapElements(m_impl.get(),
&RenderContextImpl::unmapPathBuffer,
mapCounts.pathBufferCount);
}
if (m_paintData)
{
m_paintData.unmapElements(m_impl.get(),
&RenderContextImpl::unmapPaintBuffer,
mapCounts.paintBufferCount);
}
if (m_paintAuxData)
{
m_paintAuxData.unmapElements(m_impl.get(),
&RenderContextImpl::unmapPaintAuxBuffer,
mapCounts.paintAuxBufferCount);
}
if (m_contourData)
{
m_contourData.unmapElements(m_impl.get(),
&RenderContextImpl::unmapContourBuffer,
mapCounts.contourBufferCount);
}
if (m_gradSpanData)
{
m_gradSpanData.unmapElements(m_impl.get(),
&RenderContextImpl::unmapGradSpanBuffer,
mapCounts.gradSpanBufferCount);
}
if (m_tessSpanData)
{
m_tessSpanData.unmapElements(
m_impl.get(),
&RenderContextImpl::unmapTessVertexSpanBuffer,
mapCounts.tessSpanBufferCount);
}
if (m_triangleVertexData)
{
m_triangleVertexData.unmapElements(
m_impl.get(),
&RenderContextImpl::unmapTriangleVertexBuffer,
mapCounts.triangleVertexBufferCount);
}
}
uint32_t RenderContext::incrementCoverageBufferPrefix(
bool* needsCoverageBufferClear)
{
RIVE_PROF_SCOPE()
assert(m_didBeginFrame);
assert(frameInterlockMode() == gpu::InterlockMode::clockwiseAtomic);
do
{
if (m_coverageBufferPrefix == 0)
{
// When the prefix wraps around to 0, we need to clear the coverage
// buffer because our shaders require coverageBufferPrefix to be
// monotonically increasing.
*needsCoverageBufferClear = true;
}
m_coverageBufferPrefix += 1 << CLOCKWISE_COVERAGE_BIT_COUNT;
} while (m_coverageBufferPrefix == 0);
return m_coverageBufferPrefix;
}
uint32_t RenderContext::LogicalFlush::allocateMidpointFanTessVertices(
uint32_t count)
{
uint32_t location = m_midpointFanTessVertexIdx;
m_midpointFanTessVertexIdx += count;
assert(m_midpointFanTessVertexIdx <= m_midpointFanTessEndLocation);
return location;
}
uint32_t RenderContext::LogicalFlush::allocateOuterCubicTessVertices(
uint32_t count)
{
uint32_t location = m_outerCubicTessVertexIdx;
m_outerCubicTessVertexIdx += count;
assert(m_outerCubicTessVertexIdx <= m_outerCubicTessEndLocation);
return location;
}
uint32_t RenderContext::LogicalFlush::pushPath(const PathDraw* draw)
{
RIVE_PROF_SCOPE()
assert(m_hasDoneLayout);
++m_currentPathID;
assert(0 < m_currentPathID && m_currentPathID <= m_ctx->m_maxPathID);
m_ctx->m_pathData.set_back(draw->matrix(),
draw->strokeRadius(),
draw->featherRadius(),
m_currentZIndex,
draw->atlasTransform(),
draw->coverageBufferRange());
m_ctx->m_paintData.set_back(draw->drawContents(),
draw->paintType(),
draw->simplePaintValue(),
m_gradTextureLayout,
draw->clipID(),
draw->hasClipRect(),
draw->blendMode());
m_ctx->m_paintAuxData.set_back(draw->matrix(),
draw->paintType(),
draw->simplePaintValue(),
draw->gradient(),
draw->imageTexture(),
draw->clipRectInverseMatrix(),
m_flushDesc.renderTarget,
m_ctx->platformFeatures());
assert(m_flushDesc.firstPath + m_currentPathID + 1 ==
m_ctx->m_pathData.elementsWritten());
assert(m_flushDesc.firstPaint + m_currentPathID + 1 ==
m_ctx->m_paintData.elementsWritten());
assert(m_flushDesc.firstPaintAux + m_currentPathID + 1 ==
m_ctx->m_paintAuxData.elementsWritten());
return m_currentPathID;
}
RenderContext::TessellationWriter::TessellationWriter(
LogicalFlush* flush,
uint32_t pathID,
gpu::ContourDirections contourDirections,
uint32_t forwardTessVertexCount,
uint32_t forwardTessLocation,
uint32_t mirroredTessVertexCount,
uint32_t mirroredTessLocation) :
m_flush(flush),
m_tessSpanData(m_flush->m_ctx->m_tessSpanData),
m_pathID(pathID),
m_contourDirections(contourDirections),
m_pathTessLocation(forwardTessLocation),
m_pathMirroredTessLocation(mirroredTessLocation)
{
RIVE_PROF_SCOPE()
RIVE_DEBUG_CODE(m_expectedPathTessEndLocation =
m_pathTessLocation + forwardTessVertexCount;)
RIVE_DEBUG_CODE(m_expectedPathMirroredTessEndLocation =
m_pathMirroredTessLocation - mirroredTessVertexCount;)
assert(m_flush->m_hasDoneLayout);
assert(m_flush->m_ctx->m_pathData.elementsWritten() > 0);
assert(forwardTessVertexCount == 0 || mirroredTessVertexCount == 0 ||
forwardTessVertexCount == mirroredTessVertexCount);
assert(!gpu::ContourDirectionsAreDoubleSided(m_contourDirections) ||
forwardTessVertexCount == mirroredTessVertexCount);
assert(m_pathTessLocation >= 0);
assert(m_pathMirroredTessLocation <= kMaxTessellationVertexCount);
assert(m_expectedPathTessEndLocation <= kMaxTessellationVertexCount);
assert(m_expectedPathMirroredTessEndLocation >= 0);
}
RenderContext::TessellationWriter::~TessellationWriter()
{
assert(m_pathTessLocation == m_expectedPathTessEndLocation);
assert(m_pathMirroredTessLocation == m_expectedPathMirroredTessEndLocation);
}
uint32_t RenderContext::LogicalFlush::pushContour(uint32_t pathID,
Vec2D midpoint,
bool isStroke,
bool closed,
uint32_t vertexIndex0)
{
RIVE_PROF_SCOPE()
assert(pathID != 0);
assert(isStroke || closed);
if (isStroke)
{
midpoint.x = closed ? 1 : 0;
}
m_ctx->m_contourData.emplace_back(midpoint, pathID, vertexIndex0);
++m_currentContourID;
assert(0 < m_currentContourID && m_currentContourID <= gpu::kMaxContourID);
assert(m_flushDesc.firstContour + m_currentContourID ==
m_ctx->m_contourData.elementsWritten());
return m_currentContourID;
}
uint32_t RenderContext::TessellationWriter::pushContour(
Vec2D midpoint,
bool isStroke,
bool closed,
uint32_t paddingVertexCount)
{
RIVE_PROF_SCOPE()
// The first curve of the contour will be pre-padded with
// 'paddingVertexCount' tessellation vertices, colocated at T=0. The caller
// must use this argument align the end of the contour on a boundary of the
// patch size. (See math::padding_to_align_up().)
m_nextCubicPaddingVertexCount = paddingVertexCount;
return m_flush->pushContour(m_pathID,
midpoint,
isStroke,
closed,
nextVertexIndex());
}
void RenderContext::TessellationWriter::pushCubic(
const Vec2D pts[4],
gpu::ContourDirections contourDirections,
Vec2D joinTangent,
uint32_t parametricSegmentCount,
uint32_t polarSegmentCount,
uint32_t joinSegmentCount,
uint32_t contourIDWithFlags)
{
RIVE_PROF_SCOPE()
assert(0 <= parametricSegmentCount &&
parametricSegmentCount <= kMaxParametricSegments);
assert(0 <= polarSegmentCount && polarSegmentCount <= kMaxPolarSegments);
assert(joinSegmentCount > 0);
assert((contourIDWithFlags & CONTOUR_ID_MASK) ==
(m_flush->m_currentContourID & CONTOUR_ID_MASK));
// contourID can't be zero.
assert((contourIDWithFlags & CONTOUR_ID_MASK) != 0);
// contourID can't be out of range in the contour buffer. (Contour buffer
// indices are 1-based.)
assert((contourIDWithFlags & CONTOUR_ID_MASK) <=
m_flush->desc().contourCount);
// Polar and parametric segments share the same beginning and ending
// vertices, so the merged *vertex* count is equal to the sum of polar and
// parametric *segment* counts.
uint32_t curveMergedVertexCount =
parametricSegmentCount + polarSegmentCount;
// -1 because the curve and join share an ending/beginning vertex.
uint32_t totalVertexCount = m_nextCubicPaddingVertexCount +
curveMergedVertexCount + joinSegmentCount - 1;
// Only the first curve of a contour gets padding vertices.
m_nextCubicPaddingVertexCount = 0;
switch (contourDirections)
{
case gpu::ContourDirections::forward:
pushTessellationSpans(pts,
joinTangent,
totalVertexCount,
parametricSegmentCount,
polarSegmentCount,
joinSegmentCount,
contourIDWithFlags);
break;
case gpu::ContourDirections::reverse:
pushMirroredTessellationSpans(pts,
joinTangent,
totalVertexCount,
parametricSegmentCount,
polarSegmentCount,
joinSegmentCount,
contourIDWithFlags);
break;
case gpu::ContourDirections::reverseThenForward:
case gpu::ContourDirections::forwardThenReverse:
// m_pathTessLocation and m_pathMirroredTessLocation are already
// configured, so at ths point we don't need to handle
// reverseThenForward or forwardThenReverse differently.
pushDoubleSidedTessellationSpans(pts,
joinTangent,
totalVertexCount,
parametricSegmentCount,
polarSegmentCount,
joinSegmentCount,
contourIDWithFlags);
break;
}
}
RIVE_ALWAYS_INLINE void RenderContext::TessellationWriter::
pushTessellationSpans(const Vec2D pts[4],
Vec2D joinTangent,
uint32_t totalVertexCount,
uint32_t parametricSegmentCount,
uint32_t polarSegmentCount,
uint32_t joinSegmentCount,
uint32_t contourIDWithFlags)
{
RIVE_PROF_SCOPE()
assert(totalVertexCount > 0);
uint32_t y = m_pathTessLocation / kTessTextureWidth;
int32_t x0 = m_pathTessLocation % kTessTextureWidth;
int32_t x1 = x0 + totalVertexCount;
for (;;)
{
m_tessSpanData.set_back(pts,
joinTangent,
static_cast<float>(y),
x0,
x1,
parametricSegmentCount,
polarSegmentCount,
joinSegmentCount,
contourIDWithFlags);
if (x1 > static_cast<int32_t>(kTessTextureWidth))
{
// The span was too long to fit on the current line. Wrap and draw
// it again, this time behind the left edge of the texture so we
// capture what got clipped off last time.
++y;
x0 -= kTessTextureWidth;
x1 -= kTessTextureWidth;
continue;
}
break;
}
assert(y ==
(m_pathTessLocation + totalVertexCount - 1) / kTessTextureWidth);
m_pathTessLocation += totalVertexCount;
assert(m_pathTessLocation <= m_expectedPathTessEndLocation);
}
RIVE_ALWAYS_INLINE void RenderContext::TessellationWriter::
pushMirroredTessellationSpans(const Vec2D pts[4],
Vec2D joinTangent,
uint32_t totalVertexCount,
uint32_t parametricSegmentCount,
uint32_t polarSegmentCount,
uint32_t joinSegmentCount,
uint32_t contourIDWithFlags)
{
assert(totalVertexCount > 0);
uint32_t reflectionY = (m_pathMirroredTessLocation - 1) / kTessTextureWidth;
int32_t reflectionX0 =
(m_pathMirroredTessLocation - 1) % kTessTextureWidth + 1;
int32_t reflectionX1 = reflectionX0 - totalVertexCount;
for (;;)
{
m_tessSpanData.set_back(pts,
joinTangent,
static_cast<float>(reflectionY),
reflectionX0,
reflectionX1,
parametricSegmentCount,
polarSegmentCount,
joinSegmentCount,
contourIDWithFlags);
if (reflectionX1 < 0)
{
--reflectionY;
reflectionX0 += kTessTextureWidth;
reflectionX1 += kTessTextureWidth;
continue;
}
break;
}
m_pathMirroredTessLocation -= totalVertexCount;
assert(m_pathMirroredTessLocation >= m_expectedPathMirroredTessEndLocation);
}
RIVE_ALWAYS_INLINE void RenderContext::TessellationWriter::
pushDoubleSidedTessellationSpans(const Vec2D pts[4],
Vec2D joinTangent,
uint32_t totalVertexCount,
uint32_t parametricSegmentCount,
uint32_t polarSegmentCount,
uint32_t joinSegmentCount,
uint32_t contourIDWithFlags)
{
assert(totalVertexCount > 0);
int32_t y = m_pathTessLocation / kTessTextureWidth;
int32_t x0 = m_pathTessLocation % kTessTextureWidth;
int32_t x1 = x0 + totalVertexCount;
uint32_t reflectionY = (m_pathMirroredTessLocation - 1) / kTessTextureWidth;
int32_t reflectionX0 =
(m_pathMirroredTessLocation - 1) % kTessTextureWidth + 1;
int32_t reflectionX1 = reflectionX0 - totalVertexCount;
for (;;)
{
m_tessSpanData.set_back(pts,
joinTangent,
static_cast<float>(y),
x0,
x1,
static_cast<float>(reflectionY),
reflectionX0,
reflectionX1,
parametricSegmentCount,
polarSegmentCount,
joinSegmentCount,
contourIDWithFlags);
if (x1 > static_cast<int32_t>(kTessTextureWidth) || reflectionX1 < 0)
{
// Either the span or its reflection was too long to fit on the
// current line. Wrap and draw both of them again, this time beyond
// the opposite edge of the texture so we capture what got clipped
// off last time.
++y;
x0 -= kTessTextureWidth;
x1 -= kTessTextureWidth;
--reflectionY;
reflectionX0 += kTessTextureWidth;
reflectionX1 += kTessTextureWidth;
continue;
}
break;
}
m_pathTessLocation += totalVertexCount;
assert(m_pathTessLocation <= m_expectedPathTessEndLocation);
m_pathMirroredTessLocation -= totalVertexCount;
assert(m_pathMirroredTessLocation >= m_expectedPathMirroredTessEndLocation);
}
void RenderContext::LogicalFlush::pushPaddingVertices(uint32_t count,
uint32_t tessLocation)
{
RIVE_PROF_SCOPE()
assert(m_hasDoneLayout);
assert(count > 0);
constexpr static Vec2D kEmptyCubic[4]{};
TessellationWriter(this,
/*pathID=*/0,
gpu::ContourDirections::forward,
count,
tessLocation)
.pushTessellationSpans(kEmptyCubic,
{0, 0},
count,
0,
0,
1,
INVALID_CONTOUR_ID_WITH_FLAGS);
}
void RenderContext::LogicalFlush::pushMidpointFanDraw(
const PathDraw* draw,
gpu::DrawType drawType,
uint32_t tessVertexCount,
uint32_t tessLocation,
gpu::ShaderMiscFlags shaderMiscFlags)
{
RIVE_PROF_SCOPE()
assert(m_hasDoneLayout);
uint32_t baseInstance = math::lossless_numeric_cast<uint32_t>(
tessLocation / kMidpointFanPatchSegmentSpan);
// flush() is responsible for alignment.
assert(baseInstance * kMidpointFanPatchSegmentSpan == tessLocation);
uint32_t instanceCount = tessVertexCount / kMidpointFanPatchSegmentSpan;
// flush() is responsible for alignment.
assert(instanceCount * kMidpointFanPatchSegmentSpan == tessVertexCount);
pushPathDraw(draw, drawType, shaderMiscFlags, instanceCount, baseInstance);
}
void RenderContext::LogicalFlush::pushOuterCubicsDraw(
const PathDraw* draw,
gpu::DrawType drawType,
uint32_t tessVertexCount,
uint32_t tessLocation,
gpu::ShaderMiscFlags shaderMiscFlags)
{
RIVE_PROF_SCOPE()
assert(m_hasDoneLayout);
uint32_t baseInstance = math::lossless_numeric_cast<uint32_t>(
tessLocation / kOuterCurvePatchSegmentSpan);
// flush() is responsible for alignment.
assert(baseInstance * kOuterCurvePatchSegmentSpan == tessLocation);
uint32_t instanceCount = tessVertexCount / kOuterCurvePatchSegmentSpan;
// flush() is responsible for alignment.
assert(instanceCount * kOuterCurvePatchSegmentSpan == tessVertexCount);
pushPathDraw(draw, drawType, shaderMiscFlags, instanceCount, baseInstance);
}
size_t RenderContext::LogicalFlush::pushInteriorTriangulationDraw(
const PathDraw* draw,
uint32_t pathID,
gpu::WindingFaces windingFaces,
gpu::ShaderMiscFlags shaderMiscFlags)
{
RIVE_PROF_SCOPE()
assert(m_hasDoneLayout);
assert(pathID != 0);
uint32_t baseVertex = math::lossless_numeric_cast<uint32_t>(
m_ctx->m_triangleVertexData.elementsWritten());
size_t actualVertexCount =
draw->triangulator()->polysToTriangles(pathID,
windingFaces,
&m_ctx->m_triangleVertexData);
assert(baseVertex + actualVertexCount ==
m_ctx->m_triangleVertexData.elementsWritten());
if (actualVertexCount > 0)
{
pushPathDraw(draw,
DrawType::interiorTriangulation,
shaderMiscFlags,
math::lossless_numeric_cast<uint32_t>(actualVertexCount),
baseVertex);
}
return actualVertexCount;
}
void RenderContext::LogicalFlush::pushAtlasBlit(PathDraw* draw, uint32_t pathID)
{
RIVE_PROF_SCOPE()
auto baseVertex = math::lossless_numeric_cast<uint32_t>(
m_ctx->m_triangleVertexData.elementsWritten());
auto [l, t, r, b] = AABB(draw->pixelBounds());
m_ctx->m_triangleVertexData.emplace_back(Vec2D{l, b}, 1, pathID);
m_ctx->m_triangleVertexData.emplace_back(Vec2D{l, t}, 1, pathID);
m_ctx->m_triangleVertexData.emplace_back(Vec2D{r, b}, 1, pathID);
m_ctx->m_triangleVertexData.emplace_back(Vec2D{r, b}, 1, pathID);
m_ctx->m_triangleVertexData.emplace_back(Vec2D{l, t}, 1, pathID);
m_ctx->m_triangleVertexData.emplace_back(Vec2D{r, t}, 1, pathID);
pushPathDraw(draw,
DrawType::atlasBlit,
gpu::ShaderMiscFlags::none,
6,
baseVertex);
}
void RenderContext::LogicalFlush::pushImageRectDraw(ImageRectDraw* draw)
{
RIVE_PROF_SCOPE()
assert(m_hasDoneLayout);
// If we support image paints for paths, the client should use pushPath()
// with an image paint instead of calling this method.
assert(!m_ctx->frameSupportsImagePaintForPaths());
size_t imageDrawDataOffset = m_ctx->m_imageDrawUniformData.bytesWritten();
m_ctx->m_imageDrawUniformData.emplace_back(draw->matrix(),
draw->opacity(),
draw->clipRectInverseMatrix(),
draw->clipID(),
draw->blendMode(),
m_currentZIndex);
DrawBatch& batch = pushDraw(draw,
DrawType::imageRect,
gpu::ShaderMiscFlags::none,
PaintType::image,
1,
0);
batch.imageDrawDataOffset =
math::lossless_numeric_cast<uint32_t>(imageDrawDataOffset);
}
void RenderContext::LogicalFlush::pushImageMeshDraw(ImageMeshDraw* draw)
{
RIVE_PROF_SCOPE()
assert(m_hasDoneLayout);
size_t imageDrawDataOffset = m_ctx->m_imageDrawUniformData.bytesWritten();
m_ctx->m_imageDrawUniformData.emplace_back(draw->matrix(),
draw->opacity(),
draw->clipRectInverseMatrix(),
draw->clipID(),
draw->blendMode(),
m_currentZIndex);
DrawBatch& batch = pushDraw(draw,
DrawType::imageMesh,
gpu::ShaderMiscFlags::none,
PaintType::image,
draw->indexCount(),
0);
batch.vertexBuffer = draw->vertexBuffer();
batch.uvBuffer = draw->uvBuffer();
batch.indexBuffer = draw->indexBuffer();
batch.imageDrawDataOffset =
math::lossless_numeric_cast<uint32_t>(imageDrawDataOffset);
}
void RenderContext::LogicalFlush::pushStencilClipResetDraw(
StencilClipReset* draw)
{
RIVE_PROF_SCOPE()
assert(m_hasDoneLayout);
uint32_t baseVertex = math::lossless_numeric_cast<uint32_t>(
m_ctx->m_triangleVertexData.elementsWritten());
auto [l, t, r, b] = AABB(getClipInfo(draw->previousClipID()).contentBounds);
uint32_t z = m_currentZIndex;
assert(AABB(l, t, r, b).round() == draw->pixelBounds());
assert(draw->resourceCounts().maxTriangleVertexCount == 6);
assert(m_ctx->m_triangleVertexData.hasRoomFor(6));
m_ctx->m_triangleVertexData.emplace_back(Vec2D{l, b}, 0, z);
m_ctx->m_triangleVertexData.emplace_back(Vec2D{l, t}, 0, z);
m_ctx->m_triangleVertexData.emplace_back(Vec2D{r, b}, 0, z);
m_ctx->m_triangleVertexData.emplace_back(Vec2D{r, b}, 0, z);
m_ctx->m_triangleVertexData.emplace_back(Vec2D{l, t}, 0, z);
m_ctx->m_triangleVertexData.emplace_back(Vec2D{r, t}, 0, z);
pushDraw(draw,
DrawType::msaaStencilClipReset,
gpu::ShaderMiscFlags::none,
PaintType::clipUpdate,
6,
baseVertex);
}
gpu::DrawBatch& RenderContext::LogicalFlush::pushPathDraw(
const PathDraw* draw,
DrawType drawType,
gpu::ShaderMiscFlags shaderMiscFlags,
uint32_t vertexCount,
uint32_t baseVertex)
{
RIVE_PROF_SCOPE()
assert(m_hasDoneLayout);
DrawBatch& batch = pushDraw(draw,
drawType,
shaderMiscFlags,
draw->paintType(),
vertexCount,
baseVertex);
auto pathShaderFeatures = gpu::ShaderFeatures::NONE;
if (draw->featherRadius() != 0 &&
drawType != gpu::DrawType::interiorTriangulation &&
drawType != gpu::DrawType::atlasBlit)
{
pathShaderFeatures |= ShaderFeatures::ENABLE_FEATHER;
}
if (draw->drawContents() & gpu::DrawContents::evenOddFill)
{
assert(!(shaderMiscFlags & gpu::ShaderMiscFlags::clockwiseFill));
pathShaderFeatures |= ShaderFeatures::ENABLE_EVEN_ODD;
}
constexpr static gpu::DrawContents NESTED_CLIP_FLAGS =
gpu::DrawContents::clipUpdate | gpu::DrawContents::activeClip;
if ((draw->drawContents() & NESTED_CLIP_FLAGS) == NESTED_CLIP_FLAGS)
{
pathShaderFeatures |= ShaderFeatures::ENABLE_NESTED_CLIPPING;
}
batch.shaderFeatures |=
pathShaderFeatures & m_ctx->m_frameShaderFeaturesMask;
m_combinedShaderFeatures |= batch.shaderFeatures;
assert(
(batch.shaderFeatures &
gpu::ShaderFeaturesMaskFor(drawType, m_ctx->frameInterlockMode())) ==
batch.shaderFeatures);
return batch;
}
RIVE_ALWAYS_INLINE static bool can_combine_draw_contents(
gpu::InterlockMode interlockMode,
gpu::DrawContents batchContents,
const Draw* draw)
{
// Feathered fills should never attempt to combine with fills, strokes, or
// feathered strokes because they use a different DrawType.
assert((batchContents & gpu::DrawContents::featheredFill).bits() ==
(draw->drawContents() & gpu::DrawContents::featheredFill).bits());
constexpr static auto ANY_FILL = gpu::DrawContents::clockwiseFill |
gpu::DrawContents::evenOddFill |
gpu::DrawContents::nonZeroFill;
// Raster ordering uses a different shader for clockwise fills, so we
// can't combine both legacy and clockwise fills into the same draw.
if (interlockMode == gpu::InterlockMode::rasterOrdering &&
// Anything can be combined if either the existing batch or the new draw
// don't have fills yet.
(batchContents & ANY_FILL) && (draw->drawContents() & ANY_FILL))
{
assert(!(draw->drawContents() & gpu::DrawContents::stroke));
return (batchContents & gpu::DrawContents::clockwiseFill).bits() ==
(draw->drawContents() & gpu::DrawContents::clockwiseFill).bits();
}
return true;
}
RIVE_ALWAYS_INLINE static bool can_combine_draw_images(
const Texture* currentDrawTexture,
const Texture* nextDrawTexture,
const ImageSampler currentImageSamplerKey,
const ImageSampler nextImageSamplerKey)
{
if (currentDrawTexture == nullptr || nextDrawTexture == nullptr)
{
// We can always combine two draws if one or both do not use an image
// paint.
return true;
}
// Since the image paint's texture must be bound to a specific slot, we
// can't combine draws that use different textures.
return (currentDrawTexture == nextDrawTexture) &&
(currentImageSamplerKey == nextImageSamplerKey);
}
gpu::DrawBatch& RenderContext::LogicalFlush::pushDraw(
const Draw* draw,
DrawType drawType,
gpu::ShaderMiscFlags shaderMiscFlags,
gpu::PaintType paintType,
uint32_t elementCount,
uint32_t baseElement)
{
RIVE_PROF_SCOPE()
assert(m_hasDoneLayout);
assert(elementCount > 0);
bool canMergeWithPreviousBatch;
switch (drawType)
{
case DrawType::midpointFanPatches:
case DrawType::midpointFanCenterAAPatches:
case DrawType::outerCurvePatches:
case DrawType::interiorTriangulation:
case DrawType::atlasBlit:
case DrawType::msaaStrokes:
case DrawType::msaaMidpointFanBorrowedCoverage:
case DrawType::msaaMidpointFans:
case DrawType::msaaMidpointFanStencilReset:
case DrawType::msaaMidpointFanPathsStencil:
case DrawType::msaaMidpointFanPathsCover:
case DrawType::msaaOuterCubics:
case DrawType::msaaStencilClipReset:
if (!m_drawList.empty() && m_pendingBarriers == BarrierFlags::none)
{
const DrawBatch& currentBatch = m_drawList.tail();
canMergeWithPreviousBatch =
currentBatch.drawType == drawType &&
currentBatch.shaderMiscFlags == shaderMiscFlags &&
can_combine_draw_contents(m_ctx->frameInterlockMode(),
currentBatch.drawContents,
draw) &&
can_combine_draw_images(currentBatch.imageTexture,
draw->imageTexture(),
currentBatch.imageSampler,
draw->imageSampler());
if (canMergeWithPreviousBatch &&
currentBatch.baseElement + currentBatch.elementCount !=
baseElement)
{
// In MSAA mode, multiple subpasses reference the same
// tessellation data. Although rare, this breaks the
// guarantee we have in other modes that mergeable batches
// will always have contiguous patches.
assert(m_ctx->frameInterlockMode() ==
gpu::InterlockMode::msaa);
canMergeWithPreviousBatch = false;
}
break;
}
[[fallthrough]];
// Image draws can't be combined for now because they each have their
// own unique uniforms.
case DrawType::imageRect:
case DrawType::imageMesh:
case DrawType::renderPassInitialize:
case DrawType::renderPassResolve:
canMergeWithPreviousBatch = false;
break;
}
DrawBatch* batch;
if (canMergeWithPreviousBatch)
{
batch = &m_drawList.tail();
assert(m_pendingBarriers == BarrierFlags::none);
assert(batch->drawType == drawType);
assert(batch->shaderMiscFlags == shaderMiscFlags);
assert(batch->baseElement + batch->elementCount == baseElement);
batch->elementCount += elementCount;
}
else
{
batch = &m_drawList.emplace_back(
m_ctx->perFrameAllocator(),
drawType,
shaderMiscFlags,
elementCount,
baseElement,
draw->blendMode(),
draw->imageSampler(),
std::exchange(m_pendingBarriers, BarrierFlags::none));
}
// If the batch was merged into a previous one, this ensures it was a valid
// merge.
assert(batch->drawType == drawType);
assert(can_combine_draw_images(batch->imageTexture,
draw->imageTexture(),
batch->imageSampler,
draw->imageSampler()));
assert(m_pendingBarriers == BarrierFlags::none);
auto shaderFeatures = ShaderFeatures::NONE;
if (draw->clipID() != 0)
{
shaderFeatures |= ShaderFeatures::ENABLE_CLIPPING;
}
if (draw->hasClipRect() && paintType != PaintType::clipUpdate)
{
shaderFeatures |= ShaderFeatures::ENABLE_CLIP_RECT;
}
if (paintType != PaintType::clipUpdate &&
!(shaderMiscFlags & gpu::ShaderMiscFlags::borrowedCoveragePrepass))
{
switch (draw->blendMode())
{
case BlendMode::hue:
case BlendMode::saturation:
case BlendMode::color:
case BlendMode::luminosity:
shaderFeatures |= ShaderFeatures::ENABLE_HSL_BLEND_MODES;
[[fallthrough]];
case BlendMode::screen:
case BlendMode::overlay:
case BlendMode::darken:
case BlendMode::lighten:
case BlendMode::colorDodge:
case BlendMode::colorBurn:
case BlendMode::hardLight:
case BlendMode::softLight:
case BlendMode::difference:
case BlendMode::exclusion:
case BlendMode::multiply:
shaderFeatures |= ShaderFeatures::ENABLE_ADVANCED_BLEND;
break;
case BlendMode::srcOver:
break;
}
}
batch->shaderFeatures |= shaderFeatures & m_ctx->m_frameShaderFeaturesMask;
m_combinedShaderFeatures |= batch->shaderFeatures;
assert(
(batch->shaderFeatures &
gpu::ShaderFeaturesMaskFor(drawType, m_ctx->frameInterlockMode())) ==
batch->shaderFeatures);
batch->drawContents |= draw->drawContents();
if (paintType == PaintType::image)
{
assert(draw->imageTexture() != nullptr);
if (batch->imageTexture == nullptr)
{
batch->imageTexture = draw->imageTexture();
}
assert(batch->imageTexture == draw->imageTexture());
}
if (m_ctx->frameInterlockMode() == gpu::InterlockMode::msaa)
{
// msaa can't mix drawContents in a batch.
assert(batch->drawContents == draw->drawContents());
// msaa does't mix src-over draws with advanced blend draws.
assert((batch->shaderFeatures &
gpu::ShaderFeatures::ENABLE_ADVANCED_BLEND) ==
(draw->blendMode() != BlendMode::srcOver));
// If using KHR_blend_equation_advanced, we can't mix blend modes in a
// batch.
assert(!m_ctx->platformFeatures().supportsBlendAdvancedKHR ||
batch->firstBlendMode == draw->blendMode());
if (draw->blendMode() != BlendMode::srcOver &&
!m_ctx->platformFeatures().supportsBlendAdvancedCoherentKHR)
{
// An implementation-dependent barrier is required between
// overlapping draws. Add a "dstBlend" barrier and build up a list
// of "dstReads" for the batch. The dstRead list will be required in
// the event that the implementation has to handle dstReads by
// copying out a texture.
//
// (But if the draw already has a "nextDstRead" neighbor, do
// nothing. It means an earlier subpass will already issue the
// barrier and sync this region of the framebuffer. Since nothing
// that overlaps will be ordered between that first subpass and us,
// that barrier for the first subpass is all we need.)
if (draw->nextDstRead() == nullptr)
{
batch->barriers |= BarrierFlags::dstBlend;
batch->dstReadList = draw->addToDstReadList(batch->dstReadList);
}
}
}
return *batch;
}
} // namespace rive::gpu