| /* |
| * Copyright 2022 Rive |
| */ |
| |
| #include "rive/renderer/render_context.hpp" |
| |
| #include "gr_inner_fan_triangulator.hpp" |
| #include "intersection_board.hpp" |
| #include "gradient.hpp" |
| #include "rive_render_paint.hpp" |
| #include "rive/renderer/draw.hpp" |
| #include "rive/renderer/rive_render_image.hpp" |
| #include "rive/renderer/render_context_impl.hpp" |
| #include "rive/profiler/profiler_macros.h" |
| |
| #include "shaders/constants.glsl" |
| |
| #include <string_view> |
| |
| #ifdef RIVE_DECODERS |
| #include "rive/decoders/bitmap_decoder.hpp" |
| #endif |
| |
| namespace rive::gpu |
| { |
| constexpr size_t kDefaultSimpleGradientCapacity = 512; |
| constexpr size_t kDefaultComplexGradientCapacity = 1024; |
| constexpr size_t kDefaultDrawCapacity = 2048; |
| |
| // TODO: Move this variable to PlatformFeatures. |
| constexpr uint32_t kMaxTextureHeight = 2048; |
| constexpr size_t kMaxTessellationVertexCount = |
| kMaxTextureHeight * kTessTextureWidth; |
| constexpr size_t kMaxTessellationPaddingVertexCount = |
| gpu::kMidpointFanPatchSegmentSpan + // Padding at the beginning of the tess |
| // texture |
| (gpu::kOuterCurvePatchSegmentSpan - |
| 1) + // Max padding between patch types in the tess texture |
| 1; // Padding at the end of the tessellation texture |
| constexpr size_t kMaxTessellationVertexCountBeforePadding = |
| kMaxTessellationVertexCount - kMaxTessellationPaddingVertexCount; |
| |
| // Metal requires vertex buffers to be 256-byte aligned. |
| constexpr size_t kMaxTessellationAlignmentVertices = |
| gpu::kTessVertexBufferAlignmentInElements - 1; |
| |
| // We can only reorder 32767 draws at a time since the one-based groupIndex |
| // returned by IntersectionBoard is a signed 16-bit integer. |
| constexpr size_t kMaxReorderedDrawPassCount = |
| std::numeric_limits<int16_t>::max(); |
| |
| // How tall to make a resource texture in order to support the given number of |
| // items. |
| template <size_t WidthInItems> |
| constexpr static size_t resource_texture_height(size_t itemCount) |
| { |
| return (itemCount + WidthInItems - 1) / WidthInItems; |
| } |
| |
| constexpr static size_t gradient_data_height(size_t simpleRampCount, |
| size_t complexRampCount) |
| { |
| return resource_texture_height<gpu::kGradTextureWidthInSimpleRamps>( |
| simpleRampCount) + |
| complexRampCount; |
| } |
| |
| inline GradientContentKey::GradientContentKey(rcp<const Gradient> gradient) : |
| m_gradient(std::move(gradient)) |
| {} |
| |
| inline GradientContentKey::GradientContentKey(GradientContentKey&& other) : |
| m_gradient(std::move(other.m_gradient)) |
| {} |
| |
| bool GradientContentKey::operator==(const GradientContentKey& other) const |
| { |
| if (m_gradient.get() == other.m_gradient.get()) |
| { |
| return true; |
| } |
| else |
| { |
| return m_gradient->count() == other.m_gradient->count() && |
| !memcmp(m_gradient->stops(), |
| other.m_gradient->stops(), |
| m_gradient->count() * sizeof(float)) && |
| !memcmp(m_gradient->colors(), |
| other.m_gradient->colors(), |
| m_gradient->count() * sizeof(ColorInt)); |
| } |
| } |
| |
| size_t DeepHashGradient::operator()(const GradientContentKey& key) const |
| { |
| const Gradient* grad = key.gradient(); |
| std::hash<std::string_view> hash; |
| size_t x = |
| hash(std::string_view(reinterpret_cast<const char*>(grad->stops()), |
| grad->count() * sizeof(float))); |
| size_t y = |
| hash(std::string_view(reinterpret_cast<const char*>(grad->colors()), |
| grad->count() * sizeof(ColorInt))); |
| return x ^ y; |
| } |
| |
| RenderContext::RenderContext(std::unique_ptr<RenderContextImpl> impl) : |
| m_impl(std::move(impl)), |
| // -1 from m_maxPathID so we reserve a path record for the clearColor paint |
| // (for atomic mode). This also allows us to index the storage buffers |
| // directly by pathID. |
| m_maxPathID(MaxPathID(m_impl->platformFeatures().pathIDGranularity) - 1) |
| { |
| setResourceSizes(ResourceAllocationCounts(), /*forceRealloc =*/true); |
| releaseResources(); |
| } |
| |
| RenderContext::~RenderContext() |
| { |
| // Always call flush() to avoid deadlock. |
| assert(!m_didBeginFrame); |
| // Delete the logical flushes before the block allocators let go of their |
| // allocations. |
| m_logicalFlushes.clear(); |
| } |
| |
| const gpu::PlatformFeatures& RenderContext::platformFeatures() const |
| { |
| return m_impl->platformFeatures(); |
| } |
| |
| rcp<RenderBuffer> RenderContext::makeRenderBuffer(RenderBufferType type, |
| RenderBufferFlags flags, |
| size_t sizeInBytes) |
| { |
| return m_impl->makeRenderBuffer(type, flags, sizeInBytes); |
| } |
| |
| rcp<RenderImage> RenderContext::decodeImage(Span<const uint8_t> encodedBytes) |
| { |
| RIVE_PROF_SCOPE() |
| rcp<Texture> texture = m_impl->platformDecodeImageTexture(encodedBytes); |
| #ifdef RIVE_DECODERS |
| if (texture == nullptr) |
| { |
| auto bitmap = Bitmap::decode(encodedBytes.data(), encodedBytes.size()); |
| if (bitmap) |
| { |
| // For now, RenderContextImpl::makeImageTexture() only accepts RGBA. |
| if (bitmap->pixelFormat() != Bitmap::PixelFormat::RGBAPremul) |
| { |
| bitmap->pixelFormat(Bitmap::PixelFormat::RGBAPremul); |
| } |
| uint32_t width = bitmap->width(); |
| uint32_t height = bitmap->height(); |
| uint32_t mipLevelCount = math::msb(height | width); |
| texture = m_impl->makeImageTexture(width, |
| height, |
| mipLevelCount, |
| bitmap->bytes()); |
| } |
| } |
| #endif |
| return texture != nullptr ? make_rcp<RiveRenderImage>(std::move(texture)) |
| : nullptr; |
| } |
| |
| void RenderContext::releaseResources() |
| { |
| assert(!m_didBeginFrame); |
| resetContainers(); |
| setResourceSizes(ResourceAllocationCounts()); |
| m_maxRecentResourceRequirements = ResourceAllocationCounts(); |
| m_lastResourceTrimTimeInSeconds = m_impl->secondsNow(); |
| } |
| |
| void RenderContext::resetContainers() |
| { |
| assert(!m_didBeginFrame); |
| |
| if (!m_logicalFlushes.empty()) |
| { |
| // Should get reset to 1 after flush(). |
| assert(m_logicalFlushes.size() == 1); |
| m_logicalFlushes.resize(1); |
| m_logicalFlushes.front()->resetContainers(); |
| } |
| |
| m_indirectDrawList.clear(); |
| m_indirectDrawList.shrink_to_fit(); |
| |
| m_intersectionBoard = nullptr; |
| } |
| |
| RenderContext::LogicalFlush::LogicalFlush(RenderContext* parent) : m_ctx(parent) |
| { |
| rewind(); |
| } |
| |
| void RenderContext::LogicalFlush::rewind() |
| { |
| RIVE_PROF_SCOPE() |
| m_resourceCounts = Draw::ResourceCounters(); |
| m_drawPassCount = 0; |
| m_simpleGradients.clear(); |
| m_pendingSimpleGradDraws.clear(); |
| m_complexGradients.clear(); |
| m_pendingComplexGradDraws.clear(); |
| m_pendingGradSpanCount = 0; |
| m_clips.clear(); |
| m_draws.clear(); |
| m_combinedDrawBounds = {std::numeric_limits<int32_t>::max(), |
| std::numeric_limits<int32_t>::max(), |
| std::numeric_limits<int32_t>::min(), |
| std::numeric_limits<int32_t>::min()}; |
| |
| m_pathPaddingCount = 0; |
| m_paintPaddingCount = 0; |
| m_paintAuxPaddingCount = 0; |
| m_contourPaddingCount = 0; |
| m_gradSpanPaddingCount = 0; |
| m_midpointFanTessEndLocation = 0; |
| m_outerCubicTessEndLocation = 0; |
| m_outerCubicTessVertexIdx = 0; |
| m_midpointFanTessVertexIdx = 0; |
| |
| m_flushDesc = FlushDescriptor(); |
| |
| m_drawList.reset(); |
| m_combinedShaderFeatures = gpu::ShaderFeatures::NONE; |
| |
| m_currentPathID = 0; |
| m_currentContourID = 0; |
| |
| if (m_atlasRectanizer != nullptr) |
| { |
| m_atlasRectanizer->reset(); |
| } |
| m_atlasMaxX = 0; |
| m_atlasMaxY = 0; |
| m_pendingAtlasDraws.clear(); |
| |
| m_coverageBufferLength = 0; |
| |
| m_pendingBarriers = BarrierFlags::none; |
| |
| m_currentZIndex = 0; |
| |
| RIVE_DEBUG_CODE(m_hasDoneLayout = false;) |
| } |
| |
| void RenderContext::LogicalFlush::resetContainers() |
| { |
| m_clips.clear(); |
| m_clips.shrink_to_fit(); |
| m_draws.clear(); |
| m_draws.shrink_to_fit(); |
| m_draws.reserve(kDefaultDrawCapacity); |
| |
| m_simpleGradients.rehash(0); |
| m_simpleGradients.reserve(kDefaultSimpleGradientCapacity); |
| |
| m_pendingSimpleGradDraws.clear(); |
| m_pendingSimpleGradDraws.shrink_to_fit(); |
| m_pendingSimpleGradDraws.reserve(kDefaultSimpleGradientCapacity); |
| |
| m_complexGradients.rehash(0); |
| m_complexGradients.reserve(kDefaultComplexGradientCapacity); |
| |
| m_pendingComplexGradDraws.clear(); |
| m_pendingComplexGradDraws.shrink_to_fit(); |
| m_pendingComplexGradDraws.reserve(kDefaultComplexGradientCapacity); |
| |
| m_pendingAtlasDraws.clear(); |
| m_pendingAtlasDraws.shrink_to_fit(); |
| // Don't reserve any space in m_pendingAtlasDraws since there are many |
| // usecases where it isn't used at all. |
| } |
| |
| void RenderContext::beginFrame(const FrameDescriptor& frameDescriptor) |
| { |
| RIVE_PROF_SCOPE() |
| |
| m_impl->preBeginFrame(this); |
| assert(!m_didBeginFrame); |
| assert(frameDescriptor.renderTargetWidth > 0); |
| assert(frameDescriptor.renderTargetHeight > 0); |
| m_frameDescriptor = frameDescriptor; |
| if (!platformFeatures().supportsRasterOrdering && |
| !platformFeatures().supportsFragmentShaderAtomics) |
| { |
| // We don't have pixel local storage in any form. Use 4x MSAA if |
| // msaaSampleCount wasn't already specified. |
| m_frameDescriptor.msaaSampleCount = |
| m_frameDescriptor.msaaSampleCount > 0 |
| ? m_frameDescriptor.msaaSampleCount |
| : 4; |
| } |
| if (m_frameDescriptor.msaaSampleCount > 0) |
| { |
| m_frameInterlockMode = gpu::InterlockMode::msaa; |
| } |
| else if (platformFeatures().supportsRasterOrdering && |
| (!m_frameDescriptor.disableRasterOrdering || |
| !platformFeatures().supportsFragmentShaderAtomics)) |
| { |
| m_frameInterlockMode = gpu::InterlockMode::rasterOrdering; |
| } |
| else if (frameDescriptor.clockwiseFillOverride && |
| platformFeatures().supportsClockwiseAtomicRendering) |
| { |
| assert(platformFeatures().supportsFragmentShaderAtomics); |
| m_frameInterlockMode = gpu::InterlockMode::clockwiseAtomic; |
| } |
| else |
| { |
| assert(platformFeatures().supportsFragmentShaderAtomics); |
| m_frameInterlockMode = gpu::InterlockMode::atomics; |
| } |
| m_frameShaderFeaturesMask = |
| gpu::ShaderFeaturesMaskFor(m_frameInterlockMode); |
| if (m_logicalFlushes.empty()) |
| { |
| m_logicalFlushes.emplace_back(new LogicalFlush(this)); |
| } |
| RIVE_DEBUG_CODE(m_didBeginFrame = true); |
| } |
| |
| bool RenderContext::isOutsideCurrentFrame(const IAABB& pixelBounds) |
| { |
| assert(m_didBeginFrame); |
| int4 bounds = simd::load4i(&pixelBounds); |
| auto renderTargetSize = |
| simd::cast<int32_t>(uint2{m_frameDescriptor.renderTargetWidth, |
| m_frameDescriptor.renderTargetHeight}); |
| return simd::any(bounds.xy >= renderTargetSize || bounds.zw <= 0 || |
| bounds.xy >= bounds.zw); |
| } |
| |
| bool RenderContext::frameSupportsClipRects() const |
| { |
| assert(m_didBeginFrame); |
| return m_frameInterlockMode != gpu::InterlockMode::msaa || |
| platformFeatures().supportsClipPlanes; |
| } |
| |
| bool RenderContext::frameSupportsImagePaintForPaths() const |
| { |
| assert(m_didBeginFrame); |
| return m_frameInterlockMode != gpu::InterlockMode::atomics; |
| } |
| |
| uint32_t RenderContext::generateClipID(const IAABB& contentBounds) |
| { |
| assert(m_didBeginFrame); |
| assert(!m_logicalFlushes.empty()); |
| return m_logicalFlushes.back()->generateClipID(contentBounds); |
| } |
| |
| uint32_t RenderContext::LogicalFlush::generateClipID(const IAABB& contentBounds) |
| { |
| if (m_clips.size() < m_ctx->m_maxPathID) // maxClipID == maxPathID. |
| { |
| m_clips.emplace_back(contentBounds); |
| assert(m_ctx->m_clipContentID != m_clips.size()); |
| return math::lossless_numeric_cast<uint32_t>(m_clips.size()); |
| } |
| return 0; // There are no available clip IDs. The caller should flush and |
| // try again. |
| } |
| |
| RenderContext::LogicalFlush::ClipInfo& RenderContext::LogicalFlush:: |
| getWritableClipInfo(uint32_t clipID) |
| { |
| assert(clipID > 0); |
| assert(clipID <= m_clips.size()); |
| return m_clips[clipID - 1]; |
| } |
| |
| void RenderContext::LogicalFlush::addClipReadBounds(uint32_t clipID, |
| const IAABB& bounds) |
| { |
| assert(clipID > 0); |
| assert(clipID <= m_clips.size()); |
| ClipInfo& clipInfo = getWritableClipInfo(clipID); |
| clipInfo.readBounds = clipInfo.readBounds.join(bounds); |
| } |
| |
| bool RenderContext::pushDraws(DrawUniquePtr draws[], size_t drawCount) |
| { |
| assert(m_didBeginFrame); |
| assert(!m_logicalFlushes.empty()); |
| return m_logicalFlushes.back()->pushDraws(draws, drawCount); |
| } |
| |
| bool RenderContext::LogicalFlush::pushDraws(DrawUniquePtr draws[], |
| size_t drawCount) |
| { |
| RIVE_PROF_SCOPE() |
| assert(!m_hasDoneLayout); |
| |
| auto countsVector = m_resourceCounts.toVec(); |
| for (size_t i = 0; i < drawCount; ++i) |
| { |
| assert(!draws[i]->pixelBounds().empty()); |
| assert(m_ctx->frameSupportsClipRects() || |
| draws[i]->clipRectInverseMatrix() == nullptr); |
| countsVector += draws[i]->resourceCounts().toVec(); |
| } |
| Draw::ResourceCounters countsWithNewBatch = countsVector; |
| |
| // Textures and buffers have hard size limits. If the new batch doesn't fit |
| // within our constraints, the caller needs to flush and try again. |
| if (countsWithNewBatch.pathCount > m_ctx->m_maxPathID || |
| countsWithNewBatch.contourCount > kMaxContourID || |
| countsWithNewBatch.midpointFanTessVertexCount + |
| countsWithNewBatch.outerCubicTessVertexCount > |
| kMaxTessellationVertexCountBeforePadding) |
| { |
| return false; |
| } |
| |
| // Allocate subpasses. |
| int passCountInBatch = 0; |
| for (size_t i = 0; i < drawCount; ++i) |
| { |
| draws[i]->countSubpasses(); |
| assert(draws[i]->prepassCount() >= 0); |
| assert(draws[i]->subpassCount() >= 0); |
| assert(draws[i]->prepassCount() + draws[i]->subpassCount() >= 1); |
| passCountInBatch += draws[i]->prepassCount() + draws[i]->subpassCount(); |
| } |
| |
| // We can only reorder 32k draws at a time in atomic and msaa modes since |
| // the sort key addresses them with a signed 16-bit index. Make sure we |
| // don't exceed that limit. |
| if (m_ctx->frameInterlockMode() != gpu::InterlockMode::rasterOrdering && |
| m_drawPassCount + passCountInBatch > kMaxReorderedDrawPassCount) |
| { |
| return false; |
| } |
| |
| // Allocate final resources. |
| for (size_t i = 0; i < drawCount; ++i) |
| { |
| if (!draws[i]->allocateResources(this)) |
| { |
| // The draw failed to allocate resources. Give up and let the caller |
| // flush and try again. |
| // |
| // FIXME: This works today, but the surrounding code could be |
| // modified to inadvertently leave a stale dangling reference to one |
| // of these draws in m_pendingAtlasDraws. This needs to be |
| // revisited. |
| return false; |
| } |
| } |
| |
| for (size_t i = 0; i < drawCount; ++i) |
| { |
| m_draws.push_back(std::move(draws[i])); |
| m_combinedDrawBounds = |
| m_combinedDrawBounds.join(m_draws.back()->pixelBounds()); |
| } |
| |
| m_resourceCounts = countsWithNewBatch; |
| m_drawPassCount += passCountInBatch; |
| return true; |
| } |
| |
| bool RenderContext::LogicalFlush::allocateGradient( |
| const Gradient* gradient, |
| gpu::ColorRampLocation* colorRampLocation) |
| { |
| RIVE_PROF_SCOPE() |
| assert(!m_hasDoneLayout); |
| |
| const float* stops = gradient->stops(); |
| size_t stopCount = gradient->count(); |
| assert(stopCount > 0); // RiveRenderFactory guarantees this. |
| |
| if (stopCount == 1 || (stopCount == 2 && stops[0] == 0 && stops[1] == 1)) |
| { |
| // This is a simple gradient that can be implemented by a two-texel |
| // color ramp. |
| const ColorInt* colors = gradient->colors(); |
| TwoTexelRamp colorRamp = {colors[0], |
| // Handle ramps with a single stop. |
| colors[std::min<size_t>(1, stopCount - 1)]}; |
| uint64_t simpleKey; |
| static_assert(sizeof(simpleKey) == sizeof(ColorInt) * 2); |
| RIVE_INLINE_MEMCPY(&simpleKey, &colorRamp, sizeof(ColorInt) * 2); |
| uint32_t rampTexelsIdx; |
| auto iter = m_simpleGradients.find(simpleKey); |
| if (iter != m_simpleGradients.end()) |
| { |
| // This gradient is already in the texture. |
| rampTexelsIdx = iter->second; |
| } |
| else |
| { |
| if (gradient_data_height(m_simpleGradients.size() + 1, |
| m_complexGradients.size()) > |
| kMaxTextureHeight) |
| { |
| // We ran out of rows in the gradient texture. Caller has to |
| // flush and try again. |
| return false; |
| } |
| rampTexelsIdx = math::lossless_numeric_cast<uint32_t>( |
| m_simpleGradients.size() * 2); |
| m_simpleGradients.insert({simpleKey, rampTexelsIdx}); |
| m_pendingSimpleGradDraws.push_back(colorRamp); |
| // Simple gradients get uploaded to the GPU as a single GradientSpan |
| // instance. |
| ++m_pendingGradSpanCount; |
| } |
| colorRampLocation->row = rampTexelsIdx / kGradTextureWidth; |
| colorRampLocation->col = rampTexelsIdx % kGradTextureWidth; |
| } |
| else |
| { |
| // This is a complex gradient. Render it to an entire row of the |
| // gradient texture. |
| GradientContentKey key(ref_rcp(gradient)); |
| auto iter = m_complexGradients.find(key); |
| uint16_t row; |
| if (iter != m_complexGradients.end()) |
| { |
| row = iter->second; // This gradient is already in the texture. |
| } |
| else |
| { |
| if (gradient_data_height(m_simpleGradients.size(), |
| m_complexGradients.size() + 1) > |
| kMaxTextureHeight) |
| { |
| // We ran out of rows in the gradient texture. Caller has to |
| // flush and try again. |
| return false; |
| } |
| |
| row = static_cast<uint32_t>(m_complexGradients.size()); |
| m_complexGradients.emplace(std::move(key), row); |
| m_pendingComplexGradDraws.push_back(gradient); |
| |
| size_t spanCount = stopCount - 1; |
| m_pendingGradSpanCount += spanCount; |
| } |
| // Store the row relative to the first complex gradient for now. |
| // PaintData::set() will offset this value by the number of simple |
| // gradient rows once its final value is known. |
| colorRampLocation->row = row; |
| colorRampLocation->col = ColorRampLocation::kComplexGradientMarker; |
| } |
| return true; |
| } |
| |
| bool RenderContext::LogicalFlush::allocateAtlasDraw( |
| PathDraw* pathDraw, |
| uint16_t drawWidth, |
| uint16_t drawHeight, |
| uint16_t desiredPadding, |
| uint16_t* x, |
| uint16_t* y, |
| TAABB<uint16_t>* paddedRegion) |
| { |
| RIVE_PROF_SCOPE() |
| |
| if (m_atlasRectanizer == nullptr) |
| { |
| uint16_t atlasMaxSize = m_ctx->atlasMaxSize(); |
| // Use an atlas larger than atlasMaxSize if it's too small for the |
| // request (meaning the render target is larger than atlasMaxSize). |
| m_atlasRectanizer = std::make_unique<skgpu::RectanizerSkyline>( |
| std::max(atlasMaxSize, drawWidth), |
| std::max(atlasMaxSize, drawHeight)); |
| } |
| |
| const uint16_t atlasMaxWidth = m_atlasRectanizer->width(); |
| const uint16_t atlasMaxHeight = m_atlasRectanizer->height(); |
| uint16_t paddedWidth = |
| std::min<uint16_t>(drawWidth + desiredPadding * 2, atlasMaxWidth); |
| uint16_t paddedHeight = |
| std::min<uint16_t>(drawHeight + desiredPadding * 2, atlasMaxHeight); |
| int16_t ix, iy; |
| if (!m_atlasRectanizer->addRect(paddedWidth, paddedHeight, &ix, &iy)) |
| { |
| // Delete the rectanizer of it wasn't big enough for this path. It will |
| // be reallocated to a large enough size on the next call. |
| if (drawWidth > atlasMaxWidth || drawHeight > atlasMaxHeight) |
| { |
| m_atlasRectanizer = nullptr; |
| } |
| m_atlasRectanizer = nullptr; |
| return false; |
| } |
| |
| assert(ix >= 0); |
| assert(iy >= 0); |
| assert(ix + paddedWidth <= atlasMaxWidth); |
| assert(iy + paddedHeight <= atlasMaxHeight); |
| |
| *x = ix + (paddedWidth - drawWidth) / 2; |
| *y = iy + (paddedHeight - drawHeight) / 2; |
| *paddedRegion = {ix, iy, ix + paddedWidth, iy + paddedHeight}; |
| assert((TAABB<uint16_t>{0, 0, atlasMaxWidth, atlasMaxHeight}) |
| .contains(*paddedRegion)); |
| |
| m_atlasMaxX = std::max<uint32_t>(m_atlasMaxX, paddedRegion->right); |
| m_atlasMaxY = std::max<uint32_t>(m_atlasMaxY, paddedRegion->bottom); |
| assert(m_atlasMaxX <= atlasMaxWidth); |
| assert(m_atlasMaxY <= atlasMaxHeight); |
| |
| m_pendingAtlasDraws.push_back(pathDraw); |
| return true; |
| } |
| |
| size_t RenderContext::LogicalFlush::allocateCoverageBufferRange(size_t length) |
| { |
| RIVE_PROF_SCOPE() |
| assert(m_ctx->frameInterlockMode() == gpu::InterlockMode::clockwiseAtomic); |
| assert(length % (32 * 32) == 0u); // Allocations must support 32x32 tiles. |
| uint32_t offset = m_coverageBufferLength; |
| if (offset + length > m_ctx->platformFeatures().maxCoverageBufferLength) |
| { |
| return -1; |
| } |
| m_coverageBufferLength += length; |
| return offset; |
| } |
| |
| void RenderContext::logicalFlush() |
| { |
| assert(m_didBeginFrame); |
| |
| // Reset clipping state after every logical flush because the clip buffer is |
| // not preserved between render passes. |
| m_clipContentID = 0; |
| |
| // Don't issue any GPU commands between logical flushes. Instead, build up a |
| // list of flushes that we will submit all at once at the end of the frame. |
| m_logicalFlushes.emplace_back(new LogicalFlush(this)); |
| } |
| |
| void RenderContext::flush(const FlushResources& flushResources) |
| { |
| RIVE_PROF_SCOPE() |
| assert(m_didBeginFrame); |
| assert(flushResources.renderTarget->width() == |
| m_frameDescriptor.renderTargetWidth); |
| assert(flushResources.renderTarget->height() == |
| m_frameDescriptor.renderTargetHeight); |
| |
| m_clipContentID = 0; |
| |
| // Layout this frame's resource buffers and textures. |
| LogicalFlush::ResourceCounters totalFrameResourceCounts; |
| LogicalFlush::LayoutCounters layoutCounts; |
| for (size_t i = 0; i < m_logicalFlushes.size(); ++i) |
| { |
| m_logicalFlushes[i]->layoutResources(flushResources, |
| i, |
| &totalFrameResourceCounts, |
| &layoutCounts); |
| } |
| |
| // Determine the minimum required resource allocation sizes to service this |
| // flush. |
| ResourceAllocationCounts resourceRequirements; |
| resourceRequirements.flushUniformBufferCount = m_logicalFlushes.size(); |
| resourceRequirements.imageDrawUniformBufferCount = |
| totalFrameResourceCounts.imageDrawCount; |
| resourceRequirements.pathBufferCount = |
| totalFrameResourceCounts.pathCount + layoutCounts.pathPaddingCount; |
| resourceRequirements.paintBufferCount = |
| totalFrameResourceCounts.pathCount + layoutCounts.paintPaddingCount; |
| resourceRequirements.paintAuxBufferCount = |
| totalFrameResourceCounts.pathCount + layoutCounts.paintAuxPaddingCount; |
| resourceRequirements.contourBufferCount = |
| totalFrameResourceCounts.contourCount + |
| layoutCounts.contourPaddingCount; |
| resourceRequirements.gradSpanBufferCount = |
| layoutCounts.gradSpanCount + layoutCounts.gradSpanPaddingCount; |
| resourceRequirements.tessSpanBufferCount = |
| totalFrameResourceCounts.maxTessellatedSegmentCount; |
| resourceRequirements.triangleVertexBufferCount = |
| totalFrameResourceCounts.maxTriangleVertexCount; |
| resourceRequirements.gradTextureHeight = layoutCounts.maxGradTextureHeight; |
| resourceRequirements.tessTextureHeight = layoutCounts.maxTessTextureHeight; |
| resourceRequirements.atlasTextureWidth = layoutCounts.maxAtlasWidth; |
| resourceRequirements.atlasTextureHeight = layoutCounts.maxAtlasHeight; |
| resourceRequirements.coverageBufferLength = |
| layoutCounts.maxCoverageBufferLength; |
| |
| // Ensure we're within hardware limits. |
| assert(resourceRequirements.gradTextureHeight <= kMaxTextureHeight); |
| assert(resourceRequirements.tessTextureHeight <= kMaxTextureHeight); |
| assert(resourceRequirements.atlasTextureWidth <= atlasMaxSize() || |
| resourceRequirements.atlasTextureWidth <= |
| frameDescriptor().renderTargetWidth); |
| assert(resourceRequirements.atlasTextureHeight <= atlasMaxSize() || |
| resourceRequirements.atlasTextureHeight <= |
| frameDescriptor().renderTargetHeight); |
| assert(resourceRequirements.coverageBufferLength <= |
| platformFeatures().maxCoverageBufferLength); |
| |
| // Track m_maxRecentResourceRequirements so we can trim GPU allocations when |
| // steady-state usage goes down. |
| m_maxRecentResourceRequirements = |
| simd::max(resourceRequirements.toVec(), |
| m_maxRecentResourceRequirements.toVec()); |
| |
| // Grow resources enough to handle this flush. |
| // If "allocs" already fits in our current allocations, then don't change |
| // them. If they don't fit, overallocate by 25% in order to create some |
| // slack for growth. |
| ResourceAllocationCounts allocs = simd::if_then_else( |
| resourceRequirements.toVec() <= m_currentResourceAllocations.toVec(), |
| m_currentResourceAllocations.toVec(), |
| resourceRequirements.toVec() * size_t(5) / size_t(4)); |
| |
| // In case the 25% growth pushed us above limits. |
| allocs.gradTextureHeight = |
| std::min<size_t>(allocs.gradTextureHeight, kMaxTextureHeight); |
| allocs.tessTextureHeight = |
| std::min<size_t>(allocs.tessTextureHeight, kMaxTextureHeight); |
| allocs.atlasTextureWidth = std::min<size_t>( |
| allocs.atlasTextureWidth, |
| std::max(atlasMaxSize(), frameDescriptor().renderTargetWidth)); |
| allocs.atlasTextureHeight = std::min<size_t>( |
| allocs.atlasTextureHeight, |
| std::max(atlasMaxSize(), frameDescriptor().renderTargetHeight)); |
| allocs.coverageBufferLength = |
| std::min(allocs.coverageBufferLength, |
| platformFeatures().maxCoverageBufferLength); |
| |
| // Additionally, every 5 seconds, trim resources down to the most recent |
| // steady-state usage. |
| double flushTime = m_impl->secondsNow(); |
| bool needsResourceTrim = flushTime - m_lastResourceTrimTimeInSeconds >= 5; |
| if (needsResourceTrim) |
| { |
| // Trim GPU resource allocations to 125% of their maximum recent usage, |
| // and only if the recent usage is 2/3 or less of the current |
| // allocation. |
| allocs = simd::if_then_else(m_maxRecentResourceRequirements.toVec() <= |
| allocs.toVec() * size_t(2) / size_t(3), |
| m_maxRecentResourceRequirements.toVec() * |
| size_t(5) / size_t(4), |
| allocs.toVec()); |
| |
| // Ensure we stayed within limits. |
| assert(allocs.gradTextureHeight <= kMaxTextureHeight); |
| assert(allocs.tessTextureHeight <= kMaxTextureHeight); |
| assert(allocs.atlasTextureWidth <= atlasMaxSize() || |
| allocs.atlasTextureWidth <= frameDescriptor().renderTargetWidth); |
| assert(allocs.atlasTextureHeight <= atlasMaxSize() || |
| allocs.atlasTextureHeight <= |
| frameDescriptor().renderTargetHeight); |
| assert(allocs.coverageBufferLength <= |
| platformFeatures().maxCoverageBufferLength); |
| |
| // Zero out m_maxRecentResourceRequirements for the next interval. |
| m_maxRecentResourceRequirements = ResourceAllocationCounts(); |
| m_lastResourceTrimTimeInSeconds = flushTime; |
| } |
| |
| setResourceSizes(allocs); |
| |
| m_impl->prepareToFlush(flushResources.currentFrameNumber, |
| flushResources.safeFrameNumber); |
| |
| mapResourceBuffers(resourceRequirements); |
| |
| for (const auto& flush : m_logicalFlushes) |
| { |
| flush->writeResources(); |
| } |
| |
| assert(m_flushUniformData.elementsWritten() == m_logicalFlushes.size()); |
| assert(m_imageDrawUniformData.elementsWritten() == |
| totalFrameResourceCounts.imageDrawCount); |
| assert(m_pathData.elementsWritten() == |
| totalFrameResourceCounts.pathCount + layoutCounts.pathPaddingCount); |
| assert(m_paintData.elementsWritten() == |
| totalFrameResourceCounts.pathCount + layoutCounts.paintPaddingCount); |
| assert(m_paintAuxData.elementsWritten() == |
| totalFrameResourceCounts.pathCount + |
| layoutCounts.paintAuxPaddingCount); |
| assert(m_contourData.elementsWritten() == |
| totalFrameResourceCounts.contourCount + |
| layoutCounts.contourPaddingCount); |
| assert(m_gradSpanData.elementsWritten() == |
| layoutCounts.gradSpanCount + layoutCounts.gradSpanPaddingCount); |
| assert(m_tessSpanData.elementsWritten() <= |
| totalFrameResourceCounts.maxTessellatedSegmentCount); |
| assert(m_triangleVertexData.elementsWritten() <= |
| totalFrameResourceCounts.maxTriangleVertexCount); |
| |
| unmapResourceBuffers(resourceRequirements); |
| |
| // Issue logical flushes to the backend. |
| for (const auto& flush : m_logicalFlushes) |
| { |
| m_impl->flush(flush->desc()); |
| } |
| |
| m_impl->postFlush(flushResources); |
| |
| if (!m_logicalFlushes.empty()) |
| { |
| m_logicalFlushes.resize(1); |
| m_logicalFlushes.front()->rewind(); |
| } |
| |
| // Drop all memory that was allocated for this frame using |
| // TrivialBlockAllocator. |
| m_perFrameAllocator.reset(); |
| m_numChopsAllocator.reset(); |
| m_chopVerticesAllocator.reset(); |
| m_tangentPairsAllocator.reset(); |
| m_polarSegmentCountsAllocator.reset(); |
| m_parametricSegmentCountsAllocator.reset(); |
| |
| m_frameDescriptor = FrameDescriptor(); |
| |
| RIVE_DEBUG_CODE(m_didBeginFrame = false;) |
| |
| // Wait to reset CPU-side containers until after the flush has finished. |
| if (needsResourceTrim) |
| { |
| resetContainers(); |
| } |
| } |
| |
| void RenderContext::LogicalFlush::layoutResources( |
| const FlushResources& flushResources, |
| size_t logicalFlushIdx, |
| ResourceCounters* runningFrameResourceCounts, |
| LayoutCounters* runningFrameLayoutCounts) |
| { |
| RIVE_PROF_SCOPE() |
| assert(!m_hasDoneLayout); |
| |
| const FrameDescriptor& frameDescriptor = m_ctx->frameDescriptor(); |
| |
| // Reserve a path record for the clearColor paint (used by atomic mode). |
| // This also allows us to index the storage buffers directly by pathID. |
| ++m_resourceCounts.pathCount; |
| |
| // Storage buffer offsets are required to be aligned on multiples of 256. |
| m_pathPaddingCount = |
| math::padding_to_align_up<gpu::kPathBufferAlignmentInElements>( |
| m_resourceCounts.pathCount); |
| m_paintPaddingCount = |
| math::padding_to_align_up<gpu::kPaintBufferAlignmentInElements>( |
| m_resourceCounts.pathCount); |
| m_paintAuxPaddingCount = |
| math::padding_to_align_up<gpu::kPaintAuxBufferAlignmentInElements>( |
| m_resourceCounts.pathCount); |
| m_contourPaddingCount = |
| math::padding_to_align_up<gpu::kContourBufferAlignmentInElements>( |
| m_resourceCounts.contourCount); |
| |
| // Metal requires vertex buffers to be 256-byte aligned. |
| m_gradSpanPaddingCount = |
| math::padding_to_align_up<gpu::kGradSpanBufferAlignmentInElements>( |
| m_pendingGradSpanCount); |
| |
| size_t totalTessVertexCountWithPadding = 0; |
| if ((m_resourceCounts.midpointFanTessVertexCount | |
| m_resourceCounts.outerCubicTessVertexCount) != 0) |
| { |
| // midpointFan tessellation vertices reside at the beginning of the |
| // tessellation texture, after 1 patch of padding vertices. |
| constexpr uint32_t kPrePadding = gpu::kMidpointFanPatchSegmentSpan; |
| m_midpointFanTessVertexIdx = kPrePadding; |
| m_midpointFanTessEndLocation = |
| m_midpointFanTessVertexIdx + |
| math::lossless_numeric_cast<uint32_t>( |
| m_resourceCounts.midpointFanTessVertexCount); |
| |
| // outerCubic tessellation vertices reside after the midpointFan |
| // vertices, aligned on a multiple of the outerCubic patch size. |
| uint32_t interiorPadding = |
| math::padding_to_align_up<gpu::kOuterCurvePatchSegmentSpan>( |
| m_midpointFanTessEndLocation); |
| m_outerCubicTessVertexIdx = |
| m_midpointFanTessEndLocation + interiorPadding; |
| m_outerCubicTessEndLocation = |
| m_outerCubicTessVertexIdx + |
| math::lossless_numeric_cast<uint32_t>( |
| m_resourceCounts.outerCubicTessVertexCount); |
| |
| // We need one more padding vertex after all the tessellation vertices. |
| constexpr uint32_t kPostPadding = 1; |
| totalTessVertexCountWithPadding = |
| m_outerCubicTessEndLocation + kPostPadding; |
| |
| assert(kPrePadding + interiorPadding + kPostPadding <= |
| kMaxTessellationPaddingVertexCount); |
| assert(totalTessVertexCountWithPadding <= kMaxTessellationVertexCount); |
| } |
| |
| uint32_t tessDataHeight = math::lossless_numeric_cast<uint32_t>( |
| resource_texture_height<kTessTextureWidth>( |
| totalTessVertexCountWithPadding)); |
| if (m_resourceCounts.maxTessellatedSegmentCount != 0) |
| { |
| // Conservatively account for line breaks and padding in the |
| // tessellation span count. Line breaks potentially introduce a new |
| // span. Count the maximum number of line breaks we might encounter, |
| // which is at most TWO for every line in the tessellation texture (one |
| // for a forward span, and one for its reflection.) |
| size_t maxSpanBreakCount = tessDataHeight * 2; |
| // The tessellation texture requires 3 separate spans of padding |
| // vertices (see above and below). |
| constexpr size_t kPaddingSpanCount = 3; |
| m_resourceCounts.maxTessellatedSegmentCount += |
| maxSpanBreakCount + kPaddingSpanCount + |
| kMaxTessellationAlignmentVertices; |
| } |
| |
| // Complex gradients begin on the first row immediately after the simple |
| // gradients. |
| m_gradTextureLayout.complexOffsetY = math::lossless_numeric_cast<uint32_t>( |
| resource_texture_height<gpu::kGradTextureWidthInSimpleRamps>( |
| m_simpleGradients.size())); |
| |
| m_flushDesc.renderTarget = flushResources.renderTarget; |
| m_flushDesc.interlockMode = m_ctx->frameInterlockMode(); |
| m_flushDesc.msaaSampleCount = frameDescriptor.msaaSampleCount; |
| |
| // In atomic mode, we may be able to skip the explicit clear of the color |
| // buffer and fold it into the atomic "resolve" operation instead. |
| bool doClearDuringAtomicResolve = false; |
| |
| if (logicalFlushIdx != 0) |
| { |
| // We always have to preserve the renderTarget between logical flushes. |
| m_flushDesc.colorLoadAction = gpu::LoadAction::preserveRenderTarget; |
| } |
| else if (frameDescriptor.loadAction == gpu::LoadAction::clear) |
| { |
| // In atomic mode, we can clear during the resolve operation if the |
| // clearColor is opaque (because we don't want or have a "source only" |
| // blend mode). |
| doClearDuringAtomicResolve = |
| m_ctx->frameInterlockMode() == gpu::InterlockMode::atomics && |
| colorAlpha(frameDescriptor.clearColor) == 255; |
| m_flushDesc.colorLoadAction = doClearDuringAtomicResolve |
| ? gpu::LoadAction::dontCare |
| : gpu::LoadAction::clear; |
| } |
| else |
| { |
| m_flushDesc.colorLoadAction = frameDescriptor.loadAction; |
| } |
| m_flushDesc.colorClearValue = frameDescriptor.clearColor; |
| |
| if (doClearDuringAtomicResolve) |
| { |
| // In atomic mode we can accomplish a clear of the color buffer while |
| // the shader resolves coverage, instead of actually clearing. |
| // writeResources() will configure the fill for pathID=0 to be a solid |
| // fill matching the clearColor, so if we just initialize coverage |
| // buffer to solid coverage with pathID=0, the resolve step will write |
| // out the correct clear color. |
| assert(m_flushDesc.interlockMode == gpu::InterlockMode::atomics); |
| m_flushDesc.coverageClearValue = |
| static_cast<uint32_t>(FIXED_COVERAGE_ONE); |
| } |
| else if (m_flushDesc.interlockMode == gpu::InterlockMode::atomics) |
| { |
| // When we don't skip the initial clear in atomic mode, clear the |
| // coverage buffer to pathID=0 and a transparent coverage value. |
| // pathID=0 meets the requirement that pathID is always monotonically |
| // increasing. Transparent coverage makes sure the clearColor doesn't |
| // get written out while resolving. |
| m_flushDesc.coverageClearValue = |
| static_cast<uint32_t>(FIXED_COVERAGE_ZERO); |
| } |
| else |
| { |
| // In non-atomic mode, the coverage buffer just needs to be initialized |
| // with "pathID=0" to avoid collisions with any pathIDs being rendered. |
| m_flushDesc.coverageClearValue = 0; |
| } |
| |
| if (doClearDuringAtomicResolve || |
| m_flushDesc.colorLoadAction == gpu::LoadAction::clear) |
| { |
| // If we're clearing then we always update the entire render target. |
| m_flushDesc.renderTargetUpdateBounds = |
| m_flushDesc.renderTarget->bounds(); |
| } |
| else |
| { |
| // When we don't clear, we only update the draw bounds. |
| m_flushDesc.renderTargetUpdateBounds = |
| m_flushDesc.renderTarget->bounds().intersect(m_combinedDrawBounds); |
| } |
| if (m_flushDesc.renderTargetUpdateBounds.empty()) |
| { |
| // If this is empty it means there are no draws and no clear. |
| m_flushDesc.renderTargetUpdateBounds = {0, 0, 0, 0}; |
| } |
| |
| m_flushDesc.atlasContentWidth = m_atlasMaxX; |
| m_flushDesc.atlasContentHeight = m_atlasMaxY; |
| |
| m_flushDesc.flushUniformDataOffsetInBytes = |
| logicalFlushIdx * sizeof(gpu::FlushUniforms); |
| m_flushDesc.pathCount = |
| math::lossless_numeric_cast<uint32_t>(m_resourceCounts.pathCount); |
| m_flushDesc.firstPath = runningFrameResourceCounts->pathCount + |
| runningFrameLayoutCounts->pathPaddingCount; |
| m_flushDesc.firstPaint = runningFrameResourceCounts->pathCount + |
| runningFrameLayoutCounts->paintPaddingCount; |
| m_flushDesc.firstPaintAux = runningFrameResourceCounts->pathCount + |
| runningFrameLayoutCounts->paintAuxPaddingCount; |
| m_flushDesc.contourCount = |
| math::lossless_numeric_cast<uint32_t>(m_resourceCounts.contourCount); |
| m_flushDesc.firstContour = runningFrameResourceCounts->contourCount + |
| runningFrameLayoutCounts->contourPaddingCount; |
| m_flushDesc.gradSpanCount = |
| math::lossless_numeric_cast<uint32_t>(m_pendingGradSpanCount); |
| m_flushDesc.firstGradSpan = runningFrameLayoutCounts->gradSpanCount + |
| runningFrameLayoutCounts->gradSpanPaddingCount; |
| m_flushDesc.gradDataHeight = math::lossless_numeric_cast<uint32_t>( |
| m_gradTextureLayout.complexOffsetY + m_complexGradients.size()); |
| m_flushDesc.tessDataHeight = tessDataHeight; |
| m_flushDesc.clockwiseFillOverride = frameDescriptor.clockwiseFillOverride; |
| m_flushDesc.wireframe = frameDescriptor.wireframe; |
| #ifdef WITH_RIVE_TOOLS |
| m_flushDesc.synthesizedFailureType = frameDescriptor.synthesizedFailureType; |
| #endif |
| |
| m_flushDesc.externalCommandBuffer = flushResources.externalCommandBuffer; |
| |
| *runningFrameResourceCounts = |
| runningFrameResourceCounts->toVec() + m_resourceCounts.toVec(); |
| runningFrameLayoutCounts->pathPaddingCount += m_pathPaddingCount; |
| runningFrameLayoutCounts->paintPaddingCount += m_paintPaddingCount; |
| runningFrameLayoutCounts->paintAuxPaddingCount += m_paintAuxPaddingCount; |
| runningFrameLayoutCounts->contourPaddingCount += m_contourPaddingCount; |
| runningFrameLayoutCounts->gradSpanCount += m_pendingGradSpanCount; |
| runningFrameLayoutCounts->gradSpanPaddingCount += m_gradSpanPaddingCount; |
| runningFrameLayoutCounts->maxGradTextureHeight = |
| std::max(m_flushDesc.gradDataHeight, |
| runningFrameLayoutCounts->maxGradTextureHeight); |
| runningFrameLayoutCounts->maxTessTextureHeight = |
| std::max(m_flushDesc.tessDataHeight, |
| runningFrameLayoutCounts->maxTessTextureHeight); |
| runningFrameLayoutCounts->maxAtlasWidth = |
| std::max(m_atlasMaxX, runningFrameLayoutCounts->maxAtlasWidth); |
| runningFrameLayoutCounts->maxAtlasHeight = |
| std::max(m_atlasMaxY, runningFrameLayoutCounts->maxAtlasHeight); |
| runningFrameLayoutCounts->maxCoverageBufferLength = |
| std::max<size_t>(m_coverageBufferLength, |
| runningFrameLayoutCounts->maxCoverageBufferLength); |
| |
| assert(m_flushDesc.firstPath % gpu::kPathBufferAlignmentInElements == 0); |
| assert(m_flushDesc.firstPaint % gpu::kPaintBufferAlignmentInElements == 0); |
| assert(m_flushDesc.firstPaintAux % |
| gpu::kPaintAuxBufferAlignmentInElements == |
| 0); |
| assert(m_flushDesc.firstContour % gpu::kContourBufferAlignmentInElements == |
| 0); |
| assert(m_flushDesc.firstGradSpan % |
| gpu::kGradSpanBufferAlignmentInElements == |
| 0); |
| RIVE_DEBUG_CODE(m_hasDoneLayout = true;) |
| } |
| |
| void RenderContext::LogicalFlush::writeResources() |
| { |
| RIVE_PROF_SCOPE() |
| const gpu::PlatformFeatures& platformFeatures = m_ctx->platformFeatures(); |
| assert(m_hasDoneLayout); |
| assert(m_flushDesc.firstPath == m_ctx->m_pathData.elementsWritten()); |
| assert(m_flushDesc.firstPaint == m_ctx->m_paintData.elementsWritten()); |
| assert(m_flushDesc.firstPaintAux == |
| m_ctx->m_paintAuxData.elementsWritten()); |
| |
| // Wait until here before we record these texture sizes; they aren't decided |
| // until after all LogicalFlushes have run layoutResources(). |
| m_flushDesc.atlasTextureWidth = math::lossless_numeric_cast<uint32_t>( |
| m_ctx->m_currentResourceAllocations.atlasTextureWidth); |
| m_flushDesc.atlasTextureHeight = math::lossless_numeric_cast<uint32_t>( |
| m_ctx->m_currentResourceAllocations.atlasTextureHeight); |
| m_gradTextureLayout.inverseHeight = |
| 1.f / m_ctx->m_currentResourceAllocations.gradTextureHeight; |
| |
| // Exact tessSpan/triangleVertex counts aren't known until after their data |
| // is written out. |
| size_t firstTessVertexSpan = m_ctx->m_tessSpanData.elementsWritten(); |
| size_t initialTriangleVertexDataSize = |
| m_ctx->m_triangleVertexData.bytesWritten(); |
| |
| // Metal requires vertex buffers to be 256-byte aligned. |
| size_t tessAlignmentPadding = |
| math::padding_to_align_up<gpu::kTessVertexBufferAlignmentInElements>( |
| firstTessVertexSpan); |
| assert(tessAlignmentPadding <= kMaxTessellationAlignmentVertices); |
| m_ctx->m_tessSpanData.push_back_n(nullptr, tessAlignmentPadding); |
| m_flushDesc.firstTessVertexSpan = |
| firstTessVertexSpan + tessAlignmentPadding; |
| assert(m_flushDesc.firstTessVertexSpan == |
| m_ctx->m_tessSpanData.elementsWritten()); |
| |
| // Write out the simple gradient data. |
| constexpr static uint32_t ONE_TEXEL_FIXED = 65536 / gpu::kGradTextureWidth; |
| assert(m_simpleGradients.size() == m_pendingSimpleGradDraws.size()); |
| if (!m_pendingSimpleGradDraws.empty()) |
| { |
| for (size_t i = 0; i < m_pendingSimpleGradDraws.size(); ++i) |
| { |
| // Render each simple gradient as a single, empty GradientSpan with |
| // 1px borders to the left and right. |
| auto [color0, color1] = m_pendingSimpleGradDraws[i]; |
| uint32_t y = math::lossless_numeric_cast<uint32_t>( |
| i / gpu::kGradTextureWidthInSimpleRamps); |
| size_t centerX = (i % gpu::kGradTextureWidthInSimpleRamps) * 2 + 1; |
| uint32_t centerXFixed = math::lossless_numeric_cast<uint32_t>( |
| centerX * ONE_TEXEL_FIXED); |
| m_ctx->m_gradSpanData.set_back(centerXFixed, |
| centerXFixed, |
| y, |
| GRAD_SPAN_FLAG_LEFT_BORDER | |
| GRAD_SPAN_FLAG_RIGHT_BORDER, |
| color0, |
| color1); |
| } |
| } |
| |
| // Write out the vertex data for rendering complex gradients. |
| assert(m_complexGradients.size() == m_pendingComplexGradDraws.size()); |
| if (!m_pendingComplexGradDraws.empty()) |
| { |
| // The viewport will start at simpleGradDataHeight when rendering color |
| // ramps. |
| for (uint32_t i = 0; i < m_pendingComplexGradDraws.size(); ++i) |
| { |
| // Push "GradientSpan" instances that will render each section of |
| // this color ramp's gradient. |
| const Gradient* gradient = m_pendingComplexGradDraws[i]; |
| const float* stops = gradient->stops(); |
| const ColorInt* colors = gradient->colors(); |
| size_t stopCount = gradient->count(); |
| uint32_t y = i + m_gradTextureLayout.complexOffsetY; |
| |
| // "stop * m + a" converts a stop position to a fixed-point x |
| // coordinate in the gradient texture. (In an ideal world, stops |
| // would all be aligned on pixel centers for the texture sampling to |
| // be identical to the gradient, but here we just stretch it across |
| // kGradTextureWidth pixels and hope everything looks ok.) |
| float m = (kGradTextureWidth - 1.f) * ONE_TEXEL_FIXED; |
| float a = .5f * ONE_TEXEL_FIXED; |
| uint32_t lastXFixed = static_cast<uint32_t>(stops[0] * m + a); |
| ColorInt lastColor = colors[0]; |
| assert(stopCount >= 2); |
| for (size_t i = 1; i < stopCount; ++i) |
| { |
| uint32_t xFixed = static_cast<uint32_t>(stops[i] * m + a); |
| // stops[] must be ordered. |
| assert(lastXFixed <= xFixed && xFixed < 65536); |
| uint32_t flags = GRAD_SPAN_FLAG_COMPLEX_BORDER; |
| if (i == 1) |
| flags |= GRAD_SPAN_FLAG_LEFT_BORDER; |
| if (i == stopCount - 1) |
| flags |= GRAD_SPAN_FLAG_RIGHT_BORDER; |
| m_ctx->m_gradSpanData.set_back(lastXFixed, |
| xFixed, |
| y, |
| flags, |
| lastColor, |
| colors[i]); |
| lastColor = colors[i]; |
| lastXFixed = xFixed; |
| } |
| } |
| } |
| |
| // Write a path record for the clearColor paint (used by atomic mode). |
| // This also allows us to index the storage buffers directly by pathID. |
| gpu::SimplePaintValue clearColorValue; |
| clearColorValue.color = m_ctx->frameDescriptor().clearColor; |
| m_ctx->m_pathData.skip_back(); |
| m_ctx->m_paintData.set_back(gpu::DrawContents::none, |
| PaintType::solidColor, |
| clearColorValue, |
| GradTextureLayout(), |
| /*clipID =*/0, |
| /*hasClipRect =*/false, |
| BlendMode::srcOver); |
| m_ctx->m_paintAuxData.skip_back(); |
| |
| // Render padding vertices in the tessellation texture. |
| if (m_flushDesc.tessDataHeight > 0) |
| { |
| // Padding at the beginning of the tessellation texture. |
| pushPaddingVertices(gpu::kMidpointFanPatchSegmentSpan, 0); |
| // Padding between patch types in the tessellation texture. |
| if (m_outerCubicTessVertexIdx > m_midpointFanTessEndLocation) |
| { |
| pushPaddingVertices(m_outerCubicTessVertexIdx - |
| m_midpointFanTessEndLocation, |
| m_midpointFanTessEndLocation); |
| } |
| // The final vertex of the final patch of each contour crosses over into |
| // the next contour. (This is how we wrap around back to the beginning.) |
| // Therefore, the final contour of the flush needs an out-of-contour |
| // vertex to cross into as well, so we emit a padding vertex here at the |
| // end. |
| pushPaddingVertices(1, m_outerCubicTessEndLocation); |
| } |
| |
| // Write out all the data for our high level draws, and build up a low-level |
| // draw list. |
| if (m_ctx->frameInterlockMode() == gpu::InterlockMode::rasterOrdering) |
| { |
| for (const DrawUniquePtr& draw : m_draws) |
| { |
| // TODO: We don't currently support a front-to-back prepass in |
| // rasterOrdering mode. If we decide to support this, we will either |
| // need to walk the draws backwards here, or, more likely, start |
| // sorting and re-ordering in rasterOrdering mode as well. |
| assert(draw->prepassCount() == 0); |
| assert(draw->subpassCount() > 0); |
| for (int i = 0; i < draw->subpassCount(); ++i) |
| { |
| draw->pushToRenderContext(this, i); |
| } |
| } |
| } |
| else |
| { |
| assert(m_drawPassCount <= kMaxReorderedDrawPassCount); |
| |
| // Sort the draw list to optimize batching, since we can only batch |
| // non-overlapping draws. |
| std::vector<int64_t>& indirectDrawList = m_ctx->m_indirectDrawList; |
| indirectDrawList.clear(); |
| indirectDrawList.reserve(m_drawPassCount); |
| |
| if (m_ctx->m_intersectionBoard == nullptr) |
| { |
| m_ctx->m_intersectionBoard = std::make_unique<IntersectionBoard>(); |
| } |
| IntersectionBoard* intersectionBoard = m_ctx->m_intersectionBoard.get(); |
| intersectionBoard->resizeAndReset(m_flushDesc.renderTarget->width(), |
| m_flushDesc.renderTarget->height()); |
| |
| // Build a list of sort keys that determine the final draw order. |
| constexpr static int kDrawGroupShift = |
| 48; // Where in the key does the draw group begin? |
| constexpr static int64_t kDrawGroupMask = 0x7fffllu << kDrawGroupShift; |
| constexpr static int kDrawTypeShift = 45; |
| constexpr static int64_t kDrawTypeMask RIVE_MAYBE_UNUSED = |
| 7llu << kDrawTypeShift; |
| constexpr static int kTextureHashShift = 31; |
| constexpr static int64_t kTextureHashMask = 0x3fffllu |
| << kTextureHashShift; |
| constexpr static int kBlendModeShift = 27; |
| constexpr static int kBlendModeMask = 0xf << kBlendModeShift; |
| constexpr static int kDrawContentsShift = 18; |
| constexpr static int64_t kDrawContentsMask = 0x1ffllu |
| << kDrawContentsShift; |
| constexpr static int kDrawIndexShift = 2; |
| constexpr static int64_t kDrawIndexMask = 0x7fff << kDrawIndexShift; |
| constexpr static int64_t kSubpassIndexMask = 0x3; |
| |
| for (size_t i = 0; i < m_draws.size(); ++i) |
| { |
| Draw* draw = m_draws[i].get(); |
| int4 drawBounds = simd::load4i(&m_draws[i]->pixelBounds()); |
| |
| // Add one extra pixel of padding to the draw bounds to make |
| // absolutely certain we get no overlapping pixels, which destroy |
| // the atomic shader. |
| constexpr int32_t kMax32i = std::numeric_limits<int32_t>::max(); |
| constexpr int32_t kMin32i = std::numeric_limits<int32_t>::min(); |
| drawBounds = simd::if_then_else( |
| drawBounds != int4{kMin32i, kMin32i, kMax32i, kMax32i}, |
| drawBounds + int4{-1, -1, 1, 1}, |
| drawBounds); |
| |
| // Our top priority in re-ordering is to group non-overlapping draws |
| // together, in order to maximize batching while preserving |
| // correctness. |
| int maxPasses = |
| std::max(draw->prepassCount(), draw->subpassCount()); |
| int16_t drawGroupIdx = |
| intersectionBoard->addRectangle(drawBounds, maxPasses); |
| assert(drawGroupIdx > 0); |
| int64_t key = static_cast<int64_t>(drawGroupIdx) << kDrawGroupShift; |
| |
| // Within sub-groups of non-overlapping draws, sort similar draw |
| // types together. |
| int64_t drawType = static_cast<int64_t>(draw->type()); |
| assert(drawType <= kDrawTypeMask >> kDrawTypeShift); |
| key |= drawType << kDrawTypeShift; |
| |
| // Within sub-groups of matching draw type, sort by texture binding. |
| int64_t textureHash = |
| draw->imageTexture() != nullptr |
| ? draw->imageTexture()->textureResourceHash() & |
| (kTextureHashMask >> kTextureHashShift) |
| : 0; |
| key |= textureHash << kTextureHashShift; |
| |
| // If using KHR_blend_equation_advanced, we need a batching barrier |
| // between draws with different blend modes. If not using |
| // KHR_blend_equation_advanced, sorting by blend mode may still give |
| // us better branching on the GPU. |
| int64_t blendMode = |
| gpu::ConvertBlendModeToPLSBlendMode(draw->blendMode()); |
| assert(blendMode <= kBlendModeMask >> kBlendModeShift); |
| key |= blendMode << kBlendModeShift; |
| |
| // msaa mode draws strokes, fills, and even/odd with different |
| // stencil settings. |
| int64_t drawContents = static_cast<int64_t>(draw->drawContents()); |
| assert(drawContents <= kDrawContentsMask >> kDrawContentsShift); |
| key |= drawContents << kDrawContentsShift; |
| |
| // Draw and subpass indices go at the bottom of the key so we can |
| // reference them again after sorting without affecting the order. |
| assert(i <= kDrawIndexMask >> kDrawIndexShift); |
| key |= i << kDrawIndexShift; |
| |
| assert((key & kDrawGroupMask) >> kDrawGroupShift == drawGroupIdx); |
| assert((key & kDrawTypeMask) >> kDrawTypeShift == drawType); |
| assert((key & kTextureHashMask) >> kTextureHashShift == |
| textureHash); |
| assert((key & kBlendModeMask) >> kBlendModeShift == blendMode); |
| assert((key & kDrawContentsMask) >> kDrawContentsShift == |
| drawContents); |
| assert((key & kDrawIndexMask) >> kDrawIndexShift == i); |
| |
| // Add the first prepass and subpass, if any. |
| if (draw->prepassCount() > 0) |
| { |
| // Negating the key is an easy way to sort the prepasses |
| // front-to-back, and before the subpasses. |
| indirectDrawList.push_back(-key); |
| } |
| if (draw->subpassCount() > 0) |
| { |
| indirectDrawList.push_back(key); |
| } |
| |
| // Add any additional passes. |
| for (int i = 1; i < maxPasses; ++i) |
| { |
| // Increment the drawGroupIdx and i both at once. (The |
| // intersectionBoard already reserved "maxPasses" layers of |
| // drawGroupIndices for us.) |
| key += (1ll << kDrawGroupShift) + 1; |
| assert((key & kDrawGroupMask) >> kDrawGroupShift == |
| drawGroupIdx + i); |
| assert((key & kSubpassIndexMask) == i); |
| |
| if (i < draw->prepassCount()) |
| { |
| // Negating the key is an easy way to sort the prepasses |
| // front-to-back, and before the subpasses. |
| indirectDrawList.push_back(-key); |
| } |
| if (i < draw->subpassCount()) |
| { |
| indirectDrawList.push_back(key); |
| } |
| } |
| } |
| assert(indirectDrawList.size() == m_drawPassCount); |
| |
| // Re-order the draws!! |
| std::sort(indirectDrawList.begin(), indirectDrawList.end()); |
| |
| assert(m_pendingBarriers == BarrierFlags::none); |
| if (m_ctx->frameInterlockMode() == gpu::InterlockMode::atomics && |
| platformFeatures.atomicPLSInitNeedsDraw) |
| { |
| // Atomic mode sometimes needs to initialize PLS with a draw when |
| // the backend can't do it with typical clear/load APIs. |
| // So far only Metal needs this, and its implementation doesn't |
| // require a barrier before or after. |
| m_drawList.emplace_back(m_ctx->perFrameAllocator(), |
| DrawType::renderPassInitialize, |
| gpu::ShaderMiscFlags::none, |
| 1, |
| 0, |
| BlendMode::srcOver, |
| ImageSampler::LinearClamp(), |
| BarrierFlags::none); |
| } |
| else if (m_ctx->frameInterlockMode() == gpu::InterlockMode::msaa && |
| m_flushDesc.colorLoadAction == |
| gpu::LoadAction::preserveRenderTarget && |
| platformFeatures.msaaColorPreserveNeedsDraw) |
| { |
| // When implemented with a transient attachment, MSAA needs us to |
| // draw the old renderTarget contents into the framebuffer at the |
| // beginning of the render pass when |
| // LoadAction::preserveRenderTarget is specified. |
| m_drawList.emplace_back(m_ctx->perFrameAllocator(), |
| DrawType::renderPassInitialize, |
| gpu::ShaderMiscFlags::none, |
| 1, |
| 0, |
| BlendMode::srcOver, |
| ImageSampler::LinearClamp(), |
| // The MSAA init reads the framebuffer, so |
| // it needs the equivalent of a "dstBlend" |
| // barrier. |
| BarrierFlags::dstBlend); |
| m_drawList.tail().drawContents = gpu::DrawContents::opaquePaint; |
| // The draw that follows the this init will need a special |
| // "msaaPostInit" barrier. |
| m_pendingBarriers |= BarrierFlags::msaaPostInit; |
| } |
| |
| // Find a mask that tells us when to insert barriers, and which barriers |
| // are needed. When the keys of two adjacent draws differ within this |
| // bitmask, we insert a barrier between them. |
| int64_t needsBarrierMask = 0; |
| BarrierFlags neededBarriers = BarrierFlags::none; |
| switch (m_flushDesc.interlockMode) |
| { |
| case gpu::InterlockMode::rasterOrdering: |
| // rasterOrdering mode doesn't reorder draws. |
| RIVE_UNREACHABLE(); |
| |
| case gpu::InterlockMode::atomics: |
| // In atomic mode, we need barriers any time draws overlap. |
| // Insert a barrier every time the drawGroupIdx changes. |
| needsBarrierMask = kDrawGroupMask; |
| neededBarriers = BarrierFlags::plsAtomic; |
| break; |
| |
| case gpu::InterlockMode::clockwiseAtomic: |
| // In clockwiseAtomic mode, we only need a barrier between the |
| // borrowedCoverage prepasses and the main rendering. Prepasses |
| // have a negative key, so just insert a barrier when the sign |
| // changes. |
| needsBarrierMask = 1ll << 63; |
| neededBarriers = BarrierFlags::clockwiseBorrowedCoverage; |
| break; |
| |
| case gpu::InterlockMode::msaa: |
| // MSAA mode can't batch draws that overlap because they both |
| // rely on the stencil buffer across subpasses. Stop batching |
| // every time the drawGroupIdx changes. |
| needsBarrierMask = kDrawGroupMask; |
| // MSAA mode draws clips, strokes, fills, and even/odd with |
| // different stencil settings, so these can't be batched. |
| needsBarrierMask |= kDrawContentsMask; |
| if (platformFeatures.supportsBlendAdvancedKHR) |
| { |
| // If using KHR_blend_equation_advanced, we also need to |
| // stop batching between blend modes in order to change the |
| // blend equation. |
| needsBarrierMask |= kBlendModeMask; |
| } |
| // MSAA barriers only need to prevent batching of draws for now. |
| // If we also need a dstBlend barrier, that will be decided |
| // later. |
| neededBarriers = BarrierFlags::drawBatchBreak; |
| break; |
| } |
| |
| // Write out the draw data from the sorted draw list, and build up a |
| // condensed/batched list of low-level draws. |
| constexpr int64_t BEGIN_KEY = std::numeric_limits<int64_t>::min(); |
| int64_t priorSignedKey = BEGIN_KEY; |
| for (const int64_t signedKey : indirectDrawList) |
| { |
| assert(signedKey >= priorSignedKey); |
| // The first draw always gets barriers because we need the barriers |
| // after the initial clears, loads, etc. |
| if (priorSignedKey == BEGIN_KEY || |
| (priorSignedKey & needsBarrierMask) != |
| (signedKey & needsBarrierMask)) |
| { |
| m_pendingBarriers |= neededBarriers; |
| } |
| int64_t key = abs(signedKey); |
| uint32_t drawIndex = (key & kDrawIndexMask) >> kDrawIndexShift; |
| int subpassIndex = key & kSubpassIndexMask; |
| if (signedKey < 0) |
| { |
| // Negative keys are a prepass. Update the subpassIndex to be |
| // negative. |
| subpassIndex = -1 - subpassIndex; |
| } |
| // FIXME: m_currentZIndex shouldn't be a stateful variable; it |
| // should be passed to pushToRenderContext() instead. |
| m_currentZIndex = math::lossless_numeric_cast<uint32_t>( |
| abs(key >> static_cast<int64_t>(kDrawGroupShift))); |
| m_draws[drawIndex]->pushToRenderContext(this, subpassIndex); |
| priorSignedKey = signedKey; |
| } |
| |
| // Atomic mode needs one more draw to resolve all the pixels. |
| if (m_ctx->frameInterlockMode() == gpu::InterlockMode::atomics) |
| { |
| m_drawList |
| .emplace_back(m_ctx->perFrameAllocator(), |
| DrawType::renderPassResolve, |
| gpu::ShaderMiscFlags::none, |
| 1, |
| 0, |
| BlendMode::srcOver, |
| ImageSampler::LinearClamp(), |
| BarrierFlags::plsAtomicPreResolve) |
| .shaderFeatures = m_combinedShaderFeatures; |
| } |
| } |
| |
| // Write out the draws to the feather atlas. Do this after the main draws |
| // (even though the atlas ones execute first) so that our path info and Z |
| // index are decided and available to pushAtlasTessellation(). |
| if (!m_pendingAtlasDraws.empty()) |
| { |
| TAABB<uint16_t> fullAtlasViewport = {0, |
| 0, |
| m_flushDesc.atlasContentWidth, |
| m_flushDesc.atlasContentHeight}; |
| gpu::AtlasDrawBatch* currentBatch = |
| m_ctx->m_perFrameAllocator.makePODArray<gpu::AtlasDrawBatch>( |
| m_pendingAtlasDraws.size()); |
| // Iterate the atlas draws 4 times so we can sort by fill / stroke / |
| // scissored / not, and batch together the draws that don't have |
| // scissor. |
| for (bool stroked : {false, true}) |
| { |
| if (stroked) |
| { |
| m_flushDesc.atlasStrokeBatches = currentBatch; |
| } |
| else |
| { |
| m_flushDesc.atlasFillBatches = currentBatch; |
| } |
| for (bool scissored : {false, true}) |
| { |
| gpu::AtlasDrawBatch* lastBatch = nullptr; |
| for (PathDraw* draw : m_pendingAtlasDraws) |
| { |
| if (draw->isStroke() != stroked || |
| draw->atlasScissorEnabled() != scissored) |
| { |
| continue; |
| } |
| uint32_t tessVertexCount, tessBaseVertex; |
| draw->pushAtlasTessellation(this, |
| &tessVertexCount, |
| &tessBaseVertex); |
| if (tessVertexCount == 0) |
| { |
| continue; |
| } |
| uint32_t patchCount = |
| tessVertexCount / gpu::kMidpointFanPatchSegmentSpan; |
| uint32_t basePatch = |
| tessBaseVertex / gpu::kMidpointFanPatchSegmentSpan; |
| assert(patchCount * gpu::kMidpointFanPatchSegmentSpan == |
| tessVertexCount); |
| assert(basePatch * gpu::kMidpointFanPatchSegmentSpan == |
| tessBaseVertex); |
| if (lastBatch == nullptr || scissored) |
| { |
| lastBatch = currentBatch++; |
| *lastBatch = { |
| lastBatch->scissor = scissored |
| ? draw->atlasScissor() |
| : fullAtlasViewport, |
| lastBatch->patchCount = patchCount, |
| lastBatch->basePatch = basePatch, |
| }; |
| } |
| else |
| { |
| assert(lastBatch->basePatch + lastBatch->patchCount == |
| basePatch); |
| lastBatch->patchCount += patchCount; |
| } |
| } |
| } |
| if (stroked) |
| { |
| m_flushDesc.atlasStrokeBatchCount = |
| currentBatch - m_flushDesc.atlasStrokeBatches; |
| } |
| else |
| { |
| m_flushDesc.atlasFillBatchCount = |
| currentBatch - m_flushDesc.atlasFillBatches; |
| } |
| } |
| assert(m_flushDesc.atlasFillBatchCount + |
| m_flushDesc.atlasStrokeBatchCount == |
| currentBatch - m_flushDesc.atlasFillBatches); |
| assert(m_flushDesc.atlasFillBatchCount + |
| m_flushDesc.atlasStrokeBatchCount <= |
| m_pendingAtlasDraws.size()); |
| } |
| |
| // Pad our buffers to 256-byte alignment. |
| m_ctx->m_pathData.push_back_n(nullptr, m_pathPaddingCount); |
| m_ctx->m_paintData.push_back_n(nullptr, m_paintPaddingCount); |
| m_ctx->m_paintAuxData.push_back_n(nullptr, m_paintAuxPaddingCount); |
| m_ctx->m_contourData.push_back_n(nullptr, m_contourPaddingCount); |
| m_ctx->m_gradSpanData.push_back_n(nullptr, m_gradSpanPaddingCount); |
| |
| assert(m_ctx->m_pathData.elementsWritten() == |
| m_flushDesc.firstPath + m_resourceCounts.pathCount + |
| m_pathPaddingCount); |
| assert(m_ctx->m_paintData.elementsWritten() == |
| m_flushDesc.firstPaint + m_resourceCounts.pathCount + |
| m_paintPaddingCount); |
| assert(m_ctx->m_paintAuxData.elementsWritten() == |
| m_flushDesc.firstPaintAux + m_resourceCounts.pathCount + |
| m_paintAuxPaddingCount); |
| assert(m_ctx->m_contourData.elementsWritten() == |
| m_flushDesc.firstContour + m_resourceCounts.contourCount + |
| m_contourPaddingCount); |
| assert(m_ctx->m_gradSpanData.elementsWritten() == |
| m_flushDesc.firstGradSpan + m_pendingGradSpanCount + |
| m_gradSpanPaddingCount); |
| assert(m_midpointFanTessVertexIdx == m_midpointFanTessEndLocation); |
| assert(m_outerCubicTessVertexIdx == m_outerCubicTessEndLocation); |
| |
| // Some of the flushDescriptor's data isn't known until after |
| // writeResources(). Update it now that it's known. |
| m_flushDesc.combinedShaderFeatures = m_combinedShaderFeatures; |
| m_flushDesc.atomicFixedFunctionColorOutput = |
| m_ctx->frameInterlockMode() == InterlockMode::atomics && |
| !(m_combinedShaderFeatures & ShaderFeatures::ENABLE_ADVANCED_BLEND); |
| |
| if (m_coverageBufferLength > 0) |
| { |
| assert(m_flushDesc.interlockMode == |
| gpu::InterlockMode::clockwiseAtomic); |
| // The coverage buffer prefix gets reset to zero when the buffer is |
| // reallocated, so wait until here to get the prefix. |
| m_flushDesc.coverageBufferPrefix = m_ctx->incrementCoverageBufferPrefix( |
| &m_flushDesc.needsCoverageBufferClear); |
| } |
| |
| m_flushDesc.tessVertexSpanCount = math::lossless_numeric_cast<uint32_t>( |
| m_ctx->m_tessSpanData.elementsWritten() - |
| m_flushDesc.firstTessVertexSpan); |
| |
| m_flushDesc.hasTriangleVertices = |
| m_ctx->m_triangleVertexData.bytesWritten() != |
| initialTriangleVertexDataSize; |
| |
| m_flushDesc.drawList = &m_drawList; |
| |
| // Write out the uniforms for this flush now that the flushDescriptor is |
| // complete. |
| m_ctx->m_flushUniformData.emplace_back(m_flushDesc, platformFeatures); |
| } |
| |
| void RenderContext::setResourceSizes(ResourceAllocationCounts allocs, |
| bool forceRealloc) |
| { |
| RIVE_PROF_SCOPE() |
| #if 0 |
| class Logger |
| { |
| public: |
| void logSize(const char* name, |
| size_t oldSize, |
| size_t newSize, |
| size_t newSizeInBytes) |
| { |
| m_totalSizeInBytes += newSizeInBytes; |
| if (oldSize == newSize) |
| { |
| return; |
| } |
| if (!m_hasChanged) |
| { |
| printf("RenderContext::setResourceSizes():\n"); |
| m_hasChanged = true; |
| } |
| printf(" resize %s: %zu -> %zu (%zu KiB)\n", |
| name, |
| oldSize, |
| newSize, |
| newSizeInBytes >> 10); |
| } |
| |
| void logTextureSize(const char* widthName, |
| const char* heightName, |
| size_t oldWidth, |
| size_t oldHeight, |
| size_t newWidth, |
| size_t newHeight, |
| size_t bytesPerPixel) |
| { |
| m_totalSizeInBytes += newHeight * newWidth * bytesPerPixel; |
| if (oldWidth == newWidth && oldHeight == newHeight) |
| { |
| return; |
| } |
| if (!m_hasChanged) |
| { |
| printf("RenderContext::setResourceSizes():\n"); |
| m_hasChanged = true; |
| } |
| printf(" resize %s x %s: %zu x %zu -> %zu x %zu (%zu KiB)\n", |
| widthName, |
| heightName, |
| oldWidth, |
| oldHeight, |
| newWidth, |
| newHeight, |
| (newHeight * newWidth * bytesPerPixel) >> 10); |
| } |
| |
| ~Logger() |
| { |
| if (!m_hasChanged) |
| { |
| return; |
| } |
| printf(" TOTAL GPU resource usage: %zu KiB\n", |
| m_totalSizeInBytes >> 10); |
| } |
| |
| private: |
| size_t m_totalSizeInBytes = 0; |
| bool m_hasChanged = false; |
| } logger; |
| #define LOG_BUFFER_RING_SIZE(NAME, ITEM_SIZE_IN_BYTES) \ |
| logger.logSize(#NAME, \ |
| m_currentResourceAllocations.NAME, \ |
| allocs.NAME, \ |
| allocs.NAME* ITEM_SIZE_IN_BYTES* gpu::kBufferRingSize) |
| #define LOG_TEXTURE_HEIGHT(NAME, BYTES_PER_ROW) \ |
| logger.logSize(#NAME, \ |
| m_currentResourceAllocations.NAME, \ |
| allocs.NAME, \ |
| allocs.NAME* BYTES_PER_ROW) |
| #define LOG_TEXTURE_SIZE(WIDTH_NAME, HEIGHT_NAME, BYTES_PER_PIXEL) \ |
| logger.logTextureSize(#WIDTH_NAME, \ |
| #HEIGHT_NAME, \ |
| m_currentResourceAllocations.WIDTH_NAME, \ |
| m_currentResourceAllocations.HEIGHT_NAME, \ |
| allocs.WIDTH_NAME, \ |
| allocs.HEIGHT_NAME, \ |
| BYTES_PER_PIXEL) |
| #define LOG_BUFFER_SIZE(NAME, BYTES_PER_ELEMENT) \ |
| logger.logSize(#NAME, \ |
| m_currentResourceAllocations.NAME, \ |
| allocs.NAME, \ |
| allocs.NAME* BYTES_PER_ELEMENT) |
| #else |
| #define LOG_BUFFER_RING_SIZE(NAME, ITEM_SIZE_IN_BYTES) |
| #define LOG_TEXTURE_HEIGHT(NAME, BYTES_PER_ROW) |
| #define LOG_TEXTURE_SIZE(WIDTH_NAME, HEIGHT_NAME, BYTES_PER_PIXEL) |
| #define LOG_BUFFER_SIZE(NAME, BYTES_PER_ELEMENT) |
| #endif |
| |
| LOG_BUFFER_RING_SIZE(flushUniformBufferCount, sizeof(gpu::FlushUniforms)); |
| if (allocs.flushUniformBufferCount != |
| m_currentResourceAllocations.flushUniformBufferCount || |
| forceRealloc) |
| { |
| m_impl->resizeFlushUniformBuffer(allocs.flushUniformBufferCount * |
| sizeof(gpu::FlushUniforms)); |
| } |
| |
| LOG_BUFFER_RING_SIZE(imageDrawUniformBufferCount, |
| sizeof(gpu::ImageDrawUniforms)); |
| if (allocs.imageDrawUniformBufferCount != |
| m_currentResourceAllocations.imageDrawUniformBufferCount || |
| forceRealloc) |
| { |
| m_impl->resizeImageDrawUniformBuffer( |
| allocs.imageDrawUniformBufferCount * |
| sizeof(gpu::ImageDrawUniforms)); |
| } |
| |
| LOG_BUFFER_RING_SIZE(pathBufferCount, sizeof(gpu::PathData)); |
| if (allocs.pathBufferCount != |
| m_currentResourceAllocations.pathBufferCount || |
| forceRealloc) |
| { |
| m_impl->resizePathBuffer(allocs.pathBufferCount * sizeof(gpu::PathData), |
| gpu::PathData::kBufferStructure); |
| } |
| |
| LOG_BUFFER_RING_SIZE(paintBufferCount, sizeof(gpu::PaintData)); |
| if (allocs.paintBufferCount != |
| m_currentResourceAllocations.paintBufferCount || |
| forceRealloc) |
| { |
| m_impl->resizePaintBuffer(allocs.paintBufferCount * |
| sizeof(gpu::PaintData), |
| gpu::PaintData::kBufferStructure); |
| } |
| |
| LOG_BUFFER_RING_SIZE(paintAuxBufferCount, sizeof(gpu::PaintAuxData)); |
| if (allocs.paintAuxBufferCount != |
| m_currentResourceAllocations.paintAuxBufferCount || |
| forceRealloc) |
| { |
| m_impl->resizePaintAuxBuffer(allocs.paintAuxBufferCount * |
| sizeof(gpu::PaintAuxData), |
| gpu::PaintAuxData::kBufferStructure); |
| } |
| |
| LOG_BUFFER_RING_SIZE(contourBufferCount, sizeof(gpu::ContourData)); |
| if (allocs.contourBufferCount != |
| m_currentResourceAllocations.contourBufferCount || |
| forceRealloc) |
| { |
| m_impl->resizeContourBuffer(allocs.contourBufferCount * |
| sizeof(gpu::ContourData), |
| gpu::ContourData::kBufferStructure); |
| } |
| |
| LOG_BUFFER_RING_SIZE(gradSpanBufferCount, sizeof(gpu::GradientSpan)); |
| if (allocs.gradSpanBufferCount != |
| m_currentResourceAllocations.gradSpanBufferCount || |
| forceRealloc) |
| { |
| m_impl->resizeGradSpanBuffer(allocs.gradSpanBufferCount * |
| sizeof(gpu::GradientSpan)); |
| } |
| |
| LOG_BUFFER_RING_SIZE(tessSpanBufferCount, sizeof(gpu::TessVertexSpan)); |
| if (allocs.tessSpanBufferCount != |
| m_currentResourceAllocations.tessSpanBufferCount || |
| forceRealloc) |
| { |
| m_impl->resizeTessVertexSpanBuffer(allocs.tessSpanBufferCount * |
| sizeof(gpu::TessVertexSpan)); |
| } |
| |
| LOG_BUFFER_RING_SIZE(triangleVertexBufferCount, |
| sizeof(gpu::TriangleVertex)); |
| if (allocs.triangleVertexBufferCount != |
| m_currentResourceAllocations.triangleVertexBufferCount || |
| forceRealloc) |
| { |
| m_impl->resizeTriangleVertexBuffer(allocs.triangleVertexBufferCount * |
| sizeof(gpu::TriangleVertex)); |
| } |
| |
| assert(allocs.gradTextureHeight <= kMaxTextureHeight); |
| LOG_TEXTURE_HEIGHT(gradTextureHeight, gpu::kGradTextureWidth * 4); |
| if (allocs.gradTextureHeight != |
| m_currentResourceAllocations.gradTextureHeight || |
| forceRealloc) |
| { |
| m_impl->resizeGradientTexture( |
| gpu::kGradTextureWidth, |
| math::lossless_numeric_cast<uint32_t>(allocs.gradTextureHeight)); |
| } |
| |
| assert(allocs.tessTextureHeight <= kMaxTextureHeight); |
| LOG_TEXTURE_HEIGHT(tessTextureHeight, gpu::kTessTextureWidth * 4 * 4); |
| if (allocs.tessTextureHeight != |
| m_currentResourceAllocations.tessTextureHeight || |
| forceRealloc) |
| { |
| m_impl->resizeTessellationTexture( |
| gpu::kTessTextureWidth, |
| math::lossless_numeric_cast<uint32_t>(allocs.tessTextureHeight)); |
| } |
| |
| assert(allocs.atlasTextureWidth <= atlasMaxSize() || |
| allocs.atlasTextureWidth <= frameDescriptor().renderTargetWidth); |
| assert(allocs.atlasTextureHeight <= atlasMaxSize() || |
| allocs.atlasTextureHeight <= frameDescriptor().renderTargetHeight); |
| LOG_TEXTURE_SIZE(atlasTextureWidth, atlasTextureHeight, sizeof(uint16_t)); |
| if (allocs.atlasTextureWidth != |
| m_currentResourceAllocations.atlasTextureWidth || |
| allocs.atlasTextureHeight != |
| m_currentResourceAllocations.atlasTextureHeight || |
| forceRealloc) |
| { |
| m_impl->resizeAtlasTexture( |
| math::lossless_numeric_cast<uint32_t>(allocs.atlasTextureWidth), |
| math::lossless_numeric_cast<uint32_t>(allocs.atlasTextureHeight)); |
| } |
| |
| assert(allocs.coverageBufferLength <= |
| platformFeatures().maxCoverageBufferLength); |
| LOG_BUFFER_SIZE(coverageBufferLength, sizeof(uint32_t)); |
| if (allocs.coverageBufferLength != |
| m_currentResourceAllocations.coverageBufferLength || |
| forceRealloc) |
| { |
| m_impl->resizeCoverageBuffer(allocs.coverageBufferLength * |
| sizeof(uint32_t)); |
| // Start the coverageBufferPrefix over at zero. This ensure the new |
| // buffer gets cleared because the only criteria for clearing it is when |
| // the prefix wraps around to 0. |
| m_coverageBufferPrefix = 0; |
| } |
| |
| m_currentResourceAllocations = allocs; |
| } |
| |
| void RenderContext::mapResourceBuffers( |
| const ResourceAllocationCounts& mapCounts) |
| { |
| RIVE_PROF_SCOPE() |
| if (mapCounts.flushUniformBufferCount > 0) |
| { |
| m_flushUniformData.mapElements( |
| m_impl.get(), |
| &RenderContextImpl::mapFlushUniformBuffer, |
| mapCounts.flushUniformBufferCount); |
| } |
| assert(m_flushUniformData.hasRoomFor(mapCounts.flushUniformBufferCount)); |
| |
| if (mapCounts.imageDrawUniformBufferCount > 0) |
| { |
| m_imageDrawUniformData.mapElements( |
| m_impl.get(), |
| &RenderContextImpl::mapImageDrawUniformBuffer, |
| mapCounts.imageDrawUniformBufferCount); |
| } |
| assert(m_imageDrawUniformData.hasRoomFor( |
| mapCounts.imageDrawUniformBufferCount > 0)); |
| |
| if (mapCounts.pathBufferCount > 0) |
| { |
| m_pathData.mapElements(m_impl.get(), |
| &RenderContextImpl::mapPathBuffer, |
| mapCounts.pathBufferCount); |
| } |
| assert(m_pathData.hasRoomFor(mapCounts.pathBufferCount)); |
| |
| if (mapCounts.paintBufferCount > 0) |
| { |
| m_paintData.mapElements(m_impl.get(), |
| &RenderContextImpl::mapPaintBuffer, |
| mapCounts.paintBufferCount); |
| } |
| assert(m_paintData.hasRoomFor(mapCounts.paintBufferCount)); |
| |
| if (mapCounts.paintAuxBufferCount > 0) |
| { |
| m_paintAuxData.mapElements(m_impl.get(), |
| &RenderContextImpl::mapPaintAuxBuffer, |
| mapCounts.paintAuxBufferCount); |
| } |
| assert(m_paintAuxData.hasRoomFor(mapCounts.paintAuxBufferCount)); |
| |
| if (mapCounts.contourBufferCount > 0) |
| { |
| m_contourData.mapElements(m_impl.get(), |
| &RenderContextImpl::mapContourBuffer, |
| mapCounts.contourBufferCount); |
| } |
| assert(m_contourData.hasRoomFor(mapCounts.contourBufferCount)); |
| |
| if (mapCounts.gradSpanBufferCount > 0) |
| { |
| m_gradSpanData.mapElements(m_impl.get(), |
| &RenderContextImpl::mapGradSpanBuffer, |
| mapCounts.gradSpanBufferCount); |
| } |
| assert(m_gradSpanData.hasRoomFor(mapCounts.gradSpanBufferCount)); |
| |
| if (mapCounts.tessSpanBufferCount > 0) |
| { |
| m_tessSpanData.mapElements(m_impl.get(), |
| &RenderContextImpl::mapTessVertexSpanBuffer, |
| mapCounts.tessSpanBufferCount); |
| } |
| assert(m_tessSpanData.hasRoomFor(mapCounts.tessSpanBufferCount)); |
| |
| if (mapCounts.triangleVertexBufferCount > 0) |
| { |
| m_triangleVertexData.mapElements( |
| m_impl.get(), |
| &RenderContextImpl::mapTriangleVertexBuffer, |
| mapCounts.triangleVertexBufferCount); |
| } |
| assert( |
| m_triangleVertexData.hasRoomFor(mapCounts.triangleVertexBufferCount)); |
| } |
| |
| void RenderContext::unmapResourceBuffers( |
| const ResourceAllocationCounts& mapCounts) |
| { |
| RIVE_PROF_SCOPE() |
| if (m_flushUniformData) |
| { |
| m_flushUniformData.unmapElements( |
| m_impl.get(), |
| &RenderContextImpl::unmapFlushUniformBuffer, |
| mapCounts.flushUniformBufferCount); |
| } |
| if (m_imageDrawUniformData) |
| { |
| m_imageDrawUniformData.unmapElements( |
| m_impl.get(), |
| &RenderContextImpl::unmapImageDrawUniformBuffer, |
| mapCounts.imageDrawUniformBufferCount); |
| } |
| if (m_pathData) |
| { |
| m_pathData.unmapElements(m_impl.get(), |
| &RenderContextImpl::unmapPathBuffer, |
| mapCounts.pathBufferCount); |
| } |
| if (m_paintData) |
| { |
| m_paintData.unmapElements(m_impl.get(), |
| &RenderContextImpl::unmapPaintBuffer, |
| mapCounts.paintBufferCount); |
| } |
| if (m_paintAuxData) |
| { |
| m_paintAuxData.unmapElements(m_impl.get(), |
| &RenderContextImpl::unmapPaintAuxBuffer, |
| mapCounts.paintAuxBufferCount); |
| } |
| if (m_contourData) |
| { |
| m_contourData.unmapElements(m_impl.get(), |
| &RenderContextImpl::unmapContourBuffer, |
| mapCounts.contourBufferCount); |
| } |
| if (m_gradSpanData) |
| { |
| m_gradSpanData.unmapElements(m_impl.get(), |
| &RenderContextImpl::unmapGradSpanBuffer, |
| mapCounts.gradSpanBufferCount); |
| } |
| if (m_tessSpanData) |
| { |
| m_tessSpanData.unmapElements( |
| m_impl.get(), |
| &RenderContextImpl::unmapTessVertexSpanBuffer, |
| mapCounts.tessSpanBufferCount); |
| } |
| if (m_triangleVertexData) |
| { |
| m_triangleVertexData.unmapElements( |
| m_impl.get(), |
| &RenderContextImpl::unmapTriangleVertexBuffer, |
| mapCounts.triangleVertexBufferCount); |
| } |
| } |
| |
| uint32_t RenderContext::incrementCoverageBufferPrefix( |
| bool* needsCoverageBufferClear) |
| { |
| RIVE_PROF_SCOPE() |
| assert(m_didBeginFrame); |
| assert(frameInterlockMode() == gpu::InterlockMode::clockwiseAtomic); |
| do |
| { |
| if (m_coverageBufferPrefix == 0) |
| { |
| // When the prefix wraps around to 0, we need to clear the coverage |
| // buffer because our shaders require coverageBufferPrefix to be |
| // monotonically increasing. |
| *needsCoverageBufferClear = true; |
| } |
| m_coverageBufferPrefix += 1 << CLOCKWISE_COVERAGE_BIT_COUNT; |
| } while (m_coverageBufferPrefix == 0); |
| |
| return m_coverageBufferPrefix; |
| } |
| |
| uint32_t RenderContext::LogicalFlush::allocateMidpointFanTessVertices( |
| uint32_t count) |
| { |
| uint32_t location = m_midpointFanTessVertexIdx; |
| m_midpointFanTessVertexIdx += count; |
| assert(m_midpointFanTessVertexIdx <= m_midpointFanTessEndLocation); |
| return location; |
| } |
| |
| uint32_t RenderContext::LogicalFlush::allocateOuterCubicTessVertices( |
| uint32_t count) |
| { |
| uint32_t location = m_outerCubicTessVertexIdx; |
| m_outerCubicTessVertexIdx += count; |
| assert(m_outerCubicTessVertexIdx <= m_outerCubicTessEndLocation); |
| return location; |
| } |
| |
| uint32_t RenderContext::LogicalFlush::pushPath(const PathDraw* draw) |
| { |
| RIVE_PROF_SCOPE() |
| assert(m_hasDoneLayout); |
| |
| ++m_currentPathID; |
| assert(0 < m_currentPathID && m_currentPathID <= m_ctx->m_maxPathID); |
| |
| m_ctx->m_pathData.set_back(draw->matrix(), |
| draw->strokeRadius(), |
| draw->featherRadius(), |
| m_currentZIndex, |
| draw->atlasTransform(), |
| draw->coverageBufferRange()); |
| m_ctx->m_paintData.set_back(draw->drawContents(), |
| draw->paintType(), |
| draw->simplePaintValue(), |
| m_gradTextureLayout, |
| draw->clipID(), |
| draw->hasClipRect(), |
| draw->blendMode()); |
| m_ctx->m_paintAuxData.set_back(draw->matrix(), |
| draw->paintType(), |
| draw->simplePaintValue(), |
| draw->gradient(), |
| draw->imageTexture(), |
| draw->clipRectInverseMatrix(), |
| m_flushDesc.renderTarget, |
| m_ctx->platformFeatures()); |
| |
| assert(m_flushDesc.firstPath + m_currentPathID + 1 == |
| m_ctx->m_pathData.elementsWritten()); |
| assert(m_flushDesc.firstPaint + m_currentPathID + 1 == |
| m_ctx->m_paintData.elementsWritten()); |
| assert(m_flushDesc.firstPaintAux + m_currentPathID + 1 == |
| m_ctx->m_paintAuxData.elementsWritten()); |
| |
| return m_currentPathID; |
| } |
| |
| RenderContext::TessellationWriter::TessellationWriter( |
| LogicalFlush* flush, |
| uint32_t pathID, |
| gpu::ContourDirections contourDirections, |
| uint32_t forwardTessVertexCount, |
| uint32_t forwardTessLocation, |
| uint32_t mirroredTessVertexCount, |
| uint32_t mirroredTessLocation) : |
| m_flush(flush), |
| m_tessSpanData(m_flush->m_ctx->m_tessSpanData), |
| m_pathID(pathID), |
| m_contourDirections(contourDirections), |
| m_pathTessLocation(forwardTessLocation), |
| m_pathMirroredTessLocation(mirroredTessLocation) |
| { |
| RIVE_PROF_SCOPE() |
| RIVE_DEBUG_CODE(m_expectedPathTessEndLocation = |
| m_pathTessLocation + forwardTessVertexCount;) |
| RIVE_DEBUG_CODE(m_expectedPathMirroredTessEndLocation = |
| m_pathMirroredTessLocation - mirroredTessVertexCount;) |
| assert(m_flush->m_hasDoneLayout); |
| assert(m_flush->m_ctx->m_pathData.elementsWritten() > 0); |
| assert(forwardTessVertexCount == 0 || mirroredTessVertexCount == 0 || |
| forwardTessVertexCount == mirroredTessVertexCount); |
| assert(!gpu::ContourDirectionsAreDoubleSided(m_contourDirections) || |
| forwardTessVertexCount == mirroredTessVertexCount); |
| assert(m_pathTessLocation >= 0); |
| assert(m_pathMirroredTessLocation <= kMaxTessellationVertexCount); |
| assert(m_expectedPathTessEndLocation <= kMaxTessellationVertexCount); |
| assert(m_expectedPathMirroredTessEndLocation >= 0); |
| } |
| |
| RenderContext::TessellationWriter::~TessellationWriter() |
| { |
| assert(m_pathTessLocation == m_expectedPathTessEndLocation); |
| assert(m_pathMirroredTessLocation == m_expectedPathMirroredTessEndLocation); |
| } |
| |
| uint32_t RenderContext::LogicalFlush::pushContour(uint32_t pathID, |
| Vec2D midpoint, |
| bool isStroke, |
| bool closed, |
| uint32_t vertexIndex0) |
| { |
| RIVE_PROF_SCOPE() |
| assert(pathID != 0); |
| assert(isStroke || closed); |
| |
| if (isStroke) |
| { |
| midpoint.x = closed ? 1 : 0; |
| } |
| m_ctx->m_contourData.emplace_back(midpoint, pathID, vertexIndex0); |
| |
| ++m_currentContourID; |
| assert(0 < m_currentContourID && m_currentContourID <= gpu::kMaxContourID); |
| assert(m_flushDesc.firstContour + m_currentContourID == |
| m_ctx->m_contourData.elementsWritten()); |
| return m_currentContourID; |
| } |
| |
| uint32_t RenderContext::TessellationWriter::pushContour( |
| Vec2D midpoint, |
| bool isStroke, |
| bool closed, |
| uint32_t paddingVertexCount) |
| { |
| RIVE_PROF_SCOPE() |
| // The first curve of the contour will be pre-padded with |
| // 'paddingVertexCount' tessellation vertices, colocated at T=0. The caller |
| // must use this argument align the end of the contour on a boundary of the |
| // patch size. (See math::padding_to_align_up().) |
| m_nextCubicPaddingVertexCount = paddingVertexCount; |
| |
| return m_flush->pushContour(m_pathID, |
| midpoint, |
| isStroke, |
| closed, |
| nextVertexIndex()); |
| } |
| |
| void RenderContext::TessellationWriter::pushCubic( |
| const Vec2D pts[4], |
| gpu::ContourDirections contourDirections, |
| Vec2D joinTangent, |
| uint32_t parametricSegmentCount, |
| uint32_t polarSegmentCount, |
| uint32_t joinSegmentCount, |
| uint32_t contourIDWithFlags) |
| { |
| RIVE_PROF_SCOPE() |
| assert(0 <= parametricSegmentCount && |
| parametricSegmentCount <= kMaxParametricSegments); |
| assert(0 <= polarSegmentCount && polarSegmentCount <= kMaxPolarSegments); |
| assert(joinSegmentCount > 0); |
| assert((contourIDWithFlags & CONTOUR_ID_MASK) == |
| (m_flush->m_currentContourID & CONTOUR_ID_MASK)); |
| // contourID can't be zero. |
| assert((contourIDWithFlags & CONTOUR_ID_MASK) != 0); |
| // contourID can't be out of range in the contour buffer. (Contour buffer |
| // indices are 1-based.) |
| assert((contourIDWithFlags & CONTOUR_ID_MASK) <= |
| m_flush->desc().contourCount); |
| |
| // Polar and parametric segments share the same beginning and ending |
| // vertices, so the merged *vertex* count is equal to the sum of polar and |
| // parametric *segment* counts. |
| uint32_t curveMergedVertexCount = |
| parametricSegmentCount + polarSegmentCount; |
| // -1 because the curve and join share an ending/beginning vertex. |
| uint32_t totalVertexCount = m_nextCubicPaddingVertexCount + |
| curveMergedVertexCount + joinSegmentCount - 1; |
| |
| // Only the first curve of a contour gets padding vertices. |
| m_nextCubicPaddingVertexCount = 0; |
| |
| switch (contourDirections) |
| { |
| case gpu::ContourDirections::forward: |
| pushTessellationSpans(pts, |
| joinTangent, |
| totalVertexCount, |
| parametricSegmentCount, |
| polarSegmentCount, |
| joinSegmentCount, |
| contourIDWithFlags); |
| break; |
| case gpu::ContourDirections::reverse: |
| pushMirroredTessellationSpans(pts, |
| joinTangent, |
| totalVertexCount, |
| parametricSegmentCount, |
| polarSegmentCount, |
| joinSegmentCount, |
| contourIDWithFlags); |
| break; |
| case gpu::ContourDirections::reverseThenForward: |
| case gpu::ContourDirections::forwardThenReverse: |
| // m_pathTessLocation and m_pathMirroredTessLocation are already |
| // configured, so at ths point we don't need to handle |
| // reverseThenForward or forwardThenReverse differently. |
| pushDoubleSidedTessellationSpans(pts, |
| joinTangent, |
| totalVertexCount, |
| parametricSegmentCount, |
| polarSegmentCount, |
| joinSegmentCount, |
| contourIDWithFlags); |
| break; |
| } |
| } |
| |
| RIVE_ALWAYS_INLINE void RenderContext::TessellationWriter:: |
| pushTessellationSpans(const Vec2D pts[4], |
| Vec2D joinTangent, |
| uint32_t totalVertexCount, |
| uint32_t parametricSegmentCount, |
| uint32_t polarSegmentCount, |
| uint32_t joinSegmentCount, |
| uint32_t contourIDWithFlags) |
| { |
| RIVE_PROF_SCOPE() |
| assert(totalVertexCount > 0); |
| |
| uint32_t y = m_pathTessLocation / kTessTextureWidth; |
| int32_t x0 = m_pathTessLocation % kTessTextureWidth; |
| int32_t x1 = x0 + totalVertexCount; |
| for (;;) |
| { |
| m_tessSpanData.set_back(pts, |
| joinTangent, |
| static_cast<float>(y), |
| x0, |
| x1, |
| parametricSegmentCount, |
| polarSegmentCount, |
| joinSegmentCount, |
| contourIDWithFlags); |
| if (x1 > static_cast<int32_t>(kTessTextureWidth)) |
| { |
| // The span was too long to fit on the current line. Wrap and draw |
| // it again, this time behind the left edge of the texture so we |
| // capture what got clipped off last time. |
| ++y; |
| x0 -= kTessTextureWidth; |
| x1 -= kTessTextureWidth; |
| continue; |
| } |
| break; |
| } |
| assert(y == |
| (m_pathTessLocation + totalVertexCount - 1) / kTessTextureWidth); |
| |
| m_pathTessLocation += totalVertexCount; |
| assert(m_pathTessLocation <= m_expectedPathTessEndLocation); |
| } |
| |
| RIVE_ALWAYS_INLINE void RenderContext::TessellationWriter:: |
| pushMirroredTessellationSpans(const Vec2D pts[4], |
| Vec2D joinTangent, |
| uint32_t totalVertexCount, |
| uint32_t parametricSegmentCount, |
| uint32_t polarSegmentCount, |
| uint32_t joinSegmentCount, |
| uint32_t contourIDWithFlags) |
| { |
| assert(totalVertexCount > 0); |
| |
| uint32_t reflectionY = (m_pathMirroredTessLocation - 1) / kTessTextureWidth; |
| int32_t reflectionX0 = |
| (m_pathMirroredTessLocation - 1) % kTessTextureWidth + 1; |
| int32_t reflectionX1 = reflectionX0 - totalVertexCount; |
| |
| for (;;) |
| { |
| m_tessSpanData.set_back(pts, |
| joinTangent, |
| static_cast<float>(reflectionY), |
| reflectionX0, |
| reflectionX1, |
| parametricSegmentCount, |
| polarSegmentCount, |
| joinSegmentCount, |
| contourIDWithFlags); |
| if (reflectionX1 < 0) |
| { |
| --reflectionY; |
| reflectionX0 += kTessTextureWidth; |
| reflectionX1 += kTessTextureWidth; |
| continue; |
| } |
| break; |
| } |
| |
| m_pathMirroredTessLocation -= totalVertexCount; |
| assert(m_pathMirroredTessLocation >= m_expectedPathMirroredTessEndLocation); |
| } |
| |
| RIVE_ALWAYS_INLINE void RenderContext::TessellationWriter:: |
| pushDoubleSidedTessellationSpans(const Vec2D pts[4], |
| Vec2D joinTangent, |
| uint32_t totalVertexCount, |
| uint32_t parametricSegmentCount, |
| uint32_t polarSegmentCount, |
| uint32_t joinSegmentCount, |
| uint32_t contourIDWithFlags) |
| { |
| assert(totalVertexCount > 0); |
| |
| int32_t y = m_pathTessLocation / kTessTextureWidth; |
| int32_t x0 = m_pathTessLocation % kTessTextureWidth; |
| int32_t x1 = x0 + totalVertexCount; |
| |
| uint32_t reflectionY = (m_pathMirroredTessLocation - 1) / kTessTextureWidth; |
| int32_t reflectionX0 = |
| (m_pathMirroredTessLocation - 1) % kTessTextureWidth + 1; |
| int32_t reflectionX1 = reflectionX0 - totalVertexCount; |
| |
| for (;;) |
| { |
| m_tessSpanData.set_back(pts, |
| joinTangent, |
| static_cast<float>(y), |
| x0, |
| x1, |
| static_cast<float>(reflectionY), |
| reflectionX0, |
| reflectionX1, |
| parametricSegmentCount, |
| polarSegmentCount, |
| joinSegmentCount, |
| contourIDWithFlags); |
| if (x1 > static_cast<int32_t>(kTessTextureWidth) || reflectionX1 < 0) |
| { |
| // Either the span or its reflection was too long to fit on the |
| // current line. Wrap and draw both of them again, this time beyond |
| // the opposite edge of the texture so we capture what got clipped |
| // off last time. |
| ++y; |
| x0 -= kTessTextureWidth; |
| x1 -= kTessTextureWidth; |
| |
| --reflectionY; |
| reflectionX0 += kTessTextureWidth; |
| reflectionX1 += kTessTextureWidth; |
| continue; |
| } |
| break; |
| } |
| |
| m_pathTessLocation += totalVertexCount; |
| assert(m_pathTessLocation <= m_expectedPathTessEndLocation); |
| |
| m_pathMirroredTessLocation -= totalVertexCount; |
| assert(m_pathMirroredTessLocation >= m_expectedPathMirroredTessEndLocation); |
| } |
| |
| void RenderContext::LogicalFlush::pushPaddingVertices(uint32_t count, |
| uint32_t tessLocation) |
| { |
| RIVE_PROF_SCOPE() |
| assert(m_hasDoneLayout); |
| assert(count > 0); |
| |
| constexpr static Vec2D kEmptyCubic[4]{}; |
| TessellationWriter(this, |
| /*pathID=*/0, |
| gpu::ContourDirections::forward, |
| count, |
| tessLocation) |
| .pushTessellationSpans(kEmptyCubic, |
| {0, 0}, |
| count, |
| 0, |
| 0, |
| 1, |
| INVALID_CONTOUR_ID_WITH_FLAGS); |
| } |
| |
| void RenderContext::LogicalFlush::pushMidpointFanDraw( |
| const PathDraw* draw, |
| gpu::DrawType drawType, |
| uint32_t tessVertexCount, |
| uint32_t tessLocation, |
| gpu::ShaderMiscFlags shaderMiscFlags) |
| { |
| RIVE_PROF_SCOPE() |
| assert(m_hasDoneLayout); |
| |
| uint32_t baseInstance = math::lossless_numeric_cast<uint32_t>( |
| tessLocation / kMidpointFanPatchSegmentSpan); |
| // flush() is responsible for alignment. |
| assert(baseInstance * kMidpointFanPatchSegmentSpan == tessLocation); |
| |
| uint32_t instanceCount = tessVertexCount / kMidpointFanPatchSegmentSpan; |
| // flush() is responsible for alignment. |
| assert(instanceCount * kMidpointFanPatchSegmentSpan == tessVertexCount); |
| |
| pushPathDraw(draw, drawType, shaderMiscFlags, instanceCount, baseInstance); |
| } |
| |
| void RenderContext::LogicalFlush::pushOuterCubicsDraw( |
| const PathDraw* draw, |
| gpu::DrawType drawType, |
| uint32_t tessVertexCount, |
| uint32_t tessLocation, |
| gpu::ShaderMiscFlags shaderMiscFlags) |
| { |
| RIVE_PROF_SCOPE() |
| assert(m_hasDoneLayout); |
| |
| uint32_t baseInstance = math::lossless_numeric_cast<uint32_t>( |
| tessLocation / kOuterCurvePatchSegmentSpan); |
| // flush() is responsible for alignment. |
| assert(baseInstance * kOuterCurvePatchSegmentSpan == tessLocation); |
| |
| uint32_t instanceCount = tessVertexCount / kOuterCurvePatchSegmentSpan; |
| // flush() is responsible for alignment. |
| assert(instanceCount * kOuterCurvePatchSegmentSpan == tessVertexCount); |
| |
| pushPathDraw(draw, drawType, shaderMiscFlags, instanceCount, baseInstance); |
| } |
| |
| size_t RenderContext::LogicalFlush::pushInteriorTriangulationDraw( |
| const PathDraw* draw, |
| uint32_t pathID, |
| gpu::WindingFaces windingFaces, |
| gpu::ShaderMiscFlags shaderMiscFlags) |
| { |
| RIVE_PROF_SCOPE() |
| assert(m_hasDoneLayout); |
| assert(pathID != 0); |
| |
| uint32_t baseVertex = math::lossless_numeric_cast<uint32_t>( |
| m_ctx->m_triangleVertexData.elementsWritten()); |
| size_t actualVertexCount = |
| draw->triangulator()->polysToTriangles(pathID, |
| windingFaces, |
| &m_ctx->m_triangleVertexData); |
| assert(baseVertex + actualVertexCount == |
| m_ctx->m_triangleVertexData.elementsWritten()); |
| if (actualVertexCount > 0) |
| { |
| pushPathDraw(draw, |
| DrawType::interiorTriangulation, |
| shaderMiscFlags, |
| math::lossless_numeric_cast<uint32_t>(actualVertexCount), |
| baseVertex); |
| } |
| return actualVertexCount; |
| } |
| |
| void RenderContext::LogicalFlush::pushAtlasBlit(PathDraw* draw, uint32_t pathID) |
| { |
| RIVE_PROF_SCOPE() |
| auto baseVertex = math::lossless_numeric_cast<uint32_t>( |
| m_ctx->m_triangleVertexData.elementsWritten()); |
| auto [l, t, r, b] = AABB(draw->pixelBounds()); |
| m_ctx->m_triangleVertexData.emplace_back(Vec2D{l, b}, 1, pathID); |
| m_ctx->m_triangleVertexData.emplace_back(Vec2D{l, t}, 1, pathID); |
| m_ctx->m_triangleVertexData.emplace_back(Vec2D{r, b}, 1, pathID); |
| m_ctx->m_triangleVertexData.emplace_back(Vec2D{r, b}, 1, pathID); |
| m_ctx->m_triangleVertexData.emplace_back(Vec2D{l, t}, 1, pathID); |
| m_ctx->m_triangleVertexData.emplace_back(Vec2D{r, t}, 1, pathID); |
| pushPathDraw(draw, |
| DrawType::atlasBlit, |
| gpu::ShaderMiscFlags::none, |
| 6, |
| baseVertex); |
| } |
| |
| void RenderContext::LogicalFlush::pushImageRectDraw(ImageRectDraw* draw) |
| { |
| RIVE_PROF_SCOPE() |
| assert(m_hasDoneLayout); |
| |
| // If we support image paints for paths, the client should use pushPath() |
| // with an image paint instead of calling this method. |
| assert(!m_ctx->frameSupportsImagePaintForPaths()); |
| |
| size_t imageDrawDataOffset = m_ctx->m_imageDrawUniformData.bytesWritten(); |
| m_ctx->m_imageDrawUniformData.emplace_back(draw->matrix(), |
| draw->opacity(), |
| draw->clipRectInverseMatrix(), |
| draw->clipID(), |
| draw->blendMode(), |
| m_currentZIndex); |
| |
| DrawBatch& batch = pushDraw(draw, |
| DrawType::imageRect, |
| gpu::ShaderMiscFlags::none, |
| PaintType::image, |
| 1, |
| 0); |
| batch.imageDrawDataOffset = |
| math::lossless_numeric_cast<uint32_t>(imageDrawDataOffset); |
| } |
| |
| void RenderContext::LogicalFlush::pushImageMeshDraw(ImageMeshDraw* draw) |
| { |
| RIVE_PROF_SCOPE() |
| assert(m_hasDoneLayout); |
| |
| size_t imageDrawDataOffset = m_ctx->m_imageDrawUniformData.bytesWritten(); |
| m_ctx->m_imageDrawUniformData.emplace_back(draw->matrix(), |
| draw->opacity(), |
| draw->clipRectInverseMatrix(), |
| draw->clipID(), |
| draw->blendMode(), |
| m_currentZIndex); |
| |
| DrawBatch& batch = pushDraw(draw, |
| DrawType::imageMesh, |
| gpu::ShaderMiscFlags::none, |
| PaintType::image, |
| draw->indexCount(), |
| 0); |
| batch.vertexBuffer = draw->vertexBuffer(); |
| batch.uvBuffer = draw->uvBuffer(); |
| batch.indexBuffer = draw->indexBuffer(); |
| batch.imageDrawDataOffset = |
| math::lossless_numeric_cast<uint32_t>(imageDrawDataOffset); |
| } |
| |
| void RenderContext::LogicalFlush::pushStencilClipResetDraw( |
| StencilClipReset* draw) |
| { |
| RIVE_PROF_SCOPE() |
| assert(m_hasDoneLayout); |
| |
| uint32_t baseVertex = math::lossless_numeric_cast<uint32_t>( |
| m_ctx->m_triangleVertexData.elementsWritten()); |
| auto [l, t, r, b] = AABB(getClipInfo(draw->previousClipID()).contentBounds); |
| uint32_t z = m_currentZIndex; |
| assert(AABB(l, t, r, b).round() == draw->pixelBounds()); |
| assert(draw->resourceCounts().maxTriangleVertexCount == 6); |
| assert(m_ctx->m_triangleVertexData.hasRoomFor(6)); |
| m_ctx->m_triangleVertexData.emplace_back(Vec2D{l, b}, 0, z); |
| m_ctx->m_triangleVertexData.emplace_back(Vec2D{l, t}, 0, z); |
| m_ctx->m_triangleVertexData.emplace_back(Vec2D{r, b}, 0, z); |
| m_ctx->m_triangleVertexData.emplace_back(Vec2D{r, b}, 0, z); |
| m_ctx->m_triangleVertexData.emplace_back(Vec2D{l, t}, 0, z); |
| m_ctx->m_triangleVertexData.emplace_back(Vec2D{r, t}, 0, z); |
| pushDraw(draw, |
| DrawType::msaaStencilClipReset, |
| gpu::ShaderMiscFlags::none, |
| PaintType::clipUpdate, |
| 6, |
| baseVertex); |
| } |
| |
| gpu::DrawBatch& RenderContext::LogicalFlush::pushPathDraw( |
| const PathDraw* draw, |
| DrawType drawType, |
| gpu::ShaderMiscFlags shaderMiscFlags, |
| uint32_t vertexCount, |
| uint32_t baseVertex) |
| { |
| RIVE_PROF_SCOPE() |
| assert(m_hasDoneLayout); |
| |
| DrawBatch& batch = pushDraw(draw, |
| drawType, |
| shaderMiscFlags, |
| draw->paintType(), |
| vertexCount, |
| baseVertex); |
| |
| auto pathShaderFeatures = gpu::ShaderFeatures::NONE; |
| if (draw->featherRadius() != 0 && |
| drawType != gpu::DrawType::interiorTriangulation && |
| drawType != gpu::DrawType::atlasBlit) |
| { |
| pathShaderFeatures |= ShaderFeatures::ENABLE_FEATHER; |
| } |
| if (draw->drawContents() & gpu::DrawContents::evenOddFill) |
| { |
| assert(!(shaderMiscFlags & gpu::ShaderMiscFlags::clockwiseFill)); |
| pathShaderFeatures |= ShaderFeatures::ENABLE_EVEN_ODD; |
| } |
| constexpr static gpu::DrawContents NESTED_CLIP_FLAGS = |
| gpu::DrawContents::clipUpdate | gpu::DrawContents::activeClip; |
| if ((draw->drawContents() & NESTED_CLIP_FLAGS) == NESTED_CLIP_FLAGS) |
| { |
| pathShaderFeatures |= ShaderFeatures::ENABLE_NESTED_CLIPPING; |
| } |
| batch.shaderFeatures |= |
| pathShaderFeatures & m_ctx->m_frameShaderFeaturesMask; |
| m_combinedShaderFeatures |= batch.shaderFeatures; |
| assert( |
| (batch.shaderFeatures & |
| gpu::ShaderFeaturesMaskFor(drawType, m_ctx->frameInterlockMode())) == |
| batch.shaderFeatures); |
| return batch; |
| } |
| |
| RIVE_ALWAYS_INLINE static bool can_combine_draw_contents( |
| gpu::InterlockMode interlockMode, |
| gpu::DrawContents batchContents, |
| const Draw* draw) |
| { |
| // Feathered fills should never attempt to combine with fills, strokes, or |
| // feathered strokes because they use a different DrawType. |
| assert((batchContents & gpu::DrawContents::featheredFill).bits() == |
| (draw->drawContents() & gpu::DrawContents::featheredFill).bits()); |
| |
| constexpr static auto ANY_FILL = gpu::DrawContents::clockwiseFill | |
| gpu::DrawContents::evenOddFill | |
| gpu::DrawContents::nonZeroFill; |
| // Raster ordering uses a different shader for clockwise fills, so we |
| // can't combine both legacy and clockwise fills into the same draw. |
| if (interlockMode == gpu::InterlockMode::rasterOrdering && |
| // Anything can be combined if either the existing batch or the new draw |
| // don't have fills yet. |
| (batchContents & ANY_FILL) && (draw->drawContents() & ANY_FILL)) |
| { |
| assert(!(draw->drawContents() & gpu::DrawContents::stroke)); |
| return (batchContents & gpu::DrawContents::clockwiseFill).bits() == |
| (draw->drawContents() & gpu::DrawContents::clockwiseFill).bits(); |
| } |
| return true; |
| } |
| |
| RIVE_ALWAYS_INLINE static bool can_combine_draw_images( |
| const Texture* currentDrawTexture, |
| const Texture* nextDrawTexture, |
| const ImageSampler currentImageSamplerKey, |
| const ImageSampler nextImageSamplerKey) |
| { |
| if (currentDrawTexture == nullptr || nextDrawTexture == nullptr) |
| { |
| // We can always combine two draws if one or both do not use an image |
| // paint. |
| return true; |
| } |
| // Since the image paint's texture must be bound to a specific slot, we |
| // can't combine draws that use different textures. |
| return (currentDrawTexture == nextDrawTexture) && |
| (currentImageSamplerKey == nextImageSamplerKey); |
| } |
| |
| gpu::DrawBatch& RenderContext::LogicalFlush::pushDraw( |
| const Draw* draw, |
| DrawType drawType, |
| gpu::ShaderMiscFlags shaderMiscFlags, |
| gpu::PaintType paintType, |
| uint32_t elementCount, |
| uint32_t baseElement) |
| { |
| RIVE_PROF_SCOPE() |
| assert(m_hasDoneLayout); |
| assert(elementCount > 0); |
| |
| bool canMergeWithPreviousBatch; |
| switch (drawType) |
| { |
| case DrawType::midpointFanPatches: |
| case DrawType::midpointFanCenterAAPatches: |
| case DrawType::outerCurvePatches: |
| case DrawType::interiorTriangulation: |
| case DrawType::atlasBlit: |
| case DrawType::msaaStrokes: |
| case DrawType::msaaMidpointFanBorrowedCoverage: |
| case DrawType::msaaMidpointFans: |
| case DrawType::msaaMidpointFanStencilReset: |
| case DrawType::msaaMidpointFanPathsStencil: |
| case DrawType::msaaMidpointFanPathsCover: |
| case DrawType::msaaOuterCubics: |
| case DrawType::msaaStencilClipReset: |
| if (!m_drawList.empty() && m_pendingBarriers == BarrierFlags::none) |
| { |
| const DrawBatch& currentBatch = m_drawList.tail(); |
| canMergeWithPreviousBatch = |
| currentBatch.drawType == drawType && |
| currentBatch.shaderMiscFlags == shaderMiscFlags && |
| can_combine_draw_contents(m_ctx->frameInterlockMode(), |
| currentBatch.drawContents, |
| draw) && |
| can_combine_draw_images(currentBatch.imageTexture, |
| draw->imageTexture(), |
| currentBatch.imageSampler, |
| draw->imageSampler()); |
| if (canMergeWithPreviousBatch && |
| currentBatch.baseElement + currentBatch.elementCount != |
| baseElement) |
| { |
| // In MSAA mode, multiple subpasses reference the same |
| // tessellation data. Although rare, this breaks the |
| // guarantee we have in other modes that mergeable batches |
| // will always have contiguous patches. |
| assert(m_ctx->frameInterlockMode() == |
| gpu::InterlockMode::msaa); |
| canMergeWithPreviousBatch = false; |
| } |
| break; |
| } |
| [[fallthrough]]; |
| |
| // Image draws can't be combined for now because they each have their |
| // own unique uniforms. |
| case DrawType::imageRect: |
| case DrawType::imageMesh: |
| case DrawType::renderPassInitialize: |
| case DrawType::renderPassResolve: |
| canMergeWithPreviousBatch = false; |
| break; |
| } |
| |
| DrawBatch* batch; |
| if (canMergeWithPreviousBatch) |
| { |
| batch = &m_drawList.tail(); |
| assert(m_pendingBarriers == BarrierFlags::none); |
| assert(batch->drawType == drawType); |
| assert(batch->shaderMiscFlags == shaderMiscFlags); |
| assert(batch->baseElement + batch->elementCount == baseElement); |
| batch->elementCount += elementCount; |
| } |
| else |
| { |
| batch = &m_drawList.emplace_back( |
| m_ctx->perFrameAllocator(), |
| drawType, |
| shaderMiscFlags, |
| elementCount, |
| baseElement, |
| draw->blendMode(), |
| draw->imageSampler(), |
| std::exchange(m_pendingBarriers, BarrierFlags::none)); |
| } |
| |
| // If the batch was merged into a previous one, this ensures it was a valid |
| // merge. |
| assert(batch->drawType == drawType); |
| assert(can_combine_draw_images(batch->imageTexture, |
| draw->imageTexture(), |
| batch->imageSampler, |
| draw->imageSampler())); |
| assert(m_pendingBarriers == BarrierFlags::none); |
| |
| auto shaderFeatures = ShaderFeatures::NONE; |
| if (draw->clipID() != 0) |
| { |
| shaderFeatures |= ShaderFeatures::ENABLE_CLIPPING; |
| } |
| if (draw->hasClipRect() && paintType != PaintType::clipUpdate) |
| { |
| shaderFeatures |= ShaderFeatures::ENABLE_CLIP_RECT; |
| } |
| if (paintType != PaintType::clipUpdate && |
| !(shaderMiscFlags & gpu::ShaderMiscFlags::borrowedCoveragePrepass)) |
| { |
| switch (draw->blendMode()) |
| { |
| case BlendMode::hue: |
| case BlendMode::saturation: |
| case BlendMode::color: |
| case BlendMode::luminosity: |
| shaderFeatures |= ShaderFeatures::ENABLE_HSL_BLEND_MODES; |
| [[fallthrough]]; |
| case BlendMode::screen: |
| case BlendMode::overlay: |
| case BlendMode::darken: |
| case BlendMode::lighten: |
| case BlendMode::colorDodge: |
| case BlendMode::colorBurn: |
| case BlendMode::hardLight: |
| case BlendMode::softLight: |
| case BlendMode::difference: |
| case BlendMode::exclusion: |
| case BlendMode::multiply: |
| shaderFeatures |= ShaderFeatures::ENABLE_ADVANCED_BLEND; |
| break; |
| case BlendMode::srcOver: |
| break; |
| } |
| } |
| batch->shaderFeatures |= shaderFeatures & m_ctx->m_frameShaderFeaturesMask; |
| m_combinedShaderFeatures |= batch->shaderFeatures; |
| assert( |
| (batch->shaderFeatures & |
| gpu::ShaderFeaturesMaskFor(drawType, m_ctx->frameInterlockMode())) == |
| batch->shaderFeatures); |
| |
| batch->drawContents |= draw->drawContents(); |
| |
| if (paintType == PaintType::image) |
| { |
| assert(draw->imageTexture() != nullptr); |
| if (batch->imageTexture == nullptr) |
| { |
| batch->imageTexture = draw->imageTexture(); |
| } |
| assert(batch->imageTexture == draw->imageTexture()); |
| } |
| |
| if (m_ctx->frameInterlockMode() == gpu::InterlockMode::msaa) |
| { |
| // msaa can't mix drawContents in a batch. |
| assert(batch->drawContents == draw->drawContents()); |
| // msaa does't mix src-over draws with advanced blend draws. |
| assert((batch->shaderFeatures & |
| gpu::ShaderFeatures::ENABLE_ADVANCED_BLEND) == |
| (draw->blendMode() != BlendMode::srcOver)); |
| // If using KHR_blend_equation_advanced, we can't mix blend modes in a |
| // batch. |
| assert(!m_ctx->platformFeatures().supportsBlendAdvancedKHR || |
| batch->firstBlendMode == draw->blendMode()); |
| if (draw->blendMode() != BlendMode::srcOver && |
| !m_ctx->platformFeatures().supportsBlendAdvancedCoherentKHR) |
| { |
| // An implementation-dependent barrier is required between |
| // overlapping draws. Add a "dstBlend" barrier and build up a list |
| // of "dstReads" for the batch. The dstRead list will be required in |
| // the event that the implementation has to handle dstReads by |
| // copying out a texture. |
| // |
| // (But if the draw already has a "nextDstRead" neighbor, do |
| // nothing. It means an earlier subpass will already issue the |
| // barrier and sync this region of the framebuffer. Since nothing |
| // that overlaps will be ordered between that first subpass and us, |
| // that barrier for the first subpass is all we need.) |
| if (draw->nextDstRead() == nullptr) |
| { |
| batch->barriers |= BarrierFlags::dstBlend; |
| batch->dstReadList = draw->addToDstReadList(batch->dstReadList); |
| } |
| } |
| } |
| |
| return *batch; |
| } |
| } // namespace rive::gpu |