Don't allocate the grad or tess textures until flush time We used to allocate the PLS gradient and tessellation textures preemptively, before we knew how large they needed to be (like other vertex resources). We didn't actually need to to this; we can just wait to allocate these textures until flush time when we know exactly how big they need to be. This reduces intermediate flushes since won't ever run out tessellation vertices or complex color ramps anymore (unless we exceed the absolute maximum supported number). This also required writing out the simple two-texel gradientients to a side buffer, and then copying that side buffer into the gradient texture at flush time (instead of writing the simple ramps directly to the gradient texture to start with). This is overall better because now we only need one gradient texture instead of a ring of them, and we don't have to preserve the texture contents when we render the complex gradients. Diffs= 47c57eae9 Don't allocate the grad or tess textures until flush time (#5656) Co-authored-by: Chris Dalton <99840794+csmartdalton@users.noreply.github.com>
diff --git a/.rive_head b/.rive_head index 1f201a1..2e27078 100644 --- a/.rive_head +++ b/.rive_head
@@ -1 +1 @@ -bae1359bc3f2a516f6831a31a4afc549b137361b +47c57eae9e74fb65903a4d1546c6d577c6feca82
diff --git a/include/rive/pls/buffer_ring.hpp b/include/rive/pls/buffer_ring.hpp index d2996a5..e099a2e 100644 --- a/include/rive/pls/buffer_ring.hpp +++ b/include/rive/pls/buffer_ring.hpp
@@ -148,6 +148,27 @@ const size_t m_texelsPerItem; }; +// Buffer ring implementation for buffers that only exist on the CPU. +class CPUOnlyBufferRing : public BufferRingImpl +{ +public: + CPUOnlyBufferRing(size_t capacity, size_t itemSizeInBytes) : + BufferRingImpl(capacity, itemSizeInBytes) + { + for (int i = 0; i < kBufferRingSize; ++i) + m_cpuBuffers[i].reset(new char[capacity * itemSizeInBytes]); + } + + const void* submittedata() const { return m_cpuBuffers[submittedBufferIdx()].get(); } + +protected: + void* onMapBuffer(int bufferIdx) override { return m_cpuBuffers[bufferIdx].get(); } + void onUnmapAndSubmitBuffer(int bufferIdx, size_t bytesWritten) override {} + +private: + std::unique_ptr<char[]> m_cpuBuffers[kBufferRingSize]; +}; + // Wrapper for an abstract BufferRingImpl that supports mapping buffers, writing an array of items // of the same type, and submitting for rendering. //
diff --git a/include/rive/pls/d3d/pls_render_context_d3d.hpp b/include/rive/pls/d3d/pls_render_context_d3d.hpp index 1979daf..b0c7b1c 100644 --- a/include/rive/pls/d3d/pls_render_context_d3d.hpp +++ b/include/rive/pls/d3d/pls_render_context_d3d.hpp
@@ -50,25 +50,21 @@ size_t itemSizeInBytes) override; std::unique_ptr<TexelBufferRing> makeTexelBufferRing(TexelBufferRing::Format, - Renderable, size_t widthInItems, size_t height, size_t texelsPerItem, int textureIdx, TexelBufferRing::Filter) override; - std::unique_ptr<BufferRingImpl> makeUniformBufferRing(size_t capacity, - size_t itemSizeInBytes) override; + std::unique_ptr<BufferRingImpl> makePixelUnpackBufferRing(size_t capacity, + size_t itemSizeInBytes) override; + std::unique_ptr<BufferRingImpl> makeUniformBufferRing(size_t itemSizeInBytes) override; + + void allocateGradientTexture(size_t height) override; void allocateTessellationTexture(size_t height) override; - void onFlush(FlushType, - LoadAction, - size_t gradSpanCount, - size_t gradSpansHeight, - size_t tessVertexSpanCount, - size_t tessDataHeight, - bool needsClipBuffer) override; + void onFlush(const FlushDescriptor&) override; void setPipelineLayoutAndShaders(DrawType, const ShaderFeatures&); @@ -76,6 +72,11 @@ ComPtr<ID3D11Device> m_gpu; ComPtr<ID3D11DeviceContext> m_gpuContext; + + ComPtr<ID3D11Texture2D> m_gradTexture; + ComPtr<ID3D11ShaderResourceView> m_gradTextureSRV; + ComPtr<ID3D11RenderTargetView> m_gradTextureRTV; + ComPtr<ID3D11Texture2D> m_tessTexture; ComPtr<ID3D11ShaderResourceView> m_tessTextureSRV; ComPtr<ID3D11RenderTargetView> m_tessTextureRTV;
diff --git a/include/rive/pls/gl/pls_render_context_gl.hpp b/include/rive/pls/gl/pls_render_context_gl.hpp index 2098c47..4a37f2b 100644 --- a/include/rive/pls/gl/pls_render_context_gl.hpp +++ b/include/rive/pls/gl/pls_render_context_gl.hpp
@@ -129,25 +129,21 @@ size_t itemSizeInBytes) override; std::unique_ptr<TexelBufferRing> makeTexelBufferRing(TexelBufferRing::Format, - Renderable, size_t widthInItems, size_t height, size_t texelsPerItem, int textureIdx, TexelBufferRing::Filter) override; - std::unique_ptr<BufferRingImpl> makeUniformBufferRing(size_t capacity, - size_t sizeInBytes) override; + std::unique_ptr<BufferRingImpl> makePixelUnpackBufferRing(size_t capacity, + size_t itemSizeInBytes) override; + std::unique_ptr<BufferRingImpl> makeUniformBufferRing(size_t sizeInBytes) override; + + void allocateGradientTexture(size_t height) override; void allocateTessellationTexture(size_t height) override; - void onFlush(FlushType, - LoadAction, - size_t gradSpanCount, - size_t gradSpansHeight, - size_t tessVertexSpanCount, - size_t tessDataHeight, - bool needsClipBuffer) override; + void onFlush(const FlushDescriptor&) override; // GL state wrapping. void bindProgram(GLuint); @@ -164,6 +160,7 @@ GLuint m_colorRampProgram; GLuint m_colorRampVAO; GLuint m_colorRampFBO; + GLuint m_gradientTexture = 0; // Tessellation texture rendering. GLuint m_tessellateProgram;
diff --git a/include/rive/pls/metal/pls_render_context_metal.h b/include/rive/pls/metal/pls_render_context_metal.h index ef3ef48..6e37cc8 100644 --- a/include/rive/pls/metal/pls_render_context_metal.h +++ b/include/rive/pls/metal/pls_render_context_metal.h
@@ -34,16 +34,17 @@ size_t itemSizeInBytes) override; std::unique_ptr<TexelBufferRing> makeTexelBufferRing(TexelBufferRing::Format, - Renderable, size_t widthInItems, size_t height, size_t texelsPerItem, int textureIdx, TexelBufferRing::Filter) override; - std::unique_ptr<BufferRingImpl> makeUniformBufferRing(size_t capacity, - size_t itemSizeInBytes) override; + std::unique_ptr<BufferRingImpl> makePixelUnpackBufferRing(size_t capacity, + size_t itemSizeInBytes) override; + std::unique_ptr<BufferRingImpl> makeUniformBufferRing(size_t itemSizeInBytes) override; + void allocateGradientTexture(size_t height) override; void allocateTessellationTexture(size_t height) override; void onBeginFrame() override { lockNextBufferRingIndex(); } @@ -53,13 +54,7 @@ // next buffers in our rings. void lockNextBufferRingIndex(); - void onFlush(FlushType, - LoadAction, - size_t gradSpanCount, - size_t gradSpansHeight, - size_t tessVertexSpanCount, - size_t tessDataHeight, - bool needsClipBuffer) override; + void onFlush(const FlushDescriptor&) override; const id<MTLDevice> m_gpu; const id<MTLCommandQueue> m_queue; @@ -69,6 +64,7 @@ // Renders color ramps to the gradient texture. class ColorRampPipeline; std::unique_ptr<ColorRampPipeline> m_colorRampPipeline; + id<MTLTexture> m_gradientTexture = nullptr; // Renders tessellated vertices to the tessellation texture. class TessellatePipeline;
diff --git a/include/rive/pls/pls_render_context.hpp b/include/rive/pls/pls_render_context.hpp index f13c98a..065b4cd 100644 --- a/include/rive/pls/pls_render_context.hpp +++ b/include/rive/pls/pls_render_context.hpp
@@ -331,23 +331,19 @@ virtual std::unique_ptr<BufferRingImpl> makeVertexBufferRing(size_t capacity, size_t itemSizeInBytes) = 0; - enum class Renderable : bool - { - no, - yes - }; - virtual std::unique_ptr<TexelBufferRing> makeTexelBufferRing(TexelBufferRing::Format, - Renderable, size_t widthInItems, size_t height, size_t texelsPerItem, int textureIdx, TexelBufferRing::Filter) = 0; - virtual std::unique_ptr<BufferRingImpl> makeUniformBufferRing(size_t capacity, - size_t sizeInBytes) = 0; + virtual std::unique_ptr<BufferRingImpl> makePixelUnpackBufferRing(size_t capacity, + size_t itemSizeInBytes) = 0; + virtual std::unique_ptr<BufferRingImpl> makeUniformBufferRing(size_t sizeInBytes) = 0; + + virtual void allocateGradientTexture(size_t height) = 0; virtual void allocateTessellationTexture(size_t height) = 0; const TexelBufferRing* pathBufferRing() @@ -358,17 +354,15 @@ { return static_cast<const TexelBufferRing*>(m_contourBuffer.impl()); } - const TexelBufferRing* gradTexelBufferRing() const + const BufferRingImpl* simpleColorRampsBufferRing() const { - return static_cast<const TexelBufferRing*>(m_gradTexelBuffer.impl()); + return m_simpleColorRampsBuffer.impl(); } const BufferRingImpl* gradSpanBufferRing() const { return m_gradSpanBuffer.impl(); } const BufferRingImpl* tessSpanBufferRing() { return m_tessSpanBuffer.impl(); } const BufferRingImpl* triangleBufferRing() { return m_triangleBuffer.impl(); } const BufferRingImpl* uniformBufferRing() const { return m_uniformBuffer.impl(); } - size_t gradTextureRowsForSimpleRamps() const { return m_gradTextureRowsForSimpleRamps; } - virtual void onBeginFrame() {} // Indicates how much blendMode support will be needed in the "uber" draw shader. @@ -413,13 +407,21 @@ } fragmentFeatures; }; - virtual void onFlush(FlushType, - LoadAction, - size_t gradSpanCount, - size_t gradSpansHeight, - size_t tessVertexSpanCount, - size_t tessDataHeight, - bool needsClipBuffer) = 0; + struct FlushDescriptor + { + FlushType flushType; + LoadAction loadAction; + size_t complexGradSpanCount; + size_t tessVertexSpanCount; + uint16_t simpleGradTexelsWidth; + uint16_t simpleGradTexelsHeight; + uint32_t complexGradRowsTop; + uint32_t complexGradRowsHeight; + uint32_t tessDataHeight; + bool needsClipBuffer; + }; + + virtual void onFlush(const FlushDescriptor&) = 0; const PlatformFeatures m_platformFeatures; const size_t m_maxPathID; @@ -539,14 +541,19 @@ // Capacities of all our GPU resource allocations. struct GPUResourceLimits { + // Resources allocated at the beginning of a frame (before we actually know how big they + // will need to be). size_t maxPathID; size_t maxContourID; size_t maxSimpleGradients; - size_t maxComplexGradients; size_t maxComplexGradientSpans; size_t maxTessellationSpans; - size_t maxTessellationVertices; - size_t maxTriangleVertices; + + // Resources allocated at flush time (after we already know exactly how big they need to + // be). + size_t triangleVertexBufferSize; + size_t gradientTextureHeight; + size_t tessellationTextureHeight; // "*this = max(*this, other)" void accumulateMax(const GPUResourceLimits& other) @@ -554,13 +561,14 @@ maxPathID = std::max(maxPathID, other.maxPathID); maxContourID = std::max(maxContourID, other.maxContourID); maxSimpleGradients = std::max(maxSimpleGradients, other.maxSimpleGradients); - maxComplexGradients = std::max(maxComplexGradients, other.maxComplexGradients); maxComplexGradientSpans = std::max(maxComplexGradientSpans, other.maxComplexGradientSpans); maxTessellationSpans = std::max(maxTessellationSpans, other.maxTessellationSpans); - maxTessellationVertices = - std::max(maxTessellationVertices, other.maxTessellationVertices); - maxTriangleVertices = std::max(maxTriangleVertices, other.maxTriangleVertices); + triangleVertexBufferSize = + std::max(triangleVertexBufferSize, other.triangleVertexBufferSize); + gradientTextureHeight = std::max(gradientTextureHeight, other.gradientTextureHeight); + tessellationTextureHeight = + std::max(tessellationTextureHeight, other.tessellationTextureHeight); static_assert(sizeof(*this) == sizeof(size_t) * 8); // Make sure we got every field. } @@ -575,19 +583,21 @@ scaled.maxContourID = static_cast<double>(maxContourID) * scaleFactor; if (maxSimpleGradients > threshold.maxSimpleGradients) scaled.maxSimpleGradients = static_cast<double>(maxSimpleGradients) * scaleFactor; - if (maxComplexGradients > threshold.maxComplexGradients) - scaled.maxComplexGradients = static_cast<double>(maxComplexGradients) * scaleFactor; if (maxComplexGradientSpans > threshold.maxComplexGradientSpans) scaled.maxComplexGradientSpans = static_cast<double>(maxComplexGradientSpans) * scaleFactor; if (maxTessellationSpans > threshold.maxTessellationSpans) scaled.maxTessellationSpans = static_cast<double>(maxTessellationSpans) * scaleFactor; - if (maxTessellationVertices > threshold.maxTessellationVertices) - scaled.maxTessellationVertices = - static_cast<double>(maxTessellationVertices) * scaleFactor; - if (maxTriangleVertices > threshold.maxTriangleVertices) - scaled.maxTriangleVertices = static_cast<double>(maxTriangleVertices) * scaleFactor; + if (triangleVertexBufferSize > threshold.triangleVertexBufferSize) + scaled.triangleVertexBufferSize = + static_cast<double>(triangleVertexBufferSize) * scaleFactor; + if (gradientTextureHeight > threshold.gradientTextureHeight) + scaled.gradientTextureHeight = + static_cast<double>(gradientTextureHeight) * scaleFactor; + if (tessellationTextureHeight > threshold.tessellationTextureHeight) + scaled.tessellationTextureHeight = + static_cast<double>(tessellationTextureHeight) * scaleFactor; static_assert(sizeof(*this) == sizeof(size_t) * 8); // Make sure we got every field. return scaled; } @@ -597,6 +607,18 @@ { return makeScaledIfLarger(GPUResourceLimits{}, scaleFactor); } + + // The resources we allocate at flush time don't need to grow preemptively, since we don't + // have to allocate them until we know exactly how big they need to be. This method provides + // a way to reset them to zero, thus preventing them from growing preemptively. + GPUResourceLimits resetFlushTimeLimits() const + { + GPUResourceLimits noFlushTimeLimits = *this; + noFlushTimeLimits.triangleVertexBufferSize = 0; + noFlushTimeLimits.gradientTextureHeight = 0; + noFlushTimeLimits.tessellationTextureHeight = 0; + return noFlushTimeLimits; + } }; // Reallocate any GPU resource whose size in 'targetUsage' is larger than its size in @@ -633,15 +655,15 @@ BufferRing<PathData> m_pathBuffer; BufferRing<ContourData> m_contourBuffer; - BufferRing<TwoTexelRamp> m_gradTexelBuffer; // Simple gradients get written by the CPU. - BufferRing<GradientSpan> m_gradSpanBuffer; // Complex gradients get rendered by the GPU. + BufferRing<TwoTexelRamp> m_simpleColorRampsBuffer; // Simple gradients get written by the CPU. + BufferRing<GradientSpan> m_gradSpanBuffer; // Complex gradients get rendered by the GPU. BufferRing<TessVertexSpan> m_tessSpanBuffer; BufferRing<TriangleVertex> m_triangleBuffer; BufferRing<FlushUniforms> m_uniformBuffer; // How many rows of the gradient texture are dedicated to simple (two-texel) ramps? // This is also the y-coordinate at which the complex color ramps begin. - size_t m_gradTextureRowsForSimpleRamps = 0; + size_t m_reservedGradTextureRowsForSimpleRamps = 0; // Per-frame state. FrameDescriptor m_frameDescriptor;
diff --git a/renderer/d3d/pls_render_context_d3d.cpp b/renderer/d3d/pls_render_context_d3d.cpp index 1c9f526..7f1ef56 100644 --- a/renderer/d3d/pls_render_context_d3d.cpp +++ b/renderer/d3d/pls_render_context_d3d.cpp
@@ -393,12 +393,19 @@ D3D11_BIND_VERTEX_BUFFER); } -std::unique_ptr<BufferRingImpl> PLSRenderContextD3D::makeUniformBufferRing(size_t capacity, - size_t itemSizeInBytes) +std::unique_ptr<BufferRingImpl> PLSRenderContextD3D::makePixelUnpackBufferRing( + size_t capacity, + size_t itemSizeInBytes) +{ + // It appears impossible to update a D3D texture from a GPU buffer. + return std::make_unique<CPUOnlyBufferRing>(capacity, itemSizeInBytes); +} + +std::unique_ptr<BufferRingImpl> PLSRenderContextD3D::makeUniformBufferRing(size_t itemSizeInBytes) { return std::make_unique<BufferRingD3D>(m_gpu.Get(), m_gpuContext, - capacity, + 1, itemSizeInBytes, D3D11_BIND_CONSTANT_BUFFER); } @@ -409,7 +416,6 @@ TexelBufferD3D(ID3D11Device* gpu, ComPtr<ID3D11DeviceContext> gpuContext, Format format, - bool renderable, size_t widthInItems, size_t height, size_t texelsPerItem) : @@ -417,27 +423,15 @@ { DXGI_FORMAT formatD3D = d3d_format(format); UINT bindFlags = D3D11_BIND_SHADER_RESOURCE; - if (renderable) - { - bindFlags |= D3D11_BIND_RENDER_TARGET; - } for (size_t i = 0; i < kBufferRingSize; ++i) { m_textures[i] = make_simple_2d_texture(gpu, formatD3D, widthInTexels(), height, bindFlags); m_srvs[i] = make_simple_2d_srv(gpu, m_textures[i].Get(), formatD3D); - if (renderable) - { - m_rtvs[i] = make_simple_2d_rtv(gpu, m_textures[i].Get(), formatD3D); - } } } ID3D11ShaderResourceView* submittedSRV() const { return m_srvs[submittedBufferIdx()].Get(); } - ID3D11RenderTargetView* submittedRenderTargetView() const - { - return m_rtvs[submittedBufferIdx()].Get(); - } private: void submitTexels(int textureIdx, size_t updateWidthInTexels, size_t updateHeight) override @@ -460,12 +454,10 @@ ComPtr<ID3D11DeviceContext> m_gpuContext; ComPtr<ID3D11Texture2D> m_textures[kBufferRingSize]; ComPtr<ID3D11ShaderResourceView> m_srvs[kBufferRingSize]; - ComPtr<ID3D11RenderTargetView> m_rtvs[kBufferRingSize]; }; std::unique_ptr<TexelBufferRing> PLSRenderContextD3D::makeTexelBufferRing( TexelBufferRing::Format format, - Renderable renderable, size_t widthInItems, size_t height, size_t texelsPerItem, @@ -475,7 +467,6 @@ return std::make_unique<TexelBufferD3D>(m_gpu.Get(), m_gpuContext, format, - renderable == Renderable::yes, widthInItems, height, texelsPerItem); @@ -524,6 +515,19 @@ return rcp(new PLSRenderTargetD3D(m_gpu.Get(), width, height)); } +void PLSRenderContextD3D::allocateGradientTexture(size_t height) +{ + m_gradTexture = make_simple_2d_texture(m_gpu.Get(), + DXGI_FORMAT_R8G8B8A8_UNORM, + kGradTextureWidth, + height, + D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE); + m_gradTextureSRV = + make_simple_2d_srv(m_gpu.Get(), m_gradTexture.Get(), DXGI_FORMAT_R8G8B8A8_UNORM); + m_gradTextureRTV = + make_simple_2d_rtv(m_gpu.Get(), m_gradTexture.Get(), DXGI_FORMAT_R8G8B8A8_UNORM); +} + void PLSRenderContextD3D::allocateTessellationTexture(size_t height) { m_tessTexture = make_simple_2d_texture(m_gpu.Get(), @@ -666,31 +670,20 @@ return static_cast<const TexelBufferD3D*>(texelBufferRing)->submittedSRV(); } -static ID3D11RenderTargetView* submitted_rtv(const TexelBufferRing* texelBufferRing) -{ - return static_cast<const TexelBufferD3D*>(texelBufferRing)->submittedRenderTargetView(); -} - -void PLSRenderContextD3D::onFlush(FlushType flushType, - LoadAction loadAction, - size_t gradSpanCount, - size_t gradSpansHeight, - size_t tessVertexSpanCount, - size_t tessDataHeight, - bool needsClipBuffer) +void PLSRenderContextD3D::onFlush(const FlushDescriptor& desc) { auto renderTarget = static_cast<const PLSRenderTargetD3D*>(frameDescriptor().renderTarget.get()); constexpr static UINT kZero[4]{}; - if (loadAction == LoadAction::clear) + if (desc.loadAction == LoadAction::clear) { float clearColor4f[4]; UnpackColorToRGBA32F(frameDescriptor().clearColor, clearColor4f); m_gpuContext->ClearUnorderedAccessViewFloat(renderTarget->m_targetUAV.Get(), clearColor4f); } m_gpuContext->ClearUnorderedAccessViewUint(renderTarget->m_coverageUAV.Get(), kZero); - if (needsClipBuffer) + if (desc.needsClipBuffer) { m_gpuContext->ClearUnorderedAccessViewUint(renderTarget->m_clipUAV.Get(), kZero); } @@ -699,7 +692,7 @@ m_gpuContext->VSSetConstantBuffers(0, std::size(cbuffers), cbuffers); // Render the complex color ramps to the gradient texture. - if (gradSpanCount > 0) + if (desc.complexGradSpanCount > 0) { ID3D11Buffer* gradSpanBuffer = submitted_buffer(gradSpanBufferRing()); UINT gradStride = sizeof(GradientSpan); @@ -711,9 +704,9 @@ m_gpuContext->VSSetShader(m_colorRampVertexShader.Get(), NULL, 0); D3D11_VIEWPORT viewport = {0, - static_cast<float>(gradTextureRowsForSimpleRamps()), + static_cast<float>(desc.complexGradRowsTop), static_cast<float>(kGradTextureWidth), - static_cast<float>(gradSpansHeight), + static_cast<float>(desc.complexGradRowsHeight), 0, 1}; m_gpuContext->RSSetViewports(1, &viewport); @@ -721,14 +714,29 @@ m_gpuContext->PSSetShaderResources(0, 0, NULL); m_gpuContext->PSSetShader(m_colorRampPixelShader.Get(), NULL, 0); - ID3D11RenderTargetView* rtv = submitted_rtv(gradTexelBufferRing()); - m_gpuContext->OMSetRenderTargets(1, &rtv, NULL); + m_gpuContext->OMSetRenderTargets(1, m_gradTextureRTV.GetAddressOf(), NULL); - m_gpuContext->DrawInstanced(4, gradSpanCount, 0, 0); + m_gpuContext->DrawInstanced(4, desc.complexGradSpanCount, 0, 0); + } + + // Copy the simple color ramps to the gradient texture. + if (desc.simpleGradTexelsHeight > 0) + { + D3D11_BOX box; + box.left = 0; + box.right = desc.simpleGradTexelsWidth; + box.top = 0; + box.bottom = desc.simpleGradTexelsHeight; + box.front = 0; + box.back = 1; + const void* data = + static_cast<const CPUOnlyBufferRing*>(simpleColorRampsBufferRing())->submittedata(); + m_gpuContext + ->UpdateSubresource(m_gradTexture.Get(), 0, &box, data, kGradTextureWidth * 4, 0); } // Tessellate all curves into vertices in the tessellation texture. - if (tessVertexSpanCount > 0) + if (desc.tessVertexSpanCount > 0) { ID3D11Buffer* tessSpanBuffer = submitted_buffer(tessSpanBufferRing()); UINT tessStride = sizeof(TessVertexSpan); @@ -746,8 +754,12 @@ static_assert(kContourTextureIdx == 2); m_gpuContext->VSSetShaderResources(0, std::size(vsTextureViews), vsTextureViews); - D3D11_VIEWPORT viewport = - {0, 0, static_cast<float>(kTessTextureWidth), static_cast<float>(tessDataHeight), 0, 1}; + D3D11_VIEWPORT viewport = {0, + 0, + static_cast<float>(kTessTextureWidth), + static_cast<float>(desc.tessDataHeight), + 0, + 1}; m_gpuContext->RSSetViewports(1, &viewport); m_gpuContext->PSSetShaderResources(0, 0, NULL); @@ -756,7 +768,7 @@ m_gpuContext->OMSetRenderTargets(1, m_tessTextureRTV.GetAddressOf(), NULL); // Draw two instances per TessVertexSpan: one normal and one optional reflection. - m_gpuContext->DrawInstanced(4, tessVertexSpanCount * 2, 0, 0); + m_gpuContext->DrawInstanced(4, desc.tessVertexSpanCount * 2, 0, 0); if (m_isIntel) { @@ -813,8 +825,7 @@ m_gpuContext->RSSetState(m_debugWireframeState.Get()); } - ID3D11ShaderResourceView* gradTextureView = submitted_srv(gradTexelBufferRing()); - m_gpuContext->PSSetShaderResources(kGradTextureIdx, 1, &gradTextureView); + m_gpuContext->PSSetShaderResources(kGradTextureIdx, 1, m_gradTextureSRV.GetAddressOf()); for (const Draw& draw : m_drawList) {
diff --git a/renderer/gl/buffer_ring_gl.cpp b/renderer/gl/buffer_ring_gl.cpp index cc962dd..53909d6 100644 --- a/renderer/gl/buffer_ring_gl.cpp +++ b/renderer/gl/buffer_ring_gl.cpp
@@ -111,6 +111,7 @@ glBindTexture(GL_TEXTURE_2D, m_ids[textureIdx]); if (updateWidthInTexels > 0 && updateHeight > 0) { + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); glTexSubImage2D(GL_TEXTURE_2D, 0, 0,
diff --git a/renderer/gl/pls_render_context_gl.cpp b/renderer/gl/pls_render_context_gl.cpp index 4619613..48445dd 100644 --- a/renderer/gl/pls_render_context_gl.cpp +++ b/renderer/gl/pls_render_context_gl.cpp
@@ -180,11 +180,12 @@ glDeleteProgram(m_colorRampProgram); glDeleteVertexArrays(1, &m_colorRampVAO); glDeleteFramebuffers(1, &m_colorRampFBO); + glDeleteTextures(1, &m_gradientTexture); glDeleteProgram(m_tessellateProgram); glDeleteVertexArrays(1, &m_tessellateVAO); - glDeleteTextures(1, &m_tessVertexTexture); glDeleteFramebuffers(1, &m_tessellateFBO); + glDeleteTextures(1, &m_tessVertexTexture); glDeleteVertexArrays(1, &m_drawVAO); glDeleteBuffers(1, &m_patchVerticesBuffer); @@ -199,7 +200,6 @@ std::unique_ptr<TexelBufferRing> PLSRenderContextGL::makeTexelBufferRing( TexelBufferRing::Format format, - Renderable, size_t widthInItems, size_t height, size_t texelsPerItem, @@ -214,10 +214,37 @@ filter); } -std::unique_ptr<BufferRingImpl> PLSRenderContextGL::makeUniformBufferRing(size_t capacity, - size_t sizeInBytes) +std::unique_ptr<BufferRingImpl> PLSRenderContextGL::makePixelUnpackBufferRing( + size_t capacity, + size_t itemSizeInBytes) { - return std::make_unique<BufferGL>(GL_UNIFORM_BUFFER, capacity, sizeInBytes); + return std::make_unique<BufferGL>(GL_PIXEL_UNPACK_BUFFER, capacity, itemSizeInBytes); +} + +std::unique_ptr<BufferRingImpl> PLSRenderContextGL::makeUniformBufferRing(size_t sizeInBytes) +{ + return std::make_unique<BufferGL>(GL_UNIFORM_BUFFER, 1, sizeInBytes); +} + +void PLSRenderContextGL::allocateGradientTexture(size_t height) +{ + glDeleteTextures(1, &m_gradientTexture); + + glGenTextures(1, &m_gradientTexture); + glActiveTexture(GL_TEXTURE0 + kGLTexIdxOffset + kGradTextureIdx); + glBindTexture(GL_TEXTURE_2D, m_gradientTexture); + glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8, kGradTextureWidth, height); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + + glBindFramebuffer(GL_FRAMEBUFFER, m_colorRampFBO); + glFramebufferTexture2D(GL_FRAMEBUFFER, + GL_COLOR_ATTACHMENT0, + GL_TEXTURE_2D, + m_gradientTexture, + 0); } void PLSRenderContextGL::allocateTessellationTexture(size_t height) @@ -358,41 +385,43 @@ return static_cast<const BufferGL*>(bufferRing)->submittedBufferID(); } -static GLuint gl_texture_id(const TexelBufferRing* texelBufferRing) -{ - return static_cast<const TexelBufferGL*>(texelBufferRing)->submittedTextureID(); -} - -void PLSRenderContextGL::onFlush(FlushType flushType, - LoadAction loadAction, - size_t gradSpanCount, - size_t gradSpansHeight, - size_t tessVertexSpanCount, - size_t tessDataHeight, - bool needsClipBuffer) +void PLSRenderContextGL::onFlush(const FlushDescriptor& desc) { // All programs use the same set of per-flush uniforms. glBindBufferBase(GL_UNIFORM_BUFFER, 0, gl_buffer_id(uniformBufferRing())); - // Render the complex color ramps to the gradient texture. - if (gradSpanCount > 0) + // Render the complex color ramps into the gradient texture. + if (desc.complexGradSpanCount > 0) { glBindBuffer(GL_ARRAY_BUFFER, gl_buffer_id(gradSpanBufferRing())); bindVAO(m_colorRampVAO); glVertexAttribIPointer(0, 4, GL_UNSIGNED_INT, 0, nullptr); - glViewport(0, gradTextureRowsForSimpleRamps(), kGradTextureWidth, gradSpansHeight); + glViewport(0, desc.complexGradRowsTop, kGradTextureWidth, desc.complexGradRowsHeight); glBindFramebuffer(GL_FRAMEBUFFER, m_colorRampFBO); - glFramebufferTexture2D(GL_FRAMEBUFFER, - GL_COLOR_ATTACHMENT0, - GL_TEXTURE_2D, - gl_texture_id(gradTexelBufferRing()), - 0); bindProgram(m_colorRampProgram); - glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, gradSpanCount); + GLenum colorAttachment0 = GL_COLOR_ATTACHMENT0; + glInvalidateFramebuffer(GL_FRAMEBUFFER, 1, &colorAttachment0); + glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, desc.complexGradSpanCount); + } + + // Copy the simple color ramps to the gradient texture. + if (desc.simpleGradTexelsHeight > 0) + { + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, gl_buffer_id(simpleColorRampsBufferRing())); + glActiveTexture(GL_TEXTURE0 + kGLTexIdxOffset + kGradTextureIdx); + glTexSubImage2D(GL_TEXTURE_2D, + 0, + 0, + 0, + desc.simpleGradTexelsWidth, + desc.simpleGradTexelsHeight, + GL_RGBA, + GL_UNSIGNED_BYTE, + nullptr); } // Tessellate all curves into vertices in the tessellation texture. - if (tessVertexSpanCount > 0) + if (desc.tessVertexSpanCount > 0) { glBindBuffer(GL_ARRAY_BUFFER, gl_buffer_id(tessSpanBufferRing())); bindVAO(m_tessellateVAO); @@ -410,11 +439,13 @@ GL_UNSIGNED_INT, sizeof(TessVertexSpan), reinterpret_cast<const void*>(offsetof(TessVertexSpan, x0x1))); - glViewport(0, 0, kTessTextureWidth, tessDataHeight); + glViewport(0, 0, kTessTextureWidth, desc.tessDataHeight); glBindFramebuffer(GL_FRAMEBUFFER, m_tessellateFBO); bindProgram(m_tessellateProgram); + GLenum colorAttachment0 = GL_COLOR_ATTACHMENT0; + glInvalidateFramebuffer(GL_FRAMEBUFFER, 1, &colorAttachment0); // Draw two instances per TessVertexSpan: one normal and one optional reflection. - glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, tessVertexSpanCount * 2); + glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, desc.tessVertexSpanCount * 2); } // Compile the draw programs before activating pixel local storage. @@ -446,7 +477,10 @@ } #endif - m_plsImpl->activatePixelLocalStorage(this, renderTarget(), loadAction, needsClipBuffer); + m_plsImpl->activatePixelLocalStorage(this, + renderTarget(), + desc.loadAction, + desc.needsClipBuffer); // Execute the DrawList. for (const Draw& draw : m_drawList)
diff --git a/renderer/metal/pls_render_context_metal.mm b/renderer/metal/pls_render_context_metal.mm index 97a407e..cbf5479 100644 --- a/renderer/metal/pls_render_context_metal.mm +++ b/renderer/metal/pls_render_context_metal.mm
@@ -246,7 +246,6 @@ std::unique_ptr<TexelBufferRing> PLSRenderContextMetal::makeTexelBufferRing( TexelBufferRing::Format format, - Renderable, size_t widthInItems, size_t height, size_t texelsPerItem, @@ -256,12 +255,30 @@ return std::make_unique<TexelBufferMetal>(m_gpu, format, widthInItems, height, texelsPerItem); } -std::unique_ptr<BufferRingImpl> PLSRenderContextMetal::makeUniformBufferRing(size_t capacity, - size_t itemSizeInBytes) +std::unique_ptr<BufferRingImpl> PLSRenderContextMetal::makePixelUnpackBufferRing( + size_t capacity, size_t itemSizeInBytes) { return std::make_unique<BufferMetal>(m_gpu, capacity, itemSizeInBytes); } +std::unique_ptr<BufferRingImpl> PLSRenderContextMetal::makeUniformBufferRing(size_t itemSizeInBytes) +{ + return std::make_unique<BufferMetal>(m_gpu, 1, itemSizeInBytes); +} + +void PLSRenderContextMetal::allocateGradientTexture(size_t height) +{ + MTLTextureDescriptor* desc = [[MTLTextureDescriptor alloc] init]; + desc.pixelFormat = MTLPixelFormatRGBA8Unorm; + desc.width = kGradTextureWidth; + desc.height = height; + desc.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead; + desc.textureType = MTLTextureType2D; + desc.mipmapLevelCount = 1; + desc.storageMode = MTLStorageModePrivate; + m_gradientTexture = [m_gpu newTextureWithDescriptor:desc]; +} + void PLSRenderContextMetal::allocateTessellationTexture(size_t height) { MTLTextureDescriptor* desc = [[MTLTextureDescriptor alloc] init]; @@ -293,24 +310,18 @@ return static_cast<const TexelBufferMetal*>(texelBufferRing)->submittedTexture(); } -void PLSRenderContextMetal::onFlush(FlushType flushType, - LoadAction loadAction, - size_t gradSpanCount, - size_t gradSpansHeight, - size_t tessVertexSpanCount, - size_t tessDataHeight, - bool needsClipBuffer) +void PLSRenderContextMetal::onFlush(const FlushDescriptor& desc) { auto* renderTarget = static_cast<const PLSRenderTargetMetal*>(frameDescriptor().renderTarget.get()); id<MTLCommandBuffer> commandBuffer = [m_queue commandBuffer]; // Render the complex color ramps to the gradient texture. - if (gradSpanCount > 0) + if (desc.complexGradSpanCount > 0) { MTLRenderPassDescriptor* gradPass = [MTLRenderPassDescriptor renderPassDescriptor]; gradPass.renderTargetWidth = kGradTextureWidth; - gradPass.renderTargetHeight = gradTextureRowsForSimpleRamps() + gradSpansHeight; + gradPass.renderTargetHeight = desc.complexGradRowsTop + desc.complexGradRowsHeight; // TODO: Uploading the "simple" gradient texels directly to this texture requires us to use // MTLLoadActionLoad here, in addition to triple buffering the entire gradient texture. We // should consider different approaches: @@ -321,16 +332,16 @@ // "gradPass.renderTargetHeight" in combination with MTLLoadActionDontCare. (Still // requires triple buffering of the gradient texture.) // - gradPass.colorAttachments[0].loadAction = MTLLoadActionLoad; + gradPass.colorAttachments[0].loadAction = MTLLoadActionDontCare; gradPass.colorAttachments[0].storeAction = MTLStoreActionStore; - gradPass.colorAttachments[0].texture = mtl_texture(gradTexelBufferRing()); + gradPass.colorAttachments[0].texture = m_gradientTexture; id<MTLRenderCommandEncoder> gradEncoder = [commandBuffer renderCommandEncoderWithDescriptor:gradPass]; [gradEncoder setViewport:(MTLViewport){0.f, - static_cast<double>(gradTextureRowsForSimpleRamps()), + static_cast<double>(desc.complexGradRowsTop), kGradTextureWidth, - static_cast<float>(gradSpansHeight), + static_cast<float>(desc.complexGradRowsHeight), 0.0, 1.0}]; [gradEncoder setRenderPipelineState:m_colorRampPipeline->pipelineState()]; @@ -340,16 +351,34 @@ [gradEncoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4 - instanceCount:gradSpanCount]; + instanceCount:desc.complexGradSpanCount]; [gradEncoder endEncoding]; } + // Copy the simple color ramps to the gradient texture. + if (desc.simpleGradTexelsHeight > 0) + { + id<MTLBlitCommandEncoder> blitEncoder = [commandBuffer blitCommandEncoder]; + [blitEncoder copyFromBuffer:mtl_buffer(simpleColorRampsBufferRing()) + sourceOffset:0 + sourceBytesPerRow:kGradTextureWidth * 4 + sourceBytesPerImage:desc.simpleGradTexelsHeight * kGradTextureWidth * 4 + sourceSize:MTLSizeMake( + desc.simpleGradTexelsWidth, desc.simpleGradTexelsHeight, 1) + toTexture:m_gradientTexture + destinationSlice:0 + destinationLevel:0 + destinationOrigin:MTLOriginMake(0, 0, 0)]; + + [blitEncoder endEncoding]; + } + // Tessellate all curves into vertices in the tessellation texture. - if (tessVertexSpanCount > 0) + if (desc.tessVertexSpanCount > 0) { MTLRenderPassDescriptor* tessPass = [MTLRenderPassDescriptor renderPassDescriptor]; tessPass.renderTargetWidth = kTessTextureWidth; - tessPass.renderTargetHeight = tessDataHeight; + tessPass.renderTargetHeight = desc.tessDataHeight; tessPass.colorAttachments[0].loadAction = MTLLoadActionDontCare; tessPass.colorAttachments[0].storeAction = MTLStoreActionStore; tessPass.colorAttachments[0].texture = m_tessVertexTexture; @@ -359,7 +388,7 @@ [tessEncoder setViewport:(MTLViewport){0.f, 0.f, kTessTextureWidth, - static_cast<float>(tessDataHeight), + static_cast<float>(desc.tessDataHeight), 0.0, 1.0}]; [tessEncoder setRenderPipelineState:m_tessPipeline->pipelineState()]; @@ -372,14 +401,14 @@ [tessEncoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4 - instanceCount:tessVertexSpanCount * 2]; + instanceCount:desc.tessVertexSpanCount * 2]; [tessEncoder endEncoding]; } // Set up the render pass that draws path patches and triangles. MTLRenderPassDescriptor* pass = [MTLRenderPassDescriptor renderPassDescriptor]; pass.colorAttachments[0].texture = renderTarget->targetTexture(); - if (loadAction == LoadAction::clear) + if (desc.loadAction == LoadAction::clear) { float cc[4]; UnpackColorToRGBA32F(frameDescriptor().clearColor, cc); @@ -417,7 +446,7 @@ [encoder setVertexTexture:m_tessVertexTexture atIndex:kTessVertexTextureIdx]; [encoder setVertexTexture:mtl_texture(pathBufferRing()) atIndex:kPathTextureIdx]; [encoder setVertexTexture:mtl_texture(contourBufferRing()) atIndex:kContourTextureIdx]; - [encoder setFragmentTexture:mtl_texture(gradTexelBufferRing()) atIndex:kGradTextureIdx]; + [encoder setFragmentTexture:m_gradientTexture atIndex:kGradTextureIdx]; [encoder setCullMode:MTLCullModeBack]; if (frameDescriptor().wireframe) { @@ -478,7 +507,7 @@ thisFlushLock.unlock(); }]; - if (flushType == FlushType::intermediate) + if (desc.flushType == FlushType::intermediate) { // The frame isn't complete yet. The caller will begin preparing a new flush immediately // after this method returns, so lock buffers for the next flush now.
diff --git a/renderer/pls_render_context.cpp b/renderer/pls_render_context.cpp index 566c600..c7b6456 100644 --- a/renderer/pls_render_context.cpp +++ b/renderer/pls_render_context.cpp
@@ -19,6 +19,18 @@ // When we exceed the capacity of a GPU resource mid-flush, double it immediately. constexpr static double kGPUResourceIntermediateGrowthFactor = 2; +constexpr size_t kMinSimpleColorRampRows = 1; +constexpr size_t kMaxSimpleColorRampRows = 256; // 65k simple gradients. + +constexpr size_t kMinComplexGradients = 31; +constexpr size_t kMinGradTextureHeight = kMinSimpleColorRampRows + kMinComplexGradients; +constexpr size_t kMaxGradTextureHeight = 2048; // TODO: Move this variable to PlatformFeatures. +constexpr size_t kMaxComplexGradients = kMaxGradTextureHeight - kMaxSimpleColorRampRows; + +constexpr size_t kMinTessTextureHeight = 32; +constexpr size_t kMaxTessTextureHeight = 2048; // GL_MAX_TEXTURE_SIZE spec minimum. +constexpr size_t kMaxTessellationVertices = kMaxTessTextureHeight * kTessTextureWidth; + uint32_t PLSRenderContext::ShaderFeatures::getPreprocessorDefines(SourceType sourceType) const { uint32_t defines = 0; @@ -157,9 +169,9 @@ } // How tall to make a resource texture in order to support the given number of items. -constexpr static size_t resource_texture_height(size_t widthInItems, size_t itemCount) +template <size_t WidthInItems> constexpr static size_t resource_texture_height(size_t itemCount) { - return (itemCount + widthInItems - 1) / widthInItems; + return (itemCount + WidthInItems - 1) / WidthInItems; } void PLSRenderContext::allocateGPUResources( @@ -206,20 +218,26 @@ #define COUNT_RESOURCE_SIZE(SIZE_IN_BYTES) #endif + // One-time allocation of the uniform buffer ring. + if (m_uniformBuffer.impl() == nullptr) + { + m_uniformBuffer.reset(makeUniformBufferRing(sizeof(FlushUniforms))); + } + COUNT_RESOURCE_SIZE(m_uniformBuffer.totalSizeInBytes()); + // Path data texture ring. constexpr size_t kMinPathIDCount = kPathTextureWidthInItems * 32; // 32 texels tall. - size_t targetMaxPathID = resource_texture_height(kPathTextureWidthInItems, targets.maxPathID) * + size_t targetMaxPathID = resource_texture_height<kPathTextureWidthInItems>(targets.maxPathID) * kPathTextureWidthInItems; targetMaxPathID = std::clamp(targetMaxPathID, kMinPathIDCount, m_maxPathID); size_t targetPathTextureHeight = - resource_texture_height(kPathTextureWidthInItems, targetMaxPathID); + resource_texture_height<kPathTextureWidthInItems>(targetMaxPathID); size_t currentPathTextureHeight = - resource_texture_height(kPathTextureWidthInItems, m_currentResourceLimits.maxPathID); + resource_texture_height<kPathTextureWidthInItems>(m_currentResourceLimits.maxPathID); if (shouldReallocate(targetPathTextureHeight, currentPathTextureHeight)) { assert(!m_pathBuffer.mapped()); m_pathBuffer.reset(makeTexelBufferRing(TexelBufferRing::Format::rgba32ui, - Renderable::no, kPathTextureWidthInItems, targetPathTextureHeight, kPathTexelsPerItem, @@ -236,18 +254,17 @@ // Contour data texture ring. constexpr size_t kMinContourIDCount = kContourTextureWidthInItems * 32; // 32 texels tall. size_t targetMaxContourID = - resource_texture_height(kContourTextureWidthInItems, targets.maxContourID) * + resource_texture_height<kContourTextureWidthInItems>(targets.maxContourID) * kContourTextureWidthInItems; targetMaxContourID = std::clamp(targetMaxContourID, kMinContourIDCount, kMaxContourID); size_t targetContourTextureHeight = - resource_texture_height(kContourTextureWidthInItems, targetMaxContourID); + resource_texture_height<kContourTextureWidthInItems>(targetMaxContourID); size_t currentContourTextureHeight = - resource_texture_height(kContourTextureWidthInItems, m_currentResourceLimits.maxContourID); + resource_texture_height<kContourTextureWidthInItems>(m_currentResourceLimits.maxContourID); if (shouldReallocate(targetContourTextureHeight, currentContourTextureHeight)) { assert(!m_contourBuffer.mapped()); m_contourBuffer.reset(makeTexelBufferRing(TexelBufferRing::Format::rgba32ui, - Renderable::no, kContourTextureWidthInItems, targetContourTextureHeight, kContourTexelsPerItem, @@ -261,45 +278,30 @@ } COUNT_RESOURCE_SIZE(m_contourBuffer.totalSizeInBytes()); - // Gradient texture ring. - constexpr size_t kMinSimpleGradientsHeight = 1; - constexpr size_t kMaxSimpleGradientsHeight = 256; // 65k simple gradients. - constexpr size_t kMinComplexGradients = 31; - constexpr size_t kMaxComplexGradients = - 2048 - kMaxSimpleGradientsHeight; // GL_MAX_TEXTURE_SIZE spec minimum. + // Simple gradient color ramp pixel unpack buffer ring. size_t targetSimpleGradientRows = - resource_texture_height(kGradTextureWidth, targets.maxSimpleGradients); + resource_texture_height<kGradTextureWidthInSimpleRamps>(targets.maxSimpleGradients); targetSimpleGradientRows = - std::clamp(targetSimpleGradientRows, kMinSimpleGradientsHeight, kMaxSimpleGradientsHeight); - size_t targetComplexGradients = - std::clamp(targets.maxComplexGradients, kMinComplexGradients, kMaxComplexGradients); - size_t targetGradTextureHeight = targetSimpleGradientRows + targetComplexGradients; - size_t currentGradTextureHeight = - resource_texture_height(kGradTextureWidthInSimpleRamps, - m_currentResourceLimits.maxSimpleGradients) + - m_currentResourceLimits.maxComplexGradients; + std::clamp(targetSimpleGradientRows, kMinSimpleColorRampRows, kMaxSimpleColorRampRows); assert(m_currentResourceLimits.maxSimpleGradients % kGradTextureWidthInSimpleRamps == 0); - if (shouldReallocate(targetGradTextureHeight, currentGradTextureHeight)) + assert(m_reservedGradTextureRowsForSimpleRamps == + resource_texture_height<kGradTextureWidthInSimpleRamps>( + m_currentResourceLimits.maxSimpleGradients)); + if (shouldReallocate(targetSimpleGradientRows, m_reservedGradTextureRowsForSimpleRamps)) { - assert(!m_gradTexelBuffer.mapped()); - m_gradTexelBuffer.reset(makeTexelBufferRing(TexelBufferRing::Format::rgba8, - Renderable::yes, - kGradTextureWidthInSimpleRamps, - targetGradTextureHeight, - 2, // 2 texels per simple ramp. - kGradTextureIdx, - TexelBufferRing::Filter::linear)); - LOG_CHANGED_SIZE("gradient texture height", - m_gradTextureRowsForSimpleRamps + - m_currentResourceLimits.maxComplexGradients, - targetGradTextureHeight, - m_gradTexelBuffer.totalSizeInBytes()); - m_gradTextureRowsForSimpleRamps = targetSimpleGradientRows; + assert(!m_simpleColorRampsBuffer.mapped()); + m_simpleColorRampsBuffer.reset( + makePixelUnpackBufferRing(targetSimpleGradientRows * kGradTextureWidthInSimpleRamps, + sizeof(TwoTexelRamp))); + LOG_CHANGED_SIZE("maxSimpleGradients", + m_reservedGradTextureRowsForSimpleRamps * kGradTextureWidthInSimpleRamps, + targetSimpleGradientRows * kGradTextureWidthInSimpleRamps, + m_simpleColorRampsBuffer.totalSizeInBytes()); m_currentResourceLimits.maxSimpleGradients = targetSimpleGradientRows * kGradTextureWidthInSimpleRamps; - m_currentResourceLimits.maxComplexGradients = targetComplexGradients; + m_reservedGradTextureRowsForSimpleRamps = targetSimpleGradientRows; } - COUNT_RESOURCE_SIZE(m_gradTexelBuffer.totalSizeInBytes()); + COUNT_RESOURCE_SIZE(m_simpleColorRampsBuffer.totalSizeInBytes()); // Instance buffer ring for rendering complex gradients. constexpr size_t kMinComplexGradientSpans = kMinComplexGradients * 32; @@ -321,9 +323,7 @@ } COUNT_RESOURCE_SIZE(m_gradSpanBuffer.totalSizeInBytes()); - // Instance buffer ring for rendering path tessellation data. - constexpr size_t kMinTessTextureHeight = 32; - constexpr size_t kMaxTessTextureHeight = 2048; // GL_MAX_TEXTURE_SIZE spec minimum. + // Instance buffer ring for rendering path tessellation vertices. constexpr size_t kMinTessellationSpans = kMinTessTextureHeight * kTessTextureWidth / 4; const size_t maxTessellationSpans = kMaxTessTextureHeight * kTessTextureWidth / 8; // ~100MiB size_t targetTessellationSpans = @@ -345,45 +345,50 @@ constexpr size_t kMinTriangleVertices = 3072 * 3; // 324 KiB // Triangle vertices don't have a maximum limit; we let the other components be the limiting // factor and allocate whatever buffer size we need at flush time. - size_t targetTriangleVertices = std::max(targets.maxTriangleVertices, kMinTriangleVertices); - if (shouldReallocate(targetTriangleVertices, m_currentResourceLimits.maxTriangleVertices)) + size_t targetTriangleVertices = + std::max(targets.triangleVertexBufferSize, kMinTriangleVertices); + if (shouldReallocate(targetTriangleVertices, m_currentResourceLimits.triangleVertexBufferSize)) { assert(!m_triangleBuffer.mapped()); m_triangleBuffer.reset( makeVertexBufferRing(targetTriangleVertices, sizeof(TriangleVertex))); - LOG_CHANGED_SIZE("maxTriangleVertices", - m_currentResourceLimits.maxTriangleVertices, + LOG_CHANGED_SIZE("triangleVertexBufferSize", + m_currentResourceLimits.triangleVertexBufferSize, targetTriangleVertices, m_triangleBuffer.totalSizeInBytes()); - m_currentResourceLimits.maxTriangleVertices = targetTriangleVertices; + m_currentResourceLimits.triangleVertexBufferSize = targetTriangleVertices; } COUNT_RESOURCE_SIZE(m_triangleBuffer.totalSizeInBytes()); - // Texture that that path tessellation data is rendered into. + // Gradient color ramp texture. + size_t targetGradTextureHeight = + std::clamp(targets.gradientTextureHeight, kMinGradTextureHeight, kMaxGradTextureHeight); + if (shouldReallocate(targetGradTextureHeight, m_currentResourceLimits.gradientTextureHeight)) + { + allocateGradientTexture(targetGradTextureHeight); + LOG_CHANGED_SIZE("gradientTextureHeight", + m_currentResourceLimits.gradientTextureHeight, + targetGradTextureHeight, + targetGradTextureHeight * kGradTextureWidth * 4 * sizeof(uint8_t)); + m_currentResourceLimits.gradientTextureHeight = targetGradTextureHeight; + } + COUNT_RESOURCE_SIZE((m_currentResourceLimits.gradientTextureHeight) * kGradTextureWidth * 4 * + sizeof(uint8_t)); + + // Texture that path tessellation data is rendered into. size_t targetTessTextureHeight = - std::clamp(resource_texture_height(kTessTextureWidth, targets.maxTessellationVertices), - kMinTessTextureHeight, - kMaxTessTextureHeight); - if (shouldReallocate(targetTessTextureHeight * kTessTextureWidth, - m_currentResourceLimits.maxTessellationVertices)) + std::clamp(targets.tessellationTextureHeight, kMinTessTextureHeight, kMaxTessTextureHeight); + if (shouldReallocate(targetTessTextureHeight, + m_currentResourceLimits.tessellationTextureHeight)) { allocateTessellationTexture(targetTessTextureHeight); - LOG_CHANGED_SIZE("tessellation texture height", - resource_texture_height(kTessTextureWidth, - m_currentResourceLimits.maxTessellationVertices), + LOG_CHANGED_SIZE("tessellationTextureHeight", + m_currentResourceLimits.tessellationTextureHeight, targetTessTextureHeight, targetTessTextureHeight * kTessTextureWidth * 4 * sizeof(uint32_t)); - m_currentResourceLimits.maxTessellationVertices = - targetTessTextureHeight * kTessTextureWidth; + m_currentResourceLimits.tessellationTextureHeight = targetTessTextureHeight; } COUNT_RESOURCE_SIZE(targetTessTextureHeight * kTessTextureWidth * 4 * sizeof(uint32_t)); - - // One-time allocation of the uniform buffer ring. - if (m_uniformBuffer.impl() == nullptr) - { - m_uniformBuffer.reset(makeUniformBufferRing(1, sizeof(FlushUniforms))); - } - COUNT_RESOURCE_SIZE(m_uniformBuffer.totalSizeInBytes()); } void PLSRenderContext::beginFrame(FrameDescriptor&& frameDescriptor) @@ -392,7 +397,9 @@ // Auto-grow GPU allocations to service the maximum recent usage. If the recent usage is larger // than the current allocation, scale it by an additional kGPUResourcePadding since we have to // reallocate anyway. - growExceededGPUResources(m_maxRecentResourceUsage, kGPUResourcePadding); + // Also don't preemptively grow the resources we allocate a flush time, since we can just + // allocate the right sizes once we know exactly how big they need to be. + growExceededGPUResources(m_maxRecentResourceUsage.resetFlushTimeLimits(), kGPUResourcePadding); m_frameDescriptor = std::move(frameDescriptor); m_isFirstFlushOfFrame = true; onBeginFrame(); @@ -454,11 +461,6 @@ newLimits.maxTessellationSpans = maxTessellationSpans; needsRealloc = true; } - if (newLimits.maxTessellationVertices < maxTessVertexCountWithInternalPadding) - { - newLimits.maxTessellationVertices = maxTessVertexCountWithInternalPadding; - needsRealloc = true; - } assert(!m_pathBuffer.mapped()); assert(!m_contourBuffer.mapped()); assert(!m_tessSpanBuffer.mapped()); @@ -478,16 +480,14 @@ if (m_currentPathID + pathCount <= m_currentResourceLimits.maxPathID && m_currentContourID + contourCount <= m_currentResourceLimits.maxContourID && m_tessSpanBuffer.hasRoomFor(maxTessellationSpans) && - m_tessVertexCount + maxTessVertexCountWithInternalPadding <= - m_currentResourceLimits.maxTessellationVertices) + m_tessVertexCount + maxTessVertexCountWithInternalPadding <= kMaxTessellationVertices) { assert(m_pathBuffer.hasRoomFor(pathCount)); assert(m_contourBuffer.hasRoomFor(contourCount)); RIVE_DEBUG_CODE(m_expectedTessVertexCountAtNextReserve = m_tessVertexCount + tessVertexCounter.totalVertexCountIncludingReflectionsAndPadding()); - assert(m_expectedTessVertexCountAtNextReserve <= - m_currentResourceLimits.maxTessellationVertices); + assert(m_expectedTessVertexCountAtNextReserve <= kMaxTessellationVertices); return true; } @@ -517,10 +517,6 @@ const float* stops = gradient->stops(); size_t stopCount = gradient->count(); - // Even if all our color ramps end up being rendered on the GPU, ensuring the buffer ring is - // mapped causes the texture ring to cycle, which is what we want. - m_gradTexelBuffer.ensureMapped(); - uint32_t row, left, right; if (stopCount == 2 && stops[0] == 0) { @@ -544,7 +540,8 @@ return false; } rampTexelsIdx = m_simpleGradients.size() * 2; - m_gradTexelBuffer.set_back(colors); + m_simpleColorRampsBuffer.ensureMapped(); + m_simpleColorRampsBuffer.set_back(colors); m_simpleGradients.insert({simpleKey, rampTexelsIdx}); } row = rampTexelsIdx / kGradTextureWidth; @@ -564,10 +561,10 @@ } else { - if (m_complexGradients.size() >= m_currentResourceLimits.maxComplexGradients) + if (m_complexGradients.size() >= kMaxComplexGradients) { - // We ran out of rows for complex gradients in the texture. The caller needs to - // flush and try again. + // We ran out of the maximum supported number of complex gradients. The caller needs + // to issue an intermediate flush. return false; } @@ -599,8 +596,8 @@ // Push "GradientSpan" instances that will render each section of the color ramp. ColorInt lastColor = colors[0]; uint32_t lastXFixed = 0; - // The viewport will start at m_gradTextureRowsForSimpleRamps when rendering color - // ramps. + // The viewport will start at m_reservedGradTextureRowsForSimpleRamps when rendering + // color ramps. uint32_t y = static_cast<uint32_t>(m_complexGradients.size()); // "stop * w + .5" converts a stop position to an x-coordinate in the gradient texture. // Stops should be aligned (ideally) on pixel centers to prevent bleed. @@ -618,7 +615,7 @@ } m_gradSpanBuffer.set_back(lastXFixed, 65535u, y, lastColor, lastColor); - row = m_gradTextureRowsForSimpleRamps + m_complexGradients.size(); + row = m_reservedGradTextureRowsForSimpleRamps + m_complexGradients.size(); m_complexGradients.emplace(std::move(key), row); } } @@ -689,7 +686,7 @@ RIVE_DEBUG_CODE(m_expectedTessVertexCountAtEndOfPath = m_tessVertexCount + tessVertexCountWithoutPadding); assert(m_expectedTessVertexCountAtEndOfPath <= m_expectedTessVertexCountAtNextReserve); - assert(m_expectedTessVertexCountAtEndOfPath <= m_currentResourceLimits.maxTessellationVertices); + assert(m_expectedTessVertexCountAtEndOfPath <= kMaxTessellationVertices); } void PLSRenderContext::pushContour(Vec2D midpoint, bool closed, uint32_t paddingVertexCount) @@ -768,7 +765,7 @@ constexpr static uint32_t kInvalidContourID = 0; assert(m_tessVertexCount == m_expectedTessVertexCountAtEndOfPath); RIVE_DEBUG_CODE(m_expectedTessVertexCountAtEndOfPath = m_tessVertexCount + count;) - assert(m_expectedTessVertexCountAtEndOfPath <= m_currentResourceLimits.maxTessellationVertices); + assert(m_expectedTessVertexCountAtEndOfPath <= kMaxTessellationVertices); pushTessellationSpans(kEmptyCubic, {0, 0}, count, 0, 0, 1, kInvalidContourID); assert(m_tessVertexCount == m_expectedTessVertexCountAtEndOfPath); } @@ -942,20 +939,43 @@ pushPaddingVertices(1); } + // Since we don't write these resources until flush time, we can wait to resize them until now, + // when we know exactly how large they need to be. + GPUResourceLimits newLimitsForFlushTimeResources{}; + bool needsFlushTimeRealloc = false; + assert(m_triangleBuffer.capacity() == m_currentResourceLimits.triangleVertexBufferSize); + if (m_currentResourceLimits.triangleVertexBufferSize < m_maxTriangleVertexCount) + { + newLimitsForFlushTimeResources.triangleVertexBufferSize = m_maxTriangleVertexCount; + needsFlushTimeRealloc = true; + } + size_t requiredGradTextureHeight = + m_reservedGradTextureRowsForSimpleRamps + m_complexGradients.size(); + if (m_currentResourceLimits.gradientTextureHeight < requiredGradTextureHeight) + { + newLimitsForFlushTimeResources.gradientTextureHeight = requiredGradTextureHeight; + needsFlushTimeRealloc = true; + } + size_t requiredTessTextureHeight = + resource_texture_height<kTessTextureWidth>(m_tessVertexCount); + if (m_currentResourceLimits.tessellationTextureHeight < requiredTessTextureHeight) + { + newLimitsForFlushTimeResources.tessellationTextureHeight = requiredTessTextureHeight; + needsFlushTimeRealloc = true; + } + if (needsFlushTimeRealloc) + { + growExceededGPUResources(newLimitsForFlushTimeResources, kGPUResourcePadding); + } if (m_maxTriangleVertexCount > 0) { - // Since we don't generate the triangle buffer until flush time, we can resize it now if it - // isn't large enough. - // TODO: More resources can be handled this way, e.g., the tessellation texture. - if (m_triangleBuffer.capacity() < m_maxTriangleVertexCount) - { - GPUResourceLimits newLimitsForTriangles{}; - newLimitsForTriangles.maxTriangleVertices = m_maxTriangleVertexCount; - growExceededGPUResources(newLimitsForTriangles, kGPUResourcePadding); - } m_triangleBuffer.ensureMapped(); assert(m_triangleBuffer.hasRoomFor(m_maxTriangleVertexCount)); } + assert(m_complexGradients.size() <= + m_currentResourceLimits.gradientTextureHeight - m_reservedGradTextureRowsForSimpleRamps); + assert(m_tessVertexCount <= + m_currentResourceLimits.tessellationTextureHeight * kTessTextureWidth); // Finish calculating our DrawList. bool needsClipBuffer = false; @@ -990,14 +1010,15 @@ assert(drawIdx == m_drawListCount); // Determine how much to draw. + size_t simpleColorRampCount = m_simpleColorRampsBuffer.bytesWritten() / sizeof(TwoTexelRamp); size_t gradSpanCount = m_gradSpanBuffer.bytesWritten() / sizeof(GradientSpan); size_t tessVertexSpanCount = m_tessSpanBuffer.bytesWritten() / sizeof(TessVertexSpan); - size_t tessDataHeight = resource_texture_height(kTessTextureWidth, m_tessVertexCount); + size_t tessDataHeight = resource_texture_height<kTessTextureWidth>(m_tessVertexCount); // Upload all non-empty buffers before flushing. m_pathBuffer.submit(); m_contourBuffer.submit(); - m_gradTexelBuffer.submit(); + m_simpleColorRampsBuffer.submit(); m_gradSpanBuffer.submit(); m_tessSpanBuffer.submit(); m_triangleBuffer.submit(); @@ -1007,7 +1028,7 @@ tessDataHeight, m_frameDescriptor.renderTarget->width(), m_frameDescriptor.renderTarget->height(), - gradTexelBufferRing()->height(), + m_currentResourceLimits.gradientTextureHeight, m_platformFeatures); if (!bits_equal(&m_cachedUniformData, &uniformData)) { @@ -1017,34 +1038,43 @@ m_cachedUniformData = uniformData; } - onFlush(flushType, - m_isFirstFlushOfFrame ? frameDescriptor().loadAction : LoadAction::preserveRenderTarget, - gradSpanCount, - m_complexGradients.size(), - tessVertexSpanCount, - tessDataHeight, - needsClipBuffer); + FlushDescriptor flushDesc; + flushDesc.flushType = flushType; + flushDesc.loadAction = + m_isFirstFlushOfFrame ? frameDescriptor().loadAction : LoadAction::preserveRenderTarget; + flushDesc.complexGradSpanCount = gradSpanCount; + flushDesc.tessVertexSpanCount = tessVertexSpanCount; + flushDesc.simpleGradTexelsWidth = std::min(simpleColorRampCount * 2, kGradTextureWidth); + flushDesc.simpleGradTexelsHeight = + resource_texture_height<kGradTextureWidthInSimpleRamps>(simpleColorRampCount); + flushDesc.complexGradRowsTop = m_reservedGradTextureRowsForSimpleRamps; + flushDesc.complexGradRowsHeight = m_complexGradients.size(); + flushDesc.tessDataHeight = tessDataHeight; + flushDesc.needsClipBuffer = needsClipBuffer; + onFlush(flushDesc); m_currentFrameResourceUsage.maxPathID += m_currentPathID; m_currentFrameResourceUsage.maxContourID += m_currentContourID; m_currentFrameResourceUsage.maxSimpleGradients += m_simpleGradients.size(); - m_currentFrameResourceUsage.maxComplexGradients += m_complexGradients.size(); m_currentFrameResourceUsage.maxComplexGradientSpans += gradSpanCount; m_currentFrameResourceUsage.maxTessellationSpans += tessVertexSpanCount; - m_currentFrameResourceUsage.maxTessellationVertices += m_tessVertexCount; - // Since we can defer allocating the triangle buffer until flush time, when we know exactly how - // many vertices it will need, we don't need to proactively count all the flushes in the frame. - // A simple max() will suffice. - m_currentFrameResourceUsage.maxTriangleVertices = - std::max(m_currentFrameResourceUsage.maxTriangleVertices, m_maxTriangleVertexCount); + m_currentFrameResourceUsage.triangleVertexBufferSize += m_maxTriangleVertexCount; + m_currentFrameResourceUsage.gradientTextureHeight += + resource_texture_height<kGradTextureWidthInSimpleRamps>(m_simpleGradients.size()) + + m_complexGradients.size(); + m_currentFrameResourceUsage.tessellationTextureHeight += + resource_texture_height<kTessTextureWidth>(m_tessVertexCount); static_assert(sizeof(m_currentFrameResourceUsage) == sizeof(size_t) * 8); // Make sure we got every field. if (flushType == FlushType::intermediate) { // Intermediate flushes in a single frame are BAD. If the current frame's accumulative usage - // (across all flushes) of any resource is larger than the current allocation, double it. - growExceededGPUResources(m_currentFrameResourceUsage, kGPUResourceIntermediateGrowthFactor); + // (across all flushes) of any resource is larger than the current allocation, double it! + // Also don't preemptively grow the resources we allocate a flush time, since we can just + // allocate the right sizes once we know exactly how big they need to be. + growExceededGPUResources(m_currentFrameResourceUsage.resetFlushTimeLimits(), + kGPUResourceIntermediateGrowthFactor); } else {