Add microprofile support (#11403) c83919a247
* Initial commit

* Update macros

* Add GPU Markers

* Fix builds without microprofile

* minor updates

* clang format

* Update profiler.cpp

* clang format

* Name Main Thread

* Update profiler_macros.h

* Fix end flip

* Update fiddle_context_gl.cpp

* clang format

* Update rive_build_config.lua

* Update rive_build_config.lua

* forked microprofile so I can use a tag

* Update render_context_d3d_impl.cpp

* clang

Co-authored-by: John White <aliasbinman@gmail.com>
diff --git a/.rive_head b/.rive_head
index 6f7cde7..5404d80 100644
--- a/.rive_head
+++ b/.rive_head
@@ -1 +1 @@
-28e95e51e7823ed149fa5fc323ccc9751bca5356
+c83919a2477577deac9afdc2f22b83b9093e4b93
diff --git a/build/rive_build_config.lua b/build/rive_build_config.lua
index 9bde82a..8900b74 100644
--- a/build/rive_build_config.lua
+++ b/build/rive_build_config.lua
@@ -143,6 +143,13 @@
     defines({ 'RIVE_OPTICK' })
     RIVE_OPTICK_URL = 'bombomby/optick'
     RIVE_OPTICK_VERSION = '1.4.0.0'
+end
+
+newoption({ trigger = 'with_microprofile', description = 'use microprofile profiler' })
+if _OPTIONS['with_microprofile'] then
+    defines({ 'RIVE_MICROPROFILE' })
+    RIVE_MICROPROFILE_URL = 'aliasbinman/microprofile'
+    RIVE_MICROPROFILE_VERSION = 'rivebuild'
 end 
 
 location(RIVE_BUILD_OUT)
diff --git a/dependencies/premake5_microprofile.lua b/dependencies/premake5_microprofile.lua
new file mode 100644
index 0000000..577f103
--- /dev/null
+++ b/dependencies/premake5_microprofile.lua
@@ -0,0 +1,11 @@
+local dependency = require('dependency')
+microprofile = dependency.github(RIVE_MICROPROFILE_URL, RIVE_MICROPROFILE_VERSION)
+project('microprofile')
+do
+    kind('StaticLib')
+    language('C++')
+    cppdialect('C++11')
+ 
+    files { microprofile .. "/src/embed.c" }
+    includedirs(microprofile)
+end
diff --git a/include/rive/profiler/profiler_macros.h b/include/rive/profiler/profiler_macros.h
index d6e3603..f62e4d5 100644
--- a/include/rive/profiler/profiler_macros.h
+++ b/include/rive/profiler/profiler_macros.h
@@ -14,17 +14,65 @@
 // Add to new threads
 
 #if defined(RIVE_OPTICK) // Optick integration
+
 #include "optick_core.h"
 #include "optick.h"
+#define RIVE_PROF_INIT()
 #define RIVE_PROF_FRAME() OPTICK_FRAME("RiveFrame")
 #define RIVE_PROF_SCOPE() OPTICK_EVENT()
 #define RIVE_PROF_SCOPENAME(name) OPTICK_EVENT(name)
+#define RIVE_PROF_GPUNAME(name)
+
 #define RIVE_PROF_TAG(cat, tag) OPTICK_TAG(cat, tag)
 #define RIVE_PROF_THREAD(name) OPTICK_THREAD(name)
+#define RIVE_PROF_DRAW()
+#define RIVE_PROF_TOGGLEDRAW()
+#define RIVE_PROF_GPUSUBMIT(i)
+#define RIVE_PROF_GPUFLIP()
+#define RIVE_PROF_ENDFRAME()
+
+#elif defined(RIVE_MICROPROFILE) // Microprofile integration
+#include "microprofile.h"
+#include "microprofiledraw.h"
+#include "microprofileui.h"
+
+#define MICROPROFILE_WEBSERVER 1
+#define MICROPROFILE_GPU_TIMERS 1
+
+#define RIVE_PROF_INIT()                                                       \
+    MicroProfileSetEnableAllGroups(true);                                      \
+    MicroProfileSetForceEnable(true);                                          \
+    MicroProfileWebServerStart();                                              \
+    MicroProfileOnThreadCreate("MainThread");                                  \
+    MicroProfileInit();
+
+#define RIVE_PROF_FRAME()
+#define RIVE_PROF_SCOPE()                                                      \
+    MICROPROFILE_SCOPEI(__FILE__, __FUNCTION__, 0xffffffff);
+#define RIVE_PROF_SCOPENAME(name)                                              \
+    MICROPROFILE_SCOPEI("group", name, 0xffffffff);
+#define RIVE_PROF_GPUNAME(name) MICROPROFILE_SCOPEGPUI(name, 0xffffffff);
+#define RIVE_PROF_TAG(cat, tag)
+#define RIVE_PROF_THREAD(name)
+#define RIVE_PROF_DRAW() MicroProfileDraw();
+#define RIVE_PROF_TOGGLEDRAW() // MicroProfileToggleDisplayMode();
+#define RIVE_PROF_GPUSUBMIT(i) MicroProfileGpuSubmit(i);
+#define RIVE_PROF_GPUFLIP() // MicroProfileGpuFlip();
+#define RIVE_PROF_ENDFRAME() MicroProfileFlip();
+
 #else // No profiler selected - fallback to no-op
+
+#define RIVE_PROF_INIT()
 #define RIVE_PROF_FRAME()
 #define RIVE_PROF_SCOPE()
 #define RIVE_PROF_SCOPENAME(name)
+#define RIVE_PROF_GPUNAME(name)
 #define RIVE_PROF_TAG(cat, tag)
 #define RIVE_PROF_THREAD(name)
+#define RIVE_PROF_DRAW()
+#define RIVE_PROF_TOGGLEDRAW()
+#define RIVE_PROF_GPUSUBMIT(i)
+#define RIVE_PROF_GPUFLIP()
+#define RIVE_PROF_ENDFRAME()
+
 #endif
diff --git a/premake5_v2.lua b/premake5_v2.lua
index 3ad75fa..8075862 100644
--- a/premake5_v2.lua
+++ b/premake5_v2.lua
@@ -42,6 +42,11 @@
     dofile(path.join(dependencies, 'premake5_optick.lua'))
 end
 
+if _OPTIONS['with_microprofile'] then
+    dofile(path.join(dependencies, 'premake5_microprofile.lua'))
+end
+
+
 if _OPTIONS['with_rive_scripting'] then
     local scripting = require(path.join(path.getabsolute('scripting/'), 'premake5'))
     luau = scripting.luau
@@ -125,6 +130,10 @@
         includedirs({ optick .. '/src' })
     end
 
+    if _OPTIONS['with_microprofile'] then
+      includedirs({microprofile})
+    end
+
     filter('system:macosx or system:ios')
     do
         files({ 'src/text/font_hb_apple.mm' })
diff --git a/renderer/path_fiddle/fiddle_context_d3d.cpp b/renderer/path_fiddle/fiddle_context_d3d.cpp
index f2878bd..8699b67 100644
--- a/renderer/path_fiddle/fiddle_context_d3d.cpp
+++ b/renderer/path_fiddle/fiddle_context_d3d.cpp
@@ -193,6 +193,8 @@
         if (!m_isHeadless)
             m_swapchain->Present(0, 0);
 
+        RIVE_PROF_ENDFRAME()
+
         m_renderTarget->setTargetTexture(nullptr);
     }
 
diff --git a/renderer/path_fiddle/fiddle_context_d3d12.cpp b/renderer/path_fiddle/fiddle_context_d3d12.cpp
index 244f8ae..e76770e 100644
--- a/renderer/path_fiddle/fiddle_context_d3d12.cpp
+++ b/renderer/path_fiddle/fiddle_context_d3d12.cpp
@@ -1,4 +1,5 @@
 #include "fiddle_context.hpp"
+#include "rive/profiler/profiler_macros.h"
 
 #if !defined(_WIN32) || defined(RIVE_UNREAL)
 
@@ -160,6 +161,10 @@
                                          IID_PPV_ARGS(&m_copyCommandQueue)));
         NAME_RAW_D3D12_OBJECT(m_copyCommandQueue);
 
+#if defined(RIVE_MICROPROFILE)
+        MicroProfileGpuInitD3D12(m_device.Get(), m_commandQueue.Get());
+#endif
+
         for (auto i = 0; i < FrameCount; ++i)
         {
             VERIFY_OK(m_device->CreateCommandAllocator(
@@ -431,6 +436,9 @@
 
         VERIFY_OK(m_commandQueue->Signal(m_fence.Get(),
                                          m_previousFrames[m_frameIndex]));
+
+        RIVE_PROF_GPUSUBMIT(0);
+        RIVE_PROF_ENDFRAME()
     }
 
     void flushPLSContext(RenderTarget* offscreenRenderTarget) final
@@ -447,7 +455,9 @@
 
         VERIFY_OK(m_copyCommandList->Reset(m_copyAllocators[m_frameIndex].Get(),
                                            NULL));
-
+#if defined(RIVE_MICROPROFILE)
+        MicroProfileGpuSetContext(m_commandList.Get());
+#endif
         RenderContextD3D12Impl::CommandLists cmdLists = {
             m_copyCommandList.Get(),
             m_commandList.Get()};
@@ -575,6 +585,8 @@
 
         if (!m_isHeadless)
             m_swapChain->Present(0, 0);
+
+        RIVE_PROF_GPUFLIP();
     }
 
 private:
diff --git a/renderer/path_fiddle/fiddle_context_gl.cpp b/renderer/path_fiddle/fiddle_context_gl.cpp
index 9145d83..894b47d 100644
--- a/renderer/path_fiddle/fiddle_context_gl.cpp
+++ b/renderer/path_fiddle/fiddle_context_gl.cpp
@@ -13,6 +13,7 @@
 #include "rive/renderer/rive_renderer.hpp"
 #include "rive/renderer/gl/render_context_gl_impl.hpp"
 #include "rive/renderer/gl/render_target_gl.hpp"
+#include "rive/profiler/profiler_macros.h"
 
 #ifdef RIVE_WEBGL
 #include <emscripten/emscripten.h>
@@ -169,6 +170,7 @@
                               GL_NEAREST);
             glDisable(GL_SCISSOR_TEST);
         }
+        RIVE_PROF_ENDFRAME()
     }
 
 protected:
diff --git a/renderer/path_fiddle/microprofilestub.cpp b/renderer/path_fiddle/microprofilestub.cpp
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/renderer/path_fiddle/microprofilestub.cpp
diff --git a/renderer/path_fiddle/path_fiddle.cpp b/renderer/path_fiddle/path_fiddle.cpp
index 9fcff3f..117f968 100644
--- a/renderer/path_fiddle/path_fiddle.cpp
+++ b/renderer/path_fiddle/path_fiddle.cpp
@@ -341,7 +341,15 @@
                 clockwiseFill = !clockwiseFill;
                 break;
             case GLFW_KEY_P:
-                paused = !paused;
+                if (!shift)
+                {
+                    paused = !paused;
+                }
+                else
+                {
+                    RIVE_PROF_TOGGLEDRAW();
+                }
+
                 break;
             case GLFW_KEY_H:
                 if (!shift)
@@ -427,6 +435,8 @@
     setvbuf(stdout, NULL, _IONBF, 0);
     setvbuf(stderr, NULL, _IONBF, 0);
 
+    RIVE_PROF_INIT()
+
 #ifdef DEBUG
     options.enableVulkanCoreValidationLayers = true;
 #endif
diff --git a/renderer/premake5.lua b/renderer/premake5.lua
index 57d5a28..11f807b 100644
--- a/renderer/premake5.lua
+++ b/renderer/premake5.lua
@@ -94,6 +94,11 @@
             externalincludedirs({ optick .. '/src'})
         end
 
+        if _OPTIONS['with_microprofile'] then
+            links({'microprofile'})
+            externalincludedirs({ microprofile})
+        end
+
         if rive_target_os == 'windows' then
             externalincludedirs({
                 dx12_headers .. '/include/directx',
diff --git a/renderer/premake5_pls_renderer.lua b/renderer/premake5_pls_renderer.lua
index 8139c78..ebebf77 100644
--- a/renderer/premake5_pls_renderer.lua
+++ b/renderer/premake5_pls_renderer.lua
@@ -183,6 +183,10 @@
     optick = dependency.github(RIVE_OPTICK_URL, RIVE_OPTICK_VERSION)
 end
 
+if _OPTIONS['with_microprofile'] then
+    microprofile = dependency.github(RIVE_MICROPROFILE_URL, RIVE_MICROPROFILE_VERSION)
+end
+
 project('rive_pls_renderer')
 do
     kind('StaticLib')
@@ -204,6 +208,10 @@
         includedirs({optick .. '/src'})
     end
 
+    if _OPTIONS['with_microprofile'] then
+        includedirs({ microprofile })
+    end
+
     if _OPTIONS['with_vulkan'] then
         externalincludedirs({
             vulkan_headers .. '/include',
diff --git a/renderer/src/d3d11/render_context_d3d_impl.cpp b/renderer/src/d3d11/render_context_d3d_impl.cpp
index 551aa4e..b72fd86 100644
--- a/renderer/src/d3d11/render_context_d3d_impl.cpp
+++ b/renderer/src/d3d11/render_context_d3d_impl.cpp
@@ -7,6 +7,7 @@
 #include "rive/renderer/d3d/d3d_constants.hpp"
 
 #include "rive/renderer/texture.hpp"
+#include "rive/profiler/profiler_macros.h"
 
 #include <D3DCompiler.h>
 
@@ -408,6 +409,9 @@
     ComPtr<ID3D11DeviceContext> gpuContext,
     const D3DContextOptions& contextOptions)
 {
+#if defined(RIVE_MICROPROFILE)
+    MicroProfileGpuInitD3D11(gpu.Get());
+#endif
     D3DCapabilities d3dCapabilities;
     D3D11_FEATURE_DATA_D3D11_OPTIONS2 d3d11Options2;
 
@@ -1544,6 +1548,8 @@
     // Tessellate all curves into vertices in the tessellation texture.
     if (desc.tessVertexSpanCount > 0)
     {
+        RIVE_PROF_GPUNAME("Tessellate Curves");
+
         ID3D11Buffer* tessSpanBuffer =
             flush_buffer(m_gpuContext.Get(), tessSpanBufferRing());
         UINT tessStride = sizeof(TessVertexSpan);
@@ -1626,6 +1632,8 @@
     // Render the atlas if we have any offscreen feathers.
     if ((desc.atlasFillBatchCount | desc.atlasStrokeBatchCount) != 0)
     {
+        RIVE_PROF_GPUNAME("atlasRender");
+
         float clearZero[4]{};
         m_gpuContext->ClearRenderTargetView(m_atlasTextureRTV.Get(), clearZero);
 
@@ -1708,59 +1716,69 @@
     }
 
     // Setup and clear the PLS textures.
-    switch (desc.colorLoadAction)
     {
-        case gpu::LoadAction::clear:
-            if (desc.fixedFunctionColorOutput)
-            {
-                float clearColor4f[4];
-                UnpackColorToRGBA32FPremul(desc.colorClearValue, clearColor4f);
-                m_gpuContext->ClearRenderTargetView(renderTarget->targetRTV(),
-                                                    clearColor4f);
-            }
-            else if (m_d3dCapabilities.supportsTypedUAVLoadStore)
-            {
-                float clearColor4f[4];
-                UnpackColorToRGBA32FPremul(desc.colorClearValue, clearColor4f);
-                m_gpuContext->ClearUnorderedAccessViewFloat(
-                    renderTarget->targetUAV(),
-                    clearColor4f);
-            }
-            else
-            {
-                UINT clearColorui[4] = {
-                    gpu::SwizzleRiveColorToRGBAPremul(desc.colorClearValue)};
-                m_gpuContext->ClearUnorderedAccessViewUint(
-                    renderTarget->targetUAV(),
-                    clearColorui);
-            }
-            break;
-        case gpu::LoadAction::preserveRenderTarget:
-            if (!desc.fixedFunctionColorOutput &&
-                !renderTarget->targetTextureSupportsUAV())
-            {
-                // We're rendering to an offscreen UAV and preserving the
-                // target. Copy the target texture over.
-                blit_sub_rect(m_gpuContext.Get(),
-                              renderTarget->offscreenTexture(),
-                              renderTarget->targetTexture(),
-                              desc.renderTargetUpdateBounds);
-            }
-            break;
-        case gpu::LoadAction::dontCare:
-            break;
+        RIVE_PROF_GPUNAME("clearPLSTextures");
+        switch (desc.colorLoadAction)
+        {
+
+            case gpu::LoadAction::clear:
+                if (desc.fixedFunctionColorOutput)
+                {
+                    float clearColor4f[4];
+                    UnpackColorToRGBA32FPremul(desc.colorClearValue,
+                                               clearColor4f);
+                    m_gpuContext->ClearRenderTargetView(
+                        renderTarget->targetRTV(),
+                        clearColor4f);
+                }
+                else if (m_d3dCapabilities.supportsTypedUAVLoadStore)
+                {
+                    float clearColor4f[4];
+                    UnpackColorToRGBA32FPremul(desc.colorClearValue,
+                                               clearColor4f);
+                    m_gpuContext->ClearUnorderedAccessViewFloat(
+                        renderTarget->targetUAV(),
+                        clearColor4f);
+                }
+                else
+                {
+                    UINT clearColorui[4] = {gpu::SwizzleRiveColorToRGBAPremul(
+                        desc.colorClearValue)};
+                    m_gpuContext->ClearUnorderedAccessViewUint(
+                        renderTarget->targetUAV(),
+                        clearColorui);
+                }
+                break;
+            case gpu::LoadAction::preserveRenderTarget:
+                if (!desc.fixedFunctionColorOutput &&
+                    !renderTarget->targetTextureSupportsUAV())
+                {
+                    // We're rendering to an offscreen UAV and preserving the
+                    // target. Copy the target texture over.
+                    blit_sub_rect(m_gpuContext.Get(),
+                                  renderTarget->offscreenTexture(),
+                                  renderTarget->targetTexture(),
+                                  desc.renderTargetUpdateBounds);
+                }
+                break;
+            case gpu::LoadAction::dontCare:
+                break;
+        }
+        if (desc.combinedShaderFeatures & gpu::ShaderFeatures::ENABLE_CLIPPING)
+        {
+            constexpr static UINT kZero[4]{};
+            m_gpuContext->ClearUnorderedAccessViewUint(renderTarget->clipUAV(),
+                                                       kZero);
+        }
+        {
+            UINT coverageClear[4]{desc.coverageClearValue};
+            m_gpuContext->ClearUnorderedAccessViewUint(
+                renderTarget->coverageUAV(),
+                coverageClear);
+        }
     }
-    if (desc.combinedShaderFeatures & gpu::ShaderFeatures::ENABLE_CLIPPING)
-    {
-        constexpr static UINT kZero[4]{};
-        m_gpuContext->ClearUnorderedAccessViewUint(renderTarget->clipUAV(),
-                                                   kZero);
-    }
-    {
-        UINT coverageClear[4]{desc.coverageClearValue};
-        m_gpuContext->ClearUnorderedAccessViewUint(renderTarget->coverageUAV(),
-                                                   coverageClear);
-    }
+
+    RIVE_PROF_GPUNAME("DrawList");
 
     // Execute the DrawList.
     ID3D11RenderTargetView* targetRTV =
@@ -1892,6 +1910,7 @@
                                                 &drawUniforms,
                                                 0,
                                                 0);
+                RIVE_PROF_GPUNAME("Patches");
                 m_gpuContext->DrawIndexedInstanced(PatchIndexCount(drawType),
                                                    batch.elementCount,
                                                    PatchBaseIndex(drawType),
@@ -1906,11 +1925,16 @@
                     D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
                 m_gpuContext->RSSetState(
                     m_backCulledRasterState[desc.wireframe].Get());
+                RIVE_PROF_GPUNAME(drawType == DrawType::atlasBlit
+                                      ? "atlasBlit"
+                                      : "interiorTriangulation");
                 m_gpuContext->Draw(batch.elementCount, batch.baseElement);
                 break;
             }
             case DrawType::imageRect:
             {
+                RIVE_PROF_GPUNAME("imageRect");
+
                 m_gpuContext->IASetPrimitiveTopology(
                     D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
                 m_gpuContext->IASetIndexBuffer(m_imageRectIndexBuffer.Get(),
@@ -1932,6 +1956,7 @@
             }
             case DrawType::imageMesh:
             {
+                RIVE_PROF_GPUNAME("imageMesh");
                 LITE_RTTI_CAST_OR_BREAK(vertexBuffer,
                                         RenderBufferD3DImpl*,
                                         batch.vertexBuffer);
@@ -1972,6 +1997,9 @@
                 break;
             }
             case DrawType::renderPassResolve:
+            {
+                RIVE_PROF_GPUNAME("renderPassResolve");
+
                 assert(desc.interlockMode == gpu::InterlockMode::atomics);
                 m_gpuContext->IASetPrimitiveTopology(
                     D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
@@ -2010,7 +2038,8 @@
                         NULL);
                 }
                 m_gpuContext->Draw(4, 0);
-                break;
+            }
+            break;
             case DrawType::msaaStrokes:
             case DrawType::msaaMidpointFanBorrowedCoverage:
             case DrawType::msaaMidpointFans:
@@ -2027,6 +2056,8 @@
     if (desc.interlockMode == gpu::InterlockMode::rasterOrdering &&
         !renderTarget->targetTextureSupportsUAV())
     {
+        RIVE_PROF_GPUNAME("blit_sub_rect");
+
         // We rendered to an offscreen UAV and did not resolve to the
         // renderTarget. Copy back to the main target.
         assert(!desc.fixedFunctionColorOutput);
diff --git a/renderer/src/d3d12/render_context_d3d12_impl.cpp b/renderer/src/d3d12/render_context_d3d12_impl.cpp
index 8c9b117..a83e623 100644
--- a/renderer/src/d3d12/render_context_d3d12_impl.cpp
+++ b/renderer/src/d3d12/render_context_d3d12_impl.cpp
@@ -4,6 +4,8 @@
 
 #include "rive/renderer/d3d12/render_context_d3d12_impl.hpp"
 #include "rive/renderer/d3d/d3d_constants.hpp"
+#include "rive/profiler/profiler_macros.h"
+
 // needed for root sig and heap constants
 #include "shaders/d3d/root.sig"
 
@@ -1108,6 +1110,8 @@
 
     if (desc.tessVertexSpanCount)
     {
+        RIVE_PROF_GPUNAME("Tessellate Curves");
+
         m_resourceManager->transition(cmdList,
                                       m_tesselationTexture.get(),
                                       D3D12_RESOURCE_STATE_RENDER_TARGET);
@@ -1190,6 +1194,7 @@
 
     if ((desc.atlasFillBatchCount | desc.atlasStrokeBatchCount) != 0)
     {
+        RIVE_PROF_GPUNAME("atlasRender");
         m_resourceManager->transition(cmdList,
                                       m_atlasTexture.get(),
                                       D3D12_RESOURCE_STATE_RENDER_TARGET);
@@ -1278,88 +1283,95 @@
     cmdList->RSSetScissorRects(1, &scissorRect);
 
     // Setup and clear the PLS textures.
-
-    if (desc.fixedFunctionColorOutput)
     {
-        m_resourceManager->transition(cmdList,
-                                      targetTexture,
-                                      D3D12_RESOURCE_STATE_RENDER_TARGET);
-
-        auto rtvHandle = m_rtvHeap->cpuHandleForIndex(TARGET_RTV_HEAP_OFFSET);
-        cmdList->OMSetRenderTargets(1, &rtvHandle, FALSE, nullptr);
-
-        if (desc.colorLoadAction == gpu::LoadAction::clear)
+        RIVE_PROF_GPUNAME("clearPLSTextures");
+        if (desc.fixedFunctionColorOutput)
         {
-            float clearColor4f[4];
-            UnpackColorToRGBA32FPremul(desc.colorClearValue, clearColor4f);
-            cmdList->ClearRenderTargetView(rtvHandle, clearColor4f, 0, nullptr);
-        }
-    }
-    else // !desc.fixedFunctionColorOutput
-    {
-        if (renderTarget->targetTextureSupportsUAV())
-        {
-            m_resourceManager->transition(
-                cmdList,
-                targetTexture,
-                D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
-        }
+            m_resourceManager->transition(cmdList,
+                                          targetTexture,
+                                          D3D12_RESOURCE_STATE_RENDER_TARGET);
 
-        if (desc.colorLoadAction == gpu::LoadAction::clear)
-        {
-            auto tex = renderTarget->targetTextureSupportsUAV()
-                           ? renderTarget->targetTexture()->resource()
-                           : renderTarget->offscreenTexture()->resource();
+            auto rtvHandle =
+                m_rtvHeap->cpuHandleForIndex(TARGET_RTV_HEAP_OFFSET);
+            cmdList->OMSetRenderTargets(1, &rtvHandle, FALSE, nullptr);
 
-            if (m_capabilities.supportsTypedUAVLoadStore)
+            if (desc.colorLoadAction == gpu::LoadAction::clear)
             {
                 float clearColor4f[4];
                 UnpackColorToRGBA32FPremul(desc.colorClearValue, clearColor4f);
-
-                auto gpuHandle = m_srvUavCbvHeap->gpuHandleForIndex(
-                    ATOMIC_COLOR_HEAP_OFFSET);
-                auto cpuHandle = m_cpuSrvUavCbvHeap->cpuHandleForIndex(
-                    ATOMIC_COLOR_HEAP_OFFSET);
-                m_resourceManager->clearUAV(cmdList,
-                                            tex,
-                                            gpuHandle,
-                                            cpuHandle,
-                                            clearColor4f,
-                                            desc.interlockMode ==
-                                                InterlockMode::atomics);
-            }
-            else
-            {
-                UINT clearColorui[4] = {
-                    gpu::SwizzleRiveColorToRGBAPremul(desc.colorClearValue)};
-
-                auto gpuHandle = m_srvUavCbvHeap->gpuHandleForIndex(
-                    ATOMIC_COLOR_HEAP_OFFSET);
-                auto cpuHandle = m_cpuSrvUavCbvHeap->cpuHandleForIndex(
-                    ATOMIC_COLOR_HEAP_OFFSET);
-
-                m_resourceManager->clearUAV(cmdList,
-                                            tex,
-                                            gpuHandle,
-                                            cpuHandle,
-                                            clearColorui,
-                                            desc.interlockMode ==
-                                                InterlockMode::atomics);
+                cmdList->ClearRenderTargetView(rtvHandle,
+                                               clearColor4f,
+                                               0,
+                                               nullptr);
             }
         }
-        if (desc.colorLoadAction == gpu::LoadAction::preserveRenderTarget &&
-            !renderTarget->targetTextureSupportsUAV())
+        else // !desc.fixedFunctionColorOutput
         {
-            auto offscreenTex = renderTarget->offscreenTexture();
-            blitSubRect(cmdList,
-                        offscreenTex,
-                        renderTarget->targetTexture(),
-                        desc.renderTargetUpdateBounds);
+            if (renderTarget->targetTextureSupportsUAV())
+            {
+                m_resourceManager->transition(
+                    cmdList,
+                    targetTexture,
+                    D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
+            }
 
-            m_resourceManager->transition(
-                cmdList,
-                offscreenTex,
-                D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
+            if (desc.colorLoadAction == gpu::LoadAction::clear)
+            {
+                auto tex = renderTarget->targetTextureSupportsUAV()
+                               ? renderTarget->targetTexture()->resource()
+                               : renderTarget->offscreenTexture()->resource();
+
+                if (m_capabilities.supportsTypedUAVLoadStore)
+                {
+                    float clearColor4f[4];
+                    UnpackColorToRGBA32FPremul(desc.colorClearValue,
+                                               clearColor4f);
+
+                    auto gpuHandle = m_srvUavCbvHeap->gpuHandleForIndex(
+                        ATOMIC_COLOR_HEAP_OFFSET);
+                    auto cpuHandle = m_cpuSrvUavCbvHeap->cpuHandleForIndex(
+                        ATOMIC_COLOR_HEAP_OFFSET);
+                    m_resourceManager->clearUAV(cmdList,
+                                                tex,
+                                                gpuHandle,
+                                                cpuHandle,
+                                                clearColor4f,
+                                                desc.interlockMode ==
+                                                    InterlockMode::atomics);
+                }
+                else
+                {
+                    UINT clearColorui[4] = {gpu::SwizzleRiveColorToRGBAPremul(
+                        desc.colorClearValue)};
+
+                    auto gpuHandle = m_srvUavCbvHeap->gpuHandleForIndex(
+                        ATOMIC_COLOR_HEAP_OFFSET);
+                    auto cpuHandle = m_cpuSrvUavCbvHeap->cpuHandleForIndex(
+                        ATOMIC_COLOR_HEAP_OFFSET);
+
+                    m_resourceManager->clearUAV(cmdList,
+                                                tex,
+                                                gpuHandle,
+                                                cpuHandle,
+                                                clearColorui,
+                                                desc.interlockMode ==
+                                                    InterlockMode::atomics);
+                }
+            }
+            if (desc.colorLoadAction == gpu::LoadAction::preserveRenderTarget &&
+                !renderTarget->targetTextureSupportsUAV())
+            {
+                auto offscreenTex = renderTarget->offscreenTexture();
+                blitSubRect(cmdList,
+                            offscreenTex,
+                            renderTarget->targetTexture(),
+                            desc.renderTargetUpdateBounds);
+
+                m_resourceManager->transition(
+                    cmdList,
+                    offscreenTex,
+                    D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
+            }
         }
     }
 
@@ -1412,6 +1424,7 @@
 
     m_heapDescriptorOffset += imageDescriptorOffset;
 
+    RIVE_PROF_GPUNAME("DrawList");
     for (const DrawBatch& batch : *desc.drawList)
     {
         assert(batch.elementCount != 0);
@@ -1544,6 +1557,7 @@
             case DrawType::midpointFanCenterAAPatches:
             case DrawType::outerCurvePatches:
             {
+                RIVE_PROF_GPUNAME("Patches");
                 cmdList->IASetPrimitiveTopology(
                     D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
                 auto IBV = m_pathPatchIndexBuffer->indexBufferView();
@@ -1562,6 +1576,7 @@
             case DrawType::interiorTriangulation:
             case DrawType::atlasBlit:
             {
+                RIVE_PROF_GPUNAME("interiorTriangulation||atlasBlit");
                 cmdList->IASetPrimitiveTopology(
                     D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
                 cmdList->DrawInstanced(batch.elementCount,
@@ -1572,6 +1587,8 @@
             }
             case DrawType::imageRect:
             {
+                RIVE_PROF_GPUNAME("imageRect");
+
                 cmdList->IASetPrimitiveTopology(
                     D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
                 auto IBV = m_imageRectIndexBuffer->indexBufferView();
@@ -1592,6 +1609,8 @@
             }
             case DrawType::imageMesh:
             {
+                RIVE_PROF_GPUNAME("imageMesh");
+
                 LITE_RTTI_CAST_OR_BREAK(vertexBuffer,
                                         RenderBufferD3D12Impl*,
                                         batch.vertexBuffer);
@@ -1633,11 +1652,16 @@
                 break;
             }
             case DrawType::renderPassResolve:
+            {
+                RIVE_PROF_GPUNAME("renderPassResolve");
+
                 assert(desc.interlockMode == gpu::InterlockMode::atomics);
                 cmdList->IASetPrimitiveTopology(
                     D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
                 cmdList->DrawInstanced(4, 1, 0, 0);
                 break;
+            }
+
             case DrawType::msaaStrokes:
             case DrawType::msaaMidpointFanBorrowedCoverage:
             case DrawType::msaaMidpointFans:
@@ -1654,6 +1678,8 @@
     if (desc.interlockMode == gpu::InterlockMode::rasterOrdering &&
         !renderTarget->targetTextureSupportsUAV())
     {
+        RIVE_PROF_GPUNAME("blit_sub_rect");
+
         // We rendered to an offscreen UAV and did not resolve to the
         // renderTarget. Copy back to the main target.
         assert(!desc.fixedFunctionColorOutput);
diff --git a/src/profiler/profiler.cpp b/src/profiler/profiler.cpp
new file mode 100644
index 0000000..0f8d2da
--- /dev/null
+++ b/src/profiler/profiler.cpp
@@ -0,0 +1,8 @@
+#ifdef RIVE_MICROPROFILE
+#define MICROPROFILE_IMPL
+#if defined(RIVE_WINDOWS)
+#define MICROPROFILE_GPU_TIMERS_D3D11 1
+#define MICROPROFILE_GPU_TIMERS_D3D12 1
+#endif
+#include "microprofile.h"
+#endif