renderer/metal/pls_render_context_metal_impl.mm - external/github.com/rive-app/rive-cpp - Git at Google

 /*
  * Copyright 2023 Rive
  */

 #include "rive/pls/metal/pls_render_context_metal_impl.h"

 #include "background_shader_compiler.h"
 #include "rive/pls/buffer_ring.hpp"
 #include "rive/pls/pls_image.hpp"
 #include "shaders/constants.glsl"
 #include <sstream>

 #include "shaders/out/generated/color_ramp.exports.h"
 #include "shaders/out/generated/tessellate.exports.h"

 #ifdef RIVE_IOS_SIMULATOR
 #import <mach-o/arch.h>
 #endif

 namespace rive::pls
 {
 #ifdef RIVE_IOS
 #include "shaders/out/generated/rive_pls_ios.metallib.c"
 #elif defined(RIVE_IOS_SIMULATOR)
 #include "shaders/out/generated/rive_pls_ios_simulator.metallib.c"
 #else
 #include "shaders/out/generated/rive_pls_macosx.metallib.c"
 #endif

 static id<MTLRenderPipelineState> make_pipeline_state(id<MTLDevice> gpu,
                                                       MTLRenderPipelineDescriptor* desc)
 {
     NSError* err = [NSError errorWithDomain:@"pls_pipeline_create" code:201 userInfo:nil];
     id<MTLRenderPipelineState> state = [gpu newRenderPipelineStateWithDescriptor:desc error:&err];
     if (!state)
     {
         fprintf(stderr, "%s\n", err.localizedDescription.UTF8String);
         assert(0);
         exit(-1);
     }
     return state;
 }

 // Renders color ramps to the gradient texture.
 class PLSRenderContextMetalImpl::ColorRampPipeline
 {
 public:
     ColorRampPipeline(id<MTLDevice> gpu, id<MTLLibrary> plsLibrary)
     {
         MTLRenderPipelineDescriptor* desc = [[MTLRenderPipelineDescriptor alloc] init];
         desc.vertexFunction = [plsLibrary newFunctionWithName:@GLSL_colorRampVertexMain];
         desc.fragmentFunction = [plsLibrary newFunctionWithName:@GLSL_colorRampFragmentMain];
         desc.colorAttachments[0].pixelFormat = MTLPixelFormatRGBA8Unorm;
         m_pipelineState = make_pipeline_state(gpu, desc);
     }

     id<MTLRenderPipelineState> pipelineState() const { return m_pipelineState; }

 private:
     id<MTLRenderPipelineState> m_pipelineState;
 };

 // Renders tessellated vertices to the tessellation texture.
 class PLSRenderContextMetalImpl::TessellatePipeline
 {
 public:
     TessellatePipeline(id<MTLDevice> gpu, id<MTLLibrary> plsLibrary)
     {
         MTLRenderPipelineDescriptor* desc = [[MTLRenderPipelineDescriptor alloc] init];
         desc.vertexFunction = [plsLibrary newFunctionWithName:@GLSL_tessellateVertexMain];
         desc.fragmentFunction = [plsLibrary newFunctionWithName:@GLSL_tessellateFragmentMain];
         desc.colorAttachments[0].pixelFormat = MTLPixelFormatRGBA32Uint;
         m_pipelineState = make_pipeline_state(gpu, desc);
     }

     id<MTLRenderPipelineState> pipelineState() const { return m_pipelineState; }

 private:
     id<MTLRenderPipelineState> m_pipelineState;
 };

 // Renders paths to the main render target.
 class PLSRenderContextMetalImpl::DrawPipeline
 {
 public:
     // Precompiled functions are embedded in namespaces. Return the fully qualified name of the
     // desired function, including its namespace.
     static NSString* GetPrecompiledFunctionName(DrawType drawType,
                                                 ShaderFeatures shaderFeatures,
                                                 id<MTLLibrary> precompiledLibrary,
                                                 const char* functionBaseName)
     {
         // Each feature corresponds to a specific index in the namespaceID. These must stay in
         // sync with generate_draw_combinations.py.
         char namespaceID[] = "0000000";
         if (drawType == DrawType::interiorTriangulation)
         {
             namespaceID[0] = '1';
         }
         for (size_t i = 0; i < pls::kShaderFeatureCount; ++i)
         {
             ShaderFeatures feature = static_cast<ShaderFeatures>(1 << i);
             if (shaderFeatures & feature)
             {
                 namespaceID[i + 1] = '1';
             }
             static_assert((int)ShaderFeatures::ENABLE_CLIPPING == 1 << 0);
             static_assert((int)ShaderFeatures::ENABLE_CLIP_RECT == 1 << 1);
             static_assert((int)ShaderFeatures::ENABLE_ADVANCED_BLEND == 1 << 2);
             static_assert((int)ShaderFeatures::ENABLE_EVEN_ODD == 1 << 3);
             static_assert((int)ShaderFeatures::ENABLE_NESTED_CLIPPING == 1 << 4);
             static_assert((int)ShaderFeatures::ENABLE_HSL_BLEND_MODES == 1 << 5);
         }

         char namespacePrefix;
         switch (drawType)
         {
             case DrawType::midpointFanPatches:
             case DrawType::outerCurvePatches:
             case DrawType::interiorTriangulation:
                 namespacePrefix = 'p';
                 break;
             case DrawType::imageRect:
                 RIVE_UNREACHABLE();
             case DrawType::imageMesh:
                 namespacePrefix = 'm';
                 break;
             case DrawType::plsAtomicInitialize:
             case DrawType::plsAtomicResolve:
             case DrawType::stencilClipReset:
                 RIVE_UNREACHABLE();
         }

         return
             [NSString stringWithFormat:@"%c%s::%s", namespacePrefix, namespaceID, functionBaseName];
     }

     DrawPipeline(id<MTLDevice> gpu,
                  id<MTLLibrary> library,
                  NSString* vertexFunctionName,
                  NSString* fragmentFunctionName,
                  pls::DrawType drawType,
                  pls::InterlockMode interlockMode,
                  pls::ShaderFeatures shaderFeatures)
     {
         auto makePipelineState = [=](id<MTLFunction> vertexMain,
                                      id<MTLFunction> fragmentMain,
                                      MTLPixelFormat pixelFormat) {
             MTLRenderPipelineDescriptor* desc = [[MTLRenderPipelineDescriptor alloc] init];
             desc.vertexFunction = vertexMain;
             desc.fragmentFunction = fragmentMain;

             auto* framebuffer = desc.colorAttachments[FRAMEBUFFER_PLANE_IDX];
             framebuffer.pixelFormat = pixelFormat;

             switch (interlockMode)
             {
                 case pls::InterlockMode::rasterOrdering:
                     // In rasterOrdering mode, the PLS planes are accessed as color attachments.
                     desc.colorAttachments[COVERAGE_PLANE_IDX].pixelFormat = MTLPixelFormatR32Uint;
                     desc.colorAttachments[CLIP_PLANE_IDX].pixelFormat = MTLPixelFormatR32Uint;
                     desc.colorAttachments[ORIGINAL_DST_COLOR_PLANE_IDX].pixelFormat = pixelFormat;
                     break;
                 case pls::InterlockMode::atomics:
                     // In atomic mode, the PLS planes are accessed as device buffers. We only use
                     // the "framebuffer" attachment configured above.
                     if (pls::ShadersEmitColorToRasterPipeline(interlockMode, shaderFeatures))
                     {
                         // The shader expectes a "src-over" blend function in order to to implement
                         // antialiasing and opacity.
                         framebuffer.blendingEnabled = TRUE;
                         framebuffer.sourceRGBBlendFactor = MTLBlendFactorOne;
                         framebuffer.destinationRGBBlendFactor = MTLBlendFactorOneMinusSourceAlpha;
                         framebuffer.rgbBlendOperation = MTLBlendOperationAdd;
                         framebuffer.sourceAlphaBlendFactor = MTLBlendFactorOne;
                         framebuffer.destinationAlphaBlendFactor = MTLBlendFactorOneMinusSourceAlpha;
                         framebuffer.alphaBlendOperation = MTLBlendOperationAdd;
                         framebuffer.writeMask = MTLColorWriteMaskAll;
                     }
                     else if (drawType == pls::DrawType::plsAtomicResolve)
                     {
                         // We're resolving from the offscreen color buffer to the framebuffer
                         // attachment. Write out the final color directly without any blend modes.
                         framebuffer.blendingEnabled = FALSE;
                         framebuffer.writeMask = MTLColorWriteMaskAll;
                     }
                     else
                     {
                         // This pipeline renders by storing to the offscreen color buffer; disable
                         // writes to the framebuffer attachment.
                         framebuffer.blendingEnabled = FALSE;
                         framebuffer.writeMask = MTLColorWriteMaskNone;
                     }
                     break;
                 case pls::InterlockMode::depthStencil:
                     RIVE_UNREACHABLE();
             }
             return make_pipeline_state(gpu, desc);
         };
         id<MTLFunction> vertexMain = [library newFunctionWithName:vertexFunctionName];
         id<MTLFunction> fragmentMain = [library newFunctionWithName:fragmentFunctionName];
         m_pipelineStateRGBA8 =
             makePipelineState(vertexMain, fragmentMain, MTLPixelFormatRGBA8Unorm);
         m_pipelineStateBGRA8 =
             makePipelineState(vertexMain, fragmentMain, MTLPixelFormatBGRA8Unorm);
     }

     id<MTLRenderPipelineState> pipelineState(MTLPixelFormat pixelFormat) const
     {
         assert(pixelFormat == MTLPixelFormatRGBA8Unorm ||
                pixelFormat == MTLPixelFormatRGBA16Float ||
                pixelFormat == MTLPixelFormatRGBA8Unorm_sRGB ||
                pixelFormat == MTLPixelFormatBGRA8Unorm ||
                pixelFormat == MTLPixelFormatBGRA8Unorm_sRGB);

         switch (pixelFormat)
         {
             case MTLPixelFormatRGBA8Unorm_sRGB:
             case MTLPixelFormatRGBA8Unorm:
             case MTLPixelFormatRGBA16Float:
                 return m_pipelineStateRGBA8;
             default:
                 return m_pipelineStateBGRA8;
         }
     }

 private:
     id<MTLRenderPipelineState> m_pipelineStateRGBA8;
     id<MTLRenderPipelineState> m_pipelineStateBGRA8;
 };

 #ifdef RIVE_IOS
 static bool is_apple_ios_silicon(id<MTLDevice> gpu)
 {
     if (@available(iOS 13, *))
     {
         return [gpu supportsFamily:MTLGPUFamilyApple4];
     }
     return false;
 }
 #endif

 class BufferRingMetalImpl : public BufferRing
 {
 public:
     static std::unique_ptr<BufferRingMetalImpl> Make(id<MTLDevice> gpu, size_t capacityInBytes)
     {
         return capacityInBytes != 0 ? std::make_unique<BufferRingMetalImpl>(gpu, capacityInBytes)
                                     : nullptr;
     }

     BufferRingMetalImpl(id<MTLDevice> gpu, size_t capacityInBytes) : BufferRing(capacityInBytes)
     {
         for (int i = 0; i < kBufferRingSize; ++i)
         {
             m_buffers[i] = [gpu newBufferWithLength:capacityInBytes
                                             options:MTLResourceStorageModeShared];
         }
     }

     id<MTLBuffer> submittedBuffer() const { return m_buffers[submittedBufferIdx()]; }

 protected:
     void* onMapBuffer(int bufferIdx, size_t mapSizeInBytes) override
     {
         return m_buffers[bufferIdx].contents;
     }

     void onUnmapAndSubmitBuffer(int bufferIdx, size_t mapSizeInBytes) override {}

 private:
     id<MTLBuffer> m_buffers[kBufferRingSize];
 };

 std::unique_ptr<PLSRenderContext> PLSRenderContextMetalImpl::MakeContext(
     id<MTLDevice> gpu, const ContextOptions& contextOptions)
 {
     auto plsContextImpl = std::unique_ptr<PLSRenderContextMetalImpl>(
         new PLSRenderContextMetalImpl(gpu, contextOptions));
     return std::make_unique<PLSRenderContext>(std::move(plsContextImpl));
 }

 PLSRenderContextMetalImpl::PLSRenderContextMetalImpl(id<MTLDevice> gpu,
                                                      const ContextOptions& contextOptions) :
     m_contextOptions(contextOptions), m_gpu(gpu)
 {
     // It appears, so far, that we don't need to use flat interpolation for path IDs on any Apple
     // device, and it's faster not to.
     m_platformFeatures.avoidFlatVaryings = true;
     m_platformFeatures.invertOffscreenY = true;
 #ifdef RIVE_IOS
     m_platformFeatures.supportsRasterOrdering = true;
     if (!is_apple_ios_silicon(m_gpu))
     {
         // The PowerVR GPU, at least on A10, has fp16 precision issues. We can't use the the bottom
         // 3 bits of the path and clip IDs in order for our equality testing to work.
         m_platformFeatures.pathIDGranularity = 8;
     }
 #elif defined(RIVE_IOS_SIMULATOR)
     // The simulator does not support framebuffer reads. Fall back on atomic mode.
     m_platformFeatures.supportsRasterOrdering = false;
 #else
     m_platformFeatures.supportsRasterOrdering =
         [m_gpu supportsFamily:MTLGPUFamilyApple1] && !contextOptions.disableFramebufferReads;
 #endif
     m_platformFeatures.atomicPLSMustBeInitializedAsDraw = true;

 #ifdef RIVE_IOS
     // Atomic barriers are never used on iOS, but if we ever did need them, we would use
     // rasterOrderGroups.
     m_metalFeatures.atomicBarrierType = AtomicBarrierType::rasterOrderGroup;
 #elif defined(RIVE_IOS_SIMULATOR)
     const NXArchInfo* hostArchitecture = NXGetLocalArchInfo();
     if (strncmp(hostArchitecture->name, "arm64", 5) == 0)
     {
         // The simulator doesn't advertise support for raster order groups, but they appear to work
         // anyway on an Apple-Silicon-hosted simulator. Use rasterOrderGroup in this case because
         // it's much faster than renderPassBreak. (On Intel/AMD this doesn't matter anyway because
         // renderPassBreaks are cheap and actually faster than rasterOrderGroups.)
         m_metalFeatures.atomicBarrierType = AtomicBarrierType::rasterOrderGroup;
     }
     else
     {
         m_metalFeatures.atomicBarrierType = AtomicBarrierType::renderPassBreak;
     }
 #else
     // Use real memory barriers for atomic mode if they're availabile.
     // "GPU devices in Apple3 through Apple9 families don’t support memory barriers that include the
     // MTLRenderStages.fragment or .tile stages in the after argument..."
     if (([m_gpu supportsFamily:MTLGPUFamilyCommon2] || [m_gpu supportsFamily:MTLGPUFamilyMac2]) &&
         ![m_gpu supportsFamily:MTLGPUFamilyApple3])
     {
         m_metalFeatures.atomicBarrierType = AtomicBarrierType::memoryBarrier;
     }
     else if (m_gpu.rasterOrderGroupsSupported)
     {
         m_metalFeatures.atomicBarrierType = AtomicBarrierType::rasterOrderGroup;
     }
     else
     {
         m_metalFeatures.atomicBarrierType = AtomicBarrierType::renderPassBreak;
     }
 #endif

     m_backgroundShaderCompiler = std::make_unique<BackgroundShaderCompiler>(m_gpu, m_metalFeatures);

     // Load the precompiled shaders.
     dispatch_data_t metallibData = dispatch_data_create(
 #ifdef RIVE_IOS
         rive_pls_ios_metallib,
         rive_pls_ios_metallib_len,
 #elif defined(RIVE_IOS_SIMULATOR)
         rive_pls_ios_simulator_metallib,
         rive_pls_ios_simulator_metallib_len,
 #else
         rive_pls_macosx_metallib,
         rive_pls_macosx_metallib_len,
 #endif
         nil,
         nil);
     NSError* err = [NSError errorWithDomain:@"pls_metallib_load" code:200 userInfo:nil];
     m_plsPrecompiledLibrary = [m_gpu newLibraryWithData:metallibData error:&err];
     if (m_plsPrecompiledLibrary == nil)
     {
         fprintf(stderr, "Failed to load pls metallib.\n");
         fprintf(stderr, "%s\n", err.localizedDescription.UTF8String);
         exit(-1);
     }

     m_colorRampPipeline = std::make_unique<ColorRampPipeline>(m_gpu, m_plsPrecompiledLibrary);
     m_tessPipeline = std::make_unique<TessellatePipeline>(m_gpu, m_plsPrecompiledLibrary);
     m_tessSpanIndexBuffer = [m_gpu newBufferWithBytes:pls::kTessSpanIndices
                                                length:sizeof(pls::kTessSpanIndices)
                                               options:MTLResourceStorageModeShared];

     // The precompiled static library has a fully-featured shader for each drawType in
     // "rasterOrdering" mode. We load these at initialization and use them while waiting for the
     // background compiler to generate more specialized, higher performance shaders.
     if (m_platformFeatures.supportsRasterOrdering)
     {
         for (auto drawType :
              {DrawType::midpointFanPatches, DrawType::interiorTriangulation, DrawType::imageMesh})
         {
             pls::ShaderFeatures allShaderFeatures =
                 pls::ShaderFeaturesMaskFor(drawType, pls::InterlockMode::rasterOrdering);
             uint32_t pipelineKey = ShaderUniqueKey(drawType,
                                                    allShaderFeatures,
                                                    pls::InterlockMode::rasterOrdering,
                                                    pls::ShaderMiscFlags::none);
             m_drawPipelines[pipelineKey] = std::make_unique<DrawPipeline>(
                 m_gpu,
                 m_plsPrecompiledLibrary,
                 DrawPipeline::GetPrecompiledFunctionName(drawType,
                                                          allShaderFeatures &
                                                              pls::kVertexShaderFeaturesMask,
                                                          m_plsPrecompiledLibrary,
                                                          GLSL_drawVertexMain),
                 DrawPipeline::GetPrecompiledFunctionName(
                     drawType, allShaderFeatures, m_plsPrecompiledLibrary, GLSL_drawFragmentMain),
                 drawType,
                 pls::InterlockMode::rasterOrdering,
                 allShaderFeatures);
         }
     }

     // Create vertex and index buffers for the different PLS patches.
     m_pathPatchVertexBuffer =
         [m_gpu newBufferWithLength:kPatchVertexBufferCount * sizeof(PatchVertex)
                            options:MTLResourceStorageModeShared];
     m_pathPatchIndexBuffer = [m_gpu newBufferWithLength:kPatchIndexBufferCount * sizeof(uint16_t)
                                                 options:MTLResourceStorageModeShared];
     GeneratePatchBufferData(reinterpret_cast<PatchVertex*>(m_pathPatchVertexBuffer.contents),
                             reinterpret_cast<uint16_t*>(m_pathPatchIndexBuffer.contents));

     // Set up the imageRect rendering buffers. (pls::InterlockMode::atomics only.)
     m_imageRectVertexBuffer = [m_gpu newBufferWithBytes:pls::kImageRectVertices
                                                  length:sizeof(pls::kImageRectVertices)
                                                 options:MTLResourceStorageModeShared];
     m_imageRectIndexBuffer = [m_gpu newBufferWithBytes:pls::kImageRectIndices
                                                 length:sizeof(pls::kImageRectIndices)
                                                options:MTLResourceStorageModeShared];
 }

 PLSRenderContextMetalImpl::~PLSRenderContextMetalImpl() {}

 // If the GPU supports framebuffer reads (called "programmable blending" in the feature tables), PLS
 // planes besides the main framebuffer can exist in ephemeral "memoryless" storage. This means their
 // contents are never actually written to main memory, and they only exist in fast tiled memory.
 static id<MTLTexture> make_pls_memoryless_texture(id<MTLDevice> gpu,
                                                   MTLPixelFormat pixelFormat,
                                                   uint32_t width,
                                                   uint32_t height)
 {
     MTLTextureDescriptor* desc = [[MTLTextureDescriptor alloc] init];
     desc.pixelFormat = pixelFormat;
     desc.width = width;
     desc.height = height;
     desc.usage = MTLTextureUsageRenderTarget;
     desc.textureType = MTLTextureType2D;
     desc.mipmapLevelCount = 1;
     desc.storageMode = MTLStorageModeMemoryless;
     return [gpu newTextureWithDescriptor:desc];
 }

 PLSRenderTargetMetal::PLSRenderTargetMetal(id<MTLDevice> gpu,
                                            MTLPixelFormat pixelFormat,
                                            uint32_t width,
                                            uint32_t height,
                                            const PlatformFeatures& platformFeatures) :
     PLSRenderTarget(width, height), m_gpu(gpu), m_pixelFormat(pixelFormat)
 {
     m_targetTexture = nil; // Will be configured later by setTargetTexture().
     if (platformFeatures.supportsRasterOrdering)
     {
         m_coverageMemorylessTexture =
             make_pls_memoryless_texture(gpu, MTLPixelFormatR32Uint, width, height);
         m_clipMemorylessTexture =
             make_pls_memoryless_texture(gpu, MTLPixelFormatR32Uint, width, height);
         m_originalDstColorMemorylessTexture =
             make_pls_memoryless_texture(gpu, m_pixelFormat, width, height);
     }
 }

 void PLSRenderTargetMetal::setTargetTexture(id<MTLTexture> texture)
 {
     assert(!texture || compatibleWith(texture));
     m_targetTexture = texture;
 }

 rcp<PLSRenderTargetMetal> PLSRenderContextMetalImpl::makeRenderTarget(MTLPixelFormat pixelFormat,
                                                                       uint32_t width,
                                                                       uint32_t height)
 {
     return rcp(new PLSRenderTargetMetal(m_gpu, pixelFormat, width, height, m_platformFeatures));
 }

 class RenderBufferMetalImpl : public lite_rtti_override<RenderBuffer, RenderBufferMetalImpl>
 {
 public:
     RenderBufferMetalImpl(RenderBufferType renderBufferType,
                           RenderBufferFlags renderBufferFlags,
                           size_t sizeInBytes,
                           id<MTLDevice> gpu) :
         lite_rtti_override(renderBufferType, renderBufferFlags, sizeInBytes), m_gpu(gpu)
     {
         int bufferCount =
             flags() & RenderBufferFlags::mappedOnceAtInitialization ? 1 : pls::kBufferRingSize;
         for (int i = 0; i < bufferCount; ++i)
         {
             m_buffers[i] = [gpu newBufferWithLength:sizeInBytes
                                             options:MTLResourceStorageModeShared];
         }
     }

     id<MTLBuffer> submittedBuffer() const { return m_buffers[m_submittedBufferIdx]; }

 protected:
     void* onMap() override
     {
         m_submittedBufferIdx = (m_submittedBufferIdx + 1) % pls::kBufferRingSize;
         assert(m_buffers[m_submittedBufferIdx] != nil);
         return m_buffers[m_submittedBufferIdx].contents;
     }

     void onUnmap() override {}

 private:
     id<MTLDevice> m_gpu;
     id<MTLBuffer> m_buffers[pls::kBufferRingSize];
     int m_submittedBufferIdx = -1;
 };

 rcp<RenderBuffer> PLSRenderContextMetalImpl::makeRenderBuffer(RenderBufferType type,
                                                               RenderBufferFlags flags,
                                                               size_t sizeInBytes)
 {
     return make_rcp<RenderBufferMetalImpl>(type, flags, sizeInBytes, m_gpu);
 }

 class PLSTextureMetalImpl : public PLSTexture
 {
 public:
     PLSTextureMetalImpl(id<MTLDevice> gpu,
                         uint32_t width,
                         uint32_t height,
                         uint32_t mipLevelCount,
                         const uint8_t imageDataRGBA[]) :
         PLSTexture(width, height)
     {
         // Create the texture.
         MTLTextureDescriptor* desc = [[MTLTextureDescriptor alloc] init];
         desc.pixelFormat = MTLPixelFormatRGBA8Unorm;
         desc.width = width;
         desc.height = height;
         desc.mipmapLevelCount = mipLevelCount;
         desc.usage = MTLTextureUsageShaderRead;
         desc.storageMode = MTLStorageModeShared;
         desc.textureType = MTLTextureType2D;
         m_texture = [gpu newTextureWithDescriptor:desc];

         // Specify the top-level image in the mipmap chain.
         MTLRegion region = MTLRegionMake2D(0, 0, width, height);
         [m_texture replaceRegion:region
                      mipmapLevel:0
                        withBytes:imageDataRGBA
                      bytesPerRow:width * 4];
     }

     void ensureMipmaps(id<MTLCommandBuffer> commandBuffer) const
     {
         if (m_mipsDirty)
         {
             // Generate mipmaps.
             id<MTLBlitCommandEncoder> mipEncoder = [commandBuffer blitCommandEncoder];
             [mipEncoder generateMipmapsForTexture:m_texture];
             [mipEncoder endEncoding];
             m_mipsDirty = false;
         }
     }

     id<MTLTexture> texture() const { return m_texture; }

 private:
     id<MTLTexture> m_texture;
     mutable bool m_mipsDirty = true;
 };

 rcp<PLSTexture> PLSRenderContextMetalImpl::makeImageTexture(uint32_t width,
                                                             uint32_t height,
                                                             uint32_t mipLevelCount,
                                                             const uint8_t imageDataRGBA[])
 {
     return make_rcp<PLSTextureMetalImpl>(m_gpu, width, height, mipLevelCount, imageDataRGBA);
 }

 std::unique_ptr<BufferRing> PLSRenderContextMetalImpl::makeUniformBufferRing(size_t capacityInBytes)
 {
     return BufferRingMetalImpl::Make(m_gpu, capacityInBytes);
 }

 std::unique_ptr<BufferRing> PLSRenderContextMetalImpl::makeStorageBufferRing(
     size_t capacityInBytes, pls::StorageBufferStructure)
 {
     return BufferRingMetalImpl::Make(m_gpu, capacityInBytes);
 }

 std::unique_ptr<BufferRing> PLSRenderContextMetalImpl::makeVertexBufferRing(size_t capacityInBytes)
 {
     return BufferRingMetalImpl::Make(m_gpu, capacityInBytes);
 }

 std::unique_ptr<BufferRing> PLSRenderContextMetalImpl::makeTextureTransferBufferRing(
     size_t capacityInBytes)
 {
     return BufferRingMetalImpl::Make(m_gpu, capacityInBytes);
 }

 void PLSRenderContextMetalImpl::resizeGradientTexture(uint32_t width, uint32_t height)
 {
     if (width == 0 || height == 0)
     {
         m_gradientTexture = nil;
         return;
     }
     MTLTextureDescriptor* desc = [[MTLTextureDescriptor alloc] init];
     desc.pixelFormat = MTLPixelFormatRGBA8Unorm;
     desc.width = width;
     desc.height = height;
     desc.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead;
     desc.textureType = MTLTextureType2D;
     desc.mipmapLevelCount = 1;
     desc.storageMode = MTLStorageModePrivate;
     m_gradientTexture = [m_gpu newTextureWithDescriptor:desc];
 }

 void PLSRenderContextMetalImpl::resizeTessellationTexture(uint32_t width, uint32_t height)
 {
     if (width == 0 || height == 0)
     {
         m_tessVertexTexture = nil;
         return;
     }
     MTLTextureDescriptor* desc = [[MTLTextureDescriptor alloc] init];
     desc.pixelFormat = MTLPixelFormatRGBA32Uint;
     desc.width = width;
     desc.height = height;
     desc.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead;
     desc.textureType = MTLTextureType2D;
     desc.mipmapLevelCount = 1;
     desc.storageMode = MTLStorageModePrivate;
     m_tessVertexTexture = [m_gpu newTextureWithDescriptor:desc];
 }

 const PLSRenderContextMetalImpl::DrawPipeline* PLSRenderContextMetalImpl::
     findCompatibleDrawPipeline(pls::DrawType drawType,
                                pls::ShaderFeatures shaderFeatures,
                                pls::InterlockMode interlockMode,
                                pls::ShaderMiscFlags shaderMiscFlags)
 {
     uint32_t pipelineKey =
         pls::ShaderUniqueKey(drawType, shaderFeatures, interlockMode, shaderMiscFlags);
     auto pipelineIter = m_drawPipelines.find(pipelineKey);
     if (pipelineIter == m_drawPipelines.end())
     {
         // The shader for this pipeline hasn't been scheduled for compiling yet. Schedule it to
         // compile in the background.
         m_backgroundShaderCompiler->pushJob({
             .drawType = drawType,
             .shaderFeatures = shaderFeatures,
             .interlockMode = interlockMode,
             .shaderMiscFlags = shaderMiscFlags,
         });
         pipelineIter = m_drawPipelines.insert({pipelineKey, nullptr}).first;
     }

     if (pipelineIter->second != nullptr)
     {
         // The pipeline is fully compiled and loaded.
         return pipelineIter->second.get();
     }

     // The shader for this pipeline hasn't finished compiling yet. Start by finding a fully-featured
     // superset of features whose pipeline we can fall back on while waiting for it to compile.
     ShaderFeatures fullyFeaturedPipelineFeatures =
         pls::ShaderFeaturesMaskFor(drawType, interlockMode);
     if (interlockMode == pls::InterlockMode::atomics)
     {
         // Never add ENABLE_ADVANCED_BLEND to an atomic pipeline that doesn't use advanced blend,
         // since in atomic mode, the shaders behave differently depending on whether advanced blend
         // is enabled.
         fullyFeaturedPipelineFeatures &= shaderFeatures | ~ShaderFeatures::ENABLE_ADVANCED_BLEND;
         // Never add ENABLE_CLIPPING to an atomic pipeline that doesn't use clipping; it will cause
         // a "missing buffer binding" validation error because the shader will define an (unused)
         // clipBuffer, but we won't bind anything to it.
         fullyFeaturedPipelineFeatures &= shaderFeatures | ~ShaderFeatures::ENABLE_CLIPPING;
     }
     shaderFeatures &= fullyFeaturedPipelineFeatures;

     // Fully-featured "rasterOrdering" pipelines should have already been pre-loaded from the static
     // library.
     assert(shaderFeatures != fullyFeaturedPipelineFeatures ||
            interlockMode != pls::InterlockMode::rasterOrdering);

     // Poll to see if the shader is actually done compiling, but only wait if it's a fully-feature
     // pipeline. Otherwise, we can fall back on the fully-featured pipeline while we wait for
     // compilation.
     BackgroundCompileJob job;
     bool shouldWaitForBackgroundCompilation = shaderFeatures == fullyFeaturedPipelineFeatures ||
                                               m_contextOptions.synchronousShaderCompilations;
     while (m_backgroundShaderCompiler->popFinishedJob(&job, shouldWaitForBackgroundCompilation))
     {
         uint32_t jobKey = pls::ShaderUniqueKey(
             job.drawType, job.shaderFeatures, job.interlockMode, job.shaderMiscFlags);
         m_drawPipelines[jobKey] = std::make_unique<DrawPipeline>(m_gpu,
                                                                  job.compiledLibrary,
                                                                  @GLSL_drawVertexMain,
                                                                  @GLSL_drawFragmentMain,
                                                                  job.drawType,
                                                                  job.interlockMode,
                                                                  job.shaderFeatures);
         if (jobKey == pipelineKey)
         {
             // The shader we wanted was actually done compiling and pending being built into a
             // pipeline.
             return pipelineIter->second.get();
         }
     }

     // The shader for this feature set hasn't finished compiling. Use the pipeline that has
     // all features enabled while we wait for it to finish.
     assert(shaderFeatures != fullyFeaturedPipelineFeatures);
     return findCompatibleDrawPipeline(
         drawType, fullyFeaturedPipelineFeatures, interlockMode, shaderMiscFlags);
 }

 void PLSRenderContextMetalImpl::prepareToMapBuffers()
 {
     // Wait until the GPU finishes rendering flush "N + 1 - kBufferRingSize". This ensures it
     // is safe for the CPU to begin modifying the next buffers in our rings.
     m_bufferRingIdx = (m_bufferRingIdx + 1) % kBufferRingSize;
     m_bufferRingLocks[m_bufferRingIdx].lock();
 }

 static id<MTLBuffer> mtl_buffer(const BufferRing* bufferRing)
 {
     assert(bufferRing != nullptr);
     return static_cast<const BufferRingMetalImpl*>(bufferRing)->submittedBuffer();
 }

 static MTLViewport make_viewport(uint32_t x, uint32_t y, uint32_t width, uint32_t height)
 {
     return {
         static_cast<double>(x),
         static_cast<double>(y),
         static_cast<double>(width),
         static_cast<double>(height),
         0,
         1,
     };
 }

 id<MTLRenderCommandEncoder> PLSRenderContextMetalImpl::makeRenderPassForDraws(
     const pls::FlushDescriptor& flushDesc,
     MTLRenderPassDescriptor* passDesc,
     id<MTLCommandBuffer> commandBuffer)
 {
     auto* renderTarget = static_cast<PLSRenderTargetMetal*>(flushDesc.renderTarget);

     id<MTLRenderCommandEncoder> encoder =
         [commandBuffer renderCommandEncoderWithDescriptor:passDesc];

     [encoder setViewport:make_viewport(0, 0, renderTarget->width(), renderTarget->height())];
     [encoder setVertexBuffer:mtl_buffer(flushUniformBufferRing())
                       offset:flushDesc.flushUniformDataOffsetInBytes
                      atIndex:FLUSH_UNIFORM_BUFFER_IDX];
     [encoder setFragmentBuffer:mtl_buffer(flushUniformBufferRing())
                         offset:flushDesc.flushUniformDataOffsetInBytes
                        atIndex:FLUSH_UNIFORM_BUFFER_IDX];
     [encoder setVertexTexture:m_tessVertexTexture atIndex:TESS_VERTEX_TEXTURE_IDX];
     [encoder setFragmentTexture:m_gradientTexture atIndex:GRAD_TEXTURE_IDX];
     if (flushDesc.pathCount > 0)
     {
         [encoder setVertexBuffer:mtl_buffer(pathBufferRing())
                           offset:flushDesc.firstPath * sizeof(pls::PathData)
                          atIndex:PATH_BUFFER_IDX];
         if (flushDesc.interlockMode == pls::InterlockMode::atomics)
         {
             [encoder setFragmentBuffer:mtl_buffer(paintBufferRing())
                                 offset:flushDesc.firstPaint * sizeof(pls::PaintData)
                                atIndex:PAINT_BUFFER_IDX];
             [encoder setFragmentBuffer:mtl_buffer(paintAuxBufferRing())
                                 offset:flushDesc.firstPaintAux * sizeof(pls::PaintAuxData)
                                atIndex:PAINT_AUX_BUFFER_IDX];
         }
         else
         {
             [encoder setVertexBuffer:mtl_buffer(paintBufferRing())
                               offset:flushDesc.firstPaint * sizeof(pls::PaintData)
                              atIndex:PAINT_BUFFER_IDX];
             [encoder setVertexBuffer:mtl_buffer(paintAuxBufferRing())
                               offset:flushDesc.firstPaintAux * sizeof(pls::PaintAuxData)
                              atIndex:PAINT_AUX_BUFFER_IDX];
         }
     }
     if (flushDesc.contourCount > 0)
     {
         [encoder setVertexBuffer:mtl_buffer(contourBufferRing())
                           offset:flushDesc.firstContour * sizeof(pls::ContourData)
                          atIndex:CONTOUR_BUFFER_IDX];
     }
     if (flushDesc.interlockMode == pls::InterlockMode::atomics)
     {
         // In atomic mode, the PLS planes are buffers that we need to bind separately.
         // Since the PLS plane indices collide with other buffer bindings, offset the binding
         // indices of these buffers by DEFAULT_BINDINGS_SET_SIZE.
         if (!pls::ShadersEmitColorToRasterPipeline(flushDesc.interlockMode,
                                                    flushDesc.combinedShaderFeatures))
         {
             [encoder setFragmentBuffer:renderTarget->colorAtomicBuffer()
                                 offset:0
                                atIndex:FRAMEBUFFER_PLANE_IDX + DEFAULT_BINDINGS_SET_SIZE];
         }
         [encoder setFragmentBuffer:renderTarget->coverageAtomicBuffer()
                             offset:0
                            atIndex:COVERAGE_PLANE_IDX + DEFAULT_BINDINGS_SET_SIZE];
         if (flushDesc.combinedShaderFeatures & pls::ShaderFeatures::ENABLE_CLIPPING)
         {
             [encoder setFragmentBuffer:renderTarget->clipAtomicBuffer()
                                 offset:0
                                atIndex:CLIP_PLANE_IDX + DEFAULT_BINDINGS_SET_SIZE];
         }
     }
     if (flushDesc.wireframe)
     {
         [encoder setTriangleFillMode:MTLTriangleFillModeLines];
     }
     return encoder;
 }

 void PLSRenderContextMetalImpl::flush(const FlushDescriptor& desc)
 {
     auto* renderTarget = static_cast<PLSRenderTargetMetal*>(desc.renderTarget);
     id<MTLCommandBuffer> commandBuffer = (__bridge id<MTLCommandBuffer>)desc.externalCommandBuffer;

     // Render the complex color ramps to the gradient texture.
     if (desc.complexGradSpanCount > 0)
     {
         MTLRenderPassDescriptor* gradPass = [MTLRenderPassDescriptor renderPassDescriptor];
         gradPass.renderTargetWidth = kGradTextureWidth;
         gradPass.renderTargetHeight = desc.complexGradRowsTop + desc.complexGradRowsHeight;
         gradPass.colorAttachments[0].loadAction = MTLLoadActionDontCare;
         gradPass.colorAttachments[0].storeAction = MTLStoreActionStore;
         gradPass.colorAttachments[0].texture = m_gradientTexture;

         id<MTLRenderCommandEncoder> gradEncoder =
             [commandBuffer renderCommandEncoderWithDescriptor:gradPass];
         [gradEncoder setViewport:make_viewport(0,
                                                static_cast<double>(desc.complexGradRowsTop),
                                                kGradTextureWidth,
                                                static_cast<float>(desc.complexGradRowsHeight))];
         [gradEncoder setRenderPipelineState:m_colorRampPipeline->pipelineState()];
         [gradEncoder setVertexBuffer:mtl_buffer(flushUniformBufferRing())
                               offset:desc.flushUniformDataOffsetInBytes
                              atIndex:FLUSH_UNIFORM_BUFFER_IDX];
         [gradEncoder setVertexBuffer:mtl_buffer(gradSpanBufferRing())
                               offset:desc.firstComplexGradSpan * sizeof(pls::GradientSpan)
                              atIndex:0];
         [gradEncoder setCullMode:MTLCullModeBack];
         [gradEncoder drawPrimitives:MTLPrimitiveTypeTriangleStrip
                         vertexStart:0
                         vertexCount:4
                       instanceCount:desc.complexGradSpanCount];
         [gradEncoder endEncoding];
     }

     // Copy the simple color ramps to the gradient texture.
     if (desc.simpleGradTexelsHeight > 0)
     {
         id<MTLBlitCommandEncoder> textureBlitEncoder = [commandBuffer blitCommandEncoder];
         [textureBlitEncoder
                  copyFromBuffer:mtl_buffer(simpleColorRampsBufferRing())
                    sourceOffset:desc.simpleGradDataOffsetInBytes
               sourceBytesPerRow:kGradTextureWidth * 4
             sourceBytesPerImage:desc.simpleGradTexelsHeight * kGradTextureWidth * 4
                      sourceSize:MTLSizeMake(
                                     desc.simpleGradTexelsWidth, desc.simpleGradTexelsHeight, 1)
                       toTexture:m_gradientTexture
                destinationSlice:0
                destinationLevel:0
               destinationOrigin:MTLOriginMake(0, 0, 0)];
         [textureBlitEncoder endEncoding];
     }

     // Tessellate all curves into vertices in the tessellation texture.
     if (desc.tessVertexSpanCount > 0)
     {
         MTLRenderPassDescriptor* tessPass = [MTLRenderPassDescriptor renderPassDescriptor];
         tessPass.renderTargetWidth = kTessTextureWidth;
         tessPass.renderTargetHeight = desc.tessDataHeight;
         tessPass.colorAttachments[0].loadAction = MTLLoadActionDontCare;
         tessPass.colorAttachments[0].storeAction = MTLStoreActionStore;
         tessPass.colorAttachments[0].texture = m_tessVertexTexture;

         id<MTLRenderCommandEncoder> tessEncoder =
             [commandBuffer renderCommandEncoderWithDescriptor:tessPass];
         [tessEncoder setViewport:make_viewport(0, 0, kTessTextureWidth, desc.tessDataHeight)];
         [tessEncoder setRenderPipelineState:m_tessPipeline->pipelineState()];
         [tessEncoder setVertexBuffer:mtl_buffer(flushUniformBufferRing())
                               offset:desc.flushUniformDataOffsetInBytes
                              atIndex:FLUSH_UNIFORM_BUFFER_IDX];
         [tessEncoder setVertexBuffer:mtl_buffer(tessSpanBufferRing())
                               offset:desc.firstTessVertexSpan * sizeof(pls::TessVertexSpan)
                              atIndex:0];
         assert(desc.pathCount > 0);
         [tessEncoder setVertexBuffer:mtl_buffer(pathBufferRing())
                               offset:desc.firstPath * sizeof(pls::PathData)
                              atIndex:PATH_BUFFER_IDX];
         assert(desc.contourCount > 0);
         [tessEncoder setVertexBuffer:mtl_buffer(contourBufferRing())
                               offset:desc.firstContour * sizeof(pls::ContourData)
                              atIndex:CONTOUR_BUFFER_IDX];
         [tessEncoder setCullMode:MTLCullModeBack];
         [tessEncoder drawIndexedPrimitives:MTLPrimitiveTypeTriangle
                                 indexCount:std::size(pls::kTessSpanIndices)
                                  indexType:MTLIndexTypeUInt16
                                indexBuffer:m_tessSpanIndexBuffer
                          indexBufferOffset:0
                              instanceCount:desc.tessVertexSpanCount];
         [tessEncoder endEncoding];
     }

     // Generate mipmaps if needed.
     for (const DrawBatch& batch : *desc.drawList)
     {
         // Bind the appropriate image texture, if any.
         if (auto imageTextureMetal = static_cast<const PLSTextureMetalImpl*>(batch.imageTexture))
         {
             imageTextureMetal->ensureMipmaps(commandBuffer);
         }
     }

     // Set up a render pass to do the final rendering using (some form of) pixel local storage.
     MTLRenderPassDescriptor* pass = [MTLRenderPassDescriptor renderPassDescriptor];
     pass.renderTargetWidth = desc.renderTargetUpdateBounds.right;
     pass.renderTargetHeight = desc.renderTargetUpdateBounds.bottom;
     pass.colorAttachments[FRAMEBUFFER_PLANE_IDX].texture = renderTarget->targetTexture();
     switch (desc.colorLoadAction)
     {
         case pls::LoadAction::clear:
         {
             float cc[4];
             UnpackColorToRGBA32F(desc.clearColor, cc);
             pass.colorAttachments[FRAMEBUFFER_PLANE_IDX].loadAction = MTLLoadActionClear;
             pass.colorAttachments[FRAMEBUFFER_PLANE_IDX].clearColor =
                 MTLClearColorMake(cc[0], cc[1], cc[2], cc[3]);
             break;
         }
         case pls::LoadAction::preserveRenderTarget:
             pass.colorAttachments[FRAMEBUFFER_PLANE_IDX].loadAction = MTLLoadActionLoad;
             break;
         case pls::LoadAction::dontCare:
             pass.colorAttachments[FRAMEBUFFER_PLANE_IDX].loadAction = MTLLoadActionDontCare;
             break;
     }
     pass.colorAttachments[FRAMEBUFFER_PLANE_IDX].storeAction = MTLStoreActionStore;

     // In atomic mode, advanced blends have to render through an offscreen color buffer in order to
     // read destination color. This offscreen color buffer gets transferred to the main framebuffer
     // during the final "plsAtomicResolve" operation.
     bool usesOffscreenColorBuffer = false;

     if (desc.interlockMode == pls::InterlockMode::rasterOrdering)
     {
         // In rasterOrdering mode, the PLS planes are accessed as color attachments.
         pass.colorAttachments[COVERAGE_PLANE_IDX].texture =
             renderTarget->m_coverageMemorylessTexture;
         pass.colorAttachments[COVERAGE_PLANE_IDX].loadAction = MTLLoadActionClear;
         pass.colorAttachments[COVERAGE_PLANE_IDX].clearColor =
             MTLClearColorMake(desc.coverageClearValue, 0, 0, 0);
         pass.colorAttachments[COVERAGE_PLANE_IDX].storeAction =
             desc.interlockMode == pls::InterlockMode::atomics ? MTLStoreActionStore
                                                               : MTLStoreActionDontCare;

         pass.colorAttachments[CLIP_PLANE_IDX].texture = renderTarget->m_clipMemorylessTexture;
         pass.colorAttachments[CLIP_PLANE_IDX].loadAction = MTLLoadActionClear;
         pass.colorAttachments[CLIP_PLANE_IDX].clearColor = MTLClearColorMake(0, 0, 0, 0);
         pass.colorAttachments[CLIP_PLANE_IDX].storeAction =
             desc.interlockMode == pls::InterlockMode::atomics ? MTLStoreActionStore
                                                               : MTLStoreActionDontCare;

         pass.colorAttachments[ORIGINAL_DST_COLOR_PLANE_IDX].texture =
             renderTarget->m_originalDstColorMemorylessTexture;
         pass.colorAttachments[ORIGINAL_DST_COLOR_PLANE_IDX].loadAction = MTLLoadActionDontCare;
         pass.colorAttachments[ORIGINAL_DST_COLOR_PLANE_IDX].storeAction = MTLStoreActionDontCare;
     }
     else
     {
         assert(desc.interlockMode == pls::InterlockMode::atomics);
         usesOffscreenColorBuffer =
             !pls::ShadersEmitColorToRasterPipeline(desc.interlockMode, desc.combinedShaderFeatures);
         if (usesOffscreenColorBuffer &&
             desc.colorLoadAction == pls::LoadAction::preserveRenderTarget)
         {
             // Since we need to preserve the renderTarget during load, and since we're rendering
             // to an offscreen color buffer, we have to literally copy the renderTarget into the
             // color buffer.
             id<MTLBlitCommandEncoder> copyEncoder = [commandBuffer blitCommandEncoder];
             auto updateOrigin = MTLOriginMake(
                 desc.renderTargetUpdateBounds.left, desc.renderTargetUpdateBounds.top, 0);
             auto updateSize = MTLSizeMake(
                 desc.renderTargetUpdateBounds.width(), desc.renderTargetUpdateBounds.height(), 1);
             [copyEncoder copyFromTexture:renderTarget->targetTexture()
                              sourceSlice:0
                              sourceLevel:0
                             sourceOrigin:updateOrigin
                               sourceSize:updateSize
                                 toBuffer:renderTarget->colorAtomicBuffer()
                        destinationOffset:(updateOrigin.y * renderTarget->width() + updateOrigin.x) *
                                          sizeof(uint32_t)
                   destinationBytesPerRow:renderTarget->width() * sizeof(uint32_t)
                 destinationBytesPerImage:renderTarget->height() * renderTarget->width() *
                                          sizeof(uint32_t)];
             [copyEncoder endEncoding];
         }
     }

     // Execute the DrawList.
     id<MTLRenderCommandEncoder> encoder = makeRenderPassForDraws(desc, pass, commandBuffer);
     for (const DrawBatch& batch : *desc.drawList)
     {
         if (batch.elementCount == 0)
         {
             continue;
         }

         // Setup the pipeline for this specific drawType and shaderFeatures.
         pls::ShaderFeatures shaderFeatures = desc.interlockMode == pls::InterlockMode::atomics
                                                  ? desc.combinedShaderFeatures
                                                  : batch.shaderFeatures;
         pls::ShaderMiscFlags shaderMiscFlags = pls::ShaderMiscFlags::none;
         if (usesOffscreenColorBuffer)
         {
             if (batch.drawType == pls::DrawType::plsAtomicResolve)
             {
                 // Atomic mode can always do a coalesced resolve when rendering to its offscreen
                 // color buffer.
                 shaderMiscFlags |= pls::ShaderMiscFlags::coalescedResolveAndTransfer;
             }
             else if (batch.drawType == pls::DrawType::plsAtomicInitialize)
             {
                 if (desc.colorLoadAction == pls::LoadAction::clear)
                 {
                     shaderMiscFlags |= pls::ShaderMiscFlags::storeColorClear;
                 }
                 else if (desc.colorLoadAction == pls::LoadAction::preserveRenderTarget &&
                          renderTarget->pixelFormat() == MTLPixelFormatBGRA8Unorm)
                 {
                     // We already copied the renderTarget to our color buffer, but since the target
                     // is BGRA, we also need to swizzle it to RGBA before it's ready for PLS.
                     shaderMiscFlags |= pls::ShaderMiscFlags::swizzleColorBGRAToRGBA;
                 }
             }
         }
         id<MTLRenderPipelineState> drawPipelineState =
             findCompatibleDrawPipeline(
                 batch.drawType, shaderFeatures, desc.interlockMode, shaderMiscFlags)
                 ->pipelineState(renderTarget->pixelFormat());

         // Bind the appropriate image texture, if any.
         if (auto imageTextureMetal = static_cast<const PLSTextureMetalImpl*>(batch.imageTexture))
         {
             [encoder setFragmentTexture:imageTextureMetal->texture() atIndex:IMAGE_TEXTURE_IDX];
         }

         DrawType drawType = batch.drawType;
         switch (drawType)
         {
             case DrawType::midpointFanPatches:
             case DrawType::outerCurvePatches:
             {
                 // Draw PLS patches that connect the tessellation vertices.
                 [encoder setRenderPipelineState:drawPipelineState];
                 [encoder setVertexBuffer:m_pathPatchVertexBuffer offset:0 atIndex:0];
                 [encoder setCullMode:MTLCullModeBack];
                 // Don't use baseInstance in order to run on Apple GPU Family 2.
                 // TODO: Use baseInstance instead once we deprecate Apple2.
                 [encoder setVertexBytes:&batch.baseElement
                                  length:sizeof(uint32_t)
                                 atIndex:PATH_BASE_INSTANCE_UNIFORM_BUFFER_IDX];
                 [encoder drawIndexedPrimitives:MTLPrimitiveTypeTriangle
                                     indexCount:PatchIndexCount(drawType)
                                      indexType:MTLIndexTypeUInt16
                                    indexBuffer:m_pathPatchIndexBuffer
                              indexBufferOffset:PatchBaseIndex(drawType) * sizeof(uint16_t)
                                  instanceCount:batch.elementCount];
                 break;
             }
             case DrawType::interiorTriangulation:
             {
                 [encoder setRenderPipelineState:drawPipelineState];
                 [encoder setVertexBuffer:mtl_buffer(triangleBufferRing()) offset:0 atIndex:0];
                 [encoder setCullMode:MTLCullModeBack];
                 [encoder drawPrimitives:MTLPrimitiveTypeTriangle
                             vertexStart:batch.baseElement
                             vertexCount:batch.elementCount];
                 break;
             }
             case DrawType::imageRect:
             case DrawType::imageMesh:
             {
                 [encoder setRenderPipelineState:drawPipelineState];
                 [encoder setVertexBuffer:mtl_buffer(imageDrawUniformBufferRing())
                                   offset:batch.imageDrawDataOffset
                                  atIndex:IMAGE_DRAW_UNIFORM_BUFFER_IDX];
                 [encoder setFragmentBuffer:mtl_buffer(imageDrawUniformBufferRing())
                                     offset:batch.imageDrawDataOffset
                                    atIndex:IMAGE_DRAW_UNIFORM_BUFFER_IDX];
                 [encoder setCullMode:MTLCullModeNone];
                 if (drawType == DrawType::imageRect)
                 {
                     assert(desc.interlockMode == pls::InterlockMode::atomics);
                     [encoder setVertexBuffer:m_imageRectVertexBuffer offset:0 atIndex:0];
                     [encoder drawIndexedPrimitives:MTLPrimitiveTypeTriangle
                                         indexCount:std::size(pls::kImageRectIndices)
                                          indexType:MTLIndexTypeUInt16
                                        indexBuffer:m_imageRectIndexBuffer
                                  indexBufferOffset:0];
                 }
                 else
                 {
                     LITE_RTTI_CAST_OR_BREAK(
                         vertexBuffer, const RenderBufferMetalImpl*, batch.vertexBuffer);
                     LITE_RTTI_CAST_OR_BREAK(uvBuffer, const RenderBufferMetalImpl*, batch.uvBuffer);
                     LITE_RTTI_CAST_OR_BREAK(
                         indexBuffer, const RenderBufferMetalImpl*, batch.indexBuffer);
                     [encoder setVertexBuffer:vertexBuffer->submittedBuffer() offset:0 atIndex:0];
                     [encoder setVertexBuffer:uvBuffer->submittedBuffer() offset:0 atIndex:1];
                     [encoder drawIndexedPrimitives:MTLPrimitiveTypeTriangle
                                         indexCount:batch.elementCount
                                          indexType:MTLIndexTypeUInt16
                                        indexBuffer:indexBuffer->submittedBuffer()
                                  indexBufferOffset:batch.baseElement * sizeof(uint16_t)];
                 }
                 break;
             }
             case DrawType::plsAtomicInitialize:
             case DrawType::plsAtomicResolve:
             {
                 assert(desc.interlockMode == pls::InterlockMode::atomics);
                 [encoder setRenderPipelineState:drawPipelineState];
                 [encoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4];
                 break;
             }
             case DrawType::stencilClipReset:
             {
                 RIVE_UNREACHABLE();
             }
         }
         if (desc.interlockMode == pls::InterlockMode::atomics && batch.needsBarrier)
         {
             switch (m_metalFeatures.atomicBarrierType)
             {
                 case AtomicBarrierType::memoryBarrier:
                 {
 #if !defined(RIVE_IOS) && !defined(RIVE_IOS_SIMULATOR)
                     if (@available(macOS 10.14, *))
                     {
                         [encoder memoryBarrierWithScope:MTLBarrierScopeBuffers |
                                                         MTLBarrierScopeRenderTargets
                                             afterStages:MTLRenderStageFragment
                                            beforeStages:MTLRenderStageFragment];
                         break;
                     }
 #endif
                     // atomicBarrierType shouldn't be "memoryBarrier" in this case.
                     RIVE_UNREACHABLE();
                 }
                 case AtomicBarrierType::rasterOrderGroup:
                     break;
                 case AtomicBarrierType::renderPassBreak:
                     // On very old hardware that can't support barriers, we just take a sledge
                     // hammer and break the entire render pass between overlapping draws.
                     // TODO: Is there a lighter way to achieve this?
                     [encoder endEncoding];
                     pass.colorAttachments[FRAMEBUFFER_PLANE_IDX].loadAction = MTLLoadActionLoad;
                     encoder = makeRenderPassForDraws(desc, pass, commandBuffer);
                     break;
             }
         }
     }
     [encoder endEncoding];

     if (desc.isFinalFlushOfFrame)
     {
         // Schedule a callback that will unlock the buffers used by this flush, after the GPU has
         // finished rendering with them. This unblocks the CPU from reusing them in a future flush.
         std::mutex& thisFlushLock = m_bufferRingLocks[m_bufferRingIdx];
         [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer>) {
           assert(!thisFlushLock.try_lock()); // The mutex should already be locked.
           thisFlushLock.unlock();
         }];
     }
 }
 } // namespace rive::pls