| /* |
| * Copyright 2023 Rive |
| */ |
| |
| #include "rive/pls/metal/pls_render_context_metal_impl.h" |
| |
| #include "background_shader_compiler.h" |
| #include "rive/pls/buffer_ring.hpp" |
| #include "rive/pls/pls_image.hpp" |
| #include "shaders/constants.glsl" |
| #include <sstream> |
| |
| #include "shaders/out/generated/color_ramp.exports.h" |
| #include "shaders/out/generated/tessellate.exports.h" |
| |
| #ifdef RIVE_IOS_SIMULATOR |
| #import <mach-o/arch.h> |
| #endif |
| |
| namespace rive::pls |
| { |
| #ifdef RIVE_IOS |
| #include "shaders/out/generated/rive_pls_ios.metallib.c" |
| #elif defined(RIVE_IOS_SIMULATOR) |
| #include "shaders/out/generated/rive_pls_ios_simulator.metallib.c" |
| #else |
| #include "shaders/out/generated/rive_pls_macosx.metallib.c" |
| #endif |
| |
| static id<MTLRenderPipelineState> make_pipeline_state(id<MTLDevice> gpu, |
| MTLRenderPipelineDescriptor* desc) |
| { |
| NSError* err = [NSError errorWithDomain:@"pls_pipeline_create" code:201 userInfo:nil]; |
| id<MTLRenderPipelineState> state = [gpu newRenderPipelineStateWithDescriptor:desc error:&err]; |
| if (!state) |
| { |
| fprintf(stderr, "%s\n", err.localizedDescription.UTF8String); |
| assert(0); |
| exit(-1); |
| } |
| return state; |
| } |
| |
| // Renders color ramps to the gradient texture. |
| class PLSRenderContextMetalImpl::ColorRampPipeline |
| { |
| public: |
| ColorRampPipeline(id<MTLDevice> gpu, id<MTLLibrary> plsLibrary) |
| { |
| MTLRenderPipelineDescriptor* desc = [[MTLRenderPipelineDescriptor alloc] init]; |
| desc.vertexFunction = [plsLibrary newFunctionWithName:@GLSL_colorRampVertexMain]; |
| desc.fragmentFunction = [plsLibrary newFunctionWithName:@GLSL_colorRampFragmentMain]; |
| desc.colorAttachments[0].pixelFormat = MTLPixelFormatRGBA8Unorm; |
| m_pipelineState = make_pipeline_state(gpu, desc); |
| } |
| |
| id<MTLRenderPipelineState> pipelineState() const { return m_pipelineState; } |
| |
| private: |
| id<MTLRenderPipelineState> m_pipelineState; |
| }; |
| |
| // Renders tessellated vertices to the tessellation texture. |
| class PLSRenderContextMetalImpl::TessellatePipeline |
| { |
| public: |
| TessellatePipeline(id<MTLDevice> gpu, id<MTLLibrary> plsLibrary) |
| { |
| MTLRenderPipelineDescriptor* desc = [[MTLRenderPipelineDescriptor alloc] init]; |
| desc.vertexFunction = [plsLibrary newFunctionWithName:@GLSL_tessellateVertexMain]; |
| desc.fragmentFunction = [plsLibrary newFunctionWithName:@GLSL_tessellateFragmentMain]; |
| desc.colorAttachments[0].pixelFormat = MTLPixelFormatRGBA32Uint; |
| m_pipelineState = make_pipeline_state(gpu, desc); |
| } |
| |
| id<MTLRenderPipelineState> pipelineState() const { return m_pipelineState; } |
| |
| private: |
| id<MTLRenderPipelineState> m_pipelineState; |
| }; |
| |
| // Renders paths to the main render target. |
| class PLSRenderContextMetalImpl::DrawPipeline |
| { |
| public: |
| // Precompiled functions are embedded in namespaces. Return the fully qualified name of the |
| // desired function, including its namespace. |
| static NSString* GetPrecompiledFunctionName(DrawType drawType, |
| ShaderFeatures shaderFeatures, |
| id<MTLLibrary> precompiledLibrary, |
| const char* functionBaseName) |
| { |
| // Each feature corresponds to a specific index in the namespaceID. These must stay in |
| // sync with generate_draw_combinations.py. |
| char namespaceID[] = "0000000"; |
| if (drawType == DrawType::interiorTriangulation) |
| { |
| namespaceID[0] = '1'; |
| } |
| for (size_t i = 0; i < pls::kShaderFeatureCount; ++i) |
| { |
| ShaderFeatures feature = static_cast<ShaderFeatures>(1 << i); |
| if (shaderFeatures & feature) |
| { |
| namespaceID[i + 1] = '1'; |
| } |
| static_assert((int)ShaderFeatures::ENABLE_CLIPPING == 1 << 0); |
| static_assert((int)ShaderFeatures::ENABLE_CLIP_RECT == 1 << 1); |
| static_assert((int)ShaderFeatures::ENABLE_ADVANCED_BLEND == 1 << 2); |
| static_assert((int)ShaderFeatures::ENABLE_EVEN_ODD == 1 << 3); |
| static_assert((int)ShaderFeatures::ENABLE_NESTED_CLIPPING == 1 << 4); |
| static_assert((int)ShaderFeatures::ENABLE_HSL_BLEND_MODES == 1 << 5); |
| } |
| |
| char namespacePrefix; |
| switch (drawType) |
| { |
| case DrawType::midpointFanPatches: |
| case DrawType::outerCurvePatches: |
| case DrawType::interiorTriangulation: |
| namespacePrefix = 'p'; |
| break; |
| case DrawType::imageRect: |
| RIVE_UNREACHABLE(); |
| case DrawType::imageMesh: |
| namespacePrefix = 'm'; |
| break; |
| case DrawType::plsAtomicInitialize: |
| case DrawType::plsAtomicResolve: |
| case DrawType::stencilClipReset: |
| RIVE_UNREACHABLE(); |
| } |
| |
| return |
| [NSString stringWithFormat:@"%c%s::%s", namespacePrefix, namespaceID, functionBaseName]; |
| } |
| |
| DrawPipeline(id<MTLDevice> gpu, |
| id<MTLLibrary> library, |
| NSString* vertexFunctionName, |
| NSString* fragmentFunctionName, |
| pls::DrawType drawType, |
| pls::InterlockMode interlockMode, |
| pls::ShaderFeatures shaderFeatures) |
| { |
| auto makePipelineState = [=](id<MTLFunction> vertexMain, |
| id<MTLFunction> fragmentMain, |
| MTLPixelFormat pixelFormat) { |
| MTLRenderPipelineDescriptor* desc = [[MTLRenderPipelineDescriptor alloc] init]; |
| desc.vertexFunction = vertexMain; |
| desc.fragmentFunction = fragmentMain; |
| |
| auto* framebuffer = desc.colorAttachments[FRAMEBUFFER_PLANE_IDX]; |
| framebuffer.pixelFormat = pixelFormat; |
| |
| switch (interlockMode) |
| { |
| case pls::InterlockMode::rasterOrdering: |
| // In rasterOrdering mode, the PLS planes are accessed as color attachments. |
| desc.colorAttachments[COVERAGE_PLANE_IDX].pixelFormat = MTLPixelFormatR32Uint; |
| desc.colorAttachments[CLIP_PLANE_IDX].pixelFormat = MTLPixelFormatR32Uint; |
| desc.colorAttachments[ORIGINAL_DST_COLOR_PLANE_IDX].pixelFormat = pixelFormat; |
| break; |
| case pls::InterlockMode::atomics: |
| // In atomic mode, the PLS planes are accessed as device buffers. We only use |
| // the "framebuffer" attachment configured above. |
| if (pls::ShadersEmitColorToRasterPipeline(interlockMode, shaderFeatures)) |
| { |
| // The shader expectes a "src-over" blend function in order to to implement |
| // antialiasing and opacity. |
| framebuffer.blendingEnabled = TRUE; |
| framebuffer.sourceRGBBlendFactor = MTLBlendFactorOne; |
| framebuffer.destinationRGBBlendFactor = MTLBlendFactorOneMinusSourceAlpha; |
| framebuffer.rgbBlendOperation = MTLBlendOperationAdd; |
| framebuffer.sourceAlphaBlendFactor = MTLBlendFactorOne; |
| framebuffer.destinationAlphaBlendFactor = MTLBlendFactorOneMinusSourceAlpha; |
| framebuffer.alphaBlendOperation = MTLBlendOperationAdd; |
| framebuffer.writeMask = MTLColorWriteMaskAll; |
| } |
| else if (drawType == pls::DrawType::plsAtomicResolve) |
| { |
| // We're resolving from the offscreen color buffer to the framebuffer |
| // attachment. Write out the final color directly without any blend modes. |
| framebuffer.blendingEnabled = FALSE; |
| framebuffer.writeMask = MTLColorWriteMaskAll; |
| } |
| else |
| { |
| // This pipeline renders by storing to the offscreen color buffer; disable |
| // writes to the framebuffer attachment. |
| framebuffer.blendingEnabled = FALSE; |
| framebuffer.writeMask = MTLColorWriteMaskNone; |
| } |
| break; |
| case pls::InterlockMode::depthStencil: |
| RIVE_UNREACHABLE(); |
| } |
| return make_pipeline_state(gpu, desc); |
| }; |
| id<MTLFunction> vertexMain = [library newFunctionWithName:vertexFunctionName]; |
| id<MTLFunction> fragmentMain = [library newFunctionWithName:fragmentFunctionName]; |
| m_pipelineStateRGBA8 = |
| makePipelineState(vertexMain, fragmentMain, MTLPixelFormatRGBA8Unorm); |
| m_pipelineStateBGRA8 = |
| makePipelineState(vertexMain, fragmentMain, MTLPixelFormatBGRA8Unorm); |
| } |
| |
| id<MTLRenderPipelineState> pipelineState(MTLPixelFormat pixelFormat) const |
| { |
| assert(pixelFormat == MTLPixelFormatRGBA8Unorm || |
| pixelFormat == MTLPixelFormatRGBA16Float || |
| pixelFormat == MTLPixelFormatRGBA8Unorm_sRGB || |
| pixelFormat == MTLPixelFormatBGRA8Unorm || |
| pixelFormat == MTLPixelFormatBGRA8Unorm_sRGB); |
| |
| switch (pixelFormat) |
| { |
| case MTLPixelFormatRGBA8Unorm_sRGB: |
| case MTLPixelFormatRGBA8Unorm: |
| case MTLPixelFormatRGBA16Float: |
| return m_pipelineStateRGBA8; |
| default: |
| return m_pipelineStateBGRA8; |
| } |
| } |
| |
| private: |
| id<MTLRenderPipelineState> m_pipelineStateRGBA8; |
| id<MTLRenderPipelineState> m_pipelineStateBGRA8; |
| }; |
| |
| #ifdef RIVE_IOS |
| static bool is_apple_ios_silicon(id<MTLDevice> gpu) |
| { |
| if (@available(iOS 13, *)) |
| { |
| return [gpu supportsFamily:MTLGPUFamilyApple4]; |
| } |
| return false; |
| } |
| #endif |
| |
| class BufferRingMetalImpl : public BufferRing |
| { |
| public: |
| static std::unique_ptr<BufferRingMetalImpl> Make(id<MTLDevice> gpu, size_t capacityInBytes) |
| { |
| return capacityInBytes != 0 ? std::make_unique<BufferRingMetalImpl>(gpu, capacityInBytes) |
| : nullptr; |
| } |
| |
| BufferRingMetalImpl(id<MTLDevice> gpu, size_t capacityInBytes) : BufferRing(capacityInBytes) |
| { |
| for (int i = 0; i < kBufferRingSize; ++i) |
| { |
| m_buffers[i] = [gpu newBufferWithLength:capacityInBytes |
| options:MTLResourceStorageModeShared]; |
| } |
| } |
| |
| id<MTLBuffer> submittedBuffer() const { return m_buffers[submittedBufferIdx()]; } |
| |
| protected: |
| void* onMapBuffer(int bufferIdx, size_t mapSizeInBytes) override |
| { |
| return m_buffers[bufferIdx].contents; |
| } |
| |
| void onUnmapAndSubmitBuffer(int bufferIdx, size_t mapSizeInBytes) override {} |
| |
| private: |
| id<MTLBuffer> m_buffers[kBufferRingSize]; |
| }; |
| |
| std::unique_ptr<PLSRenderContext> PLSRenderContextMetalImpl::MakeContext( |
| id<MTLDevice> gpu, const ContextOptions& contextOptions) |
| { |
| auto plsContextImpl = std::unique_ptr<PLSRenderContextMetalImpl>( |
| new PLSRenderContextMetalImpl(gpu, contextOptions)); |
| return std::make_unique<PLSRenderContext>(std::move(plsContextImpl)); |
| } |
| |
| PLSRenderContextMetalImpl::PLSRenderContextMetalImpl(id<MTLDevice> gpu, |
| const ContextOptions& contextOptions) : |
| m_contextOptions(contextOptions), m_gpu(gpu) |
| { |
| // It appears, so far, that we don't need to use flat interpolation for path IDs on any Apple |
| // device, and it's faster not to. |
| m_platformFeatures.avoidFlatVaryings = true; |
| m_platformFeatures.invertOffscreenY = true; |
| #ifdef RIVE_IOS |
| m_platformFeatures.supportsRasterOrdering = true; |
| if (!is_apple_ios_silicon(m_gpu)) |
| { |
| // The PowerVR GPU, at least on A10, has fp16 precision issues. We can't use the the bottom |
| // 3 bits of the path and clip IDs in order for our equality testing to work. |
| m_platformFeatures.pathIDGranularity = 8; |
| } |
| #elif defined(RIVE_IOS_SIMULATOR) |
| // The simulator does not support framebuffer reads. Fall back on atomic mode. |
| m_platformFeatures.supportsRasterOrdering = false; |
| #else |
| m_platformFeatures.supportsRasterOrdering = |
| [m_gpu supportsFamily:MTLGPUFamilyApple1] && !contextOptions.disableFramebufferReads; |
| #endif |
| m_platformFeatures.atomicPLSMustBeInitializedAsDraw = true; |
| |
| #ifdef RIVE_IOS |
| // Atomic barriers are never used on iOS, but if we ever did need them, we would use |
| // rasterOrderGroups. |
| m_metalFeatures.atomicBarrierType = AtomicBarrierType::rasterOrderGroup; |
| #elif defined(RIVE_IOS_SIMULATOR) |
| const NXArchInfo* hostArchitecture = NXGetLocalArchInfo(); |
| if (strncmp(hostArchitecture->name, "arm64", 5) == 0) |
| { |
| // The simulator doesn't advertise support for raster order groups, but they appear to work |
| // anyway on an Apple-Silicon-hosted simulator. Use rasterOrderGroup in this case because |
| // it's much faster than renderPassBreak. (On Intel/AMD this doesn't matter anyway because |
| // renderPassBreaks are cheap and actually faster than rasterOrderGroups.) |
| m_metalFeatures.atomicBarrierType = AtomicBarrierType::rasterOrderGroup; |
| } |
| else |
| { |
| m_metalFeatures.atomicBarrierType = AtomicBarrierType::renderPassBreak; |
| } |
| #else |
| // Use real memory barriers for atomic mode if they're availabile. |
| // "GPU devices in Apple3 through Apple9 families don’t support memory barriers that include the |
| // MTLRenderStages.fragment or .tile stages in the after argument..." |
| if (([m_gpu supportsFamily:MTLGPUFamilyCommon2] || [m_gpu supportsFamily:MTLGPUFamilyMac2]) && |
| ![m_gpu supportsFamily:MTLGPUFamilyApple3]) |
| { |
| m_metalFeatures.atomicBarrierType = AtomicBarrierType::memoryBarrier; |
| } |
| else if (m_gpu.rasterOrderGroupsSupported) |
| { |
| m_metalFeatures.atomicBarrierType = AtomicBarrierType::rasterOrderGroup; |
| } |
| else |
| { |
| m_metalFeatures.atomicBarrierType = AtomicBarrierType::renderPassBreak; |
| } |
| #endif |
| |
| m_backgroundShaderCompiler = std::make_unique<BackgroundShaderCompiler>(m_gpu, m_metalFeatures); |
| |
| // Load the precompiled shaders. |
| dispatch_data_t metallibData = dispatch_data_create( |
| #ifdef RIVE_IOS |
| rive_pls_ios_metallib, |
| rive_pls_ios_metallib_len, |
| #elif defined(RIVE_IOS_SIMULATOR) |
| rive_pls_ios_simulator_metallib, |
| rive_pls_ios_simulator_metallib_len, |
| #else |
| rive_pls_macosx_metallib, |
| rive_pls_macosx_metallib_len, |
| #endif |
| nil, |
| nil); |
| NSError* err = [NSError errorWithDomain:@"pls_metallib_load" code:200 userInfo:nil]; |
| m_plsPrecompiledLibrary = [m_gpu newLibraryWithData:metallibData error:&err]; |
| if (m_plsPrecompiledLibrary == nil) |
| { |
| fprintf(stderr, "Failed to load pls metallib.\n"); |
| fprintf(stderr, "%s\n", err.localizedDescription.UTF8String); |
| exit(-1); |
| } |
| |
| m_colorRampPipeline = std::make_unique<ColorRampPipeline>(m_gpu, m_plsPrecompiledLibrary); |
| m_tessPipeline = std::make_unique<TessellatePipeline>(m_gpu, m_plsPrecompiledLibrary); |
| m_tessSpanIndexBuffer = [m_gpu newBufferWithBytes:pls::kTessSpanIndices |
| length:sizeof(pls::kTessSpanIndices) |
| options:MTLResourceStorageModeShared]; |
| |
| // The precompiled static library has a fully-featured shader for each drawType in |
| // "rasterOrdering" mode. We load these at initialization and use them while waiting for the |
| // background compiler to generate more specialized, higher performance shaders. |
| if (m_platformFeatures.supportsRasterOrdering) |
| { |
| for (auto drawType : |
| {DrawType::midpointFanPatches, DrawType::interiorTriangulation, DrawType::imageMesh}) |
| { |
| pls::ShaderFeatures allShaderFeatures = |
| pls::ShaderFeaturesMaskFor(drawType, pls::InterlockMode::rasterOrdering); |
| uint32_t pipelineKey = ShaderUniqueKey(drawType, |
| allShaderFeatures, |
| pls::InterlockMode::rasterOrdering, |
| pls::ShaderMiscFlags::none); |
| m_drawPipelines[pipelineKey] = std::make_unique<DrawPipeline>( |
| m_gpu, |
| m_plsPrecompiledLibrary, |
| DrawPipeline::GetPrecompiledFunctionName(drawType, |
| allShaderFeatures & |
| pls::kVertexShaderFeaturesMask, |
| m_plsPrecompiledLibrary, |
| GLSL_drawVertexMain), |
| DrawPipeline::GetPrecompiledFunctionName( |
| drawType, allShaderFeatures, m_plsPrecompiledLibrary, GLSL_drawFragmentMain), |
| drawType, |
| pls::InterlockMode::rasterOrdering, |
| allShaderFeatures); |
| } |
| } |
| |
| // Create vertex and index buffers for the different PLS patches. |
| m_pathPatchVertexBuffer = |
| [m_gpu newBufferWithLength:kPatchVertexBufferCount * sizeof(PatchVertex) |
| options:MTLResourceStorageModeShared]; |
| m_pathPatchIndexBuffer = [m_gpu newBufferWithLength:kPatchIndexBufferCount * sizeof(uint16_t) |
| options:MTLResourceStorageModeShared]; |
| GeneratePatchBufferData(reinterpret_cast<PatchVertex*>(m_pathPatchVertexBuffer.contents), |
| reinterpret_cast<uint16_t*>(m_pathPatchIndexBuffer.contents)); |
| |
| // Set up the imageRect rendering buffers. (pls::InterlockMode::atomics only.) |
| m_imageRectVertexBuffer = [m_gpu newBufferWithBytes:pls::kImageRectVertices |
| length:sizeof(pls::kImageRectVertices) |
| options:MTLResourceStorageModeShared]; |
| m_imageRectIndexBuffer = [m_gpu newBufferWithBytes:pls::kImageRectIndices |
| length:sizeof(pls::kImageRectIndices) |
| options:MTLResourceStorageModeShared]; |
| } |
| |
| PLSRenderContextMetalImpl::~PLSRenderContextMetalImpl() {} |
| |
| // If the GPU supports framebuffer reads (called "programmable blending" in the feature tables), PLS |
| // planes besides the main framebuffer can exist in ephemeral "memoryless" storage. This means their |
| // contents are never actually written to main memory, and they only exist in fast tiled memory. |
| static id<MTLTexture> make_pls_memoryless_texture(id<MTLDevice> gpu, |
| MTLPixelFormat pixelFormat, |
| uint32_t width, |
| uint32_t height) |
| { |
| MTLTextureDescriptor* desc = [[MTLTextureDescriptor alloc] init]; |
| desc.pixelFormat = pixelFormat; |
| desc.width = width; |
| desc.height = height; |
| desc.usage = MTLTextureUsageRenderTarget; |
| desc.textureType = MTLTextureType2D; |
| desc.mipmapLevelCount = 1; |
| desc.storageMode = MTLStorageModeMemoryless; |
| return [gpu newTextureWithDescriptor:desc]; |
| } |
| |
| PLSRenderTargetMetal::PLSRenderTargetMetal(id<MTLDevice> gpu, |
| MTLPixelFormat pixelFormat, |
| uint32_t width, |
| uint32_t height, |
| const PlatformFeatures& platformFeatures) : |
| PLSRenderTarget(width, height), m_gpu(gpu), m_pixelFormat(pixelFormat) |
| { |
| m_targetTexture = nil; // Will be configured later by setTargetTexture(). |
| if (platformFeatures.supportsRasterOrdering) |
| { |
| m_coverageMemorylessTexture = |
| make_pls_memoryless_texture(gpu, MTLPixelFormatR32Uint, width, height); |
| m_clipMemorylessTexture = |
| make_pls_memoryless_texture(gpu, MTLPixelFormatR32Uint, width, height); |
| m_originalDstColorMemorylessTexture = |
| make_pls_memoryless_texture(gpu, m_pixelFormat, width, height); |
| } |
| } |
| |
| void PLSRenderTargetMetal::setTargetTexture(id<MTLTexture> texture) |
| { |
| assert(!texture || compatibleWith(texture)); |
| m_targetTexture = texture; |
| } |
| |
| rcp<PLSRenderTargetMetal> PLSRenderContextMetalImpl::makeRenderTarget(MTLPixelFormat pixelFormat, |
| uint32_t width, |
| uint32_t height) |
| { |
| return rcp(new PLSRenderTargetMetal(m_gpu, pixelFormat, width, height, m_platformFeatures)); |
| } |
| |
| class RenderBufferMetalImpl : public lite_rtti_override<RenderBuffer, RenderBufferMetalImpl> |
| { |
| public: |
| RenderBufferMetalImpl(RenderBufferType renderBufferType, |
| RenderBufferFlags renderBufferFlags, |
| size_t sizeInBytes, |
| id<MTLDevice> gpu) : |
| lite_rtti_override(renderBufferType, renderBufferFlags, sizeInBytes), m_gpu(gpu) |
| { |
| int bufferCount = |
| flags() & RenderBufferFlags::mappedOnceAtInitialization ? 1 : pls::kBufferRingSize; |
| for (int i = 0; i < bufferCount; ++i) |
| { |
| m_buffers[i] = [gpu newBufferWithLength:sizeInBytes |
| options:MTLResourceStorageModeShared]; |
| } |
| } |
| |
| id<MTLBuffer> submittedBuffer() const { return m_buffers[m_submittedBufferIdx]; } |
| |
| protected: |
| void* onMap() override |
| { |
| m_submittedBufferIdx = (m_submittedBufferIdx + 1) % pls::kBufferRingSize; |
| assert(m_buffers[m_submittedBufferIdx] != nil); |
| return m_buffers[m_submittedBufferIdx].contents; |
| } |
| |
| void onUnmap() override {} |
| |
| private: |
| id<MTLDevice> m_gpu; |
| id<MTLBuffer> m_buffers[pls::kBufferRingSize]; |
| int m_submittedBufferIdx = -1; |
| }; |
| |
| rcp<RenderBuffer> PLSRenderContextMetalImpl::makeRenderBuffer(RenderBufferType type, |
| RenderBufferFlags flags, |
| size_t sizeInBytes) |
| { |
| return make_rcp<RenderBufferMetalImpl>(type, flags, sizeInBytes, m_gpu); |
| } |
| |
| class PLSTextureMetalImpl : public PLSTexture |
| { |
| public: |
| PLSTextureMetalImpl(id<MTLDevice> gpu, |
| uint32_t width, |
| uint32_t height, |
| uint32_t mipLevelCount, |
| const uint8_t imageDataRGBA[]) : |
| PLSTexture(width, height) |
| { |
| // Create the texture. |
| MTLTextureDescriptor* desc = [[MTLTextureDescriptor alloc] init]; |
| desc.pixelFormat = MTLPixelFormatRGBA8Unorm; |
| desc.width = width; |
| desc.height = height; |
| desc.mipmapLevelCount = mipLevelCount; |
| desc.usage = MTLTextureUsageShaderRead; |
| desc.storageMode = MTLStorageModeShared; |
| desc.textureType = MTLTextureType2D; |
| m_texture = [gpu newTextureWithDescriptor:desc]; |
| |
| // Specify the top-level image in the mipmap chain. |
| MTLRegion region = MTLRegionMake2D(0, 0, width, height); |
| [m_texture replaceRegion:region |
| mipmapLevel:0 |
| withBytes:imageDataRGBA |
| bytesPerRow:width * 4]; |
| } |
| |
| void ensureMipmaps(id<MTLCommandBuffer> commandBuffer) const |
| { |
| if (m_mipsDirty) |
| { |
| // Generate mipmaps. |
| id<MTLBlitCommandEncoder> mipEncoder = [commandBuffer blitCommandEncoder]; |
| [mipEncoder generateMipmapsForTexture:m_texture]; |
| [mipEncoder endEncoding]; |
| m_mipsDirty = false; |
| } |
| } |
| |
| id<MTLTexture> texture() const { return m_texture; } |
| |
| private: |
| id<MTLTexture> m_texture; |
| mutable bool m_mipsDirty = true; |
| }; |
| |
| rcp<PLSTexture> PLSRenderContextMetalImpl::makeImageTexture(uint32_t width, |
| uint32_t height, |
| uint32_t mipLevelCount, |
| const uint8_t imageDataRGBA[]) |
| { |
| return make_rcp<PLSTextureMetalImpl>(m_gpu, width, height, mipLevelCount, imageDataRGBA); |
| } |
| |
| std::unique_ptr<BufferRing> PLSRenderContextMetalImpl::makeUniformBufferRing(size_t capacityInBytes) |
| { |
| return BufferRingMetalImpl::Make(m_gpu, capacityInBytes); |
| } |
| |
| std::unique_ptr<BufferRing> PLSRenderContextMetalImpl::makeStorageBufferRing( |
| size_t capacityInBytes, pls::StorageBufferStructure) |
| { |
| return BufferRingMetalImpl::Make(m_gpu, capacityInBytes); |
| } |
| |
| std::unique_ptr<BufferRing> PLSRenderContextMetalImpl::makeVertexBufferRing(size_t capacityInBytes) |
| { |
| return BufferRingMetalImpl::Make(m_gpu, capacityInBytes); |
| } |
| |
| std::unique_ptr<BufferRing> PLSRenderContextMetalImpl::makeTextureTransferBufferRing( |
| size_t capacityInBytes) |
| { |
| return BufferRingMetalImpl::Make(m_gpu, capacityInBytes); |
| } |
| |
| void PLSRenderContextMetalImpl::resizeGradientTexture(uint32_t width, uint32_t height) |
| { |
| if (width == 0 || height == 0) |
| { |
| m_gradientTexture = nil; |
| return; |
| } |
| MTLTextureDescriptor* desc = [[MTLTextureDescriptor alloc] init]; |
| desc.pixelFormat = MTLPixelFormatRGBA8Unorm; |
| desc.width = width; |
| desc.height = height; |
| desc.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead; |
| desc.textureType = MTLTextureType2D; |
| desc.mipmapLevelCount = 1; |
| desc.storageMode = MTLStorageModePrivate; |
| m_gradientTexture = [m_gpu newTextureWithDescriptor:desc]; |
| } |
| |
| void PLSRenderContextMetalImpl::resizeTessellationTexture(uint32_t width, uint32_t height) |
| { |
| if (width == 0 || height == 0) |
| { |
| m_tessVertexTexture = nil; |
| return; |
| } |
| MTLTextureDescriptor* desc = [[MTLTextureDescriptor alloc] init]; |
| desc.pixelFormat = MTLPixelFormatRGBA32Uint; |
| desc.width = width; |
| desc.height = height; |
| desc.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead; |
| desc.textureType = MTLTextureType2D; |
| desc.mipmapLevelCount = 1; |
| desc.storageMode = MTLStorageModePrivate; |
| m_tessVertexTexture = [m_gpu newTextureWithDescriptor:desc]; |
| } |
| |
| const PLSRenderContextMetalImpl::DrawPipeline* PLSRenderContextMetalImpl:: |
| findCompatibleDrawPipeline(pls::DrawType drawType, |
| pls::ShaderFeatures shaderFeatures, |
| pls::InterlockMode interlockMode, |
| pls::ShaderMiscFlags shaderMiscFlags) |
| { |
| uint32_t pipelineKey = |
| pls::ShaderUniqueKey(drawType, shaderFeatures, interlockMode, shaderMiscFlags); |
| auto pipelineIter = m_drawPipelines.find(pipelineKey); |
| if (pipelineIter == m_drawPipelines.end()) |
| { |
| // The shader for this pipeline hasn't been scheduled for compiling yet. Schedule it to |
| // compile in the background. |
| m_backgroundShaderCompiler->pushJob({ |
| .drawType = drawType, |
| .shaderFeatures = shaderFeatures, |
| .interlockMode = interlockMode, |
| .shaderMiscFlags = shaderMiscFlags, |
| }); |
| pipelineIter = m_drawPipelines.insert({pipelineKey, nullptr}).first; |
| } |
| |
| if (pipelineIter->second != nullptr) |
| { |
| // The pipeline is fully compiled and loaded. |
| return pipelineIter->second.get(); |
| } |
| |
| // The shader for this pipeline hasn't finished compiling yet. Start by finding a fully-featured |
| // superset of features whose pipeline we can fall back on while waiting for it to compile. |
| ShaderFeatures fullyFeaturedPipelineFeatures = |
| pls::ShaderFeaturesMaskFor(drawType, interlockMode); |
| if (interlockMode == pls::InterlockMode::atomics) |
| { |
| // Never add ENABLE_ADVANCED_BLEND to an atomic pipeline that doesn't use advanced blend, |
| // since in atomic mode, the shaders behave differently depending on whether advanced blend |
| // is enabled. |
| fullyFeaturedPipelineFeatures &= shaderFeatures | ~ShaderFeatures::ENABLE_ADVANCED_BLEND; |
| // Never add ENABLE_CLIPPING to an atomic pipeline that doesn't use clipping; it will cause |
| // a "missing buffer binding" validation error because the shader will define an (unused) |
| // clipBuffer, but we won't bind anything to it. |
| fullyFeaturedPipelineFeatures &= shaderFeatures | ~ShaderFeatures::ENABLE_CLIPPING; |
| } |
| shaderFeatures &= fullyFeaturedPipelineFeatures; |
| |
| // Fully-featured "rasterOrdering" pipelines should have already been pre-loaded from the static |
| // library. |
| assert(shaderFeatures != fullyFeaturedPipelineFeatures || |
| interlockMode != pls::InterlockMode::rasterOrdering); |
| |
| // Poll to see if the shader is actually done compiling, but only wait if it's a fully-feature |
| // pipeline. Otherwise, we can fall back on the fully-featured pipeline while we wait for |
| // compilation. |
| BackgroundCompileJob job; |
| bool shouldWaitForBackgroundCompilation = shaderFeatures == fullyFeaturedPipelineFeatures || |
| m_contextOptions.synchronousShaderCompilations; |
| while (m_backgroundShaderCompiler->popFinishedJob(&job, shouldWaitForBackgroundCompilation)) |
| { |
| uint32_t jobKey = pls::ShaderUniqueKey( |
| job.drawType, job.shaderFeatures, job.interlockMode, job.shaderMiscFlags); |
| m_drawPipelines[jobKey] = std::make_unique<DrawPipeline>(m_gpu, |
| job.compiledLibrary, |
| @GLSL_drawVertexMain, |
| @GLSL_drawFragmentMain, |
| job.drawType, |
| job.interlockMode, |
| job.shaderFeatures); |
| if (jobKey == pipelineKey) |
| { |
| // The shader we wanted was actually done compiling and pending being built into a |
| // pipeline. |
| return pipelineIter->second.get(); |
| } |
| } |
| |
| // The shader for this feature set hasn't finished compiling. Use the pipeline that has |
| // all features enabled while we wait for it to finish. |
| assert(shaderFeatures != fullyFeaturedPipelineFeatures); |
| return findCompatibleDrawPipeline( |
| drawType, fullyFeaturedPipelineFeatures, interlockMode, shaderMiscFlags); |
| } |
| |
| void PLSRenderContextMetalImpl::prepareToMapBuffers() |
| { |
| // Wait until the GPU finishes rendering flush "N + 1 - kBufferRingSize". This ensures it |
| // is safe for the CPU to begin modifying the next buffers in our rings. |
| m_bufferRingIdx = (m_bufferRingIdx + 1) % kBufferRingSize; |
| m_bufferRingLocks[m_bufferRingIdx].lock(); |
| } |
| |
| static id<MTLBuffer> mtl_buffer(const BufferRing* bufferRing) |
| { |
| assert(bufferRing != nullptr); |
| return static_cast<const BufferRingMetalImpl*>(bufferRing)->submittedBuffer(); |
| } |
| |
| static MTLViewport make_viewport(uint32_t x, uint32_t y, uint32_t width, uint32_t height) |
| { |
| return { |
| static_cast<double>(x), |
| static_cast<double>(y), |
| static_cast<double>(width), |
| static_cast<double>(height), |
| 0, |
| 1, |
| }; |
| } |
| |
| id<MTLRenderCommandEncoder> PLSRenderContextMetalImpl::makeRenderPassForDraws( |
| const pls::FlushDescriptor& flushDesc, |
| MTLRenderPassDescriptor* passDesc, |
| id<MTLCommandBuffer> commandBuffer) |
| { |
| auto* renderTarget = static_cast<PLSRenderTargetMetal*>(flushDesc.renderTarget); |
| |
| id<MTLRenderCommandEncoder> encoder = |
| [commandBuffer renderCommandEncoderWithDescriptor:passDesc]; |
| |
| [encoder setViewport:make_viewport(0, 0, renderTarget->width(), renderTarget->height())]; |
| [encoder setVertexBuffer:mtl_buffer(flushUniformBufferRing()) |
| offset:flushDesc.flushUniformDataOffsetInBytes |
| atIndex:FLUSH_UNIFORM_BUFFER_IDX]; |
| [encoder setFragmentBuffer:mtl_buffer(flushUniformBufferRing()) |
| offset:flushDesc.flushUniformDataOffsetInBytes |
| atIndex:FLUSH_UNIFORM_BUFFER_IDX]; |
| [encoder setVertexTexture:m_tessVertexTexture atIndex:TESS_VERTEX_TEXTURE_IDX]; |
| [encoder setFragmentTexture:m_gradientTexture atIndex:GRAD_TEXTURE_IDX]; |
| if (flushDesc.pathCount > 0) |
| { |
| [encoder setVertexBuffer:mtl_buffer(pathBufferRing()) |
| offset:flushDesc.firstPath * sizeof(pls::PathData) |
| atIndex:PATH_BUFFER_IDX]; |
| if (flushDesc.interlockMode == pls::InterlockMode::atomics) |
| { |
| [encoder setFragmentBuffer:mtl_buffer(paintBufferRing()) |
| offset:flushDesc.firstPaint * sizeof(pls::PaintData) |
| atIndex:PAINT_BUFFER_IDX]; |
| [encoder setFragmentBuffer:mtl_buffer(paintAuxBufferRing()) |
| offset:flushDesc.firstPaintAux * sizeof(pls::PaintAuxData) |
| atIndex:PAINT_AUX_BUFFER_IDX]; |
| } |
| else |
| { |
| [encoder setVertexBuffer:mtl_buffer(paintBufferRing()) |
| offset:flushDesc.firstPaint * sizeof(pls::PaintData) |
| atIndex:PAINT_BUFFER_IDX]; |
| [encoder setVertexBuffer:mtl_buffer(paintAuxBufferRing()) |
| offset:flushDesc.firstPaintAux * sizeof(pls::PaintAuxData) |
| atIndex:PAINT_AUX_BUFFER_IDX]; |
| } |
| } |
| if (flushDesc.contourCount > 0) |
| { |
| [encoder setVertexBuffer:mtl_buffer(contourBufferRing()) |
| offset:flushDesc.firstContour * sizeof(pls::ContourData) |
| atIndex:CONTOUR_BUFFER_IDX]; |
| } |
| if (flushDesc.interlockMode == pls::InterlockMode::atomics) |
| { |
| // In atomic mode, the PLS planes are buffers that we need to bind separately. |
| // Since the PLS plane indices collide with other buffer bindings, offset the binding |
| // indices of these buffers by DEFAULT_BINDINGS_SET_SIZE. |
| if (!pls::ShadersEmitColorToRasterPipeline(flushDesc.interlockMode, |
| flushDesc.combinedShaderFeatures)) |
| { |
| [encoder setFragmentBuffer:renderTarget->colorAtomicBuffer() |
| offset:0 |
| atIndex:FRAMEBUFFER_PLANE_IDX + DEFAULT_BINDINGS_SET_SIZE]; |
| } |
| [encoder setFragmentBuffer:renderTarget->coverageAtomicBuffer() |
| offset:0 |
| atIndex:COVERAGE_PLANE_IDX + DEFAULT_BINDINGS_SET_SIZE]; |
| if (flushDesc.combinedShaderFeatures & pls::ShaderFeatures::ENABLE_CLIPPING) |
| { |
| [encoder setFragmentBuffer:renderTarget->clipAtomicBuffer() |
| offset:0 |
| atIndex:CLIP_PLANE_IDX + DEFAULT_BINDINGS_SET_SIZE]; |
| } |
| } |
| if (flushDesc.wireframe) |
| { |
| [encoder setTriangleFillMode:MTLTriangleFillModeLines]; |
| } |
| return encoder; |
| } |
| |
| void PLSRenderContextMetalImpl::flush(const FlushDescriptor& desc) |
| { |
| auto* renderTarget = static_cast<PLSRenderTargetMetal*>(desc.renderTarget); |
| id<MTLCommandBuffer> commandBuffer = (__bridge id<MTLCommandBuffer>)desc.externalCommandBuffer; |
| |
| // Render the complex color ramps to the gradient texture. |
| if (desc.complexGradSpanCount > 0) |
| { |
| MTLRenderPassDescriptor* gradPass = [MTLRenderPassDescriptor renderPassDescriptor]; |
| gradPass.renderTargetWidth = kGradTextureWidth; |
| gradPass.renderTargetHeight = desc.complexGradRowsTop + desc.complexGradRowsHeight; |
| gradPass.colorAttachments[0].loadAction = MTLLoadActionDontCare; |
| gradPass.colorAttachments[0].storeAction = MTLStoreActionStore; |
| gradPass.colorAttachments[0].texture = m_gradientTexture; |
| |
| id<MTLRenderCommandEncoder> gradEncoder = |
| [commandBuffer renderCommandEncoderWithDescriptor:gradPass]; |
| [gradEncoder setViewport:make_viewport(0, |
| static_cast<double>(desc.complexGradRowsTop), |
| kGradTextureWidth, |
| static_cast<float>(desc.complexGradRowsHeight))]; |
| [gradEncoder setRenderPipelineState:m_colorRampPipeline->pipelineState()]; |
| [gradEncoder setVertexBuffer:mtl_buffer(flushUniformBufferRing()) |
| offset:desc.flushUniformDataOffsetInBytes |
| atIndex:FLUSH_UNIFORM_BUFFER_IDX]; |
| [gradEncoder setVertexBuffer:mtl_buffer(gradSpanBufferRing()) |
| offset:desc.firstComplexGradSpan * sizeof(pls::GradientSpan) |
| atIndex:0]; |
| [gradEncoder setCullMode:MTLCullModeBack]; |
| [gradEncoder drawPrimitives:MTLPrimitiveTypeTriangleStrip |
| vertexStart:0 |
| vertexCount:4 |
| instanceCount:desc.complexGradSpanCount]; |
| [gradEncoder endEncoding]; |
| } |
| |
| // Copy the simple color ramps to the gradient texture. |
| if (desc.simpleGradTexelsHeight > 0) |
| { |
| id<MTLBlitCommandEncoder> textureBlitEncoder = [commandBuffer blitCommandEncoder]; |
| [textureBlitEncoder |
| copyFromBuffer:mtl_buffer(simpleColorRampsBufferRing()) |
| sourceOffset:desc.simpleGradDataOffsetInBytes |
| sourceBytesPerRow:kGradTextureWidth * 4 |
| sourceBytesPerImage:desc.simpleGradTexelsHeight * kGradTextureWidth * 4 |
| sourceSize:MTLSizeMake( |
| desc.simpleGradTexelsWidth, desc.simpleGradTexelsHeight, 1) |
| toTexture:m_gradientTexture |
| destinationSlice:0 |
| destinationLevel:0 |
| destinationOrigin:MTLOriginMake(0, 0, 0)]; |
| [textureBlitEncoder endEncoding]; |
| } |
| |
| // Tessellate all curves into vertices in the tessellation texture. |
| if (desc.tessVertexSpanCount > 0) |
| { |
| MTLRenderPassDescriptor* tessPass = [MTLRenderPassDescriptor renderPassDescriptor]; |
| tessPass.renderTargetWidth = kTessTextureWidth; |
| tessPass.renderTargetHeight = desc.tessDataHeight; |
| tessPass.colorAttachments[0].loadAction = MTLLoadActionDontCare; |
| tessPass.colorAttachments[0].storeAction = MTLStoreActionStore; |
| tessPass.colorAttachments[0].texture = m_tessVertexTexture; |
| |
| id<MTLRenderCommandEncoder> tessEncoder = |
| [commandBuffer renderCommandEncoderWithDescriptor:tessPass]; |
| [tessEncoder setViewport:make_viewport(0, 0, kTessTextureWidth, desc.tessDataHeight)]; |
| [tessEncoder setRenderPipelineState:m_tessPipeline->pipelineState()]; |
| [tessEncoder setVertexBuffer:mtl_buffer(flushUniformBufferRing()) |
| offset:desc.flushUniformDataOffsetInBytes |
| atIndex:FLUSH_UNIFORM_BUFFER_IDX]; |
| [tessEncoder setVertexBuffer:mtl_buffer(tessSpanBufferRing()) |
| offset:desc.firstTessVertexSpan * sizeof(pls::TessVertexSpan) |
| atIndex:0]; |
| assert(desc.pathCount > 0); |
| [tessEncoder setVertexBuffer:mtl_buffer(pathBufferRing()) |
| offset:desc.firstPath * sizeof(pls::PathData) |
| atIndex:PATH_BUFFER_IDX]; |
| assert(desc.contourCount > 0); |
| [tessEncoder setVertexBuffer:mtl_buffer(contourBufferRing()) |
| offset:desc.firstContour * sizeof(pls::ContourData) |
| atIndex:CONTOUR_BUFFER_IDX]; |
| [tessEncoder setCullMode:MTLCullModeBack]; |
| [tessEncoder drawIndexedPrimitives:MTLPrimitiveTypeTriangle |
| indexCount:std::size(pls::kTessSpanIndices) |
| indexType:MTLIndexTypeUInt16 |
| indexBuffer:m_tessSpanIndexBuffer |
| indexBufferOffset:0 |
| instanceCount:desc.tessVertexSpanCount]; |
| [tessEncoder endEncoding]; |
| } |
| |
| // Generate mipmaps if needed. |
| for (const DrawBatch& batch : *desc.drawList) |
| { |
| // Bind the appropriate image texture, if any. |
| if (auto imageTextureMetal = static_cast<const PLSTextureMetalImpl*>(batch.imageTexture)) |
| { |
| imageTextureMetal->ensureMipmaps(commandBuffer); |
| } |
| } |
| |
| // Set up a render pass to do the final rendering using (some form of) pixel local storage. |
| MTLRenderPassDescriptor* pass = [MTLRenderPassDescriptor renderPassDescriptor]; |
| pass.renderTargetWidth = desc.renderTargetUpdateBounds.right; |
| pass.renderTargetHeight = desc.renderTargetUpdateBounds.bottom; |
| pass.colorAttachments[FRAMEBUFFER_PLANE_IDX].texture = renderTarget->targetTexture(); |
| switch (desc.colorLoadAction) |
| { |
| case pls::LoadAction::clear: |
| { |
| float cc[4]; |
| UnpackColorToRGBA32F(desc.clearColor, cc); |
| pass.colorAttachments[FRAMEBUFFER_PLANE_IDX].loadAction = MTLLoadActionClear; |
| pass.colorAttachments[FRAMEBUFFER_PLANE_IDX].clearColor = |
| MTLClearColorMake(cc[0], cc[1], cc[2], cc[3]); |
| break; |
| } |
| case pls::LoadAction::preserveRenderTarget: |
| pass.colorAttachments[FRAMEBUFFER_PLANE_IDX].loadAction = MTLLoadActionLoad; |
| break; |
| case pls::LoadAction::dontCare: |
| pass.colorAttachments[FRAMEBUFFER_PLANE_IDX].loadAction = MTLLoadActionDontCare; |
| break; |
| } |
| pass.colorAttachments[FRAMEBUFFER_PLANE_IDX].storeAction = MTLStoreActionStore; |
| |
| // In atomic mode, advanced blends have to render through an offscreen color buffer in order to |
| // read destination color. This offscreen color buffer gets transferred to the main framebuffer |
| // during the final "plsAtomicResolve" operation. |
| bool usesOffscreenColorBuffer = false; |
| |
| if (desc.interlockMode == pls::InterlockMode::rasterOrdering) |
| { |
| // In rasterOrdering mode, the PLS planes are accessed as color attachments. |
| pass.colorAttachments[COVERAGE_PLANE_IDX].texture = |
| renderTarget->m_coverageMemorylessTexture; |
| pass.colorAttachments[COVERAGE_PLANE_IDX].loadAction = MTLLoadActionClear; |
| pass.colorAttachments[COVERAGE_PLANE_IDX].clearColor = |
| MTLClearColorMake(desc.coverageClearValue, 0, 0, 0); |
| pass.colorAttachments[COVERAGE_PLANE_IDX].storeAction = |
| desc.interlockMode == pls::InterlockMode::atomics ? MTLStoreActionStore |
| : MTLStoreActionDontCare; |
| |
| pass.colorAttachments[CLIP_PLANE_IDX].texture = renderTarget->m_clipMemorylessTexture; |
| pass.colorAttachments[CLIP_PLANE_IDX].loadAction = MTLLoadActionClear; |
| pass.colorAttachments[CLIP_PLANE_IDX].clearColor = MTLClearColorMake(0, 0, 0, 0); |
| pass.colorAttachments[CLIP_PLANE_IDX].storeAction = |
| desc.interlockMode == pls::InterlockMode::atomics ? MTLStoreActionStore |
| : MTLStoreActionDontCare; |
| |
| pass.colorAttachments[ORIGINAL_DST_COLOR_PLANE_IDX].texture = |
| renderTarget->m_originalDstColorMemorylessTexture; |
| pass.colorAttachments[ORIGINAL_DST_COLOR_PLANE_IDX].loadAction = MTLLoadActionDontCare; |
| pass.colorAttachments[ORIGINAL_DST_COLOR_PLANE_IDX].storeAction = MTLStoreActionDontCare; |
| } |
| else |
| { |
| assert(desc.interlockMode == pls::InterlockMode::atomics); |
| usesOffscreenColorBuffer = |
| !pls::ShadersEmitColorToRasterPipeline(desc.interlockMode, desc.combinedShaderFeatures); |
| if (usesOffscreenColorBuffer && |
| desc.colorLoadAction == pls::LoadAction::preserveRenderTarget) |
| { |
| // Since we need to preserve the renderTarget during load, and since we're rendering |
| // to an offscreen color buffer, we have to literally copy the renderTarget into the |
| // color buffer. |
| id<MTLBlitCommandEncoder> copyEncoder = [commandBuffer blitCommandEncoder]; |
| auto updateOrigin = MTLOriginMake( |
| desc.renderTargetUpdateBounds.left, desc.renderTargetUpdateBounds.top, 0); |
| auto updateSize = MTLSizeMake( |
| desc.renderTargetUpdateBounds.width(), desc.renderTargetUpdateBounds.height(), 1); |
| [copyEncoder copyFromTexture:renderTarget->targetTexture() |
| sourceSlice:0 |
| sourceLevel:0 |
| sourceOrigin:updateOrigin |
| sourceSize:updateSize |
| toBuffer:renderTarget->colorAtomicBuffer() |
| destinationOffset:(updateOrigin.y * renderTarget->width() + updateOrigin.x) * |
| sizeof(uint32_t) |
| destinationBytesPerRow:renderTarget->width() * sizeof(uint32_t) |
| destinationBytesPerImage:renderTarget->height() * renderTarget->width() * |
| sizeof(uint32_t)]; |
| [copyEncoder endEncoding]; |
| } |
| } |
| |
| // Execute the DrawList. |
| id<MTLRenderCommandEncoder> encoder = makeRenderPassForDraws(desc, pass, commandBuffer); |
| for (const DrawBatch& batch : *desc.drawList) |
| { |
| if (batch.elementCount == 0) |
| { |
| continue; |
| } |
| |
| // Setup the pipeline for this specific drawType and shaderFeatures. |
| pls::ShaderFeatures shaderFeatures = desc.interlockMode == pls::InterlockMode::atomics |
| ? desc.combinedShaderFeatures |
| : batch.shaderFeatures; |
| pls::ShaderMiscFlags shaderMiscFlags = pls::ShaderMiscFlags::none; |
| if (usesOffscreenColorBuffer) |
| { |
| if (batch.drawType == pls::DrawType::plsAtomicResolve) |
| { |
| // Atomic mode can always do a coalesced resolve when rendering to its offscreen |
| // color buffer. |
| shaderMiscFlags |= pls::ShaderMiscFlags::coalescedResolveAndTransfer; |
| } |
| else if (batch.drawType == pls::DrawType::plsAtomicInitialize) |
| { |
| if (desc.colorLoadAction == pls::LoadAction::clear) |
| { |
| shaderMiscFlags |= pls::ShaderMiscFlags::storeColorClear; |
| } |
| else if (desc.colorLoadAction == pls::LoadAction::preserveRenderTarget && |
| renderTarget->pixelFormat() == MTLPixelFormatBGRA8Unorm) |
| { |
| // We already copied the renderTarget to our color buffer, but since the target |
| // is BGRA, we also need to swizzle it to RGBA before it's ready for PLS. |
| shaderMiscFlags |= pls::ShaderMiscFlags::swizzleColorBGRAToRGBA; |
| } |
| } |
| } |
| id<MTLRenderPipelineState> drawPipelineState = |
| findCompatibleDrawPipeline( |
| batch.drawType, shaderFeatures, desc.interlockMode, shaderMiscFlags) |
| ->pipelineState(renderTarget->pixelFormat()); |
| |
| // Bind the appropriate image texture, if any. |
| if (auto imageTextureMetal = static_cast<const PLSTextureMetalImpl*>(batch.imageTexture)) |
| { |
| [encoder setFragmentTexture:imageTextureMetal->texture() atIndex:IMAGE_TEXTURE_IDX]; |
| } |
| |
| DrawType drawType = batch.drawType; |
| switch (drawType) |
| { |
| case DrawType::midpointFanPatches: |
| case DrawType::outerCurvePatches: |
| { |
| // Draw PLS patches that connect the tessellation vertices. |
| [encoder setRenderPipelineState:drawPipelineState]; |
| [encoder setVertexBuffer:m_pathPatchVertexBuffer offset:0 atIndex:0]; |
| [encoder setCullMode:MTLCullModeBack]; |
| // Don't use baseInstance in order to run on Apple GPU Family 2. |
| // TODO: Use baseInstance instead once we deprecate Apple2. |
| [encoder setVertexBytes:&batch.baseElement |
| length:sizeof(uint32_t) |
| atIndex:PATH_BASE_INSTANCE_UNIFORM_BUFFER_IDX]; |
| [encoder drawIndexedPrimitives:MTLPrimitiveTypeTriangle |
| indexCount:PatchIndexCount(drawType) |
| indexType:MTLIndexTypeUInt16 |
| indexBuffer:m_pathPatchIndexBuffer |
| indexBufferOffset:PatchBaseIndex(drawType) * sizeof(uint16_t) |
| instanceCount:batch.elementCount]; |
| break; |
| } |
| case DrawType::interiorTriangulation: |
| { |
| [encoder setRenderPipelineState:drawPipelineState]; |
| [encoder setVertexBuffer:mtl_buffer(triangleBufferRing()) offset:0 atIndex:0]; |
| [encoder setCullMode:MTLCullModeBack]; |
| [encoder drawPrimitives:MTLPrimitiveTypeTriangle |
| vertexStart:batch.baseElement |
| vertexCount:batch.elementCount]; |
| break; |
| } |
| case DrawType::imageRect: |
| case DrawType::imageMesh: |
| { |
| [encoder setRenderPipelineState:drawPipelineState]; |
| [encoder setVertexBuffer:mtl_buffer(imageDrawUniformBufferRing()) |
| offset:batch.imageDrawDataOffset |
| atIndex:IMAGE_DRAW_UNIFORM_BUFFER_IDX]; |
| [encoder setFragmentBuffer:mtl_buffer(imageDrawUniformBufferRing()) |
| offset:batch.imageDrawDataOffset |
| atIndex:IMAGE_DRAW_UNIFORM_BUFFER_IDX]; |
| [encoder setCullMode:MTLCullModeNone]; |
| if (drawType == DrawType::imageRect) |
| { |
| assert(desc.interlockMode == pls::InterlockMode::atomics); |
| [encoder setVertexBuffer:m_imageRectVertexBuffer offset:0 atIndex:0]; |
| [encoder drawIndexedPrimitives:MTLPrimitiveTypeTriangle |
| indexCount:std::size(pls::kImageRectIndices) |
| indexType:MTLIndexTypeUInt16 |
| indexBuffer:m_imageRectIndexBuffer |
| indexBufferOffset:0]; |
| } |
| else |
| { |
| LITE_RTTI_CAST_OR_BREAK( |
| vertexBuffer, const RenderBufferMetalImpl*, batch.vertexBuffer); |
| LITE_RTTI_CAST_OR_BREAK(uvBuffer, const RenderBufferMetalImpl*, batch.uvBuffer); |
| LITE_RTTI_CAST_OR_BREAK( |
| indexBuffer, const RenderBufferMetalImpl*, batch.indexBuffer); |
| [encoder setVertexBuffer:vertexBuffer->submittedBuffer() offset:0 atIndex:0]; |
| [encoder setVertexBuffer:uvBuffer->submittedBuffer() offset:0 atIndex:1]; |
| [encoder drawIndexedPrimitives:MTLPrimitiveTypeTriangle |
| indexCount:batch.elementCount |
| indexType:MTLIndexTypeUInt16 |
| indexBuffer:indexBuffer->submittedBuffer() |
| indexBufferOffset:batch.baseElement * sizeof(uint16_t)]; |
| } |
| break; |
| } |
| case DrawType::plsAtomicInitialize: |
| case DrawType::plsAtomicResolve: |
| { |
| assert(desc.interlockMode == pls::InterlockMode::atomics); |
| [encoder setRenderPipelineState:drawPipelineState]; |
| [encoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4]; |
| break; |
| } |
| case DrawType::stencilClipReset: |
| { |
| RIVE_UNREACHABLE(); |
| } |
| } |
| if (desc.interlockMode == pls::InterlockMode::atomics && batch.needsBarrier) |
| { |
| switch (m_metalFeatures.atomicBarrierType) |
| { |
| case AtomicBarrierType::memoryBarrier: |
| { |
| #if !defined(RIVE_IOS) && !defined(RIVE_IOS_SIMULATOR) |
| if (@available(macOS 10.14, *)) |
| { |
| [encoder memoryBarrierWithScope:MTLBarrierScopeBuffers | |
| MTLBarrierScopeRenderTargets |
| afterStages:MTLRenderStageFragment |
| beforeStages:MTLRenderStageFragment]; |
| break; |
| } |
| #endif |
| // atomicBarrierType shouldn't be "memoryBarrier" in this case. |
| RIVE_UNREACHABLE(); |
| } |
| case AtomicBarrierType::rasterOrderGroup: |
| break; |
| case AtomicBarrierType::renderPassBreak: |
| // On very old hardware that can't support barriers, we just take a sledge |
| // hammer and break the entire render pass between overlapping draws. |
| // TODO: Is there a lighter way to achieve this? |
| [encoder endEncoding]; |
| pass.colorAttachments[FRAMEBUFFER_PLANE_IDX].loadAction = MTLLoadActionLoad; |
| encoder = makeRenderPassForDraws(desc, pass, commandBuffer); |
| break; |
| } |
| } |
| } |
| [encoder endEncoding]; |
| |
| if (desc.isFinalFlushOfFrame) |
| { |
| // Schedule a callback that will unlock the buffers used by this flush, after the GPU has |
| // finished rendering with them. This unblocks the CPU from reusing them in a future flush. |
| std::mutex& thisFlushLock = m_bufferRingLocks[m_bufferRingIdx]; |
| [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer>) { |
| assert(!thisFlushLock.try_lock()); // The mutex should already be locked. |
| thisFlushLock.unlock(); |
| }]; |
| } |
| } |
| } // namespace rive::pls |