blob: d2fc71016f08ac48de501edeaa66dfcc744cff64 [file] [log] [blame]
/*
* Copyright 2023 Rive
*/
#include "rive/renderer/metal/render_context_metal_impl.h"
#include "background_shader_compiler.h"
#include "rive/renderer/buffer_ring.hpp"
#include "rive/renderer/texture.hpp"
#include "rive/renderer/rive_render_buffer.hpp"
#include "shaders/constants.glsl"
#include <sstream>
#include "generated/shaders/color_ramp.exports.h"
#include "generated/shaders/tessellate.exports.h"
#if defined(RIVE_IOS_SIMULATOR)
#import <mach-o/arch.h>
#endif
namespace rive::gpu
{
#if defined(RIVE_IOS)
#include "generated/shaders/rive_pls_ios.metallib.c"
#elif defined(RIVE_IOS_SIMULATOR)
#include "generated/shaders/rive_pls_ios_simulator.metallib.c"
#elif defined(RIVE_XROS)
#include "generated/shaders/rive_renderer_xros.metallib.c"
#elif defined(RIVE_XROS_SIMULATOR)
#include "generated/shaders/rive_renderer_xros_simulator.metallib.c"
#elif defined(RIVE_APPLETVOS)
#include "generated/shaders/rive_renderer_appletvos.metallib.c"
#elif defined(RIVE_APPLETVOS_SIMULATOR)
#include "generated/shaders/rive_renderer_appletvsimulator.metallib.c"
#else
#include "generated/shaders/rive_pls_macosx.metallib.c"
#endif
static id<MTLRenderPipelineState> make_pipeline_state(
id<MTLDevice> gpu, MTLRenderPipelineDescriptor* desc)
{
NSError* err = [NSError errorWithDomain:@"pipeline_create"
code:201
userInfo:nil];
id<MTLRenderPipelineState> state =
[gpu newRenderPipelineStateWithDescriptor:desc error:&err];
if (!state)
{
fprintf(stderr, "%s\n", err.localizedDescription.UTF8String);
abort();
}
return state;
}
// Renders color ramps to the gradient texture.
class RenderContextMetalImpl::ColorRampPipeline
{
public:
ColorRampPipeline(id<MTLDevice> gpu, id<MTLLibrary> plsLibrary)
{
MTLRenderPipelineDescriptor* desc =
[[MTLRenderPipelineDescriptor alloc] init];
desc.vertexFunction =
[plsLibrary newFunctionWithName:@GLSL_colorRampVertexMain];
desc.fragmentFunction =
[plsLibrary newFunctionWithName:@GLSL_colorRampFragmentMain];
desc.colorAttachments[0].pixelFormat = MTLPixelFormatRGBA8Unorm;
m_pipelineState = make_pipeline_state(gpu, desc);
}
id<MTLRenderPipelineState> pipelineState() const { return m_pipelineState; }
private:
id<MTLRenderPipelineState> m_pipelineState;
};
// Renders tessellated vertices to the tessellation texture.
class RenderContextMetalImpl::TessellatePipeline
{
public:
TessellatePipeline(id<MTLDevice> gpu, id<MTLLibrary> plsLibrary)
{
MTLRenderPipelineDescriptor* desc =
[[MTLRenderPipelineDescriptor alloc] init];
desc.vertexFunction =
[plsLibrary newFunctionWithName:@GLSL_tessellateVertexMain];
desc.fragmentFunction =
[plsLibrary newFunctionWithName:@GLSL_tessellateFragmentMain];
desc.colorAttachments[0].pixelFormat = MTLPixelFormatRGBA32Uint;
m_pipelineState = make_pipeline_state(gpu, desc);
}
id<MTLRenderPipelineState> pipelineState() const { return m_pipelineState; }
private:
id<MTLRenderPipelineState> m_pipelineState;
};
// Renders paths to the main render target.
class RenderContextMetalImpl::DrawPipeline
{
public:
// Precompiled functions are embedded in namespaces. Return the fully
// qualified name of the desired function, including its namespace.
static NSString* GetPrecompiledFunctionName(
DrawType drawType,
gpu::ShaderFeatures shaderFeatures,
gpu::ShaderMiscFlags shaderMiscFlags,
id<MTLLibrary> precompiledLibrary,
const char* functionBaseName)
{
// Each feature corresponds to a specific index in the namespaceID.
// These must stay in sync with generate_draw_combinations.py.
char namespaceID[] = "00000000";
static_assert(sizeof(namespaceID) == gpu::kShaderFeatureCount +
1 /*DRAW_INTERIOR_TRIANGLES*/ +
1 /*null terminator*/);
for (size_t i = 0; i < gpu::kShaderFeatureCount; ++i)
{
ShaderFeatures feature = static_cast<ShaderFeatures>(1 << i);
if (shaderFeatures & feature)
{
namespaceID[i] = '1';
}
static_assert((int)ShaderFeatures::ENABLE_CLIPPING == 1 << 0);
static_assert((int)ShaderFeatures::ENABLE_CLIP_RECT == 1 << 1);
static_assert((int)ShaderFeatures::ENABLE_ADVANCED_BLEND == 1 << 2);
static_assert((int)ShaderFeatures::ENABLE_FEATHER == 1 << 3);
static_assert((int)ShaderFeatures::ENABLE_EVEN_ODD == 1 << 4);
static_assert((int)ShaderFeatures::ENABLE_NESTED_CLIPPING ==
1 << 5);
static_assert((int)ShaderFeatures::ENABLE_HSL_BLEND_MODES ==
1 << 6);
}
if (drawType == DrawType::interiorTriangulation)
{
namespaceID[gpu::kShaderFeatureCount] = '1';
}
char namespacePrefix;
switch (drawType)
{
case DrawType::midpointFanPatches:
case DrawType::midpointFanCenterAAPatches:
case DrawType::outerCurvePatches:
case DrawType::interiorTriangulation:
namespacePrefix =
(shaderMiscFlags & gpu::ShaderMiscFlags::clockwiseFill)
? 'c'
: 'p';
break;
case DrawType::imageRect:
RIVE_UNREACHABLE();
case DrawType::imageMesh:
namespacePrefix = 'm';
break;
case DrawType::atomicInitialize:
case DrawType::atomicResolve:
case DrawType::stencilClipReset:
RIVE_UNREACHABLE();
}
return [NSString stringWithFormat:@"%c%s::%s",
namespacePrefix,
namespaceID,
functionBaseName];
}
DrawPipeline(id<MTLDevice> gpu,
id<MTLLibrary> library,
NSString* vertexFunctionName,
NSString* fragmentFunctionName,
gpu::DrawType drawType,
gpu::InterlockMode interlockMode,
gpu::ShaderFeatures shaderFeatures,
gpu::ShaderMiscFlags shaderMiscFlags)
{
auto makePipelineState = [=](id<MTLFunction> vertexMain,
id<MTLFunction> fragmentMain,
MTLPixelFormat pixelFormat) {
MTLRenderPipelineDescriptor* desc =
[[MTLRenderPipelineDescriptor alloc] init];
desc.vertexFunction = vertexMain;
desc.fragmentFunction = fragmentMain;
auto* framebuffer = desc.colorAttachments[COLOR_PLANE_IDX];
framebuffer.pixelFormat = pixelFormat;
switch (interlockMode)
{
case gpu::InterlockMode::rasterOrdering:
// In rasterOrdering mode, the PLS planes are accessed as
// color attachments.
desc.colorAttachments[CLIP_PLANE_IDX].pixelFormat =
MTLPixelFormatR32Uint;
desc.colorAttachments[SCRATCH_COLOR_PLANE_IDX].pixelFormat =
pixelFormat;
desc.colorAttachments[COVERAGE_PLANE_IDX].pixelFormat =
MTLPixelFormatR32Uint;
break;
case gpu::InterlockMode::atomics:
// In atomic mode, the PLS planes are accessed as device
// buffers. We only use the "framebuffer" attachment
// configured above.
if (shaderMiscFlags &
gpu::ShaderMiscFlags::fixedFunctionColorOutput)
{
// The shader expectes a "src-over" blend function in
// order to to implement antialiasing and opacity.
framebuffer.blendingEnabled = TRUE;
framebuffer.sourceRGBBlendFactor = MTLBlendFactorOne;
framebuffer.destinationRGBBlendFactor =
MTLBlendFactorOneMinusSourceAlpha;
framebuffer.rgbBlendOperation = MTLBlendOperationAdd;
framebuffer.sourceAlphaBlendFactor = MTLBlendFactorOne;
framebuffer.destinationAlphaBlendFactor =
MTLBlendFactorOneMinusSourceAlpha;
framebuffer.alphaBlendOperation = MTLBlendOperationAdd;
framebuffer.writeMask = MTLColorWriteMaskAll;
}
else if (drawType == gpu::DrawType::atomicResolve)
{
// We're resolving from the offscreen color buffer to
// the framebuffer attachment. Write out the final color
// directly without any blend modes.
framebuffer.blendingEnabled = FALSE;
framebuffer.writeMask = MTLColorWriteMaskAll;
}
else
{
// This pipeline renders by storing to the offscreen
// color buffer; disable writes to the framebuffer
// attachment.
framebuffer.blendingEnabled = FALSE;
framebuffer.writeMask = MTLColorWriteMaskNone;
}
break;
case gpu::InterlockMode::clockwiseAtomic:
case gpu::InterlockMode::msaa:
RIVE_UNREACHABLE();
}
return make_pipeline_state(gpu, desc);
};
id<MTLFunction> vertexMain =
[library newFunctionWithName:vertexFunctionName];
id<MTLFunction> fragmentMain =
[library newFunctionWithName:fragmentFunctionName];
m_pipelineStateRGBA8 = makePipelineState(
vertexMain, fragmentMain, MTLPixelFormatRGBA8Unorm);
m_pipelineStateBGRA8 = makePipelineState(
vertexMain, fragmentMain, MTLPixelFormatBGRA8Unorm);
}
id<MTLRenderPipelineState> pipelineState(MTLPixelFormat pixelFormat) const
{
assert(pixelFormat == MTLPixelFormatRGBA8Unorm ||
pixelFormat == MTLPixelFormatRGBA16Float ||
pixelFormat == MTLPixelFormatRGBA8Unorm_sRGB ||
pixelFormat == MTLPixelFormatBGRA8Unorm ||
pixelFormat == MTLPixelFormatBGRA8Unorm_sRGB);
switch (pixelFormat)
{
case MTLPixelFormatRGBA8Unorm_sRGB:
case MTLPixelFormatRGBA8Unorm:
case MTLPixelFormatRGBA16Float:
return m_pipelineStateRGBA8;
default:
return m_pipelineStateBGRA8;
}
}
private:
id<MTLRenderPipelineState> m_pipelineStateRGBA8;
id<MTLRenderPipelineState> m_pipelineStateBGRA8;
};
#if defined(RIVE_IOS) || defined(RIVE_XROS) || defined(RIVE_APPLETVOS)
static bool is_apple_silicon(id<MTLDevice> gpu)
{
if (@available(iOS 13, tvOS 13, visionOS 1, *))
{
return [gpu supportsFamily:MTLGPUFamilyApple4];
}
return false;
}
#endif
class BufferRingMetalImpl : public BufferRing
{
public:
static std::unique_ptr<BufferRingMetalImpl> Make(id<MTLDevice> gpu,
size_t capacityInBytes)
{
return capacityInBytes != 0
? std::make_unique<BufferRingMetalImpl>(gpu, capacityInBytes)
: nullptr;
}
BufferRingMetalImpl(id<MTLDevice> gpu, size_t capacityInBytes) :
BufferRing(capacityInBytes)
{
for (int i = 0; i < kBufferRingSize; ++i)
{
m_buffers[i] =
[gpu newBufferWithLength:capacityInBytes
options:MTLResourceStorageModeShared];
}
}
id<MTLBuffer> submittedBuffer() const
{
return m_buffers[submittedBufferIdx()];
}
protected:
void* onMapBuffer(int bufferIdx, size_t mapSizeInBytes) override
{
return m_buffers[bufferIdx].contents;
}
void onUnmapAndSubmitBuffer(int bufferIdx, size_t mapSizeInBytes) override
{}
private:
id<MTLBuffer> m_buffers[kBufferRingSize];
};
std::unique_ptr<RenderContext> RenderContextMetalImpl::MakeContext(
id<MTLDevice> gpu, const ContextOptions& contextOptions)
{
auto renderContextImpl = std::unique_ptr<RenderContextMetalImpl>(
new RenderContextMetalImpl(gpu, contextOptions));
return std::make_unique<RenderContext>(std::move(renderContextImpl));
}
RenderContextMetalImpl::RenderContextMetalImpl(
id<MTLDevice> gpu, const ContextOptions& contextOptions) :
m_contextOptions(contextOptions), m_gpu(gpu)
{
// It appears, so far, that we don't need to use flat interpolation for path
// IDs on any Apple device, and it's faster not to.
m_platformFeatures.avoidFlatVaryings = true;
m_platformFeatures.invertOffscreenY = true;
#if defined(RIVE_IOS) || defined(RIVE_XROS) || defined(RIVE_APPLETVOS)
m_platformFeatures.supportsRasterOrdering = true;
m_platformFeatures.supportsFragmentShaderAtomics = false;
if (!is_apple_silicon(m_gpu))
{
// The PowerVR GPU, at least on A10, has fp16 precision issues. We can't
// use the the bottom 3 bits of the path and clip IDs in order for our
// equality testing to work.
m_platformFeatures.pathIDGranularity = 8;
}
#elif defined(RIVE_IOS_SIMULATOR) || defined(RIVE_XROS_SIMULATOR) || \
defined(RIVE_APPLETVOS_SIMULATOR)
// The simulator does not support framebuffer reads. Fall back on atomic
// mode.
m_platformFeatures.supportsRasterOrdering = false;
m_platformFeatures.supportsFragmentShaderAtomics = true;
#else
m_platformFeatures.supportsRasterOrdering =
[m_gpu supportsFamily:MTLGPUFamilyApple1] &&
!contextOptions.disableFramebufferReads;
m_platformFeatures.supportsFragmentShaderAtomics = true;
#endif
m_platformFeatures.atomicPLSMustBeInitializedAsDraw = true;
#if defined(RIVE_IOS) || defined(RIVE_XROS) || defined(RIVE_XROS_SIMULATOR) || \
defined(RIVE_APPLETVOS) || defined(RIVE_APPLETVOS_SIMULATOR)
// Atomic barriers are never used on iOS, but if we ever did need them, we
// would use rasterOrderGroups.
m_metalFeatures.atomicBarrierType = AtomicBarrierType::rasterOrderGroup;
#elif defined(RIVE_IOS_SIMULATOR)
const NXArchInfo* hostArchitecture = NXGetLocalArchInfo();
if (strncmp(hostArchitecture->name, "arm64", 5) == 0)
{
// The simulator doesn't advertise support for raster order groups, but
// they appear to work anyway on an Apple-Silicon-hosted simulator. Use
// rasterOrderGroup in this case because it's much faster than
// renderPassBreak. (On Intel/AMD this doesn't matter anyway because
// renderPassBreaks are cheap and actually faster than
// rasterOrderGroups.)
m_metalFeatures.atomicBarrierType = AtomicBarrierType::rasterOrderGroup;
}
else
{
m_metalFeatures.atomicBarrierType = AtomicBarrierType::renderPassBreak;
}
#else
// Use real memory barriers for atomic mode if they're availabile.
// "GPU devices in Apple3 through Apple9 families don’t support memory
// barriers that include the MTLRenderStages.fragment or .tile stages in the
// after argument..."
if (([m_gpu supportsFamily:MTLGPUFamilyCommon2] ||
[m_gpu supportsFamily:MTLGPUFamilyMac2]) &&
![m_gpu supportsFamily:MTLGPUFamilyApple3])
{
m_metalFeatures.atomicBarrierType = AtomicBarrierType::memoryBarrier;
}
else if (m_gpu.rasterOrderGroupsSupported)
{
m_metalFeatures.atomicBarrierType = AtomicBarrierType::rasterOrderGroup;
}
else
{
m_metalFeatures.atomicBarrierType = AtomicBarrierType::renderPassBreak;
}
#endif
m_backgroundShaderCompiler =
std::make_unique<BackgroundShaderCompiler>(m_gpu, m_metalFeatures);
// Load the precompiled shaders.
dispatch_data_t metallibData = dispatch_data_create(
#if defined(RIVE_IOS)
rive_pls_ios_metallib,
rive_pls_ios_metallib_len,
#elif defined(RIVE_IOS_SIMULATOR)
rive_pls_ios_simulator_metallib,
rive_pls_ios_simulator_metallib_len,
#elif defined(RIVE_XROS)
rive_renderer_xros_metallib,
rive_renderer_xros_metallib_len,
#elif defined(RIVE_XROS_SIMULATOR)
rive_renderer_xros_simulator_metallib,
rive_renderer_xros_simulator_metallib_len,
#elif defined(RIVE_APPLETVOS)
rive_renderer_appletvos_metallib,
rive_renderer_appletvos_metallib_len,
#elif defined(RIVE_APPLETVOS_SIMULATOR)
rive_renderer_appletvsimulator_metallib,
rive_renderer_appletvsimulator_metallib_len,
#else
rive_pls_macosx_metallib,
rive_pls_macosx_metallib_len,
#endif
nil,
nil);
NSError* err = [NSError errorWithDomain:@"metallib_load"
code:200
userInfo:nil];
m_plsPrecompiledLibrary = [m_gpu newLibraryWithData:metallibData
error:&err];
if (m_plsPrecompiledLibrary == nil)
{
fprintf(stderr, "Failed to load pls metallib.\n");
fprintf(stderr, "%s\n", err.localizedDescription.UTF8String);
abort();
}
m_colorRampPipeline =
std::make_unique<ColorRampPipeline>(m_gpu, m_plsPrecompiledLibrary);
MTLTextureDescriptor* desc = [[MTLTextureDescriptor alloc] init];
desc.pixelFormat = MTLPixelFormatR16Float;
desc.width = gpu::GAUSSIAN_TABLE_SIZE;
desc.height = 1;
desc.usage = MTLTextureUsageShaderRead;
desc.textureType = MTLTextureType2D;
desc.mipmapLevelCount = 1;
m_featherTexture = [m_gpu newTextureWithDescriptor:desc];
[m_featherTexture
replaceRegion:MTLRegionMake2D(0, 0, gpu::GAUSSIAN_TABLE_SIZE, 1)
mipmapLevel:0
withBytes:gpu::g_gaussianIntegralTableF16
bytesPerRow:sizeof(gpu::g_gaussianIntegralTableF16)];
m_tessPipeline =
std::make_unique<TessellatePipeline>(m_gpu, m_plsPrecompiledLibrary);
m_tessSpanIndexBuffer =
[m_gpu newBufferWithBytes:gpu::kTessSpanIndices
length:sizeof(gpu::kTessSpanIndices)
options:MTLResourceStorageModeShared];
// The precompiled static library has a fully-featured shader for each
// drawType in "rasterOrdering" mode. We load these at initialization and
// use them while waiting for the background compiler to generate more
// specialized, higher performance shaders.
if (m_platformFeatures.supportsRasterOrdering)
{
for (auto drawType : {DrawType::midpointFanPatches,
DrawType::interiorTriangulation,
DrawType::imageMesh})
{
for (auto shaderMiscFlags : {gpu::ShaderMiscFlags::none,
gpu::ShaderMiscFlags::clockwiseFill})
{
gpu::ShaderFeatures allShaderFeatures =
gpu::ShaderFeaturesMaskFor(
drawType, gpu::InterlockMode::rasterOrdering);
uint32_t pipelineKey =
ShaderUniqueKey(drawType,
allShaderFeatures,
gpu::InterlockMode::rasterOrdering,
shaderMiscFlags);
m_drawPipelines[pipelineKey] = std::make_unique<DrawPipeline>(
m_gpu,
m_plsPrecompiledLibrary,
DrawPipeline::GetPrecompiledFunctionName(
drawType,
allShaderFeatures & gpu::kVertexShaderFeaturesMask,
gpu::ShaderMiscFlags::none,
m_plsPrecompiledLibrary,
GLSL_drawVertexMain),
DrawPipeline::GetPrecompiledFunctionName(
drawType,
allShaderFeatures,
shaderMiscFlags,
m_plsPrecompiledLibrary,
GLSL_drawFragmentMain),
drawType,
gpu::InterlockMode::rasterOrdering,
allShaderFeatures,
shaderMiscFlags);
}
}
}
// Create vertex and index buffers for the different PLS patches.
m_pathPatchVertexBuffer =
[m_gpu newBufferWithLength:kPatchVertexBufferCount * sizeof(PatchVertex)
options:MTLResourceStorageModeShared];
m_pathPatchIndexBuffer =
[m_gpu newBufferWithLength:kPatchIndexBufferCount * sizeof(uint16_t)
options:MTLResourceStorageModeShared];
GeneratePatchBufferData(
reinterpret_cast<PatchVertex*>(m_pathPatchVertexBuffer.contents),
reinterpret_cast<uint16_t*>(m_pathPatchIndexBuffer.contents));
// Set up the imageRect rendering buffers. (gpu::InterlockMode::atomics
// only.)
m_imageRectVertexBuffer =
[m_gpu newBufferWithBytes:gpu::kImageRectVertices
length:sizeof(gpu::kImageRectVertices)
options:MTLResourceStorageModeShared];
m_imageRectIndexBuffer =
[m_gpu newBufferWithBytes:gpu::kImageRectIndices
length:sizeof(gpu::kImageRectIndices)
options:MTLResourceStorageModeShared];
}
RenderContextMetalImpl::~RenderContextMetalImpl() {}
// If the GPU supports framebuffer reads (called "programmable blending" in the
// feature tables), PLS planes besides the main framebuffer can exist in
// ephemeral "memoryless" storage. This means their contents are never actually
// written to main memory, and they only exist in fast tiled memory.
static id<MTLTexture> make_pls_memoryless_texture(id<MTLDevice> gpu,
MTLPixelFormat pixelFormat,
uint32_t width,
uint32_t height)
{
MTLTextureDescriptor* desc = [[MTLTextureDescriptor alloc] init];
desc.pixelFormat = pixelFormat;
desc.width = width;
desc.height = height;
desc.usage = MTLTextureUsageRenderTarget;
desc.textureType = MTLTextureType2D;
desc.mipmapLevelCount = 1;
desc.storageMode = MTLStorageModeMemoryless;
return [gpu newTextureWithDescriptor:desc];
}
RenderTargetMetal::RenderTargetMetal(id<MTLDevice> gpu,
MTLPixelFormat pixelFormat,
uint32_t width,
uint32_t height,
const PlatformFeatures& platformFeatures) :
RenderTarget(width, height), m_gpu(gpu), m_pixelFormat(pixelFormat)
{
m_targetTexture = nil; // Will be configured later by setTargetTexture().
if (platformFeatures.supportsRasterOrdering)
{
m_coverageMemorylessTexture = make_pls_memoryless_texture(
gpu, MTLPixelFormatR32Uint, width, height);
m_clipMemorylessTexture = make_pls_memoryless_texture(
gpu, MTLPixelFormatR32Uint, width, height);
m_scratchColorMemorylessTexture =
make_pls_memoryless_texture(gpu, m_pixelFormat, width, height);
}
}
void RenderTargetMetal::setTargetTexture(id<MTLTexture> texture)
{
assert(!texture || compatibleWith(texture));
m_targetTexture = texture;
}
rcp<RenderTargetMetal> RenderContextMetalImpl::makeRenderTarget(
MTLPixelFormat pixelFormat, uint32_t width, uint32_t height)
{
return rcp(new RenderTargetMetal(
m_gpu, pixelFormat, width, height, m_platformFeatures));
}
class RenderBufferMetalImpl
: public LITE_RTTI_OVERRIDE(RiveRenderBuffer, RenderBufferMetalImpl)
{
public:
RenderBufferMetalImpl(RenderBufferType renderBufferType,
RenderBufferFlags renderBufferFlags,
size_t sizeInBytes,
id<MTLDevice> gpu) :
lite_rtti_override(renderBufferType, renderBufferFlags, sizeInBytes),
m_gpu(gpu)
{
int bufferCount =
flags() & RenderBufferFlags::mappedOnceAtInitialization
? 1
: gpu::kBufferRingSize;
for (int i = 0; i < bufferCount; ++i)
{
m_buffers[i] =
[gpu newBufferWithLength:sizeInBytes
options:MTLResourceStorageModeShared];
}
}
id<MTLBuffer> submittedBuffer() { return m_buffers[frontBufferIdx()]; }
protected:
void* onMap() override
{
assert(m_buffers[backBufferIdx()] != nil);
return m_buffers[backBufferIdx()].contents;
}
void onUnmap() override {}
private:
id<MTLDevice> m_gpu;
id<MTLBuffer> m_buffers[gpu::kBufferRingSize];
int m_submittedBufferIdx = -1;
};
rcp<RenderBuffer> RenderContextMetalImpl::makeRenderBuffer(
RenderBufferType type, RenderBufferFlags flags, size_t sizeInBytes)
{
return make_rcp<RenderBufferMetalImpl>(type, flags, sizeInBytes, m_gpu);
}
class TextureMetalImpl : public Texture
{
public:
TextureMetalImpl(id<MTLDevice> gpu,
uint32_t width,
uint32_t height,
uint32_t mipLevelCount,
const uint8_t imageDataRGBA[]) :
Texture(width, height)
{
// Create the texture.
MTLTextureDescriptor* desc = [[MTLTextureDescriptor alloc] init];
desc.pixelFormat = MTLPixelFormatRGBA8Unorm;
desc.width = width;
desc.height = height;
desc.mipmapLevelCount = mipLevelCount;
desc.usage = MTLTextureUsageShaderRead;
desc.textureType = MTLTextureType2D;
m_texture = [gpu newTextureWithDescriptor:desc];
// Specify the top-level image in the mipmap chain.
MTLRegion region = MTLRegionMake2D(0, 0, width, height);
[m_texture replaceRegion:region
mipmapLevel:0
withBytes:imageDataRGBA
bytesPerRow:width * 4];
}
void ensureMipmaps(id<MTLCommandBuffer> commandBuffer) const
{
if (m_mipsDirty)
{
// Generate mipmaps.
id<MTLBlitCommandEncoder> mipEncoder =
[commandBuffer blitCommandEncoder];
[mipEncoder generateMipmapsForTexture:m_texture];
[mipEncoder endEncoding];
m_mipsDirty = false;
}
}
id<MTLTexture> texture() const { return m_texture; }
private:
id<MTLTexture> m_texture;
mutable bool m_mipsDirty = true;
};
rcp<Texture> RenderContextMetalImpl::makeImageTexture(
uint32_t width,
uint32_t height,
uint32_t mipLevelCount,
const uint8_t imageDataRGBA[])
{
return make_rcp<TextureMetalImpl>(
m_gpu, width, height, mipLevelCount, imageDataRGBA);
}
std::unique_ptr<BufferRing> RenderContextMetalImpl::makeUniformBufferRing(
size_t capacityInBytes)
{
return BufferRingMetalImpl::Make(m_gpu, capacityInBytes);
}
std::unique_ptr<BufferRing> RenderContextMetalImpl::makeStorageBufferRing(
size_t capacityInBytes, gpu::StorageBufferStructure)
{
return BufferRingMetalImpl::Make(m_gpu, capacityInBytes);
}
std::unique_ptr<BufferRing> RenderContextMetalImpl::makeVertexBufferRing(
size_t capacityInBytes)
{
return BufferRingMetalImpl::Make(m_gpu, capacityInBytes);
}
void RenderContextMetalImpl::resizeGradientTexture(uint32_t width,
uint32_t height)
{
if (width == 0 || height == 0)
{
m_gradientTexture = nil;
return;
}
MTLTextureDescriptor* desc = [[MTLTextureDescriptor alloc] init];
desc.pixelFormat = MTLPixelFormatRGBA8Unorm;
desc.width = width;
desc.height = height;
desc.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead;
desc.textureType = MTLTextureType2D;
desc.mipmapLevelCount = 1;
desc.storageMode = MTLStorageModePrivate;
m_gradientTexture = [m_gpu newTextureWithDescriptor:desc];
}
void RenderContextMetalImpl::resizeTessellationTexture(uint32_t width,
uint32_t height)
{
if (width == 0 || height == 0)
{
m_tessVertexTexture = nil;
return;
}
MTLTextureDescriptor* desc = [[MTLTextureDescriptor alloc] init];
desc.pixelFormat = MTLPixelFormatRGBA32Uint;
desc.width = width;
desc.height = height;
desc.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead;
desc.textureType = MTLTextureType2D;
desc.mipmapLevelCount = 1;
desc.storageMode = MTLStorageModePrivate;
m_tessVertexTexture = [m_gpu newTextureWithDescriptor:desc];
}
const RenderContextMetalImpl::DrawPipeline* RenderContextMetalImpl::
findCompatibleDrawPipeline(gpu::DrawType drawType,
gpu::ShaderFeatures shaderFeatures,
gpu::InterlockMode interlockMode,
gpu::ShaderMiscFlags shaderMiscFlags)
{
uint32_t pipelineKey = gpu::ShaderUniqueKey(
drawType, shaderFeatures, interlockMode, shaderMiscFlags);
auto pipelineIter = m_drawPipelines.find(pipelineKey);
if (pipelineIter == m_drawPipelines.end())
{
// The shader for this pipeline hasn't been scheduled for compiling yet.
// Schedule it to compile in the background.
m_backgroundShaderCompiler->pushJob({
.drawType = drawType,
.shaderFeatures = shaderFeatures,
.interlockMode = interlockMode,
.shaderMiscFlags = shaderMiscFlags,
});
pipelineIter = m_drawPipelines.insert({pipelineKey, nullptr}).first;
}
if (pipelineIter->second != nullptr)
{
// The pipeline is fully compiled and loaded.
return pipelineIter->second.get();
}
// The shader for this pipeline hasn't finished compiling yet. Start by
// finding a fully-featured superset of features whose pipeline we can fall
// back on while waiting for it to compile.
ShaderFeatures fullyFeaturedPipelineFeatures =
gpu::ShaderFeaturesMaskFor(drawType, interlockMode);
if (interlockMode == gpu::InterlockMode::atomics)
{
// Never add ENABLE_ADVANCED_BLEND to an atomic pipeline that doesn't
// use advanced blend, since in atomic mode, the shaders behave
// differently depending on whether advanced blend is enabled.
fullyFeaturedPipelineFeatures &=
shaderFeatures | ~ShaderFeatures::ENABLE_ADVANCED_BLEND;
// Never add ENABLE_CLIPPING to an atomic pipeline that doesn't use
// clipping; it will cause a "missing buffer binding" validation error
// because the shader will define an (unused) clipBuffer, but we won't
// bind anything to it.
fullyFeaturedPipelineFeatures &=
shaderFeatures | ~ShaderFeatures::ENABLE_CLIPPING;
}
shaderFeatures &= fullyFeaturedPipelineFeatures;
// Fully-featured "rasterOrdering" pipelines should have already been
// pre-loaded from the static library.
assert(shaderFeatures != fullyFeaturedPipelineFeatures ||
interlockMode != gpu::InterlockMode::rasterOrdering);
// Poll to see if the shader is actually done compiling, but only wait if
// it's a fully-feature pipeline. Otherwise, we can fall back on the
// fully-featured pipeline while we wait for compilation.
BackgroundCompileJob job;
bool shouldWaitForBackgroundCompilation =
shaderFeatures == fullyFeaturedPipelineFeatures ||
m_contextOptions.synchronousShaderCompilations;
while (m_backgroundShaderCompiler->popFinishedJob(
&job, shouldWaitForBackgroundCompilation))
{
uint32_t jobKey = gpu::ShaderUniqueKey(job.drawType,
job.shaderFeatures,
job.interlockMode,
job.shaderMiscFlags);
m_drawPipelines[jobKey] =
std::make_unique<DrawPipeline>(m_gpu,
job.compiledLibrary,
@GLSL_drawVertexMain,
@GLSL_drawFragmentMain,
job.drawType,
job.interlockMode,
job.shaderFeatures,
job.shaderMiscFlags);
if (jobKey == pipelineKey)
{
// The shader we wanted was actually done compiling and pending
// being built into a pipeline.
return pipelineIter->second.get();
}
}
// The shader for this feature set hasn't finished compiling. Use the
// pipeline that has all features enabled while we wait for it to finish.
assert(shaderFeatures != fullyFeaturedPipelineFeatures);
return findCompatibleDrawPipeline(drawType,
fullyFeaturedPipelineFeatures,
interlockMode,
shaderMiscFlags);
}
void RenderContextMetalImpl::prepareToMapBuffers()
{
// Wait until the GPU finishes rendering flush "N + 1 - kBufferRingSize".
// This ensures it is safe for the CPU to begin modifying the next buffers
// in our rings.
m_bufferRingIdx = (m_bufferRingIdx + 1) % kBufferRingSize;
m_bufferRingLocks[m_bufferRingIdx].lock();
}
static id<MTLBuffer> mtl_buffer(const BufferRing* bufferRing)
{
assert(bufferRing != nullptr);
return static_cast<const BufferRingMetalImpl*>(bufferRing)
->submittedBuffer();
}
static MTLViewport make_viewport(uint32_t x,
uint32_t y,
uint32_t width,
uint32_t height)
{
return {
static_cast<double>(x),
static_cast<double>(y),
static_cast<double>(width),
static_cast<double>(height),
0,
1,
};
}
id<MTLRenderCommandEncoder> RenderContextMetalImpl::makeRenderPassForDraws(
const gpu::FlushDescriptor& flushDesc,
MTLRenderPassDescriptor* passDesc,
id<MTLCommandBuffer> commandBuffer,
gpu::ShaderMiscFlags baselineShaderMiscFlags)
{
auto* renderTarget =
static_cast<RenderTargetMetal*>(flushDesc.renderTarget);
id<MTLRenderCommandEncoder> encoder =
[commandBuffer renderCommandEncoderWithDescriptor:passDesc];
[encoder
setViewport:make_viewport(
0, 0, renderTarget->width(), renderTarget->height())];
[encoder setVertexBuffer:mtl_buffer(flushUniformBufferRing())
offset:flushDesc.flushUniformDataOffsetInBytes
atIndex:FLUSH_UNIFORM_BUFFER_IDX];
[encoder setFragmentBuffer:mtl_buffer(flushUniformBufferRing())
offset:flushDesc.flushUniformDataOffsetInBytes
atIndex:FLUSH_UNIFORM_BUFFER_IDX];
[encoder setVertexTexture:m_tessVertexTexture
atIndex:TESS_VERTEX_TEXTURE_IDX];
[encoder setFragmentTexture:m_gradientTexture atIndex:GRAD_TEXTURE_IDX];
[encoder setFragmentTexture:m_featherTexture atIndex:FEATHER_TEXTURE_IDX];
if (flushDesc.pathCount > 0)
{
[encoder setVertexBuffer:mtl_buffer(pathBufferRing())
offset:flushDesc.firstPath * sizeof(gpu::PathData)
atIndex:PATH_BUFFER_IDX];
if (flushDesc.interlockMode == gpu::InterlockMode::atomics)
{
[encoder
setFragmentBuffer:mtl_buffer(paintBufferRing())
offset:flushDesc.firstPaint * sizeof(gpu::PaintData)
atIndex:PAINT_BUFFER_IDX];
[encoder setFragmentBuffer:mtl_buffer(paintAuxBufferRing())
offset:flushDesc.firstPaintAux *
sizeof(gpu::PaintAuxData)
atIndex:PAINT_AUX_BUFFER_IDX];
}
else
{
[encoder
setVertexBuffer:mtl_buffer(paintBufferRing())
offset:flushDesc.firstPaint * sizeof(gpu::PaintData)
atIndex:PAINT_BUFFER_IDX];
[encoder setVertexBuffer:mtl_buffer(paintAuxBufferRing())
offset:flushDesc.firstPaintAux *
sizeof(gpu::PaintAuxData)
atIndex:PAINT_AUX_BUFFER_IDX];
}
}
if (flushDesc.contourCount > 0)
{
[encoder
setVertexBuffer:mtl_buffer(contourBufferRing())
offset:flushDesc.firstContour * sizeof(gpu::ContourData)
atIndex:CONTOUR_BUFFER_IDX];
}
if (flushDesc.interlockMode == gpu::InterlockMode::atomics)
{
// In atomic mode, the PLS planes are buffers that we need to bind
// separately. Since the PLS plane indices collide with other buffer
// bindings, offset the binding indices of these buffers by
// DEFAULT_BINDINGS_SET_SIZE.
if (!(baselineShaderMiscFlags &
gpu::ShaderMiscFlags::fixedFunctionColorOutput))
{
[encoder
setFragmentBuffer:renderTarget->colorAtomicBuffer()
offset:0
atIndex:COLOR_PLANE_IDX + DEFAULT_BINDINGS_SET_SIZE];
}
if (flushDesc.combinedShaderFeatures &
gpu::ShaderFeatures::ENABLE_CLIPPING)
{
[encoder
setFragmentBuffer:renderTarget->clipAtomicBuffer()
offset:0
atIndex:CLIP_PLANE_IDX + DEFAULT_BINDINGS_SET_SIZE];
}
[encoder
setFragmentBuffer:renderTarget->coverageAtomicBuffer()
offset:0
atIndex:COVERAGE_PLANE_IDX + DEFAULT_BINDINGS_SET_SIZE];
}
if (flushDesc.wireframe)
{
[encoder setTriangleFillMode:MTLTriangleFillModeLines];
}
return encoder;
}
void RenderContextMetalImpl::flush(const FlushDescriptor& desc)
{
assert(desc.interlockMode != gpu::InterlockMode::clockwiseAtomic);
assert(desc.interlockMode != gpu::InterlockMode::msaa); // TODO: msaa.
auto* renderTarget = static_cast<RenderTargetMetal*>(desc.renderTarget);
id<MTLCommandBuffer> commandBuffer =
(__bridge id<MTLCommandBuffer>)desc.externalCommandBuffer;
// Render the complex color ramps to the gradient texture.
if (desc.gradSpanCount > 0)
{
MTLRenderPassDescriptor* gradPass =
[MTLRenderPassDescriptor renderPassDescriptor];
gradPass.renderTargetWidth = kGradTextureWidth;
gradPass.renderTargetHeight = desc.gradDataHeight;
gradPass.colorAttachments[0].loadAction = MTLLoadActionDontCare;
gradPass.colorAttachments[0].storeAction = MTLStoreActionStore;
gradPass.colorAttachments[0].texture = m_gradientTexture;
id<MTLRenderCommandEncoder> gradEncoder =
[commandBuffer renderCommandEncoderWithDescriptor:gradPass];
[gradEncoder
setViewport:make_viewport(0,
0,
kGradTextureWidth,
static_cast<float>(desc.gradDataHeight))];
[gradEncoder
setRenderPipelineState:m_colorRampPipeline->pipelineState()];
[gradEncoder setVertexBuffer:mtl_buffer(flushUniformBufferRing())
offset:desc.flushUniformDataOffsetInBytes
atIndex:FLUSH_UNIFORM_BUFFER_IDX];
[gradEncoder
setVertexBuffer:mtl_buffer(gradSpanBufferRing())
offset:desc.firstGradSpan * sizeof(gpu::GradientSpan)
atIndex:0];
[gradEncoder setCullMode:MTLCullModeBack];
[gradEncoder drawPrimitives:MTLPrimitiveTypeTriangleStrip
vertexStart:0
vertexCount:gpu::GRAD_SPAN_TRI_STRIP_VERTEX_COUNT
instanceCount:desc.gradSpanCount];
[gradEncoder endEncoding];
}
// Tessellate all curves into vertices in the tessellation texture.
if (desc.tessVertexSpanCount > 0)
{
MTLRenderPassDescriptor* tessPass =
[MTLRenderPassDescriptor renderPassDescriptor];
tessPass.renderTargetWidth = kTessTextureWidth;
tessPass.renderTargetHeight = desc.tessDataHeight;
tessPass.colorAttachments[0].loadAction = MTLLoadActionDontCare;
tessPass.colorAttachments[0].storeAction = MTLStoreActionStore;
tessPass.colorAttachments[0].texture = m_tessVertexTexture;
id<MTLRenderCommandEncoder> tessEncoder =
[commandBuffer renderCommandEncoderWithDescriptor:tessPass];
[tessEncoder
setViewport:make_viewport(
0, 0, kTessTextureWidth, desc.tessDataHeight)];
[tessEncoder setRenderPipelineState:m_tessPipeline->pipelineState()];
[tessEncoder setVertexBuffer:mtl_buffer(flushUniformBufferRing())
offset:desc.flushUniformDataOffsetInBytes
atIndex:FLUSH_UNIFORM_BUFFER_IDX];
[tessEncoder setVertexBuffer:mtl_buffer(tessSpanBufferRing())
offset:desc.firstTessVertexSpan *
sizeof(gpu::TessVertexSpan)
atIndex:0];
assert(desc.pathCount > 0);
[tessEncoder setVertexBuffer:mtl_buffer(pathBufferRing())
offset:desc.firstPath * sizeof(gpu::PathData)
atIndex:PATH_BUFFER_IDX];
assert(desc.contourCount > 0);
[tessEncoder
setVertexBuffer:mtl_buffer(contourBufferRing())
offset:desc.firstContour * sizeof(gpu::ContourData)
atIndex:CONTOUR_BUFFER_IDX];
[tessEncoder setCullMode:MTLCullModeBack];
[tessEncoder drawIndexedPrimitives:MTLPrimitiveTypeTriangle
indexCount:std::size(gpu::kTessSpanIndices)
indexType:MTLIndexTypeUInt16
indexBuffer:m_tessSpanIndexBuffer
indexBufferOffset:0
instanceCount:desc.tessVertexSpanCount];
[tessEncoder endEncoding];
}
// Generate mipmaps if needed.
for (const DrawBatch& batch : *desc.drawList)
{
if (auto imageTextureMetal =
static_cast<const TextureMetalImpl*>(batch.imageTexture))
{
imageTextureMetal->ensureMipmaps(commandBuffer);
}
}
// Set up a render pass to do the final rendering using (some form of) pixel
// local storage.
MTLRenderPassDescriptor* pass =
[MTLRenderPassDescriptor renderPassDescriptor];
pass.renderTargetWidth = desc.renderTargetUpdateBounds.right;
pass.renderTargetHeight = desc.renderTargetUpdateBounds.bottom;
pass.colorAttachments[COLOR_PLANE_IDX].texture =
renderTarget->targetTexture();
switch (desc.colorLoadAction)
{
case gpu::LoadAction::clear:
{
float cc[4];
UnpackColorToRGBA32FPremul(desc.clearColor, cc);
pass.colorAttachments[COLOR_PLANE_IDX].loadAction =
MTLLoadActionClear;
pass.colorAttachments[COLOR_PLANE_IDX].clearColor =
MTLClearColorMake(cc[0], cc[1], cc[2], cc[3]);
break;
}
case gpu::LoadAction::preserveRenderTarget:
pass.colorAttachments[COLOR_PLANE_IDX].loadAction =
MTLLoadActionLoad;
break;
case gpu::LoadAction::dontCare:
pass.colorAttachments[COLOR_PLANE_IDX].loadAction =
MTLLoadActionDontCare;
break;
}
pass.colorAttachments[COLOR_PLANE_IDX].storeAction = MTLStoreActionStore;
auto baselineShaderMiscFlags = gpu::ShaderMiscFlags::none;
if (desc.interlockMode == gpu::InterlockMode::rasterOrdering)
{
// In rasterOrdering mode, the PLS planes are accessed as color
// attachments.
pass.colorAttachments[CLIP_PLANE_IDX].texture =
renderTarget->m_clipMemorylessTexture;
pass.colorAttachments[CLIP_PLANE_IDX].loadAction = MTLLoadActionClear;
pass.colorAttachments[CLIP_PLANE_IDX].clearColor =
MTLClearColorMake(0, 0, 0, 0);
pass.colorAttachments[CLIP_PLANE_IDX].storeAction =
MTLStoreActionDontCare;
pass.colorAttachments[SCRATCH_COLOR_PLANE_IDX].texture =
renderTarget->m_scratchColorMemorylessTexture;
pass.colorAttachments[SCRATCH_COLOR_PLANE_IDX].loadAction =
MTLLoadActionDontCare;
pass.colorAttachments[SCRATCH_COLOR_PLANE_IDX].storeAction =
MTLStoreActionDontCare;
pass.colorAttachments[COVERAGE_PLANE_IDX].texture =
renderTarget->m_coverageMemorylessTexture;
pass.colorAttachments[COVERAGE_PLANE_IDX].loadAction =
MTLLoadActionClear;
pass.colorAttachments[COVERAGE_PLANE_IDX].clearColor =
MTLClearColorMake(desc.coverageClearValue, 0, 0, 0);
pass.colorAttachments[COVERAGE_PLANE_IDX].storeAction =
MTLStoreActionDontCare;
}
else if (!(desc.combinedShaderFeatures &
gpu::ShaderFeatures::ENABLE_ADVANCED_BLEND))
{
assert(desc.interlockMode == gpu::InterlockMode::atomics);
baselineShaderMiscFlags |=
gpu::ShaderMiscFlags::fixedFunctionColorOutput;
}
else if (desc.colorLoadAction == gpu::LoadAction::preserveRenderTarget)
{
// Since we need to preserve the renderTarget during load, and since
// we're rendering to an offscreen color buffer, we have to literally
// copy the renderTarget into the color buffer.
assert(desc.interlockMode == gpu::InterlockMode::atomics);
id<MTLBlitCommandEncoder> copyEncoder =
[commandBuffer blitCommandEncoder];
auto updateOrigin = MTLOriginMake(desc.renderTargetUpdateBounds.left,
desc.renderTargetUpdateBounds.top,
0);
auto updateSize = MTLSizeMake(desc.renderTargetUpdateBounds.width(),
desc.renderTargetUpdateBounds.height(),
1);
[copyEncoder copyFromTexture:renderTarget->targetTexture()
sourceSlice:0
sourceLevel:0
sourceOrigin:updateOrigin
sourceSize:updateSize
toBuffer:renderTarget->colorAtomicBuffer()
destinationOffset:(updateOrigin.y * renderTarget->width() +
updateOrigin.x) *
sizeof(uint32_t)
destinationBytesPerRow:renderTarget->width() * sizeof(uint32_t)
destinationBytesPerImage:renderTarget->height() *
renderTarget->width() * sizeof(uint32_t)];
[copyEncoder endEncoding];
}
// Execute the DrawList.
id<MTLRenderCommandEncoder> encoder = makeRenderPassForDraws(
desc, pass, commandBuffer, baselineShaderMiscFlags);
for (const DrawBatch& batch : *desc.drawList)
{
if (batch.elementCount == 0)
{
continue;
}
// Setup the pipeline for this specific drawType and shaderFeatures.
gpu::ShaderFeatures shaderFeatures =
desc.interlockMode == gpu::InterlockMode::atomics
? desc.combinedShaderFeatures
: batch.shaderFeatures;
gpu::ShaderMiscFlags batchMiscFlags = baselineShaderMiscFlags;
if (desc.interlockMode == gpu::InterlockMode::rasterOrdering &&
(batch.drawContents & gpu::DrawContents::clockwiseFill))
{
batchMiscFlags |= gpu::ShaderMiscFlags::clockwiseFill;
}
if (!(batchMiscFlags & gpu::ShaderMiscFlags::fixedFunctionColorOutput))
{
if (batch.drawType == gpu::DrawType::atomicResolve)
{
// Atomic mode can always do a coalesced resolve when rendering
// to an offscreen color buffer.
batchMiscFlags |=
gpu::ShaderMiscFlags::coalescedResolveAndTransfer;
}
else if (batch.drawType == gpu::DrawType::atomicInitialize)
{
if (desc.colorLoadAction == gpu::LoadAction::clear)
{
batchMiscFlags |= gpu::ShaderMiscFlags::storeColorClear;
}
else if (desc.colorLoadAction ==
gpu::LoadAction::preserveRenderTarget &&
renderTarget->pixelFormat() ==
MTLPixelFormatBGRA8Unorm)
{
// We already copied the renderTarget to our color buffer,
// but since the target is BGRA, we also need to swizzle it
// to RGBA before it's ready for PLS.
batchMiscFlags |=
gpu::ShaderMiscFlags::swizzleColorBGRAToRGBA;
}
}
}
id<MTLRenderPipelineState> drawPipelineState =
findCompatibleDrawPipeline(batch.drawType,
shaderFeatures,
desc.interlockMode,
batchMiscFlags)
->pipelineState(renderTarget->pixelFormat());
// Bind the appropriate image texture, if any.
if (auto imageTextureMetal =
static_cast<const TextureMetalImpl*>(batch.imageTexture))
{
[encoder setFragmentTexture:imageTextureMetal->texture()
atIndex:IMAGE_TEXTURE_IDX];
}
DrawType drawType = batch.drawType;
switch (drawType)
{
case DrawType::midpointFanPatches:
case DrawType::midpointFanCenterAAPatches:
case DrawType::outerCurvePatches:
{
// Draw PLS patches that connect the tessellation vertices.
[encoder setRenderPipelineState:drawPipelineState];
[encoder setVertexBuffer:m_pathPatchVertexBuffer
offset:0
atIndex:0];
[encoder setCullMode:MTLCullModeBack];
// Don't use baseInstance in order to run on Apple GPU Family 2.
// TODO: Use baseInstance instead once we deprecate Apple2.
[encoder setVertexBytes:&batch.baseElement
length:sizeof(uint32_t)
atIndex:PATH_BASE_INSTANCE_UNIFORM_BUFFER_IDX];
[encoder drawIndexedPrimitives:MTLPrimitiveTypeTriangle
indexCount:gpu::PatchIndexCount(drawType)
indexType:MTLIndexTypeUInt16
indexBuffer:m_pathPatchIndexBuffer
indexBufferOffset:gpu::PatchBaseIndex(drawType) *
sizeof(uint16_t)
instanceCount:batch.elementCount];
break;
}
case DrawType::interiorTriangulation:
{
[encoder setRenderPipelineState:drawPipelineState];
[encoder setVertexBuffer:mtl_buffer(triangleBufferRing())
offset:0
atIndex:0];
[encoder setCullMode:MTLCullModeBack];
[encoder drawPrimitives:MTLPrimitiveTypeTriangle
vertexStart:batch.baseElement
vertexCount:batch.elementCount];
break;
}
case DrawType::imageRect:
case DrawType::imageMesh:
{
[encoder setRenderPipelineState:drawPipelineState];
[encoder
setVertexBuffer:mtl_buffer(imageDrawUniformBufferRing())
offset:batch.imageDrawDataOffset
atIndex:IMAGE_DRAW_UNIFORM_BUFFER_IDX];
[encoder
setFragmentBuffer:mtl_buffer(imageDrawUniformBufferRing())
offset:batch.imageDrawDataOffset
atIndex:IMAGE_DRAW_UNIFORM_BUFFER_IDX];
[encoder setCullMode:MTLCullModeNone];
if (drawType == DrawType::imageRect)
{
assert(desc.interlockMode == gpu::InterlockMode::atomics);
[encoder setVertexBuffer:m_imageRectVertexBuffer
offset:0
atIndex:0];
[encoder
drawIndexedPrimitives:MTLPrimitiveTypeTriangle
indexCount:std::size(gpu::kImageRectIndices)
indexType:MTLIndexTypeUInt16
indexBuffer:m_imageRectIndexBuffer
indexBufferOffset:0];
}
else
{
LITE_RTTI_CAST_OR_BREAK(vertexBuffer,
RenderBufferMetalImpl*,
batch.vertexBuffer);
LITE_RTTI_CAST_OR_BREAK(
uvBuffer, RenderBufferMetalImpl*, batch.uvBuffer);
LITE_RTTI_CAST_OR_BREAK(
indexBuffer, RenderBufferMetalImpl*, batch.indexBuffer);
[encoder setVertexBuffer:vertexBuffer->submittedBuffer()
offset:0
atIndex:0];
[encoder setVertexBuffer:uvBuffer->submittedBuffer()
offset:0
atIndex:1];
[encoder
drawIndexedPrimitives:MTLPrimitiveTypeTriangle
indexCount:batch.elementCount
indexType:MTLIndexTypeUInt16
indexBuffer:indexBuffer->submittedBuffer()
indexBufferOffset:batch.baseElement *
sizeof(uint16_t)];
}
break;
}
case DrawType::atomicInitialize:
case DrawType::atomicResolve:
{
assert(desc.interlockMode == gpu::InterlockMode::atomics);
[encoder setRenderPipelineState:drawPipelineState];
[encoder drawPrimitives:MTLPrimitiveTypeTriangleStrip
vertexStart:0
vertexCount:4];
break;
}
case DrawType::stencilClipReset:
{
RIVE_UNREACHABLE();
}
}
if (batch.needsBarrier)
{
assert(desc.interlockMode == gpu::InterlockMode::atomics);
switch (m_metalFeatures.atomicBarrierType)
{
case AtomicBarrierType::memoryBarrier:
{
#if defined(RIVE_MACOSX)
if (@available(macOS 10.14, *))
{
[encoder
memoryBarrierWithScope:MTLBarrierScopeBuffers |
MTLBarrierScopeRenderTargets
afterStages:MTLRenderStageFragment
beforeStages:MTLRenderStageFragment];
break;
}
#endif
// atomicBarrierType shouldn't be "memoryBarrier" in this
// case.
RIVE_UNREACHABLE();
}
case AtomicBarrierType::rasterOrderGroup:
break;
case AtomicBarrierType::renderPassBreak:
// On very old hardware that can't support barriers, we just
// take a sledge hammer and break the entire render pass
// between overlapping draws.
// TODO: Is there a lighter way to achieve this?
[encoder endEncoding];
pass.colorAttachments[COLOR_PLANE_IDX].loadAction =
MTLLoadActionLoad;
encoder = makeRenderPassForDraws(
desc, pass, commandBuffer, baselineShaderMiscFlags);
break;
}
}
}
[encoder endEncoding];
if (desc.isFinalFlushOfFrame)
{
// Schedule a callback that will unlock the buffers used by this flush,
// after the GPU has finished rendering with them. This unblocks the CPU
// from reusing them in a future flush.
std::mutex& thisFlushLock = m_bufferRingLocks[m_bufferRingIdx];
[commandBuffer addCompletedHandler:^(id<MTLCommandBuffer>) {
assert(
!thisFlushLock.try_lock()); // The mutex should already be locked.
thisFlushLock.unlock();
}];
}
}
} // namespace rive::gpu