| /* |
| * Copyright 2023 Rive |
| */ |
| |
| #include "rive/pls/d3d/pls_render_context_d3d_impl.hpp" |
| |
| #include "rive/pls/pls_image.hpp" |
| #include "shaders/constants.glsl" |
| |
| #include <D3DCompiler.h> |
| #include <sstream> |
| |
| #include "shaders/out/generated/advanced_blend.glsl.hpp" |
| #include "shaders/out/generated/atomic_draw.glsl.hpp" |
| #include "shaders/out/generated/color_ramp.glsl.hpp" |
| #include "shaders/out/generated/constants.glsl.hpp" |
| #include "shaders/out/generated/common.glsl.hpp" |
| #include "shaders/out/generated/draw_image_mesh.glsl.hpp" |
| #include "shaders/out/generated/draw_path_common.glsl.hpp" |
| #include "shaders/out/generated/draw_path.glsl.hpp" |
| #include "shaders/out/generated/hlsl.glsl.hpp" |
| #include "shaders/out/generated/tessellate.glsl.hpp" |
| |
| // D3D11 doesn't let us bind the framebuffer UAV to slot 0 when there is a color output. Use the |
| // (unused in this case) ORIGINAL_DST_COLOR_PLANE_IDX instead when we are doing a coalesced resolve |
| // and transfer. |
| #define COALESCED_OFFSCREEN_FRAMEBUFFER_PLANE_IDX ORIGINAL_DST_COLOR_PLANE_IDX |
| |
| constexpr static UINT kPatchVertexDataSlot = 0; |
| constexpr static UINT kTriangleVertexDataSlot = 1; |
| constexpr static UINT kImageRectVertexDataSlot = 2; |
| constexpr static UINT kImageMeshVertexDataSlot = 3; |
| constexpr static UINT kImageMeshUVDataSlot = 4; |
| |
| namespace rive::pls |
| { |
| ComPtr<ID3D11Texture2D> make_simple_2d_texture(ID3D11Device* gpu, |
| DXGI_FORMAT format, |
| UINT width, |
| UINT height, |
| UINT mipLevelCount, |
| UINT bindFlags, |
| UINT miscFlags = 0) |
| { |
| D3D11_TEXTURE2D_DESC desc{}; |
| desc.Width = width; |
| desc.Height = height; |
| desc.MipLevels = mipLevelCount; |
| desc.ArraySize = 1; |
| desc.Format = format; |
| desc.SampleDesc.Count = 1; |
| desc.Usage = D3D11_USAGE_DEFAULT; |
| desc.BindFlags = bindFlags; |
| desc.CPUAccessFlags = 0; |
| desc.MiscFlags = miscFlags; |
| |
| ComPtr<ID3D11Texture2D> tex; |
| VERIFY_OK(gpu->CreateTexture2D(&desc, NULL, tex.ReleaseAndGetAddressOf())); |
| return tex; |
| } |
| |
| static ComPtr<ID3D11UnorderedAccessView> make_simple_2d_uav(ID3D11Device* gpu, |
| ID3D11Texture2D* tex, |
| DXGI_FORMAT format) |
| { |
| D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc{}; |
| uavDesc.Format = format; |
| uavDesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D; |
| |
| ComPtr<ID3D11UnorderedAccessView> uav; |
| VERIFY_OK(gpu->CreateUnorderedAccessView(tex, &uavDesc, uav.ReleaseAndGetAddressOf())); |
| return uav; |
| } |
| |
| std::unique_ptr<PLSRenderContext> PLSRenderContextD3DImpl::MakeContext( |
| ComPtr<ID3D11Device> gpu, |
| ComPtr<ID3D11DeviceContext> gpuContext, |
| const ContextOptions& contextOptions) |
| { |
| D3DCapabilities d3dCapabilities; |
| D3D11_FEATURE_DATA_D3D11_OPTIONS2 d3d11Options2; |
| if (SUCCEEDED(gpu->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS2, |
| &d3d11Options2, |
| sizeof(D3D11_FEATURE_DATA_D3D11_OPTIONS2)))) |
| { |
| d3dCapabilities.supportsRasterizerOrderedViews = d3d11Options2.ROVsSupported; |
| if (d3d11Options2.TypedUAVLoadAdditionalFormats) |
| { |
| // TypedUAVLoadAdditionalFormats is true. Now check if we can both load and |
| // store all formats used by Rive (currently only RGBA8): |
| // https://learn.microsoft.com/en-us/windows/win32/direct3d11/typed-unordered-access-view-loads. |
| D3D11_FEATURE_DATA_FORMAT_SUPPORT2 d3d11Format2{}; |
| d3d11Format2.InFormat = DXGI_FORMAT_R8G8B8A8_UNORM; |
| if (SUCCEEDED(gpu->CheckFeatureSupport(D3D11_FEATURE_FORMAT_SUPPORT2, |
| &d3d11Format2, |
| sizeof(d3d11Format2)))) |
| { |
| constexpr UINT loadStoreFlags = |
| D3D11_FORMAT_SUPPORT2_UAV_TYPED_LOAD | D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE; |
| d3dCapabilities.supportsTypedUAVLoadStore = |
| (d3d11Format2.OutFormatSupport2 & loadStoreFlags) == loadStoreFlags; |
| } |
| } |
| } |
| if (contextOptions.disableRasterizerOrderedViews) |
| { |
| d3dCapabilities.supportsRasterizerOrderedViews = false; |
| } |
| if (contextOptions.disableTypedUAVLoadStore) |
| { |
| d3dCapabilities.supportsTypedUAVLoadStore = false; |
| } |
| d3dCapabilities.isIntel = contextOptions.isIntel; |
| |
| auto plsContextImpl = std::unique_ptr<PLSRenderContextD3DImpl>( |
| new PLSRenderContextD3DImpl(std::move(gpu), std::move(gpuContext), d3dCapabilities)); |
| return std::make_unique<PLSRenderContext>(std::move(plsContextImpl)); |
| } |
| |
| PLSRenderContextD3DImpl::PLSRenderContextD3DImpl(ComPtr<ID3D11Device> gpu, |
| ComPtr<ID3D11DeviceContext> gpuContext, |
| const D3DCapabilities& d3dCapabilities) : |
| m_d3dCapabilities(d3dCapabilities), m_gpu(std::move(gpu)), m_gpuContext(std::move(gpuContext)) |
| { |
| m_platformFeatures.invertOffscreenY = true; |
| m_platformFeatures.supportsRasterOrdering = d3dCapabilities.supportsRasterizerOrderedViews; |
| |
| // Create a default raster state for path and offscreen draws. |
| D3D11_RASTERIZER_DESC rasterDesc; |
| rasterDesc.FillMode = D3D11_FILL_SOLID; |
| rasterDesc.CullMode = D3D11_CULL_BACK; |
| rasterDesc.FrontCounterClockwise = FALSE; // FrontCounterClockwise must be FALSE in order to |
| // match the winding sense of interior triangulations. |
| rasterDesc.DepthBias = 0; |
| rasterDesc.SlopeScaledDepthBias = 0; |
| rasterDesc.DepthBiasClamp = 0; |
| rasterDesc.DepthClipEnable = FALSE; |
| rasterDesc.ScissorEnable = FALSE; |
| rasterDesc.MultisampleEnable = FALSE; |
| rasterDesc.AntialiasedLineEnable = FALSE; |
| VERIFY_OK(m_gpu->CreateRasterizerState(&rasterDesc, |
| m_backCulledRasterState[0].ReleaseAndGetAddressOf())); |
| |
| // ...And with wireframe for debugging. |
| rasterDesc.FillMode = D3D11_FILL_WIREFRAME; |
| VERIFY_OK(m_gpu->CreateRasterizerState(&rasterDesc, |
| m_backCulledRasterState[1].ReleaseAndGetAddressOf())); |
| |
| // Create a raster state without face culling for drawing image meshes. |
| rasterDesc.FillMode = D3D11_FILL_SOLID; |
| rasterDesc.CullMode = D3D11_CULL_NONE; |
| VERIFY_OK(m_gpu->CreateRasterizerState(&rasterDesc, |
| m_doubleSidedRasterState[0].ReleaseAndGetAddressOf())); |
| |
| // ...And once more with wireframe for debugging. |
| rasterDesc.FillMode = D3D11_FILL_WIREFRAME; |
| VERIFY_OK(m_gpu->CreateRasterizerState(&rasterDesc, |
| m_doubleSidedRasterState[1].ReleaseAndGetAddressOf())); |
| |
| // Compile the shaders that render gradient color ramps. |
| { |
| std::ostringstream s; |
| s << glsl::hlsl << '\n'; |
| s << glsl::constants << '\n'; |
| s << glsl::common << '\n'; |
| s << glsl::color_ramp << '\n'; |
| ComPtr<ID3DBlob> vertexBlob = |
| compileSourceToBlob(GLSL_VERTEX, s.str().c_str(), GLSL_colorRampVertexMain, "vs_5_0"); |
| ComPtr<ID3DBlob> pixelBlob = compileSourceToBlob(GLSL_FRAGMENT, |
| s.str().c_str(), |
| GLSL_colorRampFragmentMain, |
| "ps_5_0"); |
| D3D11_INPUT_ELEMENT_DESC spanDesc = |
| {GLSL_a_span, 0, DXGI_FORMAT_R32G32B32A32_UINT, 0, 0, D3D11_INPUT_PER_INSTANCE_DATA, 1}; |
| VERIFY_OK(m_gpu->CreateInputLayout(&spanDesc, |
| 1, |
| vertexBlob->GetBufferPointer(), |
| vertexBlob->GetBufferSize(), |
| &m_colorRampLayout)); |
| VERIFY_OK(m_gpu->CreateVertexShader(vertexBlob->GetBufferPointer(), |
| vertexBlob->GetBufferSize(), |
| nullptr, |
| &m_colorRampVertexShader)); |
| VERIFY_OK(m_gpu->CreatePixelShader(pixelBlob->GetBufferPointer(), |
| pixelBlob->GetBufferSize(), |
| nullptr, |
| &m_colorRampPixelShader)); |
| } |
| |
| // Compile the tessellation shaders. |
| { |
| std::ostringstream s; |
| s << glsl::hlsl << '\n'; |
| s << glsl::constants << '\n'; |
| s << glsl::common << '\n'; |
| s << glsl::tessellate << '\n'; |
| ComPtr<ID3DBlob> vertexBlob = |
| compileSourceToBlob(GLSL_VERTEX, s.str().c_str(), GLSL_tessellateVertexMain, "vs_5_0"); |
| ComPtr<ID3DBlob> pixelBlob = compileSourceToBlob(GLSL_FRAGMENT, |
| s.str().c_str(), |
| GLSL_tessellateFragmentMain, |
| "ps_5_0"); |
| // Draw two instances per TessVertexSpan: one normal and one optional reflection. |
| D3D11_INPUT_ELEMENT_DESC attribsDesc[] = {{GLSL_a_p0p1_, |
| 0, |
| DXGI_FORMAT_R32G32B32A32_FLOAT, |
| 0, |
| D3D11_APPEND_ALIGNED_ELEMENT, |
| D3D11_INPUT_PER_INSTANCE_DATA, |
| 1}, |
| {GLSL_a_p2p3_, |
| 0, |
| DXGI_FORMAT_R32G32B32A32_FLOAT, |
| 0, |
| D3D11_APPEND_ALIGNED_ELEMENT, |
| D3D11_INPUT_PER_INSTANCE_DATA, |
| 1}, |
| {GLSL_a_joinTan_and_ys, |
| 0, |
| DXGI_FORMAT_R32G32B32A32_FLOAT, |
| 0, |
| D3D11_APPEND_ALIGNED_ELEMENT, |
| D3D11_INPUT_PER_INSTANCE_DATA, |
| 1}, |
| {GLSL_a_args, |
| 0, |
| DXGI_FORMAT_R32G32B32A32_UINT, |
| 0, |
| D3D11_APPEND_ALIGNED_ELEMENT, |
| D3D11_INPUT_PER_INSTANCE_DATA, |
| 1}}; |
| VERIFY_OK(m_gpu->CreateInputLayout(attribsDesc, |
| std::size(attribsDesc), |
| vertexBlob->GetBufferPointer(), |
| vertexBlob->GetBufferSize(), |
| &m_tessellateLayout)); |
| VERIFY_OK(m_gpu->CreateVertexShader(vertexBlob->GetBufferPointer(), |
| vertexBlob->GetBufferSize(), |
| nullptr, |
| &m_tessellateVertexShader)); |
| VERIFY_OK(m_gpu->CreatePixelShader(pixelBlob->GetBufferPointer(), |
| pixelBlob->GetBufferSize(), |
| nullptr, |
| &m_tessellatePixelShader)); |
| |
| m_tessSpanIndexBuffer = makeSimpleImmutableBuffer(sizeof(pls::kTessSpanIndices), |
| D3D11_BIND_INDEX_BUFFER, |
| pls::kTessSpanIndices); |
| } |
| |
| // Set up the path patch rendering buffers. |
| PatchVertex patchVertices[kPatchVertexBufferCount]; |
| uint16_t patchIndices[kPatchIndexBufferCount]; |
| GeneratePatchBufferData(patchVertices, patchIndices); |
| m_patchVertexBuffer = |
| makeSimpleImmutableBuffer(sizeof(patchVertices), D3D11_BIND_VERTEX_BUFFER, patchVertices); |
| m_patchIndexBuffer = |
| makeSimpleImmutableBuffer(sizeof(patchIndices), D3D11_BIND_INDEX_BUFFER, patchIndices); |
| |
| // Set up the imageRect rendering buffers. (pls::InterlockMode::atomics only.) |
| m_imageRectVertexBuffer = makeSimpleImmutableBuffer(sizeof(pls::kImageRectVertices), |
| D3D11_BIND_VERTEX_BUFFER, |
| pls::kImageRectVertices); |
| m_imageRectIndexBuffer = makeSimpleImmutableBuffer(sizeof(pls::kImageRectIndices), |
| D3D11_BIND_INDEX_BUFFER, |
| pls::kImageRectIndices); |
| |
| // Create buffers for uniforms. |
| { |
| D3D11_BUFFER_DESC desc{}; |
| desc.Usage = D3D11_USAGE_DEFAULT; |
| desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; |
| |
| desc.ByteWidth = sizeof(pls::FlushUniforms); |
| desc.StructureByteStride = sizeof(pls::FlushUniforms); |
| VERIFY_OK(m_gpu->CreateBuffer(&desc, nullptr, m_flushUniforms.ReleaseAndGetAddressOf())); |
| |
| desc.ByteWidth = sizeof(DrawUniforms); |
| desc.StructureByteStride = sizeof(DrawUniforms); |
| VERIFY_OK(m_gpu->CreateBuffer(&desc, nullptr, m_drawUniforms.ReleaseAndGetAddressOf())); |
| |
| desc.ByteWidth = sizeof(pls::ImageDrawUniforms); |
| desc.StructureByteStride = sizeof(pls::ImageDrawUniforms); |
| VERIFY_OK( |
| m_gpu->CreateBuffer(&desc, nullptr, m_imageDrawUniforms.ReleaseAndGetAddressOf())); |
| } |
| |
| // Create a linear sampler for the gradient texture. |
| D3D11_SAMPLER_DESC linearSamplerDesc; |
| linearSamplerDesc.Filter = D3D11_FILTER_MIN_MAG_LINEAR_MIP_POINT; |
| linearSamplerDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; |
| linearSamplerDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; |
| linearSamplerDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; |
| linearSamplerDesc.MipLODBias = 0.0f; |
| linearSamplerDesc.MaxAnisotropy = 1; |
| linearSamplerDesc.ComparisonFunc = D3D11_COMPARISON_NEVER; |
| linearSamplerDesc.MinLOD = 0; |
| linearSamplerDesc.MaxLOD = 0; |
| VERIFY_OK( |
| m_gpu->CreateSamplerState(&linearSamplerDesc, m_linearSampler.ReleaseAndGetAddressOf())); |
| |
| // Create a mipmap sampler for the image textures. |
| D3D11_SAMPLER_DESC mipmapSamplerDesc; |
| mipmapSamplerDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; |
| mipmapSamplerDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; |
| mipmapSamplerDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; |
| mipmapSamplerDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; |
| mipmapSamplerDesc.MipLODBias = 0.0f; |
| mipmapSamplerDesc.MaxAnisotropy = 1; |
| mipmapSamplerDesc.ComparisonFunc = D3D11_COMPARISON_NEVER; |
| mipmapSamplerDesc.MinLOD = 0; |
| mipmapSamplerDesc.MaxLOD = D3D11_FLOAT32_MAX; |
| VERIFY_OK( |
| m_gpu->CreateSamplerState(&mipmapSamplerDesc, m_mipmapSampler.ReleaseAndGetAddressOf())); |
| |
| ID3D11SamplerState* samplers[2] = {m_linearSampler.Get(), m_mipmapSampler.Get()}; |
| static_assert(IMAGE_TEXTURE_IDX == GRAD_TEXTURE_IDX + 1); |
| m_gpuContext->PSSetSamplers(GRAD_TEXTURE_IDX, 2, samplers); |
| |
| D3D11_BLEND_DESC srcOverDesc{}; |
| srcOverDesc.RenderTarget[0].BlendEnable = TRUE; |
| srcOverDesc.RenderTarget[0].SrcBlend = D3D11_BLEND_ONE; |
| srcOverDesc.RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC_ALPHA; |
| srcOverDesc.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD; |
| srcOverDesc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE; |
| srcOverDesc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_INV_SRC_ALPHA; |
| srcOverDesc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD; |
| srcOverDesc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; |
| VERIFY_OK(m_gpu->CreateBlendState(&srcOverDesc, m_srcOverBlendState.ReleaseAndGetAddressOf())); |
| } |
| |
| ComPtr<ID3D11Texture2D> PLSRenderContextD3DImpl::makeSimple2DTexture(DXGI_FORMAT format, |
| UINT width, |
| UINT height, |
| UINT mipLevelCount, |
| UINT bindFlags, |
| UINT miscFlags) |
| { |
| return make_simple_2d_texture(m_gpu.Get(), |
| format, |
| width, |
| height, |
| mipLevelCount, |
| bindFlags, |
| miscFlags); |
| } |
| |
| ComPtr<ID3D11UnorderedAccessView> PLSRenderContextD3DImpl::makeSimple2DUAV(ID3D11Texture2D* tex, |
| DXGI_FORMAT format) |
| { |
| return make_simple_2d_uav(m_gpu.Get(), tex, format); |
| } |
| |
| ComPtr<ID3D11Buffer> PLSRenderContextD3DImpl::makeSimpleImmutableBuffer(size_t sizeInBytes, |
| UINT bindFlags, |
| const void* data) |
| { |
| D3D11_BUFFER_DESC desc{}; |
| desc.ByteWidth = sizeInBytes; |
| desc.Usage = D3D11_USAGE_IMMUTABLE; |
| desc.BindFlags = bindFlags; |
| desc.StructureByteStride = sizeof(PatchVertex); |
| |
| D3D11_SUBRESOURCE_DATA dataDesc{}; |
| dataDesc.pSysMem = data; |
| |
| ComPtr<ID3D11Buffer> buffer; |
| VERIFY_OK(m_gpu->CreateBuffer(&desc, &dataDesc, buffer.ReleaseAndGetAddressOf())); |
| return buffer; |
| } |
| |
| ComPtr<ID3DBlob> PLSRenderContextD3DImpl::compileSourceToBlob(const char* shaderTypeDefineName, |
| const std::string& commonSource, |
| const char* entrypoint, |
| const char* target) |
| { |
| std::ostringstream source; |
| source << "#define " << shaderTypeDefineName << '\n'; |
| source << commonSource; |
| |
| const std::string& sourceStr = source.str(); |
| ComPtr<ID3DBlob> blob; |
| ComPtr<ID3DBlob> errors; |
| HRESULT hr = D3DCompile(sourceStr.c_str(), |
| sourceStr.length(), |
| nullptr, |
| nullptr, |
| nullptr, |
| entrypoint, |
| target, |
| D3DCOMPILE_ENABLE_STRICTNESS, |
| 0, |
| &blob, |
| &errors); |
| if (errors && errors->GetBufferPointer()) |
| { |
| fprintf(stderr, "Errors or warnings compiling shader.\n"); |
| int l = 1; |
| std::stringstream stream(sourceStr); |
| std::string lineStr; |
| while (std::getline(stream, lineStr, '\n')) |
| { |
| fprintf(stderr, "%4i| %s\n", l++, lineStr.c_str()); |
| } |
| fprintf(stderr, "%s\n", reinterpret_cast<char*>(errors->GetBufferPointer())); |
| exit(-1); |
| } |
| if (FAILED(hr)) |
| { |
| fprintf(stderr, "Failed to compile shader.\n"); |
| exit(-1); |
| } |
| return blob; |
| } |
| |
| class RenderBufferD3DImpl : public lite_rtti_override<RenderBuffer, RenderBufferD3DImpl> |
| { |
| public: |
| RenderBufferD3DImpl(RenderBufferType renderBufferType, |
| RenderBufferFlags renderBufferFlags, |
| size_t sizeInBytes, |
| ComPtr<ID3D11Device> gpu, |
| ComPtr<ID3D11DeviceContext> gpuContext) : |
| lite_rtti_override(renderBufferType, renderBufferFlags, sizeInBytes), |
| m_gpu(std::move(gpu)), |
| m_gpuContext(std::move(gpuContext)) |
| { |
| m_desc.ByteWidth = sizeInBytes; |
| m_desc.BindFlags = |
| type() == RenderBufferType::vertex ? D3D11_BIND_VERTEX_BUFFER : D3D11_BIND_INDEX_BUFFER; |
| if (flags() & RenderBufferFlags::mappedOnceAtInitialization) |
| { |
| m_desc.Usage = D3D11_USAGE_IMMUTABLE; |
| m_desc.CPUAccessFlags = 0; |
| m_mappedMemoryForImmutableBuffer.reset(new char[sizeInBytes]); |
| } |
| else |
| { |
| m_desc.Usage = D3D11_USAGE_DYNAMIC; |
| m_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; |
| VERIFY_OK(m_gpu->CreateBuffer(&m_desc, nullptr, m_buffer.ReleaseAndGetAddressOf())); |
| } |
| } |
| |
| ID3D11Buffer* buffer() const { return m_buffer.Get(); } |
| |
| protected: |
| void* onMap() override |
| { |
| if (flags() & RenderBufferFlags::mappedOnceAtInitialization) |
| { |
| assert(m_mappedMemoryForImmutableBuffer); |
| return m_mappedMemoryForImmutableBuffer.get(); |
| } |
| else |
| { |
| D3D11_MAPPED_SUBRESOURCE mappedSubresource; |
| m_gpuContext->Map(m_buffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedSubresource); |
| return mappedSubresource.pData; |
| } |
| } |
| |
| void onUnmap() override |
| { |
| if (flags() & RenderBufferFlags::mappedOnceAtInitialization) |
| { |
| assert(!m_buffer); |
| D3D11_SUBRESOURCE_DATA bufferDataDesc{}; |
| bufferDataDesc.pSysMem = m_mappedMemoryForImmutableBuffer.get(); |
| VERIFY_OK( |
| m_gpu->CreateBuffer(&m_desc, &bufferDataDesc, m_buffer.ReleaseAndGetAddressOf())); |
| m_mappedMemoryForImmutableBuffer.reset(); // This buffer will only be mapped once. |
| } |
| else |
| { |
| m_gpuContext->Unmap(m_buffer.Get(), 0); |
| } |
| } |
| |
| private: |
| const ComPtr<ID3D11Device> m_gpu; |
| const ComPtr<ID3D11DeviceContext> m_gpuContext; |
| D3D11_BUFFER_DESC m_desc{}; |
| ComPtr<ID3D11Buffer> m_buffer; |
| std::unique_ptr<char[]> m_mappedMemoryForImmutableBuffer; |
| }; |
| |
| rcp<RenderBuffer> PLSRenderContextD3DImpl::makeRenderBuffer(RenderBufferType type, |
| RenderBufferFlags flags, |
| size_t sizeInBytes) |
| { |
| return make_rcp<RenderBufferD3DImpl>(type, flags, sizeInBytes, m_gpu, m_gpuContext); |
| } |
| |
| class PLSTextureD3DImpl : public PLSTexture |
| { |
| public: |
| PLSTextureD3DImpl(PLSRenderContextD3DImpl* plsImpl, |
| UINT width, |
| UINT height, |
| UINT mipLevelCount, |
| const uint8_t imageDataRGBA[]) : |
| PLSTexture(width, height) |
| { |
| m_texture = |
| plsImpl->makeSimple2DTexture(DXGI_FORMAT_R8G8B8A8_UNORM, |
| width, |
| height, |
| mipLevelCount, |
| D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET, |
| D3D11_RESOURCE_MISC_GENERATE_MIPS); |
| |
| // Specify the top-level image in the mipmap chain. |
| D3D11_BOX box; |
| box.left = 0; |
| box.right = width; |
| box.top = 0; |
| box.bottom = height; |
| box.front = 0; |
| box.back = 1; |
| plsImpl->gpuContext() |
| ->UpdateSubresource(m_texture.Get(), 0, &box, imageDataRGBA, width * 4, 0); |
| |
| // Create a view and generate mipmaps. |
| VERIFY_OK(plsImpl->gpu()->CreateShaderResourceView(m_texture.Get(), |
| NULL, |
| m_srv.ReleaseAndGetAddressOf())); |
| plsImpl->gpuContext()->GenerateMips(m_srv.Get()); |
| } |
| |
| ID3D11ShaderResourceView* srv() const { return m_srv.Get(); } |
| ID3D11ShaderResourceView* const* srvAddressOf() const { return m_srv.GetAddressOf(); } |
| |
| private: |
| ComPtr<ID3D11Texture2D> m_texture; |
| ComPtr<ID3D11ShaderResourceView> m_srv; |
| }; |
| |
| rcp<PLSTexture> PLSRenderContextD3DImpl::makeImageTexture(uint32_t width, |
| uint32_t height, |
| uint32_t mipLevelCount, |
| const uint8_t imageDataRGBA[]) |
| { |
| return make_rcp<PLSTextureD3DImpl>(this, width, height, mipLevelCount, imageDataRGBA); |
| } |
| |
| class BufferRingD3D : public BufferRing |
| { |
| public: |
| BufferRingD3D(PLSRenderContextD3DImpl* plsImpl, size_t capacityInBytes, UINT bindFlags) : |
| BufferRingD3D(plsImpl, capacityInBytes, bindFlags, 0, 0) |
| {} |
| |
| ID3D11Buffer* submittedBuffer() const { return m_buffers[submittedBufferIdx()].Get(); } |
| |
| protected: |
| BufferRingD3D(PLSRenderContextD3DImpl* plsImpl, |
| size_t capacityInBytes, |
| UINT bindFlags, |
| UINT elementSizeInBytes, |
| UINT miscFlags) : |
| BufferRing(capacityInBytes), m_gpuContext(plsImpl->gpuContext()) |
| { |
| D3D11_BUFFER_DESC desc{}; |
| desc.ByteWidth = capacityInBytes; |
| desc.Usage = D3D11_USAGE_DEFAULT; |
| desc.BindFlags = bindFlags; |
| desc.CPUAccessFlags = 0; |
| desc.StructureByteStride = elementSizeInBytes; |
| desc.MiscFlags = miscFlags; |
| |
| for (size_t i = 0; i < kBufferRingSize; ++i) |
| { |
| VERIFY_OK(plsImpl->gpu()->CreateBuffer(&desc, |
| nullptr, |
| m_buffers[i].ReleaseAndGetAddressOf())); |
| } |
| } |
| |
| void* onMapBuffer(int bufferIdx, size_t mapSizeInBytes) override |
| { |
| // Use a CPU-side shadow buffer since D3D11 doesn't have an API to map a sub-range. |
| return shadowBuffer(); |
| } |
| |
| void onUnmapAndSubmitBuffer(int bufferIdx, size_t mapSizeInBytes) override |
| { |
| if (mapSizeInBytes == capacityInBytes()) |
| { |
| // Constant buffers don't allow partial updates, so special-case the event where we |
| // update the entire buffer. |
| m_gpuContext |
| ->UpdateSubresource(m_buffers[bufferIdx].Get(), 0, NULL, shadowBuffer(), 0, 0); |
| } |
| else |
| { |
| D3D11_BOX box; |
| box.left = 0; |
| box.right = mapSizeInBytes; |
| box.top = 0; |
| box.bottom = 1; |
| box.front = 0; |
| box.back = 1; |
| m_gpuContext |
| ->UpdateSubresource(m_buffers[bufferIdx].Get(), 0, &box, shadowBuffer(), 0, 0); |
| } |
| } |
| |
| ComPtr<ID3D11DeviceContext> m_gpuContext; |
| ComPtr<ID3D11Buffer> m_buffers[kBufferRingSize]; |
| }; |
| |
| class StructuredBufferRingD3D : public BufferRingD3D |
| { |
| public: |
| StructuredBufferRingD3D(PLSRenderContextD3DImpl* plsImpl, |
| size_t capacityInBytes, |
| UINT elementSizeInBytes) : |
| BufferRingD3D(plsImpl, |
| capacityInBytes, |
| D3D11_BIND_SHADER_RESOURCE, |
| elementSizeInBytes, |
| D3D11_RESOURCE_MISC_BUFFER_STRUCTURED) |
| { |
| assert(capacityInBytes % elementSizeInBytes == 0); |
| } |
| |
| ID3D11ShaderResourceView* replaceSRV(ID3D11Device* gpu, |
| UINT elementCount, |
| UINT firstElement) const |
| { |
| D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc; |
| srvDesc.Format = DXGI_FORMAT_UNKNOWN; |
| srvDesc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; |
| srvDesc.Buffer.FirstElement = firstElement; |
| srvDesc.Buffer.NumElements = elementCount; |
| |
| VERIFY_OK(gpu->CreateShaderResourceView(m_buffers[submittedBufferIdx()].Get(), |
| &srvDesc, |
| m_currentSRV.ReleaseAndGetAddressOf())); |
| return m_currentSRV.Get(); |
| } |
| |
| protected: |
| mutable ComPtr<ID3D11ShaderResourceView> m_currentSRV; |
| }; |
| |
| std::unique_ptr<BufferRing> PLSRenderContextD3DImpl::makeUniformBufferRing(size_t capacityInBytes) |
| { |
| // In D3D we update uniform data inline with commands, rather than filling a buffer up front. |
| return std::make_unique<HeapBufferRing>(capacityInBytes); |
| } |
| |
| std::unique_ptr<BufferRing> PLSRenderContextD3DImpl::makeStorageBufferRing( |
| size_t capacityInBytes, |
| pls::StorageBufferStructure bufferStructure) |
| { |
| return capacityInBytes != 0 ? std::make_unique<StructuredBufferRingD3D>( |
| this, |
| capacityInBytes, |
| pls::StorageBufferElementSizeInBytes(bufferStructure)) |
| : nullptr; |
| } |
| |
| std::unique_ptr<BufferRing> PLSRenderContextD3DImpl::makeVertexBufferRing(size_t capacityInBytes) |
| { |
| return capacityInBytes != 0 |
| ? std::make_unique<BufferRingD3D>(this, capacityInBytes, D3D11_BIND_VERTEX_BUFFER) |
| : nullptr; |
| } |
| |
| std::unique_ptr<BufferRing> PLSRenderContextD3DImpl::makeTextureTransferBufferRing( |
| size_t capacityInBytes) |
| { |
| // It appears impossible to update a D3D texture from a GPU buffer; store this data on the heap |
| // and upload it to the texture at flush time. |
| return std::make_unique<HeapBufferRing>(capacityInBytes); |
| } |
| |
| PLSRenderTargetD3D::PLSRenderTargetD3D(PLSRenderContextD3DImpl* plsImpl, |
| uint32_t width, |
| uint32_t height) : |
| PLSRenderTarget(width, height), |
| m_gpu(plsImpl->gpu()), |
| m_gpuSupportsTypedUAVLoadStore(plsImpl->d3dCapabilities().supportsTypedUAVLoadStore) |
| {} |
| |
| void PLSRenderTargetD3D::setTargetTexture(ComPtr<ID3D11Texture2D> tex) |
| { |
| if (tex != nullptr) |
| { |
| D3D11_TEXTURE2D_DESC desc; |
| tex->GetDesc(&desc); |
| #ifdef DEBUG |
| assert(desc.Width == width()); |
| assert(desc.Height == height()); |
| assert(desc.Format == DXGI_FORMAT_R8G8B8A8_UNORM || |
| desc.Format == DXGI_FORMAT_B8G8R8A8_UNORM || |
| desc.Format == DXGI_FORMAT_R8G8B8A8_TYPELESS); |
| #endif |
| m_targetTextureSupportsUAV = |
| (desc.BindFlags & D3D11_BIND_UNORDERED_ACCESS) && |
| (m_gpuSupportsTypedUAVLoadStore || desc.Format == DXGI_FORMAT_R8G8B8A8_TYPELESS); |
| m_targetFormat = desc.Format; |
| } |
| else |
| { |
| m_targetTextureSupportsUAV = false; |
| } |
| m_targetTexture = std::move(tex); |
| m_targetRTV = nullptr; |
| m_targetUAV = nullptr; |
| } |
| |
| ID3D11RenderTargetView* PLSRenderTargetD3D::targetRTV() |
| { |
| if (m_targetRTV == nullptr && m_targetTexture != nullptr) |
| { |
| D3D11_RENDER_TARGET_VIEW_DESC desc{}; |
| desc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D; |
| |
| switch (m_targetFormat) |
| { |
| case DXGI_FORMAT_R8G8B8A8_TYPELESS: |
| desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; |
| break; |
| |
| default: |
| desc.Format = m_targetFormat; |
| break; |
| } |
| |
| VERIFY_OK(m_gpu->CreateRenderTargetView(m_targetTexture.Get(), |
| &desc, |
| m_targetRTV.ReleaseAndGetAddressOf())); |
| } |
| return m_targetRTV.Get(); |
| } |
| |
| ID3D11Texture2D* PLSRenderTargetD3D::offscreenTexture() |
| { |
| assert(!m_targetTextureSupportsUAV); |
| if (m_offscreenTexture == nullptr) |
| { |
| m_offscreenTexture = make_simple_2d_texture(m_gpu.Get(), |
| DXGI_FORMAT_R8G8B8A8_TYPELESS, |
| width(), |
| height(), |
| 1, |
| D3D11_BIND_UNORDERED_ACCESS); |
| } |
| return m_offscreenTexture.Get(); |
| } |
| |
| ID3D11UnorderedAccessView* PLSRenderTargetD3D::targetUAV() |
| { |
| if (m_targetUAV == nullptr) |
| { |
| if (auto* uavTexture = |
| m_targetTextureSupportsUAV ? m_targetTexture.Get() : offscreenTexture()) |
| { |
| m_targetUAV = make_simple_2d_uav(m_gpu.Get(), |
| uavTexture, |
| m_gpuSupportsTypedUAVLoadStore ? m_targetFormat |
| : DXGI_FORMAT_R32_UINT); |
| } |
| } |
| return m_targetUAV.Get(); |
| } |
| |
| ID3D11UnorderedAccessView* PLSRenderTargetD3D::coverageUAV() |
| { |
| if (m_coverageTexture == nullptr) |
| { |
| m_coverageTexture = make_simple_2d_texture(m_gpu.Get(), |
| DXGI_FORMAT_R32_UINT, |
| width(), |
| height(), |
| 1, |
| D3D11_BIND_UNORDERED_ACCESS); |
| } |
| if (m_coverageUAV == nullptr) |
| { |
| m_coverageUAV = |
| make_simple_2d_uav(m_gpu.Get(), m_coverageTexture.Get(), DXGI_FORMAT_R32_UINT); |
| } |
| return m_coverageUAV.Get(); |
| } |
| |
| ID3D11UnorderedAccessView* PLSRenderTargetD3D::clipUAV() |
| { |
| if (m_clipTexture == nullptr) |
| { |
| m_clipTexture = make_simple_2d_texture(m_gpu.Get(), |
| DXGI_FORMAT_R32_UINT, |
| width(), |
| height(), |
| 1, |
| D3D11_BIND_UNORDERED_ACCESS); |
| } |
| if (m_clipUAV == nullptr) |
| { |
| m_clipUAV = make_simple_2d_uav(m_gpu.Get(), m_clipTexture.Get(), DXGI_FORMAT_R32_UINT); |
| } |
| return m_clipUAV.Get(); |
| } |
| |
| ID3D11UnorderedAccessView* PLSRenderTargetD3D::originalDstColorUAV() |
| { |
| if (m_originalDstColorTexture == nullptr) |
| { |
| m_originalDstColorTexture = make_simple_2d_texture(m_gpu.Get(), |
| DXGI_FORMAT_R8G8B8A8_TYPELESS, |
| width(), |
| height(), |
| 1, |
| D3D11_BIND_UNORDERED_ACCESS); |
| } |
| if (m_originalDstColorUAV == nullptr) |
| { |
| m_originalDstColorUAV = make_simple_2d_uav( |
| m_gpu.Get(), |
| m_originalDstColorTexture.Get(), |
| m_gpuSupportsTypedUAVLoadStore ? DXGI_FORMAT_R8G8B8A8_UNORM : DXGI_FORMAT_R32_UINT); |
| } |
| return m_originalDstColorUAV.Get(); |
| } |
| |
| void PLSRenderContextD3DImpl::resizeGradientTexture(uint32_t width, uint32_t height) |
| { |
| if (width == 0 || height == 0) |
| { |
| m_gradTexture = nullptr; |
| m_gradTextureSRV = nullptr; |
| m_gradTextureRTV = nullptr; |
| } |
| else |
| { |
| m_gradTexture = makeSimple2DTexture(DXGI_FORMAT_R8G8B8A8_UNORM, |
| width, |
| height, |
| 1, |
| D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE); |
| VERIFY_OK(m_gpu->CreateShaderResourceView(m_gradTexture.Get(), |
| NULL, |
| m_gradTextureSRV.ReleaseAndGetAddressOf())); |
| VERIFY_OK(m_gpu->CreateRenderTargetView(m_gradTexture.Get(), |
| NULL, |
| m_gradTextureRTV.ReleaseAndGetAddressOf())); |
| } |
| } |
| |
| void PLSRenderContextD3DImpl::resizeTessellationTexture(uint32_t width, uint32_t height) |
| { |
| if (width == 0 || height == 0) |
| { |
| m_tessTexture = nullptr; |
| m_tessTextureSRV = nullptr; |
| m_tessTextureRTV = nullptr; |
| } |
| else |
| { |
| m_tessTexture = makeSimple2DTexture(DXGI_FORMAT_R32G32B32A32_UINT, |
| width, |
| height, |
| 1, |
| D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE); |
| VERIFY_OK(m_gpu->CreateShaderResourceView(m_tessTexture.Get(), |
| NULL, |
| m_tessTextureSRV.ReleaseAndGetAddressOf())); |
| VERIFY_OK(m_gpu->CreateRenderTargetView(m_tessTexture.Get(), |
| NULL, |
| m_tessTextureRTV.ReleaseAndGetAddressOf())); |
| } |
| } |
| |
| template <typename HighLevelStruct> |
| ID3D11ShaderResourceView* PLSRenderContextD3DImpl::replaceStructuredBufferSRV( |
| const BufferRing* bufferRing, |
| UINT highLevelStructCount, |
| UINT firstHighLevelStruct) |
| { |
| // Shaders access our storage buffers as arrays of basic types, as opposed to structures. Our |
| // SRV therefore needs to be indexed by the underlying basic type, not the high level structure. |
| constexpr static UINT kUnderlyingTypeSizeInBytes = |
| pls::StorageBufferElementSizeInBytes(HighLevelStruct::kBufferStructure); |
| static_assert(sizeof(HighLevelStruct) % kUnderlyingTypeSizeInBytes == 0); |
| constexpr static UINT kStructIndexMultiplier = |
| sizeof(HighLevelStruct) / kUnderlyingTypeSizeInBytes; |
| return static_cast<const StructuredBufferRingD3D*>(bufferRing) |
| ->replaceSRV(m_gpu.Get(), |
| highLevelStructCount * kStructIndexMultiplier, |
| firstHighLevelStruct * kStructIndexMultiplier); |
| } |
| |
| void PLSRenderContextD3DImpl::setPipelineLayoutAndShaders(DrawType drawType, |
| pls::ShaderFeatures shaderFeatures, |
| pls::InterlockMode interlockMode, |
| pls::ShaderMiscFlags pixelShaderMiscFlags) |
| { |
| uint32_t vertexShaderKey = pls::ShaderUniqueKey(drawType, |
| shaderFeatures & kVertexShaderFeaturesMask, |
| interlockMode, |
| pls::ShaderMiscFlags::none); |
| auto vertexEntry = m_drawVertexShaders.find(vertexShaderKey); |
| |
| uint32_t pixelShaderKey = |
| ShaderUniqueKey(drawType, shaderFeatures, interlockMode, pixelShaderMiscFlags); |
| auto pixelEntry = m_drawPixelShaders.find(pixelShaderKey); |
| |
| if (vertexEntry == m_drawVertexShaders.end() || pixelEntry == m_drawPixelShaders.end()) |
| { |
| std::ostringstream s; |
| for (size_t i = 0; i < kShaderFeatureCount; ++i) |
| { |
| ShaderFeatures feature = static_cast<ShaderFeatures>(1 << i); |
| if (shaderFeatures & feature) |
| { |
| s << "#define " << GetShaderFeatureGLSLName(feature) << '\n'; |
| } |
| } |
| if (m_d3dCapabilities.supportsRasterizerOrderedViews) |
| { |
| if ((interlockMode == pls::InterlockMode::rasterOrdering && |
| drawType != DrawType::interiorTriangulation) || |
| drawType == DrawType::imageMesh) |
| { |
| s << "#define " << GLSL_ENABLE_RASTERIZER_ORDERED_VIEWS << '\n'; |
| } |
| } |
| if (m_d3dCapabilities.supportsTypedUAVLoadStore) |
| { |
| s << "#define " << GLSL_ENABLE_TYPED_UAV_LOAD_STORE << '\n'; |
| } |
| if (pixelShaderMiscFlags & pls::ShaderMiscFlags::coalescedResolveAndTransfer) |
| { |
| s << "#define " << GLSL_COALESCED_PLS_RESOLVE_AND_TRANSFER << '\n'; |
| s << "#define " << GLSL_FRAMEBUFFER_PLANE_IDX_OVERRIDE << ' ' |
| << COALESCED_OFFSCREEN_FRAMEBUFFER_PLANE_IDX << '\n'; |
| } |
| switch (drawType) |
| { |
| case DrawType::midpointFanPatches: |
| case DrawType::outerCurvePatches: |
| s << "#define " << GLSL_DRAW_PATH << '\n'; |
| break; |
| case DrawType::interiorTriangulation: |
| s << "#define " << GLSL_DRAW_INTERIOR_TRIANGLES << '\n'; |
| break; |
| case DrawType::imageRect: |
| assert(interlockMode == pls::InterlockMode::atomics); |
| s << "#define " << GLSL_DRAW_IMAGE << '\n'; |
| s << "#define " << GLSL_DRAW_IMAGE_RECT << '\n'; |
| break; |
| case DrawType::imageMesh: |
| s << "#define " << GLSL_DRAW_IMAGE << '\n'; |
| s << "#define " << GLSL_DRAW_IMAGE_MESH << '\n'; |
| break; |
| case DrawType::plsAtomicResolve: |
| assert(interlockMode == pls::InterlockMode::atomics); |
| s << "#define " << GLSL_DRAW_RENDER_TARGET_UPDATE_BOUNDS << '\n'; |
| s << "#define " << GLSL_RESOLVE_PLS << '\n'; |
| break; |
| case DrawType::plsAtomicInitialize: |
| case DrawType::stencilClipReset: |
| RIVE_UNREACHABLE(); |
| } |
| s << glsl::constants << '\n'; |
| s << glsl::hlsl << '\n'; |
| s << glsl::common << '\n'; |
| if (shaderFeatures & ShaderFeatures::ENABLE_ADVANCED_BLEND) |
| { |
| s << glsl::advanced_blend << '\n'; |
| } |
| switch (drawType) |
| { |
| case DrawType::midpointFanPatches: |
| case DrawType::outerCurvePatches: |
| s << pls::glsl::draw_path_common << '\n'; |
| s << (interlockMode == pls::InterlockMode::rasterOrdering ? pls::glsl::draw_path |
| : pls::glsl::atomic_draw) |
| << '\n'; |
| break; |
| case DrawType::interiorTriangulation: |
| s << pls::glsl::draw_path_common << '\n'; |
| s << (interlockMode == pls::InterlockMode::rasterOrdering ? pls::glsl::draw_path |
| : pls::glsl::atomic_draw) |
| << '\n'; |
| break; |
| case DrawType::imageRect: |
| assert(interlockMode == pls::InterlockMode::atomics); |
| s << pls::glsl::atomic_draw << '\n'; |
| break; |
| case DrawType::imageMesh: |
| s << (interlockMode == pls::InterlockMode::rasterOrdering |
| ? pls::glsl::draw_image_mesh |
| : pls::glsl::atomic_draw) |
| << '\n'; |
| break; |
| case DrawType::plsAtomicResolve: |
| case DrawType::stencilClipReset: |
| assert(interlockMode == pls::InterlockMode::atomics); |
| s << pls::glsl::atomic_draw << '\n'; |
| break; |
| case DrawType::plsAtomicInitialize: |
| RIVE_UNREACHABLE(); |
| } |
| |
| const std::string shader = s.str(); |
| |
| if (vertexEntry == m_drawVertexShaders.end()) |
| { |
| DrawVertexShader drawVertexShader; |
| ComPtr<ID3DBlob> blob = |
| compileSourceToBlob(GLSL_VERTEX, shader.c_str(), GLSL_drawVertexMain, "vs_5_0"); |
| D3D11_INPUT_ELEMENT_DESC layoutDesc[2]; |
| size_t vertexAttribCount; |
| switch (drawType) |
| { |
| case DrawType::midpointFanPatches: |
| case DrawType::outerCurvePatches: |
| layoutDesc[0] = {GLSL_a_patchVertexData, |
| 0, |
| DXGI_FORMAT_R32G32B32A32_FLOAT, |
| kPatchVertexDataSlot, |
| D3D11_APPEND_ALIGNED_ELEMENT, |
| D3D11_INPUT_PER_VERTEX_DATA, |
| 0}; |
| layoutDesc[1] = {GLSL_a_mirroredVertexData, |
| 0, |
| DXGI_FORMAT_R32G32B32A32_FLOAT, |
| kPatchVertexDataSlot, |
| D3D11_APPEND_ALIGNED_ELEMENT, |
| D3D11_INPUT_PER_VERTEX_DATA, |
| 0}; |
| vertexAttribCount = 2; |
| break; |
| case DrawType::interiorTriangulation: |
| layoutDesc[0] = {GLSL_a_triangleVertex, |
| 0, |
| DXGI_FORMAT_R32G32B32_FLOAT, |
| kTriangleVertexDataSlot, |
| 0, |
| D3D11_INPUT_PER_VERTEX_DATA, |
| 0}; |
| vertexAttribCount = 1; |
| break; |
| case DrawType::imageRect: |
| layoutDesc[0] = {GLSL_a_imageRectVertex, |
| 0, |
| DXGI_FORMAT_R32G32B32A32_FLOAT, |
| kImageRectVertexDataSlot, |
| 0, |
| D3D11_INPUT_PER_VERTEX_DATA, |
| 0}; |
| vertexAttribCount = 1; |
| break; |
| case DrawType::imageMesh: |
| layoutDesc[0] = {GLSL_a_position, |
| 0, |
| DXGI_FORMAT_R32G32_FLOAT, |
| kImageMeshVertexDataSlot, |
| D3D11_APPEND_ALIGNED_ELEMENT, |
| D3D11_INPUT_PER_VERTEX_DATA, |
| 0}; |
| layoutDesc[1] = {GLSL_a_texCoord, |
| 0, |
| DXGI_FORMAT_R32G32_FLOAT, |
| kImageMeshUVDataSlot, |
| D3D11_APPEND_ALIGNED_ELEMENT, |
| D3D11_INPUT_PER_VERTEX_DATA, |
| 0}; |
| vertexAttribCount = 2; |
| break; |
| case DrawType::plsAtomicResolve: |
| vertexAttribCount = 0; |
| break; |
| case DrawType::plsAtomicInitialize: |
| case DrawType::stencilClipReset: |
| RIVE_UNREACHABLE(); |
| } |
| VERIFY_OK(m_gpu->CreateInputLayout(layoutDesc, |
| vertexAttribCount, |
| blob->GetBufferPointer(), |
| blob->GetBufferSize(), |
| &drawVertexShader.layout)); |
| VERIFY_OK(m_gpu->CreateVertexShader(blob->GetBufferPointer(), |
| blob->GetBufferSize(), |
| nullptr, |
| &drawVertexShader.shader)); |
| vertexEntry = m_drawVertexShaders.insert({vertexShaderKey, drawVertexShader}).first; |
| } |
| |
| if (pixelEntry == m_drawPixelShaders.end()) |
| { |
| ComPtr<ID3D11PixelShader> pixelShader; |
| ComPtr<ID3DBlob> blob = |
| compileSourceToBlob(GLSL_FRAGMENT, shader.c_str(), GLSL_drawFragmentMain, "ps_5_0"); |
| VERIFY_OK(m_gpu->CreatePixelShader(blob->GetBufferPointer(), |
| blob->GetBufferSize(), |
| nullptr, |
| &pixelShader)); |
| pixelEntry = m_drawPixelShaders.insert({pixelShaderKey, pixelShader}).first; |
| } |
| } |
| |
| m_gpuContext->IASetInputLayout(vertexEntry->second.layout.Get()); |
| m_gpuContext->VSSetShader(vertexEntry->second.shader.Get(), NULL, 0); |
| m_gpuContext->PSSetShader(pixelEntry->second.Get(), NULL, 0); |
| } |
| |
| static ID3D11Buffer* submitted_buffer(const BufferRing* bufferRing) |
| { |
| assert(bufferRing != nullptr); |
| return static_cast<const BufferRingD3D*>(bufferRing)->submittedBuffer(); |
| } |
| |
| static const char* heap_buffer_contents(const BufferRing* bufferRing) |
| { |
| assert(bufferRing != nullptr); |
| auto heapBuffer = static_cast<const HeapBufferRing*>(bufferRing); |
| return reinterpret_cast<const char*>(heapBuffer->contents()); |
| } |
| |
| static void blit_sub_rect(ID3D11DeviceContext* gpuContext, |
| ID3D11Texture2D* dst, |
| ID3D11Texture2D* src, |
| const IAABB& rect) |
| { |
| D3D11_BOX updateBox = { |
| static_cast<UINT>(rect.left), |
| static_cast<UINT>(rect.top), |
| 0, |
| static_cast<UINT>(rect.right), |
| static_cast<UINT>(rect.bottom), |
| 1, |
| }; |
| gpuContext->CopySubresourceRegion(dst, 0, updateBox.left, updateBox.top, 0, src, 0, &updateBox); |
| } |
| |
| void PLSRenderContextD3DImpl::flush(const FlushDescriptor& desc) |
| { |
| auto renderTarget = static_cast<PLSRenderTargetD3D*>(desc.renderTarget); |
| |
| m_gpuContext->RSSetState(m_backCulledRasterState[0].Get()); |
| m_gpuContext->OMSetBlendState(NULL, NULL, 0xffffffff); |
| |
| // All programs use the same set of per-flush uniforms. |
| m_gpuContext->UpdateSubresource(m_flushUniforms.Get(), |
| 0, |
| NULL, |
| heap_buffer_contents(flushUniformBufferRing()) + |
| desc.flushUniformDataOffsetInBytes, |
| 0, |
| 0); |
| |
| ID3D11Buffer* uniformBuffers[] = {m_flushUniforms.Get(), |
| m_drawUniforms.Get(), |
| m_imageDrawUniforms.Get()}; |
| static_assert(PATH_BASE_INSTANCE_UNIFORM_BUFFER_IDX == FLUSH_UNIFORM_BUFFER_IDX + 1); |
| static_assert(IMAGE_DRAW_UNIFORM_BUFFER_IDX == PATH_BASE_INSTANCE_UNIFORM_BUFFER_IDX + 1); |
| m_gpuContext->VSSetConstantBuffers(FLUSH_UNIFORM_BUFFER_IDX, |
| std::size(uniformBuffers), |
| uniformBuffers); |
| |
| // All programs use the same storage buffers. |
| ID3D11ShaderResourceView* storageBufferBufferSRVs[] = { |
| desc.pathCount > 0 ? replaceStructuredBufferSRV<pls::PathData>(pathBufferRing(), |
| desc.pathCount, |
| desc.firstPath) |
| : nullptr, |
| desc.pathCount > 0 ? replaceStructuredBufferSRV<pls::PaintData>(paintBufferRing(), |
| desc.pathCount, |
| desc.firstPaint) |
| : nullptr, |
| desc.pathCount > 0 ? replaceStructuredBufferSRV<pls::PaintAuxData>(paintAuxBufferRing(), |
| desc.pathCount, |
| desc.firstPaintAux) |
| : nullptr, |
| desc.contourCount > 0 ? replaceStructuredBufferSRV<pls::ContourData>(contourBufferRing(), |
| desc.contourCount, |
| desc.firstContour) |
| : nullptr, |
| }; |
| static_assert(PAINT_BUFFER_IDX == PATH_BUFFER_IDX + 1); |
| static_assert(PAINT_AUX_BUFFER_IDX == PAINT_BUFFER_IDX + 1); |
| static_assert(CONTOUR_BUFFER_IDX == PAINT_AUX_BUFFER_IDX + 1); |
| m_gpuContext->VSSetShaderResources(PATH_BUFFER_IDX, |
| std::size(storageBufferBufferSRVs), |
| storageBufferBufferSRVs); |
| if (desc.interlockMode == pls::InterlockMode::atomics) |
| { |
| // Atomic mode accesses the paint buffers from the pixel shader. |
| m_gpuContext->PSSetShaderResources(PAINT_BUFFER_IDX, 2, storageBufferBufferSRVs + 1); |
| } |
| |
| // Render the complex color ramps to the gradient texture. |
| if (desc.complexGradSpanCount > 0) |
| { |
| ID3D11Buffer* gradSpanBuffer = submitted_buffer(gradSpanBufferRing()); |
| UINT gradStride = sizeof(GradientSpan); |
| UINT gradOffset = 0; |
| m_gpuContext->IASetVertexBuffers(0, 1, &gradSpanBuffer, &gradStride, &gradOffset); |
| m_gpuContext->IASetInputLayout(m_colorRampLayout.Get()); |
| m_gpuContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); |
| |
| m_gpuContext->VSSetShader(m_colorRampVertexShader.Get(), NULL, 0); |
| |
| D3D11_VIEWPORT viewport = {0, |
| static_cast<float>(desc.complexGradRowsTop), |
| static_cast<float>(kGradTextureWidth), |
| static_cast<float>(desc.complexGradRowsHeight), |
| 0, |
| 1}; |
| m_gpuContext->RSSetViewports(1, &viewport); |
| |
| // Unbind the gradient texture before rendering it. |
| ID3D11ShaderResourceView* nullTextureView = nullptr; |
| m_gpuContext->PSSetShaderResources(GRAD_TEXTURE_IDX, 1, &nullTextureView); |
| |
| m_gpuContext->PSSetShader(m_colorRampPixelShader.Get(), NULL, 0); |
| |
| m_gpuContext->OMSetRenderTargets(1, m_gradTextureRTV.GetAddressOf(), NULL); |
| |
| m_gpuContext->DrawInstanced(4, desc.complexGradSpanCount, 0, desc.firstComplexGradSpan); |
| } |
| |
| // Copy the simple color ramps to the gradient texture. |
| if (desc.simpleGradTexelsHeight > 0) |
| { |
| assert(desc.simpleGradTexelsHeight * desc.simpleGradTexelsWidth * 4 <= |
| simpleColorRampsBufferRing()->capacityInBytes()); |
| D3D11_BOX box; |
| box.left = 0; |
| box.right = desc.simpleGradTexelsWidth; |
| box.top = 0; |
| box.bottom = desc.simpleGradTexelsHeight; |
| box.front = 0; |
| box.back = 1; |
| m_gpuContext->UpdateSubresource(m_gradTexture.Get(), |
| 0, |
| &box, |
| heap_buffer_contents(simpleColorRampsBufferRing()) + |
| desc.simpleGradDataOffsetInBytes, |
| kGradTextureWidth * 4, |
| 0); |
| } |
| |
| // Tessellate all curves into vertices in the tessellation texture. |
| if (desc.tessVertexSpanCount > 0) |
| { |
| ID3D11Buffer* tessSpanBuffer = submitted_buffer(tessSpanBufferRing()); |
| UINT tessStride = sizeof(TessVertexSpan); |
| UINT tessOffset = 0; |
| m_gpuContext->IASetVertexBuffers(0, 1, &tessSpanBuffer, &tessStride, &tessOffset); |
| m_gpuContext->IASetIndexBuffer(m_tessSpanIndexBuffer.Get(), DXGI_FORMAT_R16_UINT, 0); |
| m_gpuContext->IASetInputLayout(m_tessellateLayout.Get()); |
| m_gpuContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); |
| |
| m_gpuContext->VSSetShader(m_tessellateVertexShader.Get(), NULL, 0); |
| |
| // Unbind the tessellation texture before rendering it. |
| ID3D11ShaderResourceView* nullTessTextureView = NULL; |
| m_gpuContext->VSSetShaderResources(TESS_VERTEX_TEXTURE_IDX, 1, &nullTessTextureView); |
| |
| D3D11_VIEWPORT viewport = {0, |
| 0, |
| static_cast<float>(kTessTextureWidth), |
| static_cast<float>(desc.tessDataHeight), |
| 0, |
| 1}; |
| m_gpuContext->RSSetViewports(1, &viewport); |
| |
| m_gpuContext->PSSetShader(m_tessellatePixelShader.Get(), NULL, 0); |
| |
| m_gpuContext->OMSetRenderTargets(1, m_tessTextureRTV.GetAddressOf(), NULL); |
| |
| m_gpuContext->DrawIndexedInstanced(std::size(pls::kTessSpanIndices), |
| desc.tessVertexSpanCount, |
| 0, |
| 0, |
| desc.firstTessVertexSpan); |
| |
| if (m_d3dCapabilities.isIntel) |
| { |
| // FIXME! Intel needs this flush! Driver bug? Find a lighter workaround? |
| m_gpuContext->Flush(); |
| } |
| } |
| |
| // Setup and clear the PLS textures. |
| bool renderDirectToRasterPipeline = |
| pls::ShadersEmitColorToRasterPipeline(desc.interlockMode, desc.combinedShaderFeatures); |
| switch (desc.colorLoadAction) |
| { |
| case pls::LoadAction::clear: |
| if (renderDirectToRasterPipeline) |
| { |
| float clearColor4f[4]; |
| UnpackColorToRGBA32F(desc.clearColor, clearColor4f); |
| m_gpuContext->ClearRenderTargetView(renderTarget->targetRTV(), clearColor4f); |
| } |
| else if (m_d3dCapabilities.supportsTypedUAVLoadStore) |
| { |
| float clearColor4f[4]; |
| UnpackColorToRGBA32F(desc.clearColor, clearColor4f); |
| m_gpuContext->ClearUnorderedAccessViewFloat(renderTarget->targetUAV(), |
| clearColor4f); |
| } |
| else |
| { |
| UINT clearColorui[4] = {pls::SwizzleRiveColorToRGBA(desc.clearColor)}; |
| m_gpuContext->ClearUnorderedAccessViewUint(renderTarget->targetUAV(), clearColorui); |
| } |
| break; |
| case pls::LoadAction::preserveRenderTarget: |
| if (!renderDirectToRasterPipeline && !renderTarget->targetTextureSupportsUAV()) |
| { |
| // We're rendering to an offscreen UAV and preserving the target. Copy the target |
| // texture over. |
| blit_sub_rect(m_gpuContext.Get(), |
| renderTarget->offscreenTexture(), |
| renderTarget->targetTexture(), |
| desc.renderTargetUpdateBounds); |
| } |
| break; |
| case pls::LoadAction::dontCare: |
| break; |
| } |
| { |
| UINT coverageClear[4]{desc.coverageClearValue}; |
| m_gpuContext->ClearUnorderedAccessViewUint(renderTarget->coverageUAV(), coverageClear); |
| } |
| if (desc.combinedShaderFeatures & pls::ShaderFeatures::ENABLE_CLIPPING) |
| { |
| constexpr static UINT kZero[4]{}; |
| m_gpuContext->ClearUnorderedAccessViewUint(renderTarget->clipUAV(), kZero); |
| } |
| |
| // Execute the DrawList. |
| ID3D11Buffer* vertexBuffers[3] = { |
| m_patchVertexBuffer.Get(), |
| desc.hasTriangleVertices ? submitted_buffer(triangleBufferRing()) : NULL, |
| m_imageRectVertexBuffer.Get()}; |
| UINT vertexStrides[3] = {sizeof(pls::PatchVertex), |
| sizeof(pls::TriangleVertex), |
| sizeof(pls::ImageRectVertex)}; |
| UINT vertexOffsets[3] = {0, 0, 0}; |
| static_assert(kPatchVertexDataSlot == 0); |
| static_assert(kTriangleVertexDataSlot == 1); |
| static_assert(kImageRectVertexDataSlot == 2); |
| m_gpuContext->IASetVertexBuffers(0, 3, vertexBuffers, vertexStrides, vertexOffsets); |
| |
| D3D11_VIEWPORT viewport = {0, |
| 0, |
| static_cast<float>(renderTarget->width()), |
| static_cast<float>(renderTarget->height()), |
| 0, |
| 1}; |
| m_gpuContext->RSSetViewports(1, &viewport); |
| |
| m_gpuContext->PSSetConstantBuffers(IMAGE_DRAW_UNIFORM_BUFFER_IDX, |
| 1, |
| m_imageDrawUniforms.GetAddressOf()); |
| |
| ID3D11RenderTargetView* targetRTV = |
| renderDirectToRasterPipeline ? renderTarget->targetRTV() : NULL; |
| ID3D11UnorderedAccessView* plsUAVs[] = { |
| renderDirectToRasterPipeline ? NULL : renderTarget->targetUAV(), |
| renderTarget->coverageUAV(), |
| renderTarget->clipUAV(), |
| desc.interlockMode == pls::InterlockMode::rasterOrdering |
| ? renderTarget->originalDstColorUAV() |
| : NULL, // Atomic mode doesn't use the originalDstColor. |
| }; |
| static_assert(FRAMEBUFFER_PLANE_IDX == 0); |
| static_assert(COVERAGE_PLANE_IDX == 1); |
| static_assert(CLIP_PLANE_IDX == 2); |
| static_assert(ORIGINAL_DST_COLOR_PLANE_IDX == 3); |
| UINT numUsedUAVs = plsUAVs[ORIGINAL_DST_COLOR_PLANE_IDX] != nullptr ? std::size(plsUAVs) |
| : std::size(plsUAVs) - 1; |
| m_gpuContext->OMSetRenderTargetsAndUnorderedAccessViews( |
| renderDirectToRasterPipeline ? 1 : 0, |
| &targetRTV, |
| NULL, |
| renderDirectToRasterPipeline ? 1 : 0, |
| renderDirectToRasterPipeline ? numUsedUAVs - 1 : numUsedUAVs, |
| renderDirectToRasterPipeline ? plsUAVs + 1 : plsUAVs, |
| NULL); |
| |
| if (renderDirectToRasterPipeline) |
| { |
| // When rendering directly to the target RTV, we use the built-in blend hardware for opacity |
| // and antialiasing. |
| m_gpuContext->OMSetBlendState(m_srcOverBlendState.Get(), NULL, 0xffffffff); |
| } |
| |
| // Set these last, when the tess and grad textures are no longer bound as render targets. |
| m_gpuContext->VSSetShaderResources(TESS_VERTEX_TEXTURE_IDX, 1, m_tessTextureSRV.GetAddressOf()); |
| m_gpuContext->PSSetShaderResources(GRAD_TEXTURE_IDX, 1, m_gradTextureSRV.GetAddressOf()); |
| |
| const char* const imageDrawUniformData = heap_buffer_contents(imageDrawUniformBufferRing()); |
| |
| bool renderPassHasCoalescedResolveAndTransfer = |
| desc.interlockMode == pls::InterlockMode::atomics && !renderDirectToRasterPipeline && |
| !renderTarget->targetTextureSupportsUAV(); |
| |
| for (const DrawBatch& batch : *desc.drawList) |
| { |
| if (batch.elementCount == 0) |
| { |
| continue; |
| } |
| |
| DrawType drawType = batch.drawType; |
| auto shaderFeatures = desc.interlockMode == pls::InterlockMode::atomics |
| ? desc.combinedShaderFeatures |
| : batch.shaderFeatures; |
| auto pixelShaderMiscFlags = |
| drawType == pls::DrawType::plsAtomicResolve && renderPassHasCoalescedResolveAndTransfer |
| ? pls::ShaderMiscFlags::coalescedResolveAndTransfer |
| : pls::ShaderMiscFlags::none; |
| setPipelineLayoutAndShaders(drawType, |
| shaderFeatures, |
| desc.interlockMode, |
| pixelShaderMiscFlags); |
| |
| if (auto imageTextureD3D = static_cast<const PLSTextureD3DImpl*>(batch.imageTexture)) |
| { |
| m_gpuContext->PSSetShaderResources(IMAGE_TEXTURE_IDX, |
| 1, |
| imageTextureD3D->srvAddressOf()); |
| } |
| |
| switch (drawType) |
| { |
| case DrawType::midpointFanPatches: |
| case DrawType::outerCurvePatches: |
| { |
| m_gpuContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); |
| m_gpuContext->IASetIndexBuffer(m_patchIndexBuffer.Get(), DXGI_FORMAT_R16_UINT, 0); |
| m_gpuContext->RSSetState(m_backCulledRasterState[desc.wireframe].Get()); |
| DrawUniforms drawUniforms(batch.baseElement); |
| m_gpuContext->UpdateSubresource(m_drawUniforms.Get(), 0, NULL, &drawUniforms, 0, 0); |
| m_gpuContext->DrawIndexedInstanced(PatchIndexCount(drawType), |
| batch.elementCount, |
| PatchBaseIndex(drawType), |
| 0, |
| batch.baseElement); |
| break; |
| } |
| case DrawType::interiorTriangulation: |
| { |
| m_gpuContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); |
| m_gpuContext->RSSetState(m_backCulledRasterState[desc.wireframe].Get()); |
| m_gpuContext->Draw(batch.elementCount, batch.baseElement); |
| break; |
| } |
| case DrawType::imageRect: |
| m_gpuContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); |
| m_gpuContext->IASetIndexBuffer(m_imageRectIndexBuffer.Get(), |
| DXGI_FORMAT_R16_UINT, |
| 0); |
| m_gpuContext->RSSetState(m_doubleSidedRasterState[desc.wireframe].Get()); |
| m_gpuContext->UpdateSubresource(m_imageDrawUniforms.Get(), |
| 0, |
| NULL, |
| imageDrawUniformData + batch.imageDrawDataOffset, |
| 0, |
| 0); |
| m_gpuContext->DrawIndexed(std::size(pls::kImageRectIndices), 0, 0); |
| break; |
| case DrawType::imageMesh: |
| { |
| LITE_RTTI_CAST_OR_BREAK(vertexBuffer, |
| const RenderBufferD3DImpl*, |
| batch.vertexBuffer); |
| LITE_RTTI_CAST_OR_BREAK(uvBuffer, const RenderBufferD3DImpl*, batch.uvBuffer); |
| LITE_RTTI_CAST_OR_BREAK(indexBuffer, const RenderBufferD3DImpl*, batch.indexBuffer); |
| ID3D11Buffer* imageMeshBuffers[] = {vertexBuffer->buffer(), uvBuffer->buffer()}; |
| UINT imageMeshStrides[] = {sizeof(Vec2D), sizeof(Vec2D)}; |
| UINT imageMeshOffsets[] = {0, 0}; |
| m_gpuContext->IASetVertexBuffers(kImageMeshVertexDataSlot, |
| 2, |
| imageMeshBuffers, |
| imageMeshStrides, |
| imageMeshOffsets); |
| static_assert(kImageMeshUVDataSlot == kImageMeshVertexDataSlot + 1); |
| m_gpuContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); |
| m_gpuContext->IASetIndexBuffer(indexBuffer->buffer(), DXGI_FORMAT_R16_UINT, 0); |
| m_gpuContext->RSSetState(m_doubleSidedRasterState[desc.wireframe].Get()); |
| m_gpuContext->UpdateSubresource(m_imageDrawUniforms.Get(), |
| 0, |
| NULL, |
| imageDrawUniformData + batch.imageDrawDataOffset, |
| 0, |
| 0); |
| m_gpuContext->DrawIndexed(batch.elementCount, batch.baseElement, 0); |
| break; |
| } |
| case DrawType::plsAtomicResolve: |
| assert(desc.interlockMode == pls::InterlockMode::atomics); |
| m_gpuContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); |
| m_gpuContext->RSSetState(m_backCulledRasterState[0].Get()); |
| if (renderPassHasCoalescedResolveAndTransfer) |
| { |
| // Bind the actual target texture as the render target for the PLS resolve, so |
| // we don't have to copy to it after the render pass. |
| // (And ince we're changing the render target, this also better be the final |
| // batch of the render pass.) |
| assert(&batch == &desc.drawList->tail()); |
| assert(!renderDirectToRasterPipeline); |
| assert(!renderTarget->targetTextureSupportsUAV()); |
| ID3D11RenderTargetView* resolveRTV = renderTarget->targetRTV(); |
| ID3D11UnorderedAccessView* resolveUAVs[] = { |
| renderTarget->coverageUAV(), |
| renderTarget->clipUAV(), |
| renderTarget->targetUAV(), // Bind the target UAV (for reading) to a |
| // different slot for the resolve because D3D |
| // doesn't let us use slot 0 when there's a |
| // render target. |
| }; |
| static_assert(COVERAGE_PLANE_IDX == 1); |
| static_assert(CLIP_PLANE_IDX == 2); |
| static_assert(COALESCED_OFFSCREEN_FRAMEBUFFER_PLANE_IDX == 3); |
| m_gpuContext->OMSetRenderTargetsAndUnorderedAccessViews(1, |
| &resolveRTV, |
| NULL, |
| 1, |
| std::size(resolveUAVs), |
| resolveUAVs, |
| NULL); |
| } |
| m_gpuContext->Draw(4, 0); |
| break; |
| case DrawType::plsAtomicInitialize: |
| case DrawType::stencilClipReset: |
| RIVE_UNREACHABLE(); |
| } |
| } |
| |
| if (desc.interlockMode == pls::InterlockMode::rasterOrdering && |
| !renderTarget->targetTextureSupportsUAV()) |
| { |
| // We rendered to an offscreen UAV and did not resolve to the renderTarget. Copy back to the |
| // main target. |
| assert(!renderDirectToRasterPipeline); |
| assert(!renderPassHasCoalescedResolveAndTransfer); |
| blit_sub_rect(m_gpuContext.Get(), |
| renderTarget->targetTexture(), |
| renderTarget->offscreenTexture(), |
| desc.renderTargetUpdateBounds); |
| } |
| } |
| } // namespace rive::pls |