Add conditional define for min 16-bit precision types. Use make_half* instead of C-style casts
diff --git a/include/rive/pls/d3d/pls_render_context_d3d_impl.hpp b/include/rive/pls/d3d/pls_render_context_d3d_impl.hpp index d39a2dc..70a9e4a 100644 --- a/include/rive/pls/d3d/pls_render_context_d3d_impl.hpp +++ b/include/rive/pls/d3d/pls_render_context_d3d_impl.hpp
@@ -81,6 +81,7 @@ { bool supportsRasterizerOrderedViews = false; bool supportsTypedUAVLoadStore = false; // Can we load/store all UAV formats used by Rive? + bool supportsMin16Precision = false; // Can we use minimum 16-bit precision types (e.g. min16int)? bool isIntel = false; };
diff --git a/renderer/d3d/pls_render_context_d3d_impl.cpp b/renderer/d3d/pls_render_context_d3d_impl.cpp index 7f8f20b..571d3be 100644 --- a/renderer/d3d/pls_render_context_d3d_impl.cpp +++ b/renderer/d3d/pls_render_context_d3d_impl.cpp
@@ -8,6 +8,7 @@ #include "shaders/constants.glsl" #include <D3DCompiler.h> +#include <d3d11_3.h> #include <sstream> #include "shaders/out/generated/advanced_blend.glsl.hpp" @@ -80,30 +81,45 @@ D3DCapabilities d3dCapabilities; D3D11_FEATURE_DATA_D3D11_OPTIONS2 d3d11Options2; - if (gpu->GetFeatureLevel() >= D3D_FEATURE_LEVEL_11_1 && - SUCCEEDED(gpu->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS2, - &d3d11Options2, - sizeof(D3D11_FEATURE_DATA_D3D11_OPTIONS2)))) + if (gpu->GetFeatureLevel() >= D3D_FEATURE_LEVEL_11_1) { - d3dCapabilities.supportsRasterizerOrderedViews = d3d11Options2.ROVsSupported; - if (d3d11Options2.TypedUAVLoadAdditionalFormats) + if (SUCCEEDED(gpu->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS2, + &d3d11Options2, + sizeof(D3D11_FEATURE_DATA_D3D11_OPTIONS2)))) { - // TypedUAVLoadAdditionalFormats is true. Now check if we can both load and - // store all formats used by Rive (currently only RGBA8): - // https://learn.microsoft.com/en-us/windows/win32/direct3d11/typed-unordered-access-view-loads. - D3D11_FEATURE_DATA_FORMAT_SUPPORT2 d3d11Format2{}; - d3d11Format2.InFormat = DXGI_FORMAT_R8G8B8A8_UNORM; - if (SUCCEEDED(gpu->CheckFeatureSupport(D3D11_FEATURE_FORMAT_SUPPORT2, - &d3d11Format2, - sizeof(d3d11Format2)))) + d3dCapabilities.supportsRasterizerOrderedViews = d3d11Options2.ROVsSupported; + if (d3d11Options2.TypedUAVLoadAdditionalFormats) { - constexpr UINT loadStoreFlags = - D3D11_FORMAT_SUPPORT2_UAV_TYPED_LOAD | D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE; - d3dCapabilities.supportsTypedUAVLoadStore = - (d3d11Format2.OutFormatSupport2 & loadStoreFlags) == loadStoreFlags; + // TypedUAVLoadAdditionalFormats is true. Now check if we can both load and + // store all formats used by Rive (currently only RGBA8): + // https://learn.microsoft.com/en-us/windows/win32/direct3d11/typed-unordered-access-view-loads. + D3D11_FEATURE_DATA_FORMAT_SUPPORT2 d3d11Format2{}; + d3d11Format2.InFormat = DXGI_FORMAT_R8G8B8A8_UNORM; + if (SUCCEEDED(gpu->CheckFeatureSupport(D3D11_FEATURE_FORMAT_SUPPORT2, + &d3d11Format2, + sizeof(d3d11Format2)))) + { + constexpr UINT loadStoreFlags = + D3D11_FORMAT_SUPPORT2_UAV_TYPED_LOAD | D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE; + d3dCapabilities.supportsTypedUAVLoadStore = + (d3d11Format2.OutFormatSupport2 & loadStoreFlags) == loadStoreFlags; + } } } + + // Check if we can use HLSL minimum precision types (e.g. min16int) + D3D11_FEATURE_DATA_SHADER_MIN_PRECISION_SUPPORT d3d11MinPrecisionSupport; + if (SUCCEEDED(gpu->CheckFeatureSupport(D3D11_FEATURE_SHADER_MIN_PRECISION_SUPPORT, + &d3d11MinPrecisionSupport, + sizeof(d3d11MinPrecisionSupport)))) + { + const UINT allStageMinPrecision = + (d3d11MinPrecisionSupport.AllOtherShaderStagesMinPrecision & d3d11MinPrecisionSupport.PixelShaderMinPrecision); + + d3dCapabilities.supportsMin16Precision = (allStageMinPrecision & D3D11_SHADER_MIN_PRECISION_16_BIT) != 0; + } } + if (contextOptions.disableRasterizerOrderedViews) { d3dCapabilities.supportsRasterizerOrderedViews = false; @@ -112,6 +128,7 @@ { d3dCapabilities.supportsTypedUAVLoadStore = false; } + d3dCapabilities.isIntel = contextOptions.isIntel; auto plsContextImpl = std::unique_ptr<PLSRenderContextD3DImpl>( @@ -937,6 +954,10 @@ { s << "#define " << GLSL_ENABLE_TYPED_UAV_LOAD_STORE << '\n'; } + if (m_d3dCapabilities.supportsMin16Precision) + { + s << "#define " << GLSL_ENABLE_MIN_16_PRECISION << '\n'; + } if (pixelShaderMiscFlags & pls::ShaderMiscFlags::coalescedResolveAndTransfer) { s << "#define " << GLSL_COALESCED_PLS_RESOLVE_AND_TRANSFER << '\n';
diff --git a/renderer/shaders/advanced_blend.glsl b/renderer/shaders/advanced_blend.glsl index bd4fc18..7b33974 100644 --- a/renderer/shaders/advanced_blend.glsl +++ b/renderer/shaders/advanced_blend.glsl
@@ -172,14 +172,14 @@ case BLEND_MODE_COLORDODGE: // ES3 spec, 4.5.1 Range and Precision: dividing a non-zero by 0 results in the // appropriately signed IEEE Inf. - f = (half)mix(min(dst.rgb / (1. - src.rgb), make_half3(1, 1, 1)), + f = mix(min(dst.rgb / (1. - src.rgb), make_half3(1, 1, 1)), make_half3(0, 0, 0), lessThanEqual(dst.rgb, make_half3(0, 0, 0))); break; case BLEND_MODE_COLORBURN: // ES3 spec, 4.5.1 Range and Precision: dividing a non-zero by 0 results in the // appropriately signed IEEE Inf. - f = (half)mix(1. - min((1. - dst.rgb) / src.rgb, 1.), + f = mix(1. - min((1. - dst.rgb) / src.rgb, 1.), make_half3(1, 1, 1), greaterThanEqual(dst.rgb, make_half3(1, 1, 1))); break; @@ -204,7 +204,7 @@ f[i] = dst[i] + (2. * src[i] - 1.) * dst[i] * ((16. * dst[i] - 12.) * dst[i] + 3.); else - f[i] = dst[i] + (2. * src[i] - 1.) * ((half)sqrt(dst[i]) - dst[i]); + f[i] = dst[i] + (2. * src[i] - 1.) * make_half(sqrt(dst[i]) - dst[i]); } break; }
diff --git a/renderer/shaders/atomic_draw.glsl b/renderer/shaders/atomic_draw.glsl index c721121..bc5df67 100644 --- a/renderer/shaders/atomic_draw.glsl +++ b/renderer/shaders/atomic_draw.glsl
@@ -129,7 +129,7 @@ if (aaRadiusX >= .5) { vertexPosition.x = .5; - v_edgeCoverage *= (half)(.5 / aaRadiusX); + v_edgeCoverage *= make_half(.5 / aaRadiusX); } else { @@ -139,7 +139,7 @@ if (aaRadiusY >= .5) { vertexPosition.y = .5; - v_edgeCoverage *= (half)(.5 / aaRadiusY); + v_edgeCoverage *= make_half(.5 / aaRadiusY); } else { @@ -303,7 +303,7 @@ half from_fixed(uint x) { - return (half)(float(x) * FIXED_COVERAGE_INVERSE_FACTOR + + return make_half(float(x) * FIXED_COVERAGE_INVERSE_FACTOR + (-FIXED_COVERAGE_ZERO * FIXED_COVERAGE_INVERSE_FACTOR)); } @@ -316,7 +316,7 @@ half coverage = abs(coverageCount); #ifdef @ENABLE_EVEN_ODD if ((paintData.x & PAINT_FLAG_EVEN_ODD) != 0u) - coverage = 1. - (half)abs(fract(coverage * .5) * 2. + -1.); + coverage = 1. - make_half(abs(fract(coverage * .5) * 2. + -1.)); #endif // ENABLE_EVEN_ODD coverage = min(coverage, make_half(1)); // This also caps stroke coverage, which can be >1. #ifdef @ENABLE_CLIPPING @@ -574,7 +574,7 @@ // get resolved later like other draws because the @imageTexture binding is liable to change, // and furthermore in the case of imageMeshes, we can't calculate UV coordinates based on // fragment position. - half4 imageColor = (half4)TEXTURE_SAMPLE(@imageTexture, imageSampler, v_texCoord); + half4 imageColor = make_half4(TEXTURE_SAMPLE(@imageTexture, imageSampler, v_texCoord)); half meshCoverage = 1.; #ifdef @DRAW_IMAGE_RECT meshCoverage = min(v_edgeCoverage, meshCoverage); @@ -611,7 +611,7 @@ meshCoverage = min(meshCoverage, clipCoverage); } #endif // ENABLE_CLIPPING - imageColor.a *= meshCoverage * (half)imageDrawUniforms.opacity; + imageColor.a *= meshCoverage * make_half(imageDrawUniforms.opacity); #ifdef @ENABLE_ADVANCED_BLEND if (lastColor.a != .0 || imageColor.a != .0)
diff --git a/renderer/shaders/draw_path.glsl b/renderer/shaders/draw_path.glsl index bad28e3..93d7ed8 100644 --- a/renderer/shaders/draw_path.glsl +++ b/renderer/shaders/draw_path.glsl
@@ -360,7 +360,7 @@ half coverage = abs(coverageCount); #ifdef @ENABLE_EVEN_ODD if (v_pathID < .0 /*even-odd*/) - coverage = 1. - (half)abs(fract(coverage * .5) * 2. + -1.); + coverage = 1. - make_half(abs(fract(coverage * .5) * 2. + -1.)); #endif coverage = min(coverage, make_half(1)); // This also caps stroke coverage, which can be >1.
diff --git a/renderer/shaders/draw_path_common.glsl b/renderer/shaders/draw_path_common.glsl index 807455f..9114cfe 100644 --- a/renderer/shaders/draw_path_common.glsl +++ b/renderer/shaders/draw_path_common.glsl
@@ -291,7 +291,7 @@ float2x2 M = make_float2x2(uintBitsToFloat(STORAGE_BUFFER_LOAD4(@pathBuffer, o_pathID * 2u))); uint4 pathData = STORAGE_BUFFER_LOAD4(@pathBuffer, o_pathID * 2u + 1u); float2 translate = uintBitsToFloat(pathData.xy); - o_windingWeight = half(floatBitsToInt(triangleVertex.z) >> 16) * (half)sign(determinant(M)); + o_windingWeight = half(floatBitsToInt(triangleVertex.z) >> 16) * make_half(sign(determinant(M))); return MUL(M, triangleVertex.xy) + translate; } #endif // @DRAW_INTERIOR_TRIANGLES
diff --git a/renderer/shaders/hlsl.glsl b/renderer/shaders/hlsl.glsl index eeea827..6154689 100644 --- a/renderer/shaders/hlsl.glsl +++ b/renderer/shaders/hlsl.glsl
@@ -43,10 +43,26 @@ #endif $typedef float3 packed_float3; -#define make_half half -#define make_half2 half2 -#define make_half3 half3 -#define make_half4 half4 +#define make_half $half +#define make_half2 $half2 +#define make_half3 $half3 +#define make_half4 $half4 + +#ifdef @ENABLE_MIN_16_PRECISION + +$typedef $min16int short; +#define make_short $min16int +#define make_short2 $min16int2 +#define make_short3 $min16int3 +#define make_short4 $min16int4 + +$typedef $min16uint ushort; +#define make_ushort $min16uint +#define make_ushort2 $min16uint2 +#define make_ushort3 $min16uint3 +#define make_ushort4 $min16uint4 + +#else $typedef $int short; #define make_short $int @@ -60,6 +76,8 @@ #define make_ushort3 $uint3 #define make_ushort4 $uint4 +#endif + #define make_half3x4 $half3x4 #define INLINE $inline @@ -263,7 +281,6 @@ #define floatBitsToInt $asint #define floatBitsToUint $asuint #define fract $frac -#define mix $lerp #define inversesqrt $rsqrt #define notEqual(A, B) ((A) != (B)) #define lessThanEqual(A, B) ((A) <= (B)) @@ -324,3 +341,14 @@ float2x2 adjoint = float2x2(m[1][1], -m[0][1], -m[1][0], m[0][0]); return adjoint * (1. / determinant(m)); } + +INLINE float mix(float x, float y, float s) { return lerp(x, y, s); } +INLINE float2 mix(float2 x, float2 y, float2 s) { return lerp(x, y, s); } +INLINE float3 mix(float3 x, float3 y, float3 s) { return lerp(x, y, s); } +INLINE float4 mix(float4 x, float4 y, float4 s) { return lerp(x, y, s); } + +// Use manual implementations here since lerp has no overload for half type +INLINE half mix(half x, half y, half s) { return x + s * (y - x); } +INLINE half2 mix(half2 x, half2 y, half2 s) { return x + s * (y - x); } +INLINE half3 mix(half3 x, half3 y, half3 s) { return x + s * (y - x); } +INLINE half4 mix(half4 x, half4 y, half4 s) { return x + s * (y - x); } \ No newline at end of file