Slightly improve cubic chop precision in stroke tessellation shaders

Also adds GrGLSLVertexGeoBuilder::appendRawFunctionDefinition(). This
allows us to define functions in vertex and geometry shaders whose names
don't get mangled.

Bug: skia:10419
Change-Id: I90319b54bcbbb7000c7809cb6ce8d1969a3bd8c5
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/327456
Reviewed-by: Brian Salomon <bsalomon@google.com>
Commit-Queue: Chris Dalton <csmartdalton@google.com>
diff --git a/src/gpu/glsl/GrGLSLVertexGeoBuilder.h b/src/gpu/glsl/GrGLSLVertexGeoBuilder.h
index e5f6630..882f22a 100644
--- a/src/gpu/glsl/GrGLSLVertexGeoBuilder.h
+++ b/src/gpu/glsl/GrGLSLVertexGeoBuilder.h
@@ -15,6 +15,14 @@
  * geometry for the rasterizer.
  */
 class GrGLSLVertexGeoBuilder : public GrGLSLShaderBuilder {
+public:
+    // Copies the given text verbatim to the function definitions section. Does not mangle the name.
+    // 'functionDefinition' should be a fully valid SkSL function, complete with return type, name,
+    // arguments, braces, and a body.
+    void insertFunction(const char* functionDefinition) {
+        this->functions().append(functionDefinition);
+    }
+
 protected:
     GrGLSLVertexGeoBuilder(GrGLSLProgramBuilder* program) : INHERITED(program) {}
 
diff --git a/src/gpu/tessellate/GrStrokeTessellateShader.cpp b/src/gpu/tessellate/GrStrokeTessellateShader.cpp
index e026342..dab581a 100644
--- a/src/gpu/tessellate/GrStrokeTessellateShader.cpp
+++ b/src/gpu/tessellate/GrStrokeTessellateShader.cpp
@@ -76,6 +76,15 @@
         v->declareGlobal(GrShaderVar("vsTans01", kFloat4_GrSLType, TypeModifier::Out));
         v->declareGlobal(GrShaderVar("vsTans23", kFloat4_GrSLType, TypeModifier::Out));
         v->declareGlobal(GrShaderVar("vsPrevJoinTangent", kFloat2_GrSLType, TypeModifier::Out));
+
+        // Unlike mix(), this does not return b when t==1. But it otherwise seems to get better
+        // precision than "a*(1 - t) + b*t" for things like chopping cubics on exact cusp points.
+        // The responsibility falls on the caller to ensure t != 1 before calling.
+        v->insertFunction(R"(
+        float4 unchecked_mix(float4 a, float4 b, float4 t) {
+            return fma(b - a, t, a);
+        })");
+
         v->codeAppendf(R"(
         // Unpack the control points.
         float4x2 P = float4x2(inputPts01, inputPts23);
@@ -206,13 +215,13 @@
         }
 
         // Chop the curve at chopT[0] and chopT[1].
-        float4 ab = mix(P[0].xyxy, P[1].xyxy, chopT.sstt);
-        float4 bc = mix(P[1].xyxy, P[2].xyxy, chopT.sstt);
-        float4 cd = mix(P[2].xyxy, P[3].xyxy, chopT.sstt);
-        float4 abc = mix(ab, bc, chopT.sstt);
-        float4 bcd = mix(bc, cd, chopT.sstt);
-        float4 abcd = mix(abc, bcd, chopT.sstt);
-        float4 middle = mix(abc, bcd, chopT.ttss);
+        float4 ab = unchecked_mix(P[0].xyxy, P[1].xyxy, chopT.sstt);
+        float4 bc = unchecked_mix(P[1].xyxy, P[2].xyxy, chopT.sstt);
+        float4 cd = unchecked_mix(P[2].xyxy, P[3].xyxy, chopT.sstt);
+        float4 abc = unchecked_mix(ab, bc, chopT.sstt);
+        float4 bcd = unchecked_mix(bc, cd, chopT.sstt);
+        float4 abcd = unchecked_mix(abc, bcd, chopT.sstt);
+        float4 middle = unchecked_mix(abc, bcd, chopT.ttss);
 
         // Find tangents at the chop points if an inner tangent wasn't specified.
         if (innerTangents[0] == float2(0)) {
@@ -522,6 +531,13 @@
 
     uniform vec4 sk_RTAdjust;
 
+    // Unlike mix(), this does not return b when t==1. But it otherwise seems to get better
+    // precision than "a*(1 - t) + b*t" for things like chopping cubics on exact cusp points.
+    // We override this result anyway when t==1 so it shouldn't be a problem.
+    vec2 unchecked_mix(vec2 a, vec2 b, float t) {
+        return fma(b - a, vec2(t), a);
+    }
+
     void main() {
         // Our patch is composed of exactly "numTotalCombinedSegments + 1" stroke-width edges that
         // run orthogonal to the curve and make a strip of "numTotalCombinedSegments" quads.
@@ -666,12 +682,12 @@
         float T = max(parametricT, radialT);
 
         // Evaluate the cubic at T. Use De Casteljau's for its accuracy and stability.
-        vec2 ab = mix(P[0], P[1], T);
-        vec2 bc = mix(P[1], P[2], T);
-        vec2 cd = mix(P[2], P[3], T);
-        vec2 abc = mix(ab, bc, T);
-        vec2 bcd = mix(bc, cd, T);
-        vec2 position = mix(abc, bcd, T);
+        vec2 ab = unchecked_mix(P[0], P[1], T);
+        vec2 bc = unchecked_mix(P[1], P[2], T);
+        vec2 cd = unchecked_mix(P[2], P[3], T);
+        vec2 abc = unchecked_mix(ab, bc, T);
+        vec2 bcd = unchecked_mix(bc, cd, T);
+        vec2 position = unchecked_mix(abc, bcd, T);
 
         // If we went with T=parametricT, then update the tangent. Otherwise leave it at the radial
         // tangent found previously. (In the event that parametricT == radialT, we keep the radial