renderer/src/shaders/tessellate.glsl - external/github.com/rive-app/rive-cpp - Git at Google

 /*
  * Copyright 2020 Google LLC.
  *
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE file.
  *
  * Initial import from
  * skia:src/gpu/ganesh/tessellate/GrStrokeTessellationShader.cpp
  *
  * Copyright 2022 Rive
  */

 #define MAX_PARAMETRIC_SEGMENTS_LOG2 10 // Max 1024 segments.

 #ifdef @VERTEX
 ATTR_BLOCK_BEGIN(Attrs)
 ATTR(
     0,
     float4,
     @a_p0p1_); // End in '_' because D3D interprets the '1' as a semantic index.
 ATTR(1, float4, @a_p2p3_);
 ATTR(2, float4, @a_joinTan_and_ys); // [joinTangent, y, reflectionY]
 #ifdef SPLIT_UINT4_ATTRIBUTES
 ATTR(3, uint, @a_x0x1);
 ATTR(4, uint, @a_reflectionX0X1);
 ATTR(5, uint, @a_segmentCounts);
 ATTR(6, uint, @a_contourIDWithFlags);
 #else
 ATTR(3,
      uint4,
      @a_args); // [x0x1, reflectionX0X1, segmentCounts, contourIDWithFlags]
 #endif
 ATTR_BLOCK_END
 #endif

 VARYING_BLOCK_BEGIN
 NO_PERSPECTIVE VARYING(0, float4, v_p0p1);
 NO_PERSPECTIVE VARYING(1, float4, v_p2p3);
 NO_PERSPECTIVE VARYING(
     2,
     float4,
     v_args); // [vertexIdx, totalVertexCount, joinSegmentCount,
              //  parametricSegmentCount, radsPerPolarSegment]
 NO_PERSPECTIVE VARYING(3,
                        float3,
                        v_joinArgs); // [joinTangent, radsPerJoinSegment]
 FLAT VARYING(4, uint, v_contourIDWithFlags);
 VARYING_BLOCK_END

 // Tangent of the curve at T=0 and T=1.
 INLINE float2x2 find_tangents(float2 p0, float2 p1, float2 p2, float2 p3)
 {
     float2x2 t;
     t[0] = (any(notEqual(p0, p1)) ? p1 : any(notEqual(p1, p2)) ? p2 : p3) - p0;
     t[1] = p3 - (any(notEqual(p3, p2)) ? p2 : any(notEqual(p2, p1)) ? p1 : p0);
     return t;
 }

 #ifdef @VERTEX
 VERTEX_TEXTURE_BLOCK_BEGIN
 VERTEX_TEXTURE_BLOCK_END

 VERTEX_STORAGE_BUFFER_BLOCK_BEGIN
 STORAGE_BUFFER_U32x4(PATH_BUFFER_IDX, PathBuffer, @pathBuffer);
 STORAGE_BUFFER_U32x4(CONTOUR_BUFFER_IDX, ContourBuffer, @contourBuffer);
 VERTEX_STORAGE_BUFFER_BLOCK_END

 INLINE float cosine_between_vectors(float2 a, float2 b)
 {
     // FIXME(crbug.com/800804,skbug.com/11268): This can overflow if we don't
     // normalize exponents.
     float ab_cosTheta = dot(a, b);
     float ab_pow2 = dot(a, a) * dot(b, b);
     return (ab_pow2 == .0) ? 1.
                            : clamp(ab_cosTheta * inversesqrt(ab_pow2), -1., 1.);
 }

 VERTEX_MAIN(@tessellateVertexMain, Attrs, attrs, _vertexID, _instanceID)
 {
     // Each instance repeats twice. Once for normal patch(es) and once for
     // reflection(s).
     ATTR_UNPACK(_instanceID, attrs, @a_p0p1_, float4);
     ATTR_UNPACK(_instanceID, attrs, @a_p2p3_, float4);
     ATTR_UNPACK(_instanceID, attrs, @a_joinTan_and_ys, float4);
 #ifdef SPLIT_UINT4_ATTRIBUTES
     ATTR_UNPACK(_instanceID, attrs, @a_x0x1, uint);
     ATTR_UNPACK(_instanceID, attrs, @a_reflectionX0X1, uint);
     ATTR_UNPACK(_instanceID, attrs, @a_segmentCounts, uint);
     ATTR_UNPACK(_instanceID, attrs, @a_contourIDWithFlags, uint);

     uint4 @a_args = uint4(@a_x0x1,
                           @a_reflectionX0X1,
                           @a_segmentCounts,
                           @a_contourIDWithFlags);
 #else
     ATTR_UNPACK(_instanceID, attrs, @a_args, uint4);
 #endif

     VARYING_INIT(v_p0p1, float4);
     VARYING_INIT(v_p2p3, float4);
     VARYING_INIT(v_args, float4);
     VARYING_INIT(v_joinArgs, float3);
     VARYING_INIT(v_contourIDWithFlags, uint);

     float2 p0 = @a_p0p1_.xy;
     float2 p1 = @a_p0p1_.zw;
     float2 p2 = @a_p2p3_.xy;
     float2 p3 = @a_p2p3_.zw;
     // Each instance has two spans, potentially for both a forward copy and and
     // reflection. (If the second span isn't needed, the client will have placed
     // it offscreen.)
     bool isFirstSpan = _vertexID < 4;
     float y = isFirstSpan ? @a_joinTan_and_ys.z : @a_joinTan_and_ys.w;
     int x0x1 = int(isFirstSpan ? @a_args.x : @a_args.y);
 #ifdef GLSL
     int x1up = x0x1 << 16;
     if (@a_args.z == 0xffffffffu)
     {
         // Pixel 8 with ARM Mali-G715 throws away "x0x1 << 16 >> 16". We need
         // this in order to sign-extend the bottom 16 bits of x0x1. Create a
         // branch that we know won't be taken, in order to convince the compiler
         // not to throw this operation away. NOTE: we could use
         // bitfieldExtract(), but it isn't available on ES 3.0.
         --x1up;
     }
     float x0 = float(x1up >> 16);
 #else
     float x0 = float(x0x1 << 16 >> 16);
 #endif
     float x1 = float(x0x1 >> 16);
     float2 coord = float2((_vertexID & 1) == 0 ? x0 : x1,
                           (_vertexID & 2) == 0 ? y + 1. : y);

     uint parametricSegmentCount = @a_args.z & 0x3ffu;
     uint polarSegmentCount = (@a_args.z >> 10) & 0x3ffu;
     uint joinSegmentCount = @a_args.z >> 20;
     uint contourIDWithFlags = @a_args.w;
     if (x1 < x0) // Reflections are drawn right to left.
     {
         contourIDWithFlags |= MIRRORED_CONTOUR_CONTOUR_FLAG;
     }
     if ((x1 - x0) * uniforms.tessInverseViewportY < .0)
     {
         // Make sure we always emit clockwise triangles. Swap the top and bottom
         // vertices.
         coord.y = 2. * y + 1. - coord.y;
     }
     if ((contourIDWithFlags & CULL_EXCESS_TESSELLATION_SEGMENTS_CONTOUR_FLAG) !=
         0u)
     {
         // This span may have more tessellation vertices allocated to it than
         // necessary (e.g., outerCurve patches all have a fixed patch size,
         // regardless of how many segments the curve actually needs). Re-run
         // Wang's formula to figure out how many segments we actually need, and
         // make any excess segments degenerate by co-locating their vertices at
         // T=0.
         uint pathID = STORAGE_BUFFER_LOAD4(@contourBuffer,
                                            contour_data_idx(contourIDWithFlags))
                           .z;
         float2x2 mat = make_float2x2(
             uintBitsToFloat(STORAGE_BUFFER_LOAD4(@pathBuffer, pathID * 4u)));
         float2 d0 = MUL(mat, -2. * p1 + p2 + p0);

         float2 d1 = MUL(mat, -2. * p2 + p3 + p1);
         float m = max(dot(d0, d0), dot(d1, d1));
         float n = max(ceil(sqrt(.75 * 4. * sqrt(m))), 1.);
         parametricSegmentCount = min(uint(n), parametricSegmentCount);
     }
     // Polar and parametric segments share the same beginning and ending
     // vertices, so the merged *vertex* count is equal to the sum of polar and
     // parametric *segment* counts.
     uint totalVertexCount =
         parametricSegmentCount + polarSegmentCount + joinSegmentCount - 1u;

     float2x2 tangents = find_tangents(p0, p1, p2, p3);
     float theta = acos(cosine_between_vectors(tangents[0], tangents[1]));
     float radsPerPolarSegment = theta / float(polarSegmentCount);
     // Adjust sign of radsPerPolarSegment to match the direction the curve
     // turns. NOTE: Since the curve is not allowed to inflect, we can just check
     // F'(.5) x F''(.5). NOTE: F'(.5) x F''(.5) has the same sign as (p2 - p0) x
     // (p3 - p1).
     float turn = determinant(float2x2(p2 - p0, p3 - p1));
     if (turn ==
         .0) // This is the case for joins and cusps where points are co-located.
         turn = determinant(tangents);
     if (turn < .0)
         radsPerPolarSegment = -radsPerPolarSegment;

     v_p0p1 = float4(p0, p1);
     v_p2p3 = float4(p2, p3);
     v_args = float4(float(totalVertexCount) - abs(x1 - coord.x), // vertexIdx
                     float(totalVertexCount), // totalVertexCount
                     (joinSegmentCount << 10) | parametricSegmentCount,
                     radsPerPolarSegment);
     if (joinSegmentCount > 1u)
     {
         float2x2 joinTangents = float2x2(tangents[1], @a_joinTan_and_ys.xy);
         float joinTheta =
             acos(cosine_between_vectors(joinTangents[0], joinTangents[1]));
         float joinSpan = float(joinSegmentCount);
         if ((contourIDWithFlags &
              (JOIN_TYPE_MASK | EMULATED_STROKE_CAP_CONTOUR_FLAG)) ==
             (ROUND_JOIN_CONTOUR_FLAG | EMULATED_STROKE_CAP_CONTOUR_FLAG))
         {
             // Round caps emulated as joins need to emit vertices at T=0 and
             // T=1, unlike normal round joins. The fragment shader will handle
             // most of this, but here we need to adjust radsPerJoinSegment to
             // account for the fact that this join will be rotating around two
             // more segments.
             joinSpan -= 2.;
         }
         float radsPerJoinSegment = joinTheta / joinSpan;
         if (determinant(joinTangents) < .0)
             radsPerJoinSegment = -radsPerJoinSegment;
         v_joinArgs.xy = @a_joinTan_and_ys.xy;
         v_joinArgs.z = radsPerJoinSegment;
     }
     v_contourIDWithFlags = contourIDWithFlags;

     float4 pos;
     pos.x = coord.x * (2. / TESS_TEXTURE_WIDTH) - 1.;
     pos.y = coord.y * uniforms.tessInverseViewportY -
             sign(uniforms.tessInverseViewportY);
     pos.zw = float2(0, 1);

     VARYING_PACK(v_p0p1);
     VARYING_PACK(v_p2p3);
     VARYING_PACK(v_args);
     VARYING_PACK(v_joinArgs);
     VARYING_PACK(v_contourIDWithFlags);
     EMIT_VERTEX(pos);
 }
 #endif

 #ifdef @FRAGMENT
 FRAG_DATA_MAIN(uint4, @tessellateFragmentMain)
 {
     VARYING_UNPACK(v_p0p1, float4);
     VARYING_UNPACK(v_p2p3, float4);
     VARYING_UNPACK(v_args, float4);
     VARYING_UNPACK(v_joinArgs, float3);
     VARYING_UNPACK(v_contourIDWithFlags, uint);

     float2 p0 = v_p0p1.xy;
     float2 p1 = v_p0p1.zw;
     float2 p2 = v_p2p3.xy;
     float2 p3 = v_p2p3.zw;
     float2x2 tangents = find_tangents(p0, p1, p2, p3);
     // Colocate any padding vertices at T=0.
     float vertexIdx = max(floor(v_args.x), .0);
     float totalVertexCount = v_args.y;
     uint joinSegmentCount_and_parametricSegmentCount = uint(v_args.z);
     float parametricSegmentCount =
         float(joinSegmentCount_and_parametricSegmentCount & 0x3ffu);
     float joinSegmentCount =
         float(joinSegmentCount_and_parametricSegmentCount >> 10);
     float radsPerPolarSegment = v_args.w;
     uint contourIDWithFlags = v_contourIDWithFlags;

     // mergedVertexID/mergedSegmentCount are relative to the sub-section of the
     // instance this vertex belongs to (either the curve section that consists
     // of merged polar and parametric segments, or the join section composed of
     // just polar segments).
     //
     // Begin with the assumption that we belong to the curve section.
     float mergedSegmentCount = totalVertexCount - joinSegmentCount;
     float mergedVertexID = vertexIdx;
     half2 featherCorrection = make_half2(.0, .0);
     int featherVertexIndex0;
     if (mergedVertexID <= mergedSegmentCount)
     {
         // We do belong to the curve section. Clear out any stroke join flags.
         contourIDWithFlags &= ~JOIN_TYPE_MASK;
     }
     else
     {
         // We actually belong to the join section following the curve. Construct
         // a point-cubic with rotation.
         p0 = p1 = p2 = p3;
         tangents = float2x2(tangents[1], v_joinArgs.xy /*joinTangent*/);
         parametricSegmentCount = 1.;
         mergedVertexID -= mergedSegmentCount;
         mergedSegmentCount = joinSegmentCount;
         radsPerPolarSegment = v_joinArgs.z; // radsPerJoinSegment.
         if ((contourIDWithFlags & JOIN_TYPE_MASK) > ROUND_JOIN_CONTOUR_FLAG)
         {
             // Miter or bevel join vertices snap to either tangents[0] or
             // tangents[1], and get adjusted in the shader that follows.
             if (mergedVertexID < 2.5) // With 5 join segments, this branch will
                                       // see IDs: 1, 2, 3, 4.
                 contourIDWithFlags |= JOIN_TANGENT_0_CONTOUR_FLAG;
             if (mergedVertexID > 1.5 && mergedVertexID < 3.5)
                 contourIDWithFlags |= JOIN_TANGENT_INNER_CONTOUR_FLAG;
         }
         else if ((contourIDWithFlags & EMULATED_STROKE_CAP_CONTOUR_FLAG) != 0u)
         {
             // Round caps emulated as joins need to emit vertices at T=0 and
             // T=1, unlike normal round joins. Preserve the same number of
             // vertices (the CPU should have given us two extra, knowing that we
             // are an emulated cap, and the vertex shader should have already
             // accounted for this in radsPerJoinSegment), but adjust our
             // stepping parameters so we begin at T=0 and end at T=1.
             mergedSegmentCount -= 2.;
             --mergedVertexID;
         }
         else if ((contourIDWithFlags & JOIN_TYPE_MASK) ==
                  FEATHER_JOIN_CONTOUR_FLAG)
         {
             featherVertexIndex0 = -int(mergedVertexID);
             --mergedVertexID; // Duplicate the vertex at T=1 on the join also.

             // radsPerPolarSegment was calculated under the assumption that all
             // joinSegmentCount vertices were meant for the angle between tan0
             // and tan1. However, feather joins always have a rotation of PI.
             float rads = radsPerPolarSegment * joinSegmentCount;
             float joinVertexCount = joinSegmentCount - 1.;
             float joinNonEmptySegmentCount = joinVertexCount - 1. - 3.;
             // Feather joins draw backwards segments across the angle outside
             // the join, in order to erase some of the coverage that got
             // written.
             float backwardSegmentCount =
                 clamp(round(abs(rads) / PI * joinNonEmptySegmentCount),
                       1.,
                       joinNonEmptySegmentCount - 1.);
             // Forward segments are in the normal join angle.
             float forwardSegmentCount =
                 joinNonEmptySegmentCount - backwardSegmentCount;
             featherCorrection.y = cast_float_to_half(abs(rads) / PI);
             if (mergedVertexID <= forwardSegmentCount)
             {
                 // We're a backwards segment of the feather join.
                 tangents[1] = -tangents[1];
                 radsPerPolarSegment =
                     -(PI * sign(rads) - rads) / forwardSegmentCount;
                 mergedSegmentCount = forwardSegmentCount;
                 featherCorrection.x = 1.;
             }
             else if (mergedVertexID == forwardSegmentCount + 1.)
             {
                 // There's a discontinuous jump between the backwards and
                 // forward segments. Duplicate the final backwards vertex with
                 // zero height.
                 tangents[0] = tangents[1] = -tangents[1];
                 mergedVertexID = .0;
                 mergedSegmentCount = 1.;
                 contourIDWithFlags |= ZERO_FEATHER_OUTSET_CONTOUR_FLAG;
             }
             else if (mergedVertexID == forwardSegmentCount + 2.)
             {
                 // There's a discontinuous jump between the backwards and
                 // forward segments. Duplicate the first forward vertex with
                 // zero height.
                 tangents[1] = tangents[0];
                 mergedVertexID = .0;
                 mergedSegmentCount = 1.;
                 contourIDWithFlags |= ZERO_FEATHER_OUTSET_CONTOUR_FLAG;
             }
             else
             {
                 // We're a forward segment of the feather join.
                 mergedVertexID -= forwardSegmentCount + 3.;
                 mergedSegmentCount = backwardSegmentCount;
                 radsPerPolarSegment = rads / backwardSegmentCount;
                 featherCorrection.y =
                     cast_float_to_half(1. - featherCorrection.y);
                 featherCorrection.x = 1.;
             }
         }
         contourIDWithFlags |= radsPerPolarSegment < .0
                                   ? LEFT_JOIN_CONTOUR_FLAG
                                   : RIGHT_JOIN_CONTOUR_FLAG;
     }

     float2 tessCoord;
     float theta = .0;
     if (mergedVertexID == .0 || mergedVertexID == mergedSegmentCount ||
         (contourIDWithFlags & JOIN_TYPE_MASK) > ROUND_JOIN_CONTOUR_FLAG)
     {
         // Tessellated vertices at the beginning and end of the strip use exact
         // endpoints and tangents. This ensures crack-free seaming between
         // instances.
         bool isTan0 = mergedVertexID < mergedSegmentCount * .5;
         tessCoord = isTan0 ? p0 : p3;
         theta = atan2(isTan0 ? tangents[0] : tangents[1]);
         // Never do feather correction on the ends; the falloff is linear here
         // plus we need to blend smoothly with the edges.
         featherCorrection.x = .0;
     }
     else if ((contourIDWithFlags & RETROFITTED_TRIANGLE_CONTOUR_FLAG) != 0u)
     {
         // This cubic should actually be drawn as the single, non-AA triangle:
         // [p0, p1, p3]. This is used to squeeze in more rare triangles, like
         // "grout" triangles from self intersections on interior triangulation,
         // where it wouldn't be worth it to put them in their own dedicated draw
         // call.
         tessCoord = p1;
     }
     else
     {
         float T, polarT;
         if (parametricSegmentCount == mergedSegmentCount)
         {
             // There are no polar vertices. This is (probably) a fill. Vertices
             // are spaced evenly in parametric space.
             T = mergedVertexID / parametricSegmentCount;
             polarT = .0; // Set polarT != T to ensure we calculate the
                          // parametric tangent later.
         }
         else
         {
             // Compute the location and tangent direction of the tessellated
             // stroke vertex with the integral id "mergedVertexID", where
             // mergedVertexID is the sorted-order index of parametric and polar
             // vertices. Start by finding the tangent function's power basis
             // coefficients. These define a tangent direction (scaled by some
             // uniform value) as:
             //
             //                                                 |T^2|
             //     Tangent_Direction(T) = dx,dy = |A  2B  C| * |T  |
             //                                    |.   .  .|   |1  |
             float2 A, B, C = p1 - p0;
             float2 D = p3 - p0;
             float2 E = p2 - p1;
             B = E - C;
             A = -3. * E + D;
             // FIXME(crbug.com/800804,skbug.com/11268): Consider normalizing the
             // exponents in A,B,C at this point in order to prevent fp32
             // overflow.

             // Now find the coefficients that give a tangent direction from a
             // parametric vertex ID:
             //
             //  Tangent_Direction(parametricVertexID) = dx,dy =
             //
             //                     |parametricVertexID^2|
             //      |A  B_  C_| *  |parametricVertexID  |
             //      |.   .   .|    |1                   |
             //
             float2 B_ = B * (parametricSegmentCount * 2.);
             float2 C_ = C * (parametricSegmentCount * parametricSegmentCount);

             // Run a binary search to determine the highest parametric vertex
             // that is located on or before the mergedVertexID. A merged ID is
             // determined by the sum of complete parametric and polar segments
             // behind it. i.e., find the highest parametric vertex where:
             //
             //    parametricVertexID + floor(numPolarSegmentsAtParametricT) <=
             //    mergedVertexID
             //
             float lastParametricVertexID = .0;
             float maxParametricVertexID =
                 min(parametricSegmentCount - 1., mergedVertexID);
             // FIXME(crbug.com/800804,skbug.com/11268): This normalize() can
             // overflow.
             float2 tan0norm = normalize(tangents[0]);
             float negAbsRadsPerSegment = -abs(radsPerPolarSegment);
             float maxRotation0 =
                 (1. + mergedVertexID) * abs(radsPerPolarSegment);
             for (int p = MAX_PARAMETRIC_SEGMENTS_LOG2 - 1; p >= 0; --p)
             {
                 // Test the parametric vertex at lastParametricVertexID + 2^p.
                 float testParametricID =
                     lastParametricVertexID + exp2(float(p));
                 if (testParametricID <= maxParametricVertexID)
                 {
                     float2 testTan = testParametricID * A + B_;
                     testTan = testParametricID * testTan + C_;
                     float cosRotation = dot(normalize(testTan), tan0norm);
                     float maxRotation =
                         testParametricID * negAbsRadsPerSegment + maxRotation0;
                     maxRotation = min(maxRotation, PI);
                     // Is rotation <= maxRotation? (i.e., is the number of
                     // complete polar segments behind testT, + testParametricID
                     // <= mergedVertexID?)
                     if (cosRotation >= cos(maxRotation))
                         lastParametricVertexID = testParametricID;
                 }
             }

             // Find the T value of the parametric vertex at
             // lastParametricVertexID.
             float parametricT = lastParametricVertexID / parametricSegmentCount;

             // Now that we've identified the highest parametric vertex on or
             // before the mergedVertexID, the highest polar vertex is easy:
             float lastPolarVertexID = mergedVertexID - lastParametricVertexID;

             // Find the angle of tan0, or the angle between tan0norm and the
             // positive x axis.
             float theta0 = acos(clamp(tan0norm.x, -1., 1.));
             theta0 = tan0norm.y >= .0 ? theta0 : -theta0;

             // Find the tangent vector on the vertex at lastPolarVertexID.
             theta = lastPolarVertexID * radsPerPolarSegment + theta0;
             float2 norm = float2(sin(theta), -cos(theta));

             // Find the T value where the tangent is orthogonal to norm. This is
             // a quadratic:
             //
             //     dot(norm, Tangent_Direction(T)) == 0
             //
             //                         |T^2|
             //     norm * |A  2B  C| * |T  | == 0
             //            |.   .  .|   |1  |
             //
             float a = dot(norm, A), b_over_2 = dot(norm, B), c = dot(norm, C);
             float discr_over_4 = max(b_over_2 * b_over_2 - a * c, .0);
             float q = sqrt(discr_over_4);
             if (b_over_2 > .0)
                 q = -q;
             q -= b_over_2;

             // Roots are q/a and c/q. Since each curve section does not inflect
             // or rotate more than 180 degrees, there can only be one tangent
             // orthogonal to "norm" inside 0..1. Pick the root nearest .5.
             float _5qa = -.5 * q * a;
             float2 root = (abs(q * q + _5qa) < abs(a * c + _5qa))
                               ? float2(q, a)
                               : float2(c, q);
             polarT = (root.t != .0) ? root.s / root.t : .0;
             polarT = clamp(polarT, .0, 1.);

             // The root finder above can become unstable when lastPolarVertexID
             // == 0 (e.g., if there are roots at exatly 0 and 1 both). polarT
             // should always == 0 in this case.
             if (lastPolarVertexID == .0)
                 polarT = .0;

             // Now that we've identified the T values of the last parametric and
             // polar vertices, our final T value for mergedVertexID is whichever
             // is larger.
             T = max(parametricT, polarT);

             if (featherCorrection.x == 1.)
             {
                 // This is a total hack to give feather joins nonlinear falloff
                 // on the bisector while keeping it linear on the ends.
                 // TODO: This needs a more methodical approach.
                 featherCorrection.x =
                     sqrt(sin(lastPolarVertexID / mergedSegmentCount * PI));
             }
         }

         // Evaluate the cubic at T. Use De Casteljau's for its accuracy and
         // stability.
         float2 ab = unchecked_mix(p0, p1, T);
         float2 bc = unchecked_mix(p1, p2, T);
         float2 cd = unchecked_mix(p2, p3, T);
         float2 abc = unchecked_mix(ab, bc, T);
         float2 bcd = unchecked_mix(bc, cd, T);
         tessCoord = unchecked_mix(abc, bcd, T);

         // If we went with T=parametricT, then update theta. Otherwise leave it
         // at the polar theta found previously. (In the event that parametricT
         // == polarT, we keep the polar theta.)
         if (T != polarT)
             theta = atan2(bcd - abc);
     }

     uint4 tessData =
         uint4(floatBitsToUint(float3(tessCoord, theta)), contourIDWithFlags);
     if ((contourIDWithFlags & JOIN_TYPE_MASK) == FEATHER_JOIN_CONTOUR_FLAG)
     {
         // Feathered corners need an extra dimming factor. Luckily, the corner
         // spokes are all centered on the same point, so we can store a backset
         // to that point instead of the point itself, and thus make room for the
         // dimming factor.
         tessData.x = uint(featherVertexIndex0);
         // The coverage dimming factor is "y^(x+1)" on the outer edge of the
         // feather (y <= 1, x >= 0), and just "y^0" at the center. We provide x
         // and y, and the vertex shader works out the rest.
         tessData.y = packHalf2x16(featherCorrection);
     }
     EMIT_FRAG_DATA(tessData);
 }
 #endif
	/*
	* Copyright 2020 Google LLC.
	*
	* Use of this source code is governed by a BSD-style license that can be
	* found in the LICENSE file.
	*
	* Initial import from
	* skia:src/gpu/ganesh/tessellate/GrStrokeTessellationShader.cpp
	*
	* Copyright 2022 Rive
	*/

	#define MAX_PARAMETRIC_SEGMENTS_LOG2 10 // Max 1024 segments.

	#ifdef @VERTEX
	ATTR_BLOCK_BEGIN(Attrs)
	ATTR(
	0,
	float4,
	@a_p0p1_); // End in '_' because D3D interprets the '1' as a semantic index.
	ATTR(1, float4, @a_p2p3_);
	ATTR(2, float4, @a_joinTan_and_ys); // [joinTangent, y, reflectionY]
	#ifdef SPLIT_UINT4_ATTRIBUTES
	ATTR(3, uint, @a_x0x1);
	ATTR(4, uint, @a_reflectionX0X1);
	ATTR(5, uint, @a_segmentCounts);
	ATTR(6, uint, @a_contourIDWithFlags);
	#else
	ATTR(3,
	uint4,
	@a_args); // [x0x1, reflectionX0X1, segmentCounts, contourIDWithFlags]
	#endif
	ATTR_BLOCK_END
	#endif

	VARYING_BLOCK_BEGIN
	NO_PERSPECTIVE VARYING(0, float4, v_p0p1);
	NO_PERSPECTIVE VARYING(1, float4, v_p2p3);
	NO_PERSPECTIVE VARYING(
	2,
	float4,
	v_args); // [vertexIdx, totalVertexCount, joinSegmentCount,
	// parametricSegmentCount, radsPerPolarSegment]
	NO_PERSPECTIVE VARYING(3,
	float3,
	v_joinArgs); // [joinTangent, radsPerJoinSegment]
	FLAT VARYING(4, uint, v_contourIDWithFlags);
	VARYING_BLOCK_END

	// Tangent of the curve at T=0 and T=1.
	INLINE float2x2 find_tangents(float2 p0, float2 p1, float2 p2, float2 p3)
	{
	float2x2 t;
	t[0] = (any(notEqual(p0, p1)) ? p1 : any(notEqual(p1, p2)) ? p2 : p3) - p0;
	t[1] = p3 - (any(notEqual(p3, p2)) ? p2 : any(notEqual(p2, p1)) ? p1 : p0);
	return t;
	}

	#ifdef @VERTEX
	VERTEX_TEXTURE_BLOCK_BEGIN
	VERTEX_TEXTURE_BLOCK_END

	VERTEX_STORAGE_BUFFER_BLOCK_BEGIN
	STORAGE_BUFFER_U32x4(PATH_BUFFER_IDX, PathBuffer, @pathBuffer);
	STORAGE_BUFFER_U32x4(CONTOUR_BUFFER_IDX, ContourBuffer, @contourBuffer);
	VERTEX_STORAGE_BUFFER_BLOCK_END

	INLINE float cosine_between_vectors(float2 a, float2 b)
	{
	// FIXME(crbug.com/800804,skbug.com/11268): This can overflow if we don't
	// normalize exponents.
	float ab_cosTheta = dot(a, b);
	float ab_pow2 = dot(a, a) * dot(b, b);
	return (ab_pow2 == .0) ? 1.
	: clamp(ab_cosTheta * inversesqrt(ab_pow2), -1., 1.);
	}

	VERTEX_MAIN(@tessellateVertexMain, Attrs, attrs, _vertexID, _instanceID)
	{
	// Each instance repeats twice. Once for normal patch(es) and once for
	// reflection(s).
	ATTR_UNPACK(_instanceID, attrs, @a_p0p1_, float4);
	ATTR_UNPACK(_instanceID, attrs, @a_p2p3_, float4);
	ATTR_UNPACK(_instanceID, attrs, @a_joinTan_and_ys, float4);
	#ifdef SPLIT_UINT4_ATTRIBUTES
	ATTR_UNPACK(_instanceID, attrs, @a_x0x1, uint);
	ATTR_UNPACK(_instanceID, attrs, @a_reflectionX0X1, uint);
	ATTR_UNPACK(_instanceID, attrs, @a_segmentCounts, uint);
	ATTR_UNPACK(_instanceID, attrs, @a_contourIDWithFlags, uint);

	uint4 @a_args = uint4(@a_x0x1,
	@a_reflectionX0X1,
	@a_segmentCounts,
	@a_contourIDWithFlags);
	#else
	ATTR_UNPACK(_instanceID, attrs, @a_args, uint4);
	#endif

	VARYING_INIT(v_p0p1, float4);
	VARYING_INIT(v_p2p3, float4);
	VARYING_INIT(v_args, float4);
	VARYING_INIT(v_joinArgs, float3);
	VARYING_INIT(v_contourIDWithFlags, uint);

	float2 p0 = @a_p0p1_.xy;
	float2 p1 = @a_p0p1_.zw;
	float2 p2 = @a_p2p3_.xy;
	float2 p3 = @a_p2p3_.zw;
	// Each instance has two spans, potentially for both a forward copy and and
	// reflection. (If the second span isn't needed, the client will have placed
	// it offscreen.)
	bool isFirstSpan = _vertexID < 4;
	float y = isFirstSpan ? @a_joinTan_and_ys.z : @a_joinTan_and_ys.w;
	int x0x1 = int(isFirstSpan ? @a_args.x : @a_args.y);
	#ifdef GLSL
	int x1up = x0x1 << 16;
	if (@a_args.z == 0xffffffffu)
	{
	// Pixel 8 with ARM Mali-G715 throws away "x0x1 << 16 >> 16". We need
	// this in order to sign-extend the bottom 16 bits of x0x1. Create a
	// branch that we know won't be taken, in order to convince the compiler
	// not to throw this operation away. NOTE: we could use
	// bitfieldExtract(), but it isn't available on ES 3.0.
	--x1up;
	}
	float x0 = float(x1up >> 16);
	#else
	float x0 = float(x0x1 << 16 >> 16);
	#endif
	float x1 = float(x0x1 >> 16);
	float2 coord = float2((_vertexID & 1) == 0 ? x0 : x1,
	(_vertexID & 2) == 0 ? y + 1. : y);

	uint parametricSegmentCount = @a_args.z & 0x3ffu;
	uint polarSegmentCount = (@a_args.z >> 10) & 0x3ffu;
	uint joinSegmentCount = @a_args.z >> 20;
	uint contourIDWithFlags = @a_args.w;
	if (x1 < x0) // Reflections are drawn right to left.
	{
	contourIDWithFlags \|= MIRRORED_CONTOUR_CONTOUR_FLAG;
	}
	if ((x1 - x0) * uniforms.tessInverseViewportY < .0)
	{
	// Make sure we always emit clockwise triangles. Swap the top and bottom
	// vertices.
	coord.y = 2. * y + 1. - coord.y;
	}
	if ((contourIDWithFlags & CULL_EXCESS_TESSELLATION_SEGMENTS_CONTOUR_FLAG) !=
	0u)
	{
	// This span may have more tessellation vertices allocated to it than
	// necessary (e.g., outerCurve patches all have a fixed patch size,
	// regardless of how many segments the curve actually needs). Re-run
	// Wang's formula to figure out how many segments we actually need, and
	// make any excess segments degenerate by co-locating their vertices at
	// T=0.
	uint pathID = STORAGE_BUFFER_LOAD4(@contourBuffer,
	contour_data_idx(contourIDWithFlags))
	.z;
	float2x2 mat = make_float2x2(
	uintBitsToFloat(STORAGE_BUFFER_LOAD4(@pathBuffer, pathID * 4u)));
	float2 d0 = MUL(mat, -2. * p1 + p2 + p0);

	float2 d1 = MUL(mat, -2. * p2 + p3 + p1);
	float m = max(dot(d0, d0), dot(d1, d1));
	float n = max(ceil(sqrt(.75 * 4. * sqrt(m))), 1.);
	parametricSegmentCount = min(uint(n), parametricSegmentCount);
	}
	// Polar and parametric segments share the same beginning and ending
	// vertices, so the merged vertex count is equal to the sum of polar and
	// parametric segment counts.
	uint totalVertexCount =
	parametricSegmentCount + polarSegmentCount + joinSegmentCount - 1u;

	float2x2 tangents = find_tangents(p0, p1, p2, p3);
	float theta = acos(cosine_between_vectors(tangents[0], tangents[1]));
	float radsPerPolarSegment = theta / float(polarSegmentCount);
	// Adjust sign of radsPerPolarSegment to match the direction the curve
	// turns. NOTE: Since the curve is not allowed to inflect, we can just check
	// F'(.5) x F''(.5). NOTE: F'(.5) x F''(.5) has the same sign as (p2 - p0) x
	// (p3 - p1).
	float turn = determinant(float2x2(p2 - p0, p3 - p1));
	if (turn ==
	.0) // This is the case for joins and cusps where points are co-located.
	turn = determinant(tangents);
	if (turn < .0)
	radsPerPolarSegment = -radsPerPolarSegment;

	v_p0p1 = float4(p0, p1);
	v_p2p3 = float4(p2, p3);
	v_args = float4(float(totalVertexCount) - abs(x1 - coord.x), // vertexIdx
	float(totalVertexCount), // totalVertexCount
	(joinSegmentCount << 10) \| parametricSegmentCount,
	radsPerPolarSegment);
	if (joinSegmentCount > 1u)
	{
	float2x2 joinTangents = float2x2(tangents[1], @a_joinTan_and_ys.xy);
	float joinTheta =
	acos(cosine_between_vectors(joinTangents[0], joinTangents[1]));
	float joinSpan = float(joinSegmentCount);
	if ((contourIDWithFlags &
	(JOIN_TYPE_MASK \| EMULATED_STROKE_CAP_CONTOUR_FLAG)) ==
	(ROUND_JOIN_CONTOUR_FLAG \| EMULATED_STROKE_CAP_CONTOUR_FLAG))
	{
	// Round caps emulated as joins need to emit vertices at T=0 and
	// T=1, unlike normal round joins. The fragment shader will handle
	// most of this, but here we need to adjust radsPerJoinSegment to
	// account for the fact that this join will be rotating around two
	// more segments.
	joinSpan -= 2.;
	}
	float radsPerJoinSegment = joinTheta / joinSpan;
	if (determinant(joinTangents) < .0)
	radsPerJoinSegment = -radsPerJoinSegment;
	v_joinArgs.xy = @a_joinTan_and_ys.xy;
	v_joinArgs.z = radsPerJoinSegment;
	}
	v_contourIDWithFlags = contourIDWithFlags;

	float4 pos;
	pos.x = coord.x * (2. / TESS_TEXTURE_WIDTH) - 1.;
	pos.y = coord.y * uniforms.tessInverseViewportY -
	sign(uniforms.tessInverseViewportY);
	pos.zw = float2(0, 1);

	VARYING_PACK(v_p0p1);
	VARYING_PACK(v_p2p3);
	VARYING_PACK(v_args);
	VARYING_PACK(v_joinArgs);
	VARYING_PACK(v_contourIDWithFlags);
	EMIT_VERTEX(pos);
	}
	#endif

	#ifdef @FRAGMENT
	FRAG_DATA_MAIN(uint4, @tessellateFragmentMain)
	{
	VARYING_UNPACK(v_p0p1, float4);
	VARYING_UNPACK(v_p2p3, float4);
	VARYING_UNPACK(v_args, float4);
	VARYING_UNPACK(v_joinArgs, float3);
	VARYING_UNPACK(v_contourIDWithFlags, uint);

	float2 p0 = v_p0p1.xy;
	float2 p1 = v_p0p1.zw;
	float2 p2 = v_p2p3.xy;
	float2 p3 = v_p2p3.zw;
	float2x2 tangents = find_tangents(p0, p1, p2, p3);
	// Colocate any padding vertices at T=0.
	float vertexIdx = max(floor(v_args.x), .0);
	float totalVertexCount = v_args.y;
	uint joinSegmentCount_and_parametricSegmentCount = uint(v_args.z);
	float parametricSegmentCount =
	float(joinSegmentCount_and_parametricSegmentCount & 0x3ffu);
	float joinSegmentCount =
	float(joinSegmentCount_and_parametricSegmentCount >> 10);
	float radsPerPolarSegment = v_args.w;
	uint contourIDWithFlags = v_contourIDWithFlags;

	// mergedVertexID/mergedSegmentCount are relative to the sub-section of the
	// instance this vertex belongs to (either the curve section that consists
	// of merged polar and parametric segments, or the join section composed of
	// just polar segments).
	//
	// Begin with the assumption that we belong to the curve section.
	float mergedSegmentCount = totalVertexCount - joinSegmentCount;
	float mergedVertexID = vertexIdx;
	half2 featherCorrection = make_half2(.0, .0);
	int featherVertexIndex0;
	if (mergedVertexID <= mergedSegmentCount)
	{
	// We do belong to the curve section. Clear out any stroke join flags.
	contourIDWithFlags &= ~JOIN_TYPE_MASK;
	}
	else
	{
	// We actually belong to the join section following the curve. Construct
	// a point-cubic with rotation.
	p0 = p1 = p2 = p3;
	tangents = float2x2(tangents[1], v_joinArgs.xy /joinTangent/);
	parametricSegmentCount = 1.;
	mergedVertexID -= mergedSegmentCount;
	mergedSegmentCount = joinSegmentCount;
	radsPerPolarSegment = v_joinArgs.z; // radsPerJoinSegment.
	if ((contourIDWithFlags & JOIN_TYPE_MASK) > ROUND_JOIN_CONTOUR_FLAG)
	{
	// Miter or bevel join vertices snap to either tangents[0] or
	// tangents[1], and get adjusted in the shader that follows.
	if (mergedVertexID < 2.5) // With 5 join segments, this branch will
	// see IDs: 1, 2, 3, 4.
	contourIDWithFlags \|= JOIN_TANGENT_0_CONTOUR_FLAG;
	if (mergedVertexID > 1.5 && mergedVertexID < 3.5)
	contourIDWithFlags \|= JOIN_TANGENT_INNER_CONTOUR_FLAG;
	}
	else if ((contourIDWithFlags & EMULATED_STROKE_CAP_CONTOUR_FLAG) != 0u)
	{
	// Round caps emulated as joins need to emit vertices at T=0 and
	// T=1, unlike normal round joins. Preserve the same number of
	// vertices (the CPU should have given us two extra, knowing that we
	// are an emulated cap, and the vertex shader should have already
	// accounted for this in radsPerJoinSegment), but adjust our
	// stepping parameters so we begin at T=0 and end at T=1.
	mergedSegmentCount -= 2.;
	--mergedVertexID;
	}
	else if ((contourIDWithFlags & JOIN_TYPE_MASK) ==
	FEATHER_JOIN_CONTOUR_FLAG)
	{
	featherVertexIndex0 = -int(mergedVertexID);
	--mergedVertexID; // Duplicate the vertex at T=1 on the join also.

	// radsPerPolarSegment was calculated under the assumption that all
	// joinSegmentCount vertices were meant for the angle between tan0
	// and tan1. However, feather joins always have a rotation of PI.
	float rads = radsPerPolarSegment * joinSegmentCount;
	float joinVertexCount = joinSegmentCount - 1.;
	float joinNonEmptySegmentCount = joinVertexCount - 1. - 3.;
	// Feather joins draw backwards segments across the angle outside
	// the join, in order to erase some of the coverage that got
	// written.
	float backwardSegmentCount =
	clamp(round(abs(rads) / PI * joinNonEmptySegmentCount),
	1.,
	joinNonEmptySegmentCount - 1.);
	// Forward segments are in the normal join angle.
	float forwardSegmentCount =
	joinNonEmptySegmentCount - backwardSegmentCount;
	featherCorrection.y = cast_float_to_half(abs(rads) / PI);
	if (mergedVertexID <= forwardSegmentCount)
	{
	// We're a backwards segment of the feather join.
	tangents[1] = -tangents[1];
	radsPerPolarSegment =
	-(PI * sign(rads) - rads) / forwardSegmentCount;
	mergedSegmentCount = forwardSegmentCount;
	featherCorrection.x = 1.;
	}
	else if (mergedVertexID == forwardSegmentCount + 1.)
	{
	// There's a discontinuous jump between the backwards and
	// forward segments. Duplicate the final backwards vertex with
	// zero height.
	tangents[0] = tangents[1] = -tangents[1];
	mergedVertexID = .0;
	mergedSegmentCount = 1.;
	contourIDWithFlags \|= ZERO_FEATHER_OUTSET_CONTOUR_FLAG;
	}
	else if (mergedVertexID == forwardSegmentCount + 2.)
	{
	// There's a discontinuous jump between the backwards and
	// forward segments. Duplicate the first forward vertex with
	// zero height.
	tangents[1] = tangents[0];
	mergedVertexID = .0;
	mergedSegmentCount = 1.;
	contourIDWithFlags \|= ZERO_FEATHER_OUTSET_CONTOUR_FLAG;
	}
	else
	{
	// We're a forward segment of the feather join.
	mergedVertexID -= forwardSegmentCount + 3.;
	mergedSegmentCount = backwardSegmentCount;
	radsPerPolarSegment = rads / backwardSegmentCount;
	featherCorrection.y =
	cast_float_to_half(1. - featherCorrection.y);
	featherCorrection.x = 1.;
	}
	}
	contourIDWithFlags \|= radsPerPolarSegment < .0
	? LEFT_JOIN_CONTOUR_FLAG
	: RIGHT_JOIN_CONTOUR_FLAG;
	}

	float2 tessCoord;
	float theta = .0;
	if (mergedVertexID == .0 \|\| mergedVertexID == mergedSegmentCount \|\|
	(contourIDWithFlags & JOIN_TYPE_MASK) > ROUND_JOIN_CONTOUR_FLAG)
	{
	// Tessellated vertices at the beginning and end of the strip use exact
	// endpoints and tangents. This ensures crack-free seaming between
	// instances.
	bool isTan0 = mergedVertexID < mergedSegmentCount * .5;
	tessCoord = isTan0 ? p0 : p3;
	theta = atan2(isTan0 ? tangents[0] : tangents[1]);
	// Never do feather correction on the ends; the falloff is linear here
	// plus we need to blend smoothly with the edges.
	featherCorrection.x = .0;
	}
	else if ((contourIDWithFlags & RETROFITTED_TRIANGLE_CONTOUR_FLAG) != 0u)
	{
	// This cubic should actually be drawn as the single, non-AA triangle:
	// [p0, p1, p3]. This is used to squeeze in more rare triangles, like
	// "grout" triangles from self intersections on interior triangulation,
	// where it wouldn't be worth it to put them in their own dedicated draw
	// call.
	tessCoord = p1;
	}
	else
	{
	float T, polarT;
	if (parametricSegmentCount == mergedSegmentCount)
	{
	// There are no polar vertices. This is (probably) a fill. Vertices
	// are spaced evenly in parametric space.
	T = mergedVertexID / parametricSegmentCount;
	polarT = .0; // Set polarT != T to ensure we calculate the
	// parametric tangent later.
	}
	else
	{
	// Compute the location and tangent direction of the tessellated
	// stroke vertex with the integral id "mergedVertexID", where
	// mergedVertexID is the sorted-order index of parametric and polar
	// vertices. Start by finding the tangent function's power basis
	// coefficients. These define a tangent direction (scaled by some
	// uniform value) as:
	//
	// \|T^2\|
	// Tangent_Direction(T) = dx,dy = \|A 2B C\| * \|T \|
	// \|. . .\| \|1 \|
	float2 A, B, C = p1 - p0;
	float2 D = p3 - p0;
	float2 E = p2 - p1;
	B = E - C;
	A = -3. * E + D;
	// FIXME(crbug.com/800804,skbug.com/11268): Consider normalizing the
	// exponents in A,B,C at this point in order to prevent fp32
	// overflow.

	// Now find the coefficients that give a tangent direction from a
	// parametric vertex ID:
	//
	// Tangent_Direction(parametricVertexID) = dx,dy =
	//
	// \|parametricVertexID^2\|
	// \|A B_ C_\| * \|parametricVertexID \|
	// \|. . .\| \|1 \|
	//
	float2 B_ = B * (parametricSegmentCount * 2.);
	float2 C_ = C * (parametricSegmentCount * parametricSegmentCount);

	// Run a binary search to determine the highest parametric vertex
	// that is located on or before the mergedVertexID. A merged ID is
	// determined by the sum of complete parametric and polar segments
	// behind it. i.e., find the highest parametric vertex where:
	//
	// parametricVertexID + floor(numPolarSegmentsAtParametricT) <=
	// mergedVertexID
	//
	float lastParametricVertexID = .0;
	float maxParametricVertexID =
	min(parametricSegmentCount - 1., mergedVertexID);
	// FIXME(crbug.com/800804,skbug.com/11268): This normalize() can
	// overflow.
	float2 tan0norm = normalize(tangents[0]);
	float negAbsRadsPerSegment = -abs(radsPerPolarSegment);
	float maxRotation0 =
	(1. + mergedVertexID) * abs(radsPerPolarSegment);
	for (int p = MAX_PARAMETRIC_SEGMENTS_LOG2 - 1; p >= 0; --p)
	{
	// Test the parametric vertex at lastParametricVertexID + 2^p.
	float testParametricID =
	lastParametricVertexID + exp2(float(p));
	if (testParametricID <= maxParametricVertexID)
	{
	float2 testTan = testParametricID * A + B_;
	testTan = testParametricID * testTan + C_;
	float cosRotation = dot(normalize(testTan), tan0norm);
	float maxRotation =
	testParametricID * negAbsRadsPerSegment + maxRotation0;
	maxRotation = min(maxRotation, PI);
	// Is rotation <= maxRotation? (i.e., is the number of
	// complete polar segments behind testT, + testParametricID
	// <= mergedVertexID?)
	if (cosRotation >= cos(maxRotation))
	lastParametricVertexID = testParametricID;
	}
	}

	// Find the T value of the parametric vertex at
	// lastParametricVertexID.
	float parametricT = lastParametricVertexID / parametricSegmentCount;

	// Now that we've identified the highest parametric vertex on or
	// before the mergedVertexID, the highest polar vertex is easy:
	float lastPolarVertexID = mergedVertexID - lastParametricVertexID;

	// Find the angle of tan0, or the angle between tan0norm and the
	// positive x axis.
	float theta0 = acos(clamp(tan0norm.x, -1., 1.));
	theta0 = tan0norm.y >= .0 ? theta0 : -theta0;

	// Find the tangent vector on the vertex at lastPolarVertexID.
	theta = lastPolarVertexID * radsPerPolarSegment + theta0;
	float2 norm = float2(sin(theta), -cos(theta));

	// Find the T value where the tangent is orthogonal to norm. This is
	// a quadratic:
	//
	// dot(norm, Tangent_Direction(T)) == 0
	//
	// \|T^2\|
	// norm * \|A 2B C\| * \|T \| == 0
	// \|. . .\| \|1 \|
	//
	float a = dot(norm, A), b_over_2 = dot(norm, B), c = dot(norm, C);
	float discr_over_4 = max(b_over_2 * b_over_2 - a * c, .0);
	float q = sqrt(discr_over_4);
	if (b_over_2 > .0)
	q = -q;
	q -= b_over_2;

	// Roots are q/a and c/q. Since each curve section does not inflect
	// or rotate more than 180 degrees, there can only be one tangent
	// orthogonal to "norm" inside 0..1. Pick the root nearest .5.
	float _5qa = -.5 * q * a;
	float2 root = (abs(q * q + _5qa) < abs(a * c + _5qa))
	? float2(q, a)
	: float2(c, q);
	polarT = (root.t != .0) ? root.s / root.t : .0;
	polarT = clamp(polarT, .0, 1.);

	// The root finder above can become unstable when lastPolarVertexID
	// == 0 (e.g., if there are roots at exatly 0 and 1 both). polarT
	// should always == 0 in this case.
	if (lastPolarVertexID == .0)
	polarT = .0;

	// Now that we've identified the T values of the last parametric and
	// polar vertices, our final T value for mergedVertexID is whichever
	// is larger.
	T = max(parametricT, polarT);

	if (featherCorrection.x == 1.)
	{
	// This is a total hack to give feather joins nonlinear falloff
	// on the bisector while keeping it linear on the ends.
	// TODO: This needs a more methodical approach.
	featherCorrection.x =
	sqrt(sin(lastPolarVertexID / mergedSegmentCount * PI));
	}
	}

	// Evaluate the cubic at T. Use De Casteljau's for its accuracy and
	// stability.
	float2 ab = unchecked_mix(p0, p1, T);
	float2 bc = unchecked_mix(p1, p2, T);
	float2 cd = unchecked_mix(p2, p3, T);
	float2 abc = unchecked_mix(ab, bc, T);
	float2 bcd = unchecked_mix(bc, cd, T);
	tessCoord = unchecked_mix(abc, bcd, T);

	// If we went with T=parametricT, then update theta. Otherwise leave it
	// at the polar theta found previously. (In the event that parametricT
	// == polarT, we keep the polar theta.)
	if (T != polarT)
	theta = atan2(bcd - abc);
	}

	uint4 tessData =
	uint4(floatBitsToUint(float3(tessCoord, theta)), contourIDWithFlags);
	if ((contourIDWithFlags & JOIN_TYPE_MASK) == FEATHER_JOIN_CONTOUR_FLAG)
	{
	// Feathered corners need an extra dimming factor. Luckily, the corner
	// spokes are all centered on the same point, so we can store a backset
	// to that point instead of the point itself, and thus make room for the
	// dimming factor.
	tessData.x = uint(featherVertexIndex0);
	// The coverage dimming factor is "y^(x+1)" on the outer edge of the
	// feather (y <= 1, x >= 0), and just "y^0" at the center. We provide x
	// and y, and the vertex shader works out the rest.
	tessData.y = packHalf2x16(featherCorrection);
	}
	EMIT_FRAG_DATA(tessData);
	}
	#endif