Use SIMD to update hw tessellation tolerances
Bug: chromium:1172543
Change-Id: I223566197d1f2fd5fea07302f48ab89f50a36187
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/374840
Reviewed-by: John Stiles <johnstiles@google.com>
diff --git a/bench/TessellateBench.cpp b/bench/TessellateBench.cpp
index 0737aa7..c0cb7dd 100644
--- a/bench/TessellateBench.cpp
+++ b/bench/TessellateBench.cpp
@@ -271,9 +271,10 @@
class GrStrokeHardwareTessellator::TestingOnly_Benchmark : public Benchmark {
public:
- TestingOnly_Benchmark(MakePathStrokesFn MakePathStrokesFn, float matrixScale,
- const char* suffix)
+ TestingOnly_Benchmark(MakePathStrokesFn MakePathStrokesFn, ShaderFlags shaderFlags,
+ float matrixScale, const char* suffix)
: fMakePathStrokesFn(MakePathStrokesFn)
+ , fShaderFlags(shaderFlags)
, fMatrixScale(matrixScale) {
fName.printf("tessellate_GrStrokeHardwareTessellator_prepare%s", suffix);
}
@@ -301,7 +302,7 @@
void onDraw(int loops, SkCanvas*) final {
SkMatrix matrix = SkMatrix::Scale(fMatrixScale, fMatrixScale);
for (int i = 0; i < loops; ++i) {
- GrStrokeHardwareTessellator tessellator(ShaderFlags::kNone, fPathStrokes.data(),
+ GrStrokeHardwareTessellator tessellator(fShaderFlags, fPathStrokes.data(),
fTotalVerbCount, *fTarget->caps().shaderCaps());
tessellator.prepare(fTarget.get(), matrix);
fTarget->resetAllocator();
@@ -310,6 +311,7 @@
SkString fName;
MakePathStrokesFn fMakePathStrokesFn;
+ const ShaderFlags fShaderFlags;
float fMatrixScale;
std::unique_ptr<GrMockOpTarget> fTarget;
std::vector<PathStrokeList> fPathStrokes;
@@ -317,12 +319,22 @@
int fTotalVerbCount = 0;
};
-DEF_BENCH( return new GrStrokeHardwareTessellator::TestingOnly_Benchmark(make_simple_cubic_path, 1,
- ""); )
-DEF_BENCH( return new GrStrokeHardwareTessellator::TestingOnly_Benchmark(make_simple_cubic_path, 5,
- "_one_chop"); )
-DEF_BENCH( return new GrStrokeHardwareTessellator::TestingOnly_Benchmark(make_motionmark_paths, 1,
- "_motionmark"); )
+DEF_BENCH(
+ return new GrStrokeHardwareTessellator::TestingOnly_Benchmark(make_simple_cubic_path,
+ ShaderFlags::kNone, 1, "");
+)
+
+DEF_BENCH(
+ return new GrStrokeHardwareTessellator::TestingOnly_Benchmark(make_simple_cubic_path,
+ ShaderFlags::kNone, 5,
+ "_one_chop");
+)
+
+DEF_BENCH(
+ return new GrStrokeHardwareTessellator::TestingOnly_Benchmark(make_motionmark_paths,
+ ShaderFlags::kDynamicStroke, 1,
+ "_motionmark");
+)
class GrStrokeIndirectTessellator::Benchmark : public ::Benchmark {
protected:
diff --git a/src/gpu/GrVx.h b/src/gpu/GrVx.h
index 560f15a..7323607 100644
--- a/src/gpu/GrVx.h
+++ b/src/gpu/GrVx.h
@@ -66,7 +66,7 @@
//
// NOTE: This function deviates immediately from pi and 0 outside -1 and 1. (The derivatives are
// infinite at -1 and 1). So the input must still be clamped between -1 and 1.
-#define GRVX_FAST_ACOS_MAX_ERROR SkDegreesToRadians(.96f)
+#define GRVX_APPROX_ACOS_MAX_ERROR SkDegreesToRadians(.96f)
template<int N> SK_ALWAYS_INLINE vec<N> approx_acos(vec<N> x) {
constexpr static float a = -0.939115566365855f;
constexpr static float b = 0.9217841528914573f;
diff --git a/src/gpu/tessellate/GrStrokeHardwareTessellator.cpp b/src/gpu/tessellate/GrStrokeHardwareTessellator.cpp
index 79d4592..83dddfa 100644
--- a/src/gpu/tessellate/GrStrokeHardwareTessellator.cpp
+++ b/src/gpu/tessellate/GrStrokeHardwareTessellator.cpp
@@ -9,6 +9,7 @@
#include "src/core/SkPathPriv.h"
#include "src/gpu/GrRecordingContextPriv.h"
+#include "src/gpu/GrVx.h"
#include "src/gpu/geometry/GrPathUtils.h"
#include "src/gpu/tessellate/GrWangsFormula.h"
@@ -35,14 +36,8 @@
return numParametricSegments + numRadialSegments - 1;
}
-static float num_parametric_segments(float numCombinedSegments, float numRadialSegments) {
- // numCombinedSegments = numParametricSegments + numRadialSegments - 1.
- // (See num_combined_segments()).
- return std::max(numCombinedSegments + 1 - numRadialSegments, 0.f);
-}
-
-static float pow4(float x) {
- float xx = x*x;
+static grvx::float2 pow4(grvx::float2 x) {
+ auto xx = x*x;
return xx*xx;
}
@@ -58,7 +53,7 @@
kBowtie = SkPaint::kLast_Join + 1 // Double sided round join.
};
- PatchWriter(ShaderFlags shaderFlags, GrMeshDrawOp::Target* target,
+ PatchWriter(ShaderFlags shaderFlags, GrMeshDrawOp::Target* target, float matrixMaxScale,
SkTArray<PatchChunk>* patchChunks, int totalCombinedVerbCnt)
: fShaderFlags(shaderFlags)
, fTarget(target)
@@ -66,7 +61,8 @@
, fPatchStride(GrStrokeTessellateShader::PatchStride(fShaderFlags))
// Subtract 2 because the tessellation shader chops every cubic at two locations, and
// each chop has the potential to introduce an extra segment.
- , fMaxTessellationSegments(target->caps().shaderCaps()->maxTessellationSegments() - 2) {
+ , fMaxTessellationSegments(target->caps().shaderCaps()->maxTessellationSegments() - 2)
+ , fParametricIntolerance(Tolerances::CalcParametricIntolerance(matrixMaxScale)) {
// Pre-allocate at least enough vertex space for 1 in 4 strokes to chop, and for 8 caps.
int strokePreallocCount = totalCombinedVerbCnt * 5/4;
int capPreallocCount = 8;
@@ -83,7 +79,7 @@
// This is the intolerance value, adjusted for the view matrix, to use with Wang's formulas when
// determining how many parametric segments a curve will require.
float parametricIntolerance() const {
- return fTolerances.fParametricIntolerance;
+ return fParametricIntolerance;
}
// Will a line and worst-case previous join both fit in a single patch together?
bool lineFitsInPatch_withJoin() {
@@ -92,65 +88,61 @@
// Will a stroke with the given number of parametric segments and a worst-case rotation of 180
// degrees fit in a single patch?
bool stroke180FitsInPatch(float numParametricSegments_pow4) {
- return numParametricSegments_pow4 <= fMaxParametricSegments180_pow4;
+ return numParametricSegments_pow4 <= fMaxParametricSegments_pow4[0];
}
// Will a worst-case 180-degree stroke with the given number of parametric segments, and a
// worst-case join fit in a single patch together?
bool stroke180FitsInPatch_withJoin(float numParametricSegments_pow4) {
- return numParametricSegments_pow4 <= fMaxParametricSegments180_pow4_withJoin;
+ return numParametricSegments_pow4 <= fMaxParametricSegments_pow4_withJoin[0];
}
// Will a stroke with the given number of parametric segments and a worst-case rotation of 360
// degrees fit in a single patch?
bool stroke360FitsInPatch(float numParametricSegments_pow4) {
- return numParametricSegments_pow4 <= fMaxParametricSegments360_pow4;
+ return numParametricSegments_pow4 <= fMaxParametricSegments_pow4[1];
}
// Will a worst-case 360-degree stroke with the given number of parametric segments, and a
// worst-case join fit in a single patch together?
bool stroke360FitsInPatch_withJoin(float numParametricSegments_pow4) {
- return numParametricSegments_pow4 <= fMaxParametricSegments360_pow4_withJoin;
+ return numParametricSegments_pow4 <= fMaxParametricSegments_pow4_withJoin[1];
}
- void updateTolerances(Tolerances tolerances, SkPaint::Join joinType) {
+ void updateTolerances(float numRadialSegmentsPerRadian, SkPaint::Join joinType) {
+ using grvx::float2;
+
+ fNumRadialSegmentsPerRadian = numRadialSegmentsPerRadian;
+
// Calculate the worst-case numbers of parametric segments our hardware can support for the
// current stroke radius, in the event that there are also enough radial segments to rotate
// 180 and 360 degrees respectively. These are used for "quick accepts" that allow us to
// send almost all curves directly to the hardware without having to chop.
- float numRadialSegments180 = std::max(std::ceil(
- SK_ScalarPI * tolerances.fNumRadialSegmentsPerRadian), 1.f);
- float maxParametricSegments180 = num_parametric_segments(fMaxTessellationSegments,
- numRadialSegments180);
- fMaxParametricSegments180_pow4 = pow4(maxParametricSegments180);
+ float2 numRadialSegments_180_360 = skvx::max(skvx::ceil(
+ float2{SK_ScalarPI, 2*SK_ScalarPI} * fNumRadialSegmentsPerRadian), 1);
+ // numEdges = numSegments + 1. See num_combined_segments().
+ float maxTotalEdges = fMaxTessellationSegments + 1;
+ // numParametricSegments = numTotalEdges - numRadialSegments. See num_combined_segments().
+ float2 maxParametricSegments = skvx::max(maxTotalEdges - numRadialSegments_180_360, 0);
+ float2 maxParametricSegments_pow4 = pow4(maxParametricSegments);
+ maxParametricSegments_pow4.store(fMaxParametricSegments_pow4);
- float numRadialSegments360 = std::max(std::ceil(
- 2*SK_ScalarPI * tolerances.fNumRadialSegmentsPerRadian), 1.f);
- float maxParametricSegments360 = num_parametric_segments(fMaxTessellationSegments,
- numRadialSegments360);
- fMaxParametricSegments360_pow4 = pow4(maxParametricSegments360);
-
- // Now calculate the worst-case numbers of parametric segments if we are to integrate a join
- // into the same patch as the curve.
- float maxNumSegmentsInJoin;
+ // Find the worst-case numbers of parametric segments if we are to integrate a join into the
+ // same patch as the curve.
+ float numRadialSegments180 = numRadialSegments_180_360[0];
+ float worstCaseNumSegmentsInJoin;
switch (joinType) {
- case SkPaint::kBevel_Join:
- maxNumSegmentsInJoin = 1;
- break;
- case SkPaint::kMiter_Join:
- maxNumSegmentsInJoin = 2;
- break;
- case SkPaint::kRound_Join:
- // 180-degree round join.
- maxNumSegmentsInJoin = numRadialSegments180;
- break;
+ case SkPaint::kBevel_Join: worstCaseNumSegmentsInJoin = 1; break;
+ case SkPaint::kMiter_Join: worstCaseNumSegmentsInJoin = 2; break;
+ case SkPaint::kRound_Join: worstCaseNumSegmentsInJoin = numRadialSegments180; break;
}
- // Subtract an extra 1 off the end because when we integrate a join, the tessellator has to
- // add a redundant edge between the join and curve.
- fMaxParametricSegments180_pow4_withJoin = pow4(std::max(
- maxParametricSegments180 - maxNumSegmentsInJoin - 1, 0.f));
- fMaxParametricSegments360_pow4_withJoin = pow4(std::max(
- maxParametricSegments360 - maxNumSegmentsInJoin - 1, 0.f));
- fMaxCombinedSegments_withJoin = fMaxTessellationSegments - maxNumSegmentsInJoin - 1;
+
+ // Now calculate the worst-case numbers of parametric segments if we also want to combine a
+ // join with the patch. Subtract an extra 1 off the end because when we integrate a join,
+ // the tessellator has to add a redundant edge between the join and curve.
+ float2 maxParametricSegments_pow4_withJoin = pow4(skvx::max(
+ maxParametricSegments - worstCaseNumSegmentsInJoin - 1, 0));
+ maxParametricSegments_pow4_withJoin.store(fMaxParametricSegments_pow4_withJoin);
+
+ fMaxCombinedSegments_withJoin = fMaxTessellationSegments - worstCaseNumSegmentsInJoin - 1;
fSoloRoundJoinAlwaysFitsInPatch = (numRadialSegments180 <= fMaxTessellationSegments);
- fTolerances = tolerances;
fStrokeJoinType = JoinType(joinType);
}
@@ -389,7 +381,7 @@
}
float numParametricSegments_pow4 =
- GrWangsFormula::quadratic_pow4(fTolerances.fParametricIntolerance, p);
+ GrWangsFormula::quadratic_pow4(fParametricIntolerance, p);
if (this->stroke180FitsInPatch(numParametricSegments_pow4) || maxDepth == 0) {
this->internalPatchTo(prevJoinType,
this->stroke180FitsInPatch_withJoin(numParametricSegments_pow4),
@@ -399,8 +391,7 @@
// We still might have enough tessellation segments to render the curve. Check again with
// the actual rotation.
- float numRadialSegments =
- SkMeasureQuadRotation(p) * fTolerances.fNumRadialSegmentsPerRadian;
+ float numRadialSegments = SkMeasureQuadRotation(p) * fNumRadialSegmentsPerRadian;
numRadialSegments = std::max(std::ceil(numRadialSegments), 1.f);
float numParametricSegments = GrWangsFormula::root4(numParametricSegments_pow4);
numParametricSegments = std::max(std::ceil(numParametricSegments), 1.f);
@@ -454,8 +445,7 @@
return;
}
- float numParametricSegments_pow4 =
- GrWangsFormula::cubic_pow4(fTolerances.fParametricIntolerance, p);
+ float numParametricSegments_pow4 = GrWangsFormula::cubic_pow4(fParametricIntolerance, p);
if (this->stroke180FitsInPatch(numParametricSegments_pow4) || maxDepth == 0) {
this->internalPatchTo(prevJoinType,
this->stroke180FitsInPatch_withJoin(numParametricSegments_pow4),
@@ -465,8 +455,7 @@
// We still might have enough tessellation segments to render the curve. Check again with
// its actual rotation.
- float numRadialSegments =
- SkMeasureNonInflectCubicRotation(p) * fTolerances.fNumRadialSegmentsPerRadian;
+ float numRadialSegments = SkMeasureNonInflectCubicRotation(p) * fNumRadialSegmentsPerRadian;
numRadialSegments = std::max(std::ceil(numRadialSegments), 1.f);
float numParametricSegments = GrWangsFormula::root4(numParametricSegments_pow4);
numParametricSegments = std::max(std::ceil(numParametricSegments), 1.f);
@@ -553,7 +542,7 @@
SkVector tan0 = junctionPoint - fLastControlPoint;
SkVector tan1 = nextControlPoint - junctionPoint;
float rotation = SkMeasureAngleBetweenVectors(tan0, tan1);
- float numRadialSegments = rotation * fTolerances.fNumRadialSegmentsPerRadian;
+ float numRadialSegments = rotation * fNumRadialSegmentsPerRadian;
if (numRadialSegments > fMaxTessellationSegments) {
// This is a round join that requires more segments than the tessellator supports.
// Split it and recurse.
@@ -659,19 +648,29 @@
// The maximum number of tessellation segments the hardware can emit for a single patch.
const int fMaxTessellationSegments;
- // These values contain worst-case numbers of parametric segments, raised to the 4th power, that
+ // This is the intolerance value, adjusted for the view matrix, to use with Wang's formulas when
+ // determining how many parametric segments a curve will require.
+ const float fParametricIntolerance;
+
+ // Number of radial segments required for each radian of rotation in order to look smooth with
+ // the current stroke radius.
+ float fNumRadialSegmentsPerRadian;
+
+ // These arrays contain worst-case numbers of parametric segments, raised to the 4th power, that
// our hardware can support for the current stroke radius. They assume curve rotations of 180
// and 360 degrees respectively. These are used for "quick accepts" that allow us to send almost
// all curves directly to the hardware without having to chop. We raise to the 4th power because
// the "pow4" variants of Wang's formula are the quickest to evaluate.
- GrStrokeTessellateShader::Tolerances fTolerances;
- JoinType fStrokeJoinType;
- float fMaxParametricSegments180_pow4;
- float fMaxParametricSegments360_pow4;
- float fMaxParametricSegments180_pow4_withJoin;
- float fMaxParametricSegments360_pow4_withJoin;
+ float fMaxParametricSegments_pow4[2]; // Values for strokes that rotate 180 and 360 degrees.
+ float fMaxParametricSegments_pow4_withJoin[2]; // For strokes that rotate 180 and 360 degrees.
+
+ // Maximum number of segments we can allocate for a stroke if we are stuffing it in a patch
+ // together with a worst-case join.
float fMaxCombinedSegments_withJoin;
+
+ // Additional info on the current stroke radius/join type.
bool fSoloRoundJoinAlwaysFitsInPatch;
+ JoinType fStrokeJoinType;
// Variables related to the patch chunk that we are currently writing out during prepareBuffers.
int fCurrChunkPatchCount = 0;
@@ -691,7 +690,42 @@
GrVertexColor fDynamicColor;
};
-} // namespace
+// Calculates and buffers up future values for "numRadialSegmentsPerRadian" using SIMD.
+class alignas(sizeof(grvx::float4)) RadialSegmentsPerRadianBuffer {
+public:
+ using PathStrokeList = GrStrokeTessellator::PathStrokeList;
+
+ RadialSegmentsPerRadianBuffer(float parametricIntolerance)
+ : fParametricIntolerance(parametricIntolerance) {
+ }
+
+ float fetchNext(PathStrokeList* head) {
+ // GrStrokeTessellateOp::onCombineIfPossible does not allow hairlines to become dynamic. If
+ // this changes, we will need to call Tolerances::GetLocalStrokeWidth() for each stroke.
+ SkASSERT(!head->fStroke.isHairlineStyle());
+ if (fBufferIdx == 4) {
+ // We ran out of values. Peek ahead and buffer up 4 more.
+ PathStrokeList* peekAhead = head;
+ int i = 0;
+ do {
+ fStrokeWidths[i++] = peekAhead->fStroke.getWidth();
+ } while ((peekAhead = peekAhead->fNext) && i < 4);
+ Tolerances::ApproxNumRadialSegmentsPerRadian(fParametricIntolerance,
+ fStrokeWidths).store(
+ fNumRadialSegmentsPerRadian);
+ fBufferIdx = 0;
+ }
+ SkASSERT(0 <= fBufferIdx && fBufferIdx < 4);
+ SkASSERT(fStrokeWidths[fBufferIdx] == head->fStroke.getWidth());
+ return fNumRadialSegmentsPerRadian[fBufferIdx++];
+ }
+
+private:
+ grvx::float4 fStrokeWidths{}; // Must be first for alignment purposes.
+ float fNumRadialSegmentsPerRadian[4];
+ const float fParametricIntolerance;
+ int fBufferIdx = 4; // Initialize the buffer as "empty";
+};
SK_ALWAYS_INLINE static bool conic_has_cusp(const SkPoint p[3]) {
SkVector a = p[1] - p[0];
@@ -736,27 +770,40 @@
(!(skvx::all(p0 == p1) || skvx::all(p2 == p3)) || (a == 0 && b == 0 && c == 0));
}
+} // namespace
+
void GrStrokeHardwareTessellator::prepare(GrMeshDrawOp::Target* target,
const SkMatrix& viewMatrix) {
using JoinType = PatchWriter::JoinType;
- std::array<float, 2> matrixScales;
- if (!viewMatrix.getMinMaxScales(matrixScales.data())) {
- matrixScales.fill(1);
+ std::array<float, 2> matrixMinMaxScales;
+ if (!viewMatrix.getMinMaxScales(matrixMinMaxScales.data())) {
+ matrixMinMaxScales.fill(1);
}
- PatchWriter patchWriter(fShaderFlags, target, &fPatchChunks, fTotalCombinedVerbCnt);
- const SkStrokeRec* strokeForTolerances = nullptr;
+ PatchWriter patchWriter(fShaderFlags, target, matrixMinMaxScales[1], &fPatchChunks,
+ fTotalCombinedVerbCnt);
+ if (!(fShaderFlags & ShaderFlags::kDynamicStroke)) {
+ // Strokes are static. Calculate tolerances once.
+ const SkStrokeRec& stroke = fPathStrokeList->fStroke;
+ float localStrokeWidth = Tolerances::GetLocalStrokeWidth(matrixMinMaxScales.data(),
+ stroke.getWidth());
+ float numRadialSegmentsPerRadian = Tolerances::CalcNumRadialSegmentsPerRadian(
+ patchWriter.parametricIntolerance(), localStrokeWidth);
+ patchWriter.updateTolerances(numRadialSegmentsPerRadian, stroke.getJoin());
+ }
+
+ // Fast SIMD queue that buffers up values for "numRadialSegmentsPerRadian". Only used when we
+ // have dynamic strokes.
+ RadialSegmentsPerRadianBuffer radialSegmentsPerRadianBuffer(
+ patchWriter.parametricIntolerance());
for (PathStrokeList* pathStroke = fPathStrokeList; pathStroke; pathStroke = pathStroke->fNext) {
const SkStrokeRec& stroke = pathStroke->fStroke;
- if (!strokeForTolerances || strokeForTolerances->getWidth() != stroke.getWidth() ||
- strokeForTolerances->getCap() != stroke.getCap()) {
- auto tolerances = Tolerances::MakePreTransform(matrixScales.data(), stroke.getWidth());
- patchWriter.updateTolerances(tolerances, stroke.getJoin());
- strokeForTolerances = &stroke;
- }
if (fShaderFlags & ShaderFlags::kDynamicStroke) {
+ // Strokes are dynamic. Update tolerances with every new stroke.
+ patchWriter.updateTolerances(radialSegmentsPerRadianBuffer.fetchNext(pathStroke),
+ stroke.getJoin());
patchWriter.updateDynamicStroke(stroke);
}
if (fShaderFlags & ShaderFlags::kDynamicColor) {
diff --git a/src/gpu/tessellate/GrStrokeIndirectTessellator.cpp b/src/gpu/tessellate/GrStrokeIndirectTessellator.cpp
index 3e44604..4dd5c2f 100644
--- a/src/gpu/tessellate/GrStrokeIndirectTessellator.cpp
+++ b/src/gpu/tessellate/GrStrokeIndirectTessellator.cpp
@@ -75,8 +75,8 @@
void updateTolerances(float strokeWidth, bool isRoundJoin) {
this->flush();
- fTolerances = GrStrokeTessellateShader::Tolerances::MakePreTransform(
- fMatrixMinMaxScales.data(), strokeWidth);
+ fTolerances = GrStrokeTessellateShader::Tolerances::Make(fMatrixMinMaxScales.data(),
+ strokeWidth);
fResolveLevelForCircles = SkTPin<float>(
sk_float_nextlog2(fTolerances.fNumRadialSegmentsPerRadian * SK_ScalarPI),
1, kMaxResolveLevel);
diff --git a/src/gpu/tessellate/GrStrokeTessellateOp.cpp b/src/gpu/tessellate/GrStrokeTessellateOp.cpp
index a278b0b..16db49b 100644
--- a/src/gpu/tessellate/GrStrokeTessellateOp.cpp
+++ b/src/gpu/tessellate/GrStrokeTessellateOp.cpp
@@ -92,6 +92,9 @@
!DynamicStroke::StrokesHaveEqualDynamicState(this->headStroke(), op->headStroke())) {
// The paths have different stroke properties. We will need to enable dynamic stroke if we
// still decide to combine them.
+ if (this->headStroke().isHairlineStyle()) {
+ return CombineResult::kCannotCombine; // Dynamic hairlines aren't supported.
+ }
combinedFlags |= ShaderFlags::kDynamicStroke;
}
if (!(combinedFlags & ShaderFlags::kDynamicColor) && this->headColor() != op->headColor()) {
diff --git a/src/gpu/tessellate/GrStrokeTessellateShader.cpp b/src/gpu/tessellate/GrStrokeTessellateShader.cpp
index e147714..866313b 100644
--- a/src/gpu/tessellate/GrStrokeTessellateShader.cpp
+++ b/src/gpu/tessellate/GrStrokeTessellateShader.cpp
@@ -421,11 +421,12 @@
if (!shader.hasDynamicStroke()) {
Tolerances tolerances;
if (!stroke.isHairlineStyle()) {
- tolerances.set(shader.viewMatrix().getMaxScale(), stroke.getWidth());
+ tolerances = Tolerances::MakeNonHairline(shader.viewMatrix().getMaxScale(),
+ stroke.getWidth());
} else {
// In the hairline case we transform prior to tessellation. Set up tolerances for an
// identity viewMatrix and a strokeWidth of 1.
- tolerances.set(1, 1);
+ tolerances = Tolerances::MakeNonHairline(1, 1);
}
float strokeRadius = (stroke.isHairlineStyle()) ? .5f : stroke.getWidth() * .5;
pdman.set4f(fTessArgsUniform,
@@ -1262,11 +1263,12 @@
// Set up the tessellation control uniforms.
Tolerances tolerances;
if (!stroke.isHairlineStyle()) {
- tolerances.set(shader.viewMatrix().getMaxScale(), stroke.getWidth());
+ tolerances = Tolerances::MakeNonHairline(shader.viewMatrix().getMaxScale(),
+ stroke.getWidth());
} else {
// In the hairline case we transform prior to tessellation. Set up tolerances for an
// identity viewMatrix and a strokeWidth of 1.
- tolerances.set(1, 1);
+ tolerances = Tolerances::MakeNonHairline(1, 1);
}
float strokeRadius = (stroke.isHairlineStyle()) ? .5f : stroke.getWidth() * .5;
pdman.set4f(fTessControlArgsUniform,
diff --git a/src/gpu/tessellate/GrStrokeTessellateShader.h b/src/gpu/tessellate/GrStrokeTessellateShader.h
index d491d1d..da125bf 100644
--- a/src/gpu/tessellate/GrStrokeTessellateShader.h
+++ b/src/gpu/tessellate/GrStrokeTessellateShader.h
@@ -11,6 +11,7 @@
#include "src/gpu/tessellate/GrPathShader.h"
#include "include/core/SkStrokeRec.h"
+#include "src/gpu/GrVx.h"
#include "src/gpu/tessellate/GrTessellationPathRenderer.h"
#include <array>
@@ -69,17 +70,35 @@
// These tolerances decide the number of parametric and radial segments the tessellator will
// linearize curves into. These decisions are made in (pre-viewMatrix) local path space.
struct Tolerances {
- // See fParametricIntolerance.
+ // Decides the number of parametric segments the tessellator adds for each curve. (Uniform
+ // steps in parametric space.) The tessellator will add enough parametric segments so that,
+ // once transformed into device space, they never deviate by more than
+ // 1/GrTessellationPathRenderer::kLinearizationIntolerance pixels from the true curve.
constexpr static float CalcParametricIntolerance(float matrixMaxScale) {
return matrixMaxScale * GrTessellationPathRenderer::kLinearizationIntolerance;
}
- // Returns the equivalent tolerances in (pre-viewMatrix) local path space that the
- // tessellator will use when rendering this stroke.
- static Tolerances MakePreTransform(const float matrixMinMaxScales[2], float strokeWidth) {
- float matrixMaxScale = matrixMinMaxScales[1];
+ // Decides the number of radial segments the tessellator adds for each curve. (Uniform steps
+ // in tangent angle.) The tessellator will add this number of radial segments for each
+ // radian of rotation in local path space.
+ static float CalcNumRadialSegmentsPerRadian(float parametricIntolerance,
+ float strokeWidth) {
+ return .5f / acosf(std::max(1 - 2 / (parametricIntolerance * strokeWidth), -1.f));
+ }
+ template<int N> static grvx::vec<N> ApproxNumRadialSegmentsPerRadian(
+ float parametricIntolerance, grvx::vec<N> strokeWidths) {
+ grvx::vec<N> cosTheta = skvx::max(1 - 2 / (parametricIntolerance * strokeWidths), -1);
+ // Subtract GRVX_APPROX_ACOS_MAX_ERROR so we never account for too few segments.
+ return .5f / (grvx::approx_acos(cosTheta) - GRVX_APPROX_ACOS_MAX_ERROR);
+ }
+ // Returns the equivalent stroke width in (pre-viewMatrix) local path space that the
+ // tessellator will use when rendering this stroke. This only differs from the actual stroke
+ // width for hairlines.
+ static float GetLocalStrokeWidth(const float matrixMinMaxScales[2], float strokeWidth) {
+ SkASSERT(strokeWidth >= 0);
float localStrokeWidth = strokeWidth;
- if (localStrokeWidth == 0) {
+ if (localStrokeWidth == 0) { // Is the stroke a hairline?
float matrixMinScale = matrixMinMaxScales[0];
+ float matrixMaxScale = matrixMinMaxScales[1];
// If the stroke is hairline then the tessellator will operate in post-transform
// space instead. But for the sake of CPU methods that need to conservatively
// approximate the number of segments to emit, we use
@@ -90,26 +109,25 @@
// of segments to emit.)
approxScale = std::max(matrixMinScale, matrixMaxScale * .25f);
localStrokeWidth = 1/approxScale;
+ if (localStrokeWidth == 0) {
+ // We just can't accidentally return zero from this method because zero means
+ // "hairline". Otherwise return whatever we calculated above.
+ localStrokeWidth = SK_ScalarNearlyZero;
+ }
}
- return GrStrokeTessellateShader::Tolerances(matrixMaxScale, localStrokeWidth);
+ return localStrokeWidth;
}
- Tolerances() = default;
- Tolerances(float matrixMaxScale, float strokeWidth) {
- this->set(matrixMaxScale, strokeWidth);
+ static Tolerances Make(const float matrixMinMaxScales[2], float strokeWidth) {
+ return MakeNonHairline(matrixMinMaxScales[1],
+ GetLocalStrokeWidth(matrixMinMaxScales, strokeWidth));
}
- void set(float matrixMaxScale, float strokeWidth) {
- fParametricIntolerance = CalcParametricIntolerance(matrixMaxScale);
- fNumRadialSegmentsPerRadian =
- .5f / acosf(std::max(1 - 2/(fParametricIntolerance * strokeWidth), -1.f));
+ static Tolerances MakeNonHairline(float matrixMaxScale, float strokeWidth) {
+ SkASSERT(strokeWidth > 0);
+ float parametricIntolerance = CalcParametricIntolerance(matrixMaxScale);
+ return {parametricIntolerance,
+ CalcNumRadialSegmentsPerRadian(parametricIntolerance, strokeWidth)};
}
- // Decides the number of parametric segments the tessellator adds for each curve. (Uniform
- // steps in parametric space.) The tessellator will add enough parametric segments so that,
- // once transformed into device space, they never deviate by more than
- // 1/GrTessellationPathRenderer::kLinearizationIntolerance pixels from the true curve.
float fParametricIntolerance;
- // Decides the number of radial segments the tessellator adds for each curve. (Uniform steps
- // in tangent angle.) The tessellator will add this number of radial segments for each
- // radian of rotation in local path space.
float fNumRadialSegmentsPerRadian;
};
diff --git a/tests/GrVxTest.cpp b/tests/GrVxTest.cpp
index f7e05a9..829fcef 100644
--- a/tests/GrVxTest.cpp
+++ b/tests/GrVxTest.cpp
@@ -43,7 +43,7 @@
static bool check_approx_acos(skiatest::Reporter* r, float x, float approx_acos_x) {
float acosf_x = acosf(x);
float error = acosf_x - approx_acos_x;
- if (!(fabsf(error) <= GRVX_FAST_ACOS_MAX_ERROR)) {
+ if (!(fabsf(error) <= GRVX_APPROX_ACOS_MAX_ERROR)) {
ERRORF(r, "Larger-than-expected error from grvx::approx_acos\n"
" x= %f\n"
" approx_acos_x= %f (%f degrees\n"
@@ -52,7 +52,7 @@
" tolerance= %f (%f degrees)\n\n",
x, approx_acos_x, SkRadiansToDegrees(approx_acos_x), acosf_x,
SkRadiansToDegrees(acosf_x), error, SkRadiansToDegrees(error),
- GRVX_FAST_ACOS_MAX_ERROR, SkRadiansToDegrees(GRVX_FAST_ACOS_MAX_ERROR));
+ GRVX_APPROX_ACOS_MAX_ERROR, SkRadiansToDegrees(GRVX_APPROX_ACOS_MAX_ERROR));
return false;
}
return true;
@@ -146,7 +146,7 @@
float approxTheta) {
float expectedTheta = precise_angle_between_vectors(a, b);
float error = expectedTheta - approxTheta;
- if (!(fabsf(error) <= GRVX_FAST_ACOS_MAX_ERROR + SK_ScalarNearlyZero)) {
+ if (!(fabsf(error) <= GRVX_APPROX_ACOS_MAX_ERROR + SK_ScalarNearlyZero)) {
int expAx = SkFloat2Bits(a.fX) >> 23 & 0xff;
int expAy = SkFloat2Bits(a.fY) >> 23 & 0xff;
int expBx = SkFloat2Bits(b.fX) >> 23 & 0xff;
@@ -162,8 +162,8 @@
" tolerance= %f (%f degrees)\n\n",
a.fX, a.fY, b.fX, b.fY, expAx, expAy, expBx, expBy, approxTheta,
SkRadiansToDegrees(approxTheta), expectedTheta, SkRadiansToDegrees(expectedTheta),
- error, SkRadiansToDegrees(error), GRVX_FAST_ACOS_MAX_ERROR,
- SkRadiansToDegrees(GRVX_FAST_ACOS_MAX_ERROR));
+ error, SkRadiansToDegrees(error), GRVX_APPROX_ACOS_MAX_ERROR,
+ SkRadiansToDegrees(GRVX_APPROX_ACOS_MAX_ERROR));
return false;
}
return true;
diff --git a/tests/StrokeIndirectTest.cpp b/tests/StrokeIndirectTest.cpp
index 9b67b66..e2fa51b 100644
--- a/tests/StrokeIndirectTest.cpp
+++ b/tests/StrokeIndirectTest.cpp
@@ -16,6 +16,8 @@
#include "src/gpu/tessellate/GrTessellationPathRenderer.h"
#include "src/gpu/tessellate/GrWangsFormula.h"
+using Tolerances = GrStrokeTessellateShader::Tolerances;
+
static sk_sp<GrDirectContext> make_mock_context() {
GrMockOptions mockOptions;
mockOptions.fDrawInstancedSupport = true;
@@ -270,7 +272,7 @@
const SkMatrix& viewMatrix,
const SkPath& path,
const SkStrokeRec& stroke) {
- GrStrokeTessellateShader::Tolerances tolerances(viewMatrix.getMaxScale(), stroke.getWidth());
+ auto tolerances = Tolerances::MakeNonHairline(viewMatrix.getMaxScale(), stroke.getWidth());
int8_t resolveLevelForCircles = SkTPin<float>(
sk_float_nextlog2(tolerances.fNumRadialSegmentsPerRadian * SK_ScalarPI),
1, kMaxResolveLevel);
@@ -439,7 +441,7 @@
};
auto instance = static_cast<const IndirectInstance*>(target->peekStaticVertexData());
auto* indirect = static_cast<const GrDrawIndirectCommand*>(target->peekStaticIndirectData());
- GrStrokeTessellateShader::Tolerances tolerances(viewMatrix.getMaxScale(), stroke.getWidth());
+ auto tolerances = Tolerances::MakeNonHairline(viewMatrix.getMaxScale(), stroke.getWidth());
float tolerance = test_tolerance(stroke.getJoin());
for (int i = 0; i < fChainedDrawIndirectCount; ++i) {
int numExtraEdgesInJoin = (stroke.getJoin() == SkPaint::kMiter_Join) ? 4 : 3;