Optimize stroke tessellation patch allocation
This speeds up the microbench by ~20% (620us -> 502).
Bug: chromium:1172543
Change-Id: I926db1d8eec9bbf053cc3379053424f79aea3445
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/373763
Reviewed-by: John Stiles <johnstiles@google.com>
Commit-Queue: Chris Dalton <csmartdalton@google.com>
diff --git a/src/gpu/tessellate/GrStrokeHardwareTessellator.cpp b/src/gpu/tessellate/GrStrokeHardwareTessellator.cpp
index 23623d0..aeed0d2 100644
--- a/src/gpu/tessellate/GrStrokeHardwareTessellator.cpp
+++ b/src/gpu/tessellate/GrStrokeHardwareTessellator.cpp
@@ -70,12 +70,14 @@
// Pre-allocate at least enough vertex space for 1 in 4 strokes to chop, and for 8 caps.
int strokePreallocCount = totalCombinedVerbCnt * 5/4;
int capPreallocCount = 8;
- this->allocPatchChunkAtLeast(strokePreallocCount + capPreallocCount);
+ fNextChunkMinPatchAllocCount = strokePreallocCount + capPreallocCount;
}
~PatchWriter() {
- fTarget->putBackVertices(fCurrChunkPatchCapacity - fPatchChunks->back().fPatchCount,
- fPatchStride);
+ if (!fPatchChunks->empty()) {
+ fTarget->putBackVertices(fCurrChunkPatchCapacity - fCurrChunkPatchCount, fPatchStride);
+ fPatchChunks->back().fPatchCount = fCurrChunkPatchCount;
+ }
}
// This is the intolerance value, adjusted for the view matrix, to use with Wang's formulas when
@@ -586,7 +588,7 @@
fLastControlPoint = nextControlPoint;
}
- void writeDynamicAttribs() {
+ SK_ALWAYS_INLINE void writeDynamicAttribs() {
if (fShaderFlags & ShaderFlags::kDynamicStroke) {
fPatchWriter.write(fDynamicStroke);
}
@@ -595,29 +597,34 @@
}
}
- bool allocPatch() {
- if (fPatchChunks->back().fPatchCount >= fCurrChunkPatchCapacity) {
- // The current chunk is full. Time to allocate a new one. (And no need to put back
- // vertices; the buffer is full.)
- this->allocPatchChunkAtLeast(fCurrChunkMinPatchAllocCount * 2);
- }
- if (!fPatchWriter.isValid()) {
- SkDebugf("WARNING: Failed to allocate vertex buffer for tessellated stroke.");
+ SK_ALWAYS_INLINE bool allocPatch() {
+ if (fCurrChunkPatchCount == fCurrChunkPatchCapacity && !this->allocPatchChunk()) {
return false;
}
- SkASSERT(fPatchChunks->back().fPatchCount <= fCurrChunkPatchCapacity);
- ++fPatchChunks->back().fPatchCount;
+ SkASSERT(fCurrChunkPatchCount < fCurrChunkPatchCapacity);
+ ++fCurrChunkPatchCount;
return true;
}
- void allocPatchChunkAtLeast(int minPatchAllocCount) {
- SkASSERT(fTarget);
+ bool allocPatchChunk() {
+ if (!fPatchChunks->empty()) {
+ fPatchChunks->back().fPatchCount = fCurrChunkPatchCount;
+ // No need to put back vertices; the buffer is full.
+ }
+ fCurrChunkPatchCount = 0;
PatchChunk* chunk = &fPatchChunks->push_back();
- fPatchWriter = {fTarget->makeVertexSpaceAtLeast(fPatchStride, minPatchAllocCount,
- minPatchAllocCount, &chunk->fPatchBuffer,
- &chunk->fBasePatch,
+ fPatchWriter = {fTarget->makeVertexSpaceAtLeast(fPatchStride, fNextChunkMinPatchAllocCount,
+ fNextChunkMinPatchAllocCount,
+ &chunk->fPatchBuffer, &chunk->fBasePatch,
&fCurrChunkPatchCapacity)};
- fCurrChunkMinPatchAllocCount = minPatchAllocCount;
+ if (!fPatchWriter.isValid()) {
+ SkDebugf("WARNING: Failed to allocate vertex buffer for tessellated stroke.\n");
+ fPatchChunks->pop_back();
+ fCurrChunkPatchCapacity = 0;
+ return false;
+ }
+ fNextChunkMinPatchAllocCount *= 2;
+ return true;
}
const ShaderFlags fShaderFlags;
@@ -645,8 +652,9 @@
bool fSoloRoundJoinAlwaysFitsInPatch;
// Variables related to the patch chunk that we are currently writing out during prepareBuffers.
- int fCurrChunkPatchCapacity;
- int fCurrChunkMinPatchAllocCount;
+ int fCurrChunkPatchCount = 0;
+ int fCurrChunkPatchCapacity = 0;
+ int fNextChunkMinPatchAllocCount;
GrVertexWriter fPatchWriter;
// Variables related to the specific contour that we are currently iterating during