| /* |
| * Copyright 2026 Google LLC |
| * |
| * Use of this source code is governed by a BSD-style license that can be |
| * found in the LICENSE file. |
| */ |
| |
| #ifndef skgpu_graphite_Tiler_DEFINED |
| #define skgpu_graphite_Tiler_DEFINED |
| |
| #include "include/private/SkAssert.h" |
| #include "include/private/SkTDArray.h" |
| #include "src/gpu/graphite/sparse_strips/Polyline.h" |
| #include "src/gpu/graphite/sparse_strips/SparseStripsTypes.h" |
| |
| #include <cmath> |
| #include <cstdint> |
| #include <limits> |
| #include <string> |
| |
| namespace skgpu::graphite { |
| |
| // Helper struct for the intersection bits. These will be consolidated into a header in the future. |
| struct IntersectionBits { |
| // TODO (thomsmit): reorder these into more orthodox LRTB |
| // |
| // Generally speaking, these record which edges a line "touches" within its tile, and whether |
| // the tile contributes to coarse winding. However, due to tie-breaking for corner cases and |
| // differing inclusive/exclusive rules for lines ending exactly on tile boundaries, these do not |
| // always represent literal touches. Similarly, while coarse Winding and a Top edge touch are |
| // related, they are not always identical. Ultimately, these bits are a driver for |
| // rasterization, not an intuitive description of the line. |
| static constexpr uint32_t T = 0b00001; |
| static constexpr uint32_t B = 0b00010; |
| static constexpr uint32_t L = 0b00100; |
| static constexpr uint32_t R = 0b01000; |
| static constexpr uint32_t W = 0b10000; |
| |
| static constexpr uint32_t BOT_SHIFT = 1; |
| static constexpr uint32_t LEFT_SHIFT = 2; |
| static constexpr uint32_t RIGHT_SHIFT = 3; |
| static constexpr uint32_t WINDING_SHIFT = 4; |
| static constexpr uint32_t INT_MASK_SHIFT = 5; |
| |
| static constexpr uint32_t INTERSECTION_MASK = W | R | L | B | T; |
| static constexpr uint32_t MAX_LINES_PER_PATH = 1 << (32 - INT_MASK_SHIFT); |
| |
| static std::string maskToString(uint32_t mask) { |
| std::string s; |
| if (mask & W) s += "W"; |
| if (mask & T) s += (s.empty() ? "T" : " | T"); |
| if (mask & B) s += (s.empty() ? "B" : " | B"); |
| if (mask & L) s += (s.empty() ? "L" : " | L"); |
| if (mask & R) s += (s.empty() ? "R" : " | R"); |
| if (mask == 0) return "empty"; |
| return s; |
| } |
| }; |
| |
| // A sparse strips tile, contains: |
| // 1) The top left corner of the tile in device space, in tile sized units. |
| // 2) The coarse winding (W) of the tile |
| // 3) The intersection mask (R | L | T | B) of the tile |
| // 4) The index of their parent line in the array backing polyline |
| struct Tile { |
| uint16_t x; |
| uint16_t y; |
| /// Contains the parent line's index and the intersection/winding mask. |
| /// MSB LSB |
| /// 31------------------------------------------------------5|4|3|2|1|0| |
| /// | Parent Line Index (27 bits) |W|R|L|B|T| |
| uint32_t fPackedLineIdxIntersectionMask; |
| |
| Tile() : x(0), y(0), fPackedLineIdxIntersectionMask(0) {} |
| |
| Tile(uint16_t x, uint16_t y, uint32_t lineIdx, uint32_t intersectionMask) |
| : x(x) |
| , y(y) |
| , fPackedLineIdxIntersectionMask((lineIdx << IntersectionBits::INT_MASK_SHIFT) | |
| intersectionMask) { |
| SkASSERT(intersectionMask < (1 << IntersectionBits::INT_MASK_SHIFT)); |
| } |
| |
| // When sorting we place y in the highest bits followed by x, so that tiles are grouped by row, |
| // left to right. Note, lineIdx is placed in the lower bits of the sorting key so that tiles |
| // from the same parent line at the same geometric location are adjacent and have better cache |
| // locality when processed in makeStrips. |
| // |
| // TODO (thomsmit): try holding as u64 and converting for member access instead of converting |
| // for sorting. |
| SK_ALWAYS_INLINE uint64_t toBits() const { |
| return (static_cast<uint64_t>(y) << 48) | (static_cast<uint64_t>(x) << 32) | |
| static_cast<uint64_t>(fPackedLineIdxIntersectionMask); |
| } |
| |
| bool operator<(const Tile& other) const { return toBits() < other.toBits(); } |
| |
| bool operator==(const Tile& other) const { return toBits() == other.toBits(); } |
| |
| uint32_t lineIdx() const { |
| return fPackedLineIdxIntersectionMask >> IntersectionBits::INT_MASK_SHIFT; |
| } |
| uint32_t intersectionMask() const { |
| return fPackedLineIdxIntersectionMask & IntersectionBits::INTERSECTION_MASK; |
| } |
| }; |
| |
| // Make sure the struct is a size convenient to move around, as tiles will be sorted. |
| static_assert(sizeof(Tile) == 8); |
| |
| // Container that holds the tiles and manages their state (sorting). Templated to expose the tile |
| // size as statically tunable parameter which can be tested within the same compilation unit |
| template <uint16_t kTileWidth, uint16_t kTileHeight> |
| class Tiles : public IntersectionBits { |
| public: |
| Tiles() {} |
| |
| void reset() { |
| fTileBuf.clear(); |
| } |
| |
| void sortTiles(int32_t offset) { |
| std::sort(fTileBuf.begin() + offset, fTileBuf.end()); |
| } |
| |
| void sortTiles() { |
| sortTiles(0); |
| } |
| |
| const SkTDArray<Tile>& getTiles() const { return fTileBuf; } |
| |
| // This function has two purposes: |
| // 1) The viewport is divided into tiles, whose dimensions are given by the template |
| // parameters. For each line produced by flattening---contained inside Polyline---find which |
| // tiles this line intersects. Tiles act as a "coarse rasterization stage," which elides |
| // carrying a scanline for each MSAA subsample point. (Without this technique, we would |
| // require 64 scanlines for an 8 tall tile with 8xMSAA). Tile edge touches are vertical |
| // exclusive, horizontal inclusive. This is because: A) In the vertical case, inclusivity |
| // would cause the coarse winding to be double counted or negated by the single-point tile |
| // produced by the touch, and the tile produced by the succeeding line B) In the horizontal |
| // case, the inclusivity is necessary because the tile produced by the succeeding line will |
| // not consider itself left touching, and so the single-point tile is necessary to carry |
| // over the left-edge winding. |
| // |
| // 2) To enable parallel rasterization, we need to establish a source of truth for the line |
| // intersection points on tiles, such that adjacent tiles agree where intersections occur. |
| // While calculating exact intersection coordinates here is feasible, it is (relatively) |
| // computationally expensive, so instead, we defer the heavy math to the GPU/rasterizer and |
| // produce a lightweight intersection bitmask. This mask unambiguously defines which edges |
| // of a tile a line segment touches: |
| // |
| // Bit representation: |
| // Bit: 4 | 3 | 2 | 1 | 0 |
| // Val: W | R | L | B | T |
| // |
| // - W (Winding): Tracks whether the line touched the top edge of the tile. |
| // - R/L/B/T: Right, Left, Bottom, and Top edge intersections. |
| void makeTilesMSAA(const Polyline& polyline, uint16_t viewportWidth, uint16_t viewportHeight) { |
| SkASSERT(polyline.count() <= MAX_LINES_PER_PATH); |
| |
| if (viewportWidth == 0 || viewportHeight == 0) { |
| return; |
| } |
| |
| // Will never underflow |
| uint16_t tileColumns = DivCeil(viewportWidth, kTileWidth) - 1; |
| uint16_t tileRows = DivCeil(viewportHeight, kTileHeight); |
| |
| constexpr float invW = 1.0f / static_cast<float>(kTileWidth); |
| constexpr float invH = 1.0f / static_cast<float>(kTileHeight); |
| |
| for (auto it = polyline.begin(); it != polyline.end(); ++it) { |
| auto [line, lineIdx] = *it; |
| |
| // map line into tile units |
| float p0X = line.p0.fX * invW; |
| float p0Y = line.p0.fY * invH; |
| float p1X = line.p1.fX * invW; |
| float p1Y = line.p1.fY * invH; |
| |
| float lineLeftX, lineRightX; |
| if (p0X < p1X) { |
| lineLeftX = p0X; |
| lineRightX = p1X; |
| } else { |
| lineLeftX = p1X; |
| lineRightX = p0X; |
| } |
| |
| // If the leftmost point of this line is right of the viewport, cull it. Although we |
| // cull path verbs right of the viewport in the flattening stage, a right edge crossing |
| // path verb may still be flattened into lines, some of which may be completely outside |
| // of the viewport. |
| if (lineLeftX >= static_cast<float>(tileColumns + 1)) { |
| // Note, lineLeftX > tileColumns is NOT equiavalent here, and so the + 1 cannot be |
| // dropped |
| continue; |
| } |
| |
| float lineTopY, lineTopX, lineBottomY, lineBottomX; |
| if (p0Y < p1Y) { |
| lineTopY = p0Y; |
| lineTopX = p0X; |
| lineBottomY = p1Y; |
| lineBottomX = p1X; |
| } else { |
| lineTopY = p1Y; |
| lineTopX = p1X; |
| lineBottomY = p0Y; |
| lineBottomX = p0X; |
| } |
| |
| uint16_t yTopTiles = std::min(f32ToU16Sat(lineTopY), tileRows); |
| float lineBottomYCeil = std::ceil(lineBottomY); |
| uint16_t yBottomTiles = std::min(f32ToU16Sat(lineBottomYCeil), tileRows); |
| |
| // If yTopTiles == yBottomTiles, then the line is either completely above or below the |
| // viewport OR it is perfectly horizontal and aligned to the tile grid, contributing no |
| // winding. In either case, it should be culled. |
| if (yTopTiles >= yBottomTiles) { |
| // Technically, the `>` part of the `>=` is unnecessary due to clamping, but this |
| // gives stronger signal. |
| continue; |
| } |
| |
| int32_t p0TileX = static_cast<int32_t>(std::floor(lineTopX)); |
| int32_t p0TileY = static_cast<int32_t>(std::floor(lineTopY)); |
| int32_t p1TileX = static_cast<int32_t>(std::floor(lineBottomX)); |
| int32_t p1TileY; |
| if (lineBottomY == lineBottomYCeil) { |
| p1TileY = static_cast<int32_t>(lineBottomY) - 1; |
| } else { |
| p1TileY = static_cast<int32_t>(std::floor(lineBottomY)); |
| } |
| |
| // Each line processed falls into 1 of three categories: |
| // 1) The line produces a single tile. |
| // 2) The line is perfectly vertical. |
| // 3) The line is neither 1 or 2 (general case) |
| bool notSameTile = (p0TileY != p1TileY) || (p0TileX != p1TileX); |
| if (notSameTile) { |
| if (lineLeftX == lineRightX) { // Vertical line case |
| uint16_t x = std::min(f32ToU16Sat(lineLeftX), tileColumns); |
| |
| // Process the Top Row (if visible on screen) |
| uint16_t yStart = yTopTiles; |
| bool isStartCulled = (lineTopY < 0.0f); |
| if (!isStartCulled) { |
| uint32_t winding = (static_cast<float>(yTopTiles) >= lineTopY) ? W : 0; |
| uint32_t intersectionMask = B | winding; |
| fTileBuf.push_back(Tile(x, yStart, lineIdx, intersectionMask)); |
| yStart++; |
| } |
| |
| // Process all "fully crossed" tiles (W | T | B). |
| int32_t yEndIdx = std::min(p1TileY, static_cast<int32_t>(tileRows)); |
| for (int32_t yIdx = yStart; yIdx < yEndIdx; ++yIdx) { |
| uint32_t intersectionMask = W | T | B; |
| fTileBuf.push_back( |
| Tile(x, static_cast<uint16_t>(yIdx), lineIdx, intersectionMask)); |
| } |
| |
| // Process the terminal tile (W | T), if it exists. We only emit this if the |
| // line actually terminates on the screen, and if we haven't already |
| // processed/culled it via `yStart`. |
| if (p1TileY >= yStart && p1TileY < static_cast<int32_t>(tileRows)) { |
| uint32_t intersectionMask = W | T; |
| fTileBuf.push_back( |
| Tile(x, static_cast<uint16_t>(p1TileY), lineIdx, intersectionMask)); |
| } |
| } else { // General case |
| float dx = p1X - p0X; |
| float dy = p1Y - p0Y; |
| float xSlope = dx / dy; |
| |
| // Package for the helper functions, with inlining this should be zero cost. |
| // Changing to a class member regresses perf by ~10% on TilerSortBench |
| LineContext ctx { |
| static_cast<uint32_t>(lineIdx), |
| lineTopX, lineTopY, |
| lineBottomX, lineBottomY, |
| xSlope, |
| lineLeftX, lineRightX, |
| p0TileX, p0TileY, |
| p1TileX, p1TileY, |
| tileColumns |
| }; |
| |
| if (lineBottomX > lineTopX) { // cannot be equal at this point |
| runLoops</*kXDir=*/true>(ctx, lineTopY, lineBottomY, yTopTiles, tileRows); |
| } else { |
| runLoops</*kXDir=*/false>(ctx, lineTopY, lineBottomY, yTopTiles, tileRows); |
| } |
| } |
| } else { // Single tile case |
| uint16_t xClamped = std::min(f32ToU16Sat(lineLeftX), tileColumns); |
| uint32_t winding = static_cast<float>(yTopTiles) >= lineTopY ? W : 0; |
| fTileBuf.push_back(Tile(xClamped, yTopTiles, lineIdx, winding)); |
| } |
| } |
| } |
| |
| private: |
| // For now, only support square tiles. |
| static_assert(kTileWidth == kTileHeight); |
| |
| struct LineContext { |
| uint32_t lineIdx; |
| float topX, topY; |
| float bottomX, bottomY; |
| float xSlope; |
| float lineLeftX, lineRightX; |
| int32_t p0TileX, p0TileY; |
| int32_t p1TileX, p1TileY; |
| uint16_t tileColumns; |
| }; |
| |
| SkTDArray<Tile> fTileBuf; |
| |
| static SK_ALWAYS_INLINE uint16_t DivCeil(uint16_t a, uint16_t b) { |
| return (a + b - 1) / b; |
| } |
| |
| static SK_ALWAYS_INLINE uint16_t f32ToU16Sat(float v) { |
| // std::clamp will catch +/- inf here, but not NaN. However we should never get NaN here. |
| SkASSERT(!std::isnan(v)); |
| return static_cast<uint16_t>(std::clamp(v, 0.0f, 65535.0f)); |
| } |
| |
| template<bool kXDir> |
| SK_ALWAYS_INLINE void pushEdge(const LineContext& ctx, |
| uint16_t xIdx, |
| uint16_t y, |
| float rowTopX, |
| float rowBottomX, |
| int32_t canonicalStart, |
| uint16_t canonicalEnd, |
| uint32_t windingInput, |
| bool checkStart, |
| bool checkEnd) { |
| // Determine whether this tile is the true start or end of the line within this horizontal |
| // row. We need to account for clamping because the line may start or end off-screen (e.g., |
| // X = -5), but we clamp X to the viewport. |
| uint32_t uncRowStart = static_cast<uint32_t>(static_cast<int32_t>(xIdx) == canonicalStart); |
| uint32_t uncRowEnd = static_cast<uint32_t>(xIdx == canonicalEnd); |
| |
| // Relativize the start/end based on line direction |
| uint32_t canonicalRowStart = kXDir ? uncRowStart : uncRowEnd; |
| uint32_t canonicalRowEnd = kXDir ? uncRowEnd : uncRowStart; |
| |
| // Mask out the Top/Bottom bits if this tile contains the line endpoints. |
| uint32_t notStartTile = 1; |
| if (checkStart) { |
| notStartTile ^= static_cast<uint32_t>((static_cast<int32_t>(xIdx) == ctx.p0TileX) && |
| (static_cast<int32_t>(y) == ctx.p0TileY)); |
| } |
| |
| uint32_t notEndTile = 1; |
| if (checkEnd) { |
| notEndTile ^= static_cast<uint32_t>((static_cast<int32_t>(xIdx) == ctx.p1TileX) && |
| (static_cast<int32_t>(y) == ctx.p1TileY)); |
| } |
| |
| uint32_t mask = windingInput; |
| // If this tile is the start of the row, the line must have entered through the Top edge. |
| // (Unless it's the tile start). |
| mask |= canonicalRowStart & notStartTile; |
| // If this tile is the end of the row, the line must have exited through the Bottom edge. |
| // (Unless it's the global end tile). |
| mask |= (canonicalRowEnd & notEndTile) << BOT_SHIFT; |
| |
| // If a tile is NOT the start of the row, it must have been entered horizontally. If it is |
| // NOT the end, it must be exited horizontally. Base L/R on the direction of the line. |
| if constexpr (kXDir) { |
| mask |= (1 ^ canonicalRowStart) << LEFT_SHIFT; |
| mask |= (1 ^ canonicalRowEnd) << RIGHT_SHIFT; |
| } else { |
| mask |= (1 ^ canonicalRowStart) << RIGHT_SHIFT; |
| mask |= (1 ^ canonicalRowEnd) << LEFT_SHIFT; |
| } |
| |
| // Corner handling |
| float xLeftF = static_cast<float>(xIdx); |
| float xRightF = static_cast<float>(xIdx + 1); |
| uint32_t trc = static_cast<uint32_t>(rowTopX == xRightF) & notStartTile; |
| uint32_t tlc = static_cast<uint32_t>(rowTopX == xLeftF) & notStartTile; |
| uint32_t brc = static_cast<uint32_t>(rowBottomX == xRightF) & notEndTile; |
| uint32_t blc = static_cast<uint32_t>(rowBottomX == xLeftF) & notEndTile; |
| |
| // If the line hits the exact Top-Left corner, but it is NOT the canonical start of the row, |
| // we must treat it as a Left intersection to properly bridge the mask to the adjacent tile. |
| uint32_t tieBreak = tlc & (canonicalRowStart ^ 1); |
| |
| // Force corners into into purely horizontal intersections. This makes the downstream |
| // intersection calculation logic simpler. |
| mask |= (tieBreak | blc) << LEFT_SHIFT; |
| mask |= (trc | brc) << RIGHT_SHIFT; |
| mask &= ~(tieBreak | trc); |
| mask &= ~((blc | brc) << BOT_SHIFT); |
| |
| fTileBuf.push_back(Tile(xIdx, y, ctx.lineIdx, mask)); |
| } |
| |
| template<bool kXDir> |
| SK_ALWAYS_INLINE void processRow(const LineContext& ctx, |
| uint16_t yIdx, |
| float rowTopX, |
| float rowBottomX, |
| uint32_t wMask, |
| bool checkStart, |
| bool checkEnd) { |
| float lx = std::fmin(rowTopX, rowBottomX); |
| float rx = std::fmax(rowTopX, rowBottomX); |
| |
| // Convert floating-point boundaries into discrete integer tile indices. Note: |
| // `canonicalXStart` preserves the true start (even if negative) before clamping, which is |
| // required by `pushEdge` to tie-break in some cases. |
| int32_t canonicalXStart = static_cast<int32_t>(std::floor(lx)); |
| uint16_t canonicalXEnd = f32ToU16Sat(rx); |
| uint16_t xStart = f32ToU16Sat(lx); |
| // Clamp the end of the row to the right viewport if necessary. |
| uint16_t xEndVal = std::min(canonicalXEnd, ctx.tileColumns); |
| |
| if (xStart <= xEndVal) { |
| // Process the Leftmost Tile of the row. If this is the *only* tile in the row, |
| // `isSingle`, is both the row start and row end so the Winding (W) bit is passed |
| // regardless of direction. |
| bool isSingle = xStart == xEndVal; |
| uint32_t wLeft = (kXDir || isSingle ? W : 0) & wMask; |
| pushEdge<kXDir>(ctx, xStart, yIdx, rowTopX, rowBottomX, canonicalXStart, canonicalXEnd, |
| wLeft, checkStart, checkEnd); |
| |
| // Process all captive "Middle" tiles in this row. These tiles never have vertical |
| // crossings, and for the purpose of the intersection mask, are identical as they always |
| // recieve [R | L]. Bulk append with `inner count.` |
| int innerCount = static_cast<int>(xEndVal) - static_cast<int>(xStart) - 1; |
| if (innerCount > 0) { |
| uint32_t innerMask = R | L; |
| Tile* outTiles = fTileBuf.append(innerCount); |
| for (int i = 0; i < innerCount; ++i) { |
| outTiles[i] = Tile(xStart + 1 + i, yIdx, ctx.lineIdx, innerMask); |
| } |
| } |
| |
| // Process the Rightmost Tile of the row. Emitted only if the row spans more than one |
| // tile (i.e., we haven't already processed this exact tile as the Leftmost Tile). |
| if (xStart < xEndVal) { |
| uint32_t wRight = (kXDir ? 0 : W) & wMask; |
| pushEdge<kXDir>(ctx, xEndVal, yIdx, rowTopX, rowBottomX, canonicalXStart, |
| canonicalXEnd, wRight, checkStart, checkEnd); |
| } |
| } |
| } |
| |
| template<bool kXDir> |
| SK_ALWAYS_INLINE void runLoops(const LineContext& ctx, |
| float lineTopY, |
| float lineBottomY, |
| uint16_t yTopTiles, |
| uint16_t tileRows) { |
| // Process the Top Row (if visible on screen) |
| uint16_t yStart = yTopTiles; |
| bool isStartCulled = (lineTopY < 0.0f); |
| if (!isStartCulled) { |
| float y = static_cast<float>(yStart); |
| float rowBottomY = std::min(y + 1.0f, lineBottomY); |
| // Catch perfectly horizontal lines and/or prevent floating point drift |
| float rowBottomX = (rowBottomY == ctx.bottomY) |
| ? ctx.bottomX |
| : ctx.topX + (rowBottomY - ctx.topY) * ctx.xSlope; |
| uint32_t mask = y >= lineTopY ? W : 0; |
| // The top row might ALSO be the bottom row, so checkEnd = true |
| processRow<kXDir>(ctx, yStart, ctx.topX, rowBottomX, mask, |
| /*checkStart=*/true, /*checkEnd=*/true); |
| yStart++; |
| } |
| |
| // Process all "Middle" fully crossed rows; the tiles cannot be the start or the end |
| int32_t yEndIdx = std::min(ctx.p1TileY, static_cast<int32_t>(tileRows)); |
| for (int32_t yIdx = yStart; yIdx < yEndIdx; ++yIdx) { |
| float y = static_cast<float>(yIdx); |
| // Although this seems like duplicate calculation, finding the intersections |
| // independently allows the entire loop to auto-vectorize, and is well worth it. |
| float rowTopX = ctx.topX + (y - ctx.topY) * ctx.xSlope; |
| float rowBottomX = ctx.topX + (y + 1.0f - ctx.topY) * ctx.xSlope; |
| processRow<kXDir>(ctx, yIdx, rowTopX, rowBottomX, 0xffffffff, |
| /*checkStart=*/false, /*checkEnd=*/false); |
| } |
| |
| // Process the Terminal Row, if it exists. I.e. if it's on-screen AND wasn't already |
| // processed as the Top Row. |
| if (ctx.p1TileY >= yStart && ctx.p1TileY < static_cast<int32_t>(tileRows)) { |
| float y = static_cast<float>(ctx.p1TileY); |
| // No guard is necessary here against horizontal lines, as a horizontal line would |
| // have been processed as a starting row. |
| float rowTopX = ctx.topX + (y - ctx.topY) * ctx.xSlope; |
| // No need to check start (we are past it), but must check end. |
| processRow<kXDir>(ctx, ctx.p1TileY, rowTopX, ctx.bottomX, 0xffffffff, |
| /*checkStart=*/false, /*checkEnd=*/true); |
| } |
| } |
| }; |
| |
| } // namespace skgpu::graphite |
| |
| #endif // skgpu_graphite_Tiler_DEFINED |