blob: 8fe439a002b8e9206a18b93916440f3c6548bc2b [file] [log] [blame]
/*
* Copyright 2024 Rive
*/
#include "intersection_board.hpp"
#include "rive/math/math_types.hpp"
#if !SIMD_NATIVE_GVEC && \
(defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64))
// MSVC doesn't get codegen for the inner loop. Provide direct SSE intrinsics.
#include <emmintrin.h>
#define FALLBACK_ON_SSE2_INTRINSICS
#else
#endif
namespace rive::gpu
{
void IntersectionTile::reset(int left,
int top,
int16_t baselineGroupIndex,
uint16_t baselineOverlapBits)
{
// Since we mask non-intersecting groupIndices to zero, the "mask and max"
// algorithm is only correct for positive values. (baselineGroupIndex is
// only signed because SSE doesn't have an unsigned max instruction.)
assert(baselineGroupIndex >= 0);
m_topLeft = {left, top, left, top};
m_baselineGroupIndex = baselineGroupIndex;
m_maxGroupIndex = baselineGroupIndex;
m_baselineOverlapBits = baselineOverlapBits;
m_overlapBitsForMaxGroup = baselineOverlapBits;
m_edges.clear();
m_groupIndices.clear();
m_overlapBits.clear();
m_rectangleCount = 0;
}
template <GroupingType Type>
void IntersectionTile::addRectangle(int4 ltrb,
int16_t groupIndex,
uint16_t currentRectangleOverlapBits)
{
assert(simd::all(ltrb.xy < ltrb.zw)); // Ensure ltrb isn't zero or negative.
assert(groupIndex >= 0);
if constexpr (Type == GroupingType::overlapAllowed)
{
if (m_overlapBits.size() != m_groupIndices.size())
{
// We did not previously have any overlap bits and now we're about
// to - initialize the array with zeros (everything already in the
// intersection tile has zeros as its overlap bits, or else the
// boardContainsNonzeroOverlapBits flag would have already been
// set).
assert(m_overlapBits.empty());
m_overlapBits.resize(m_groupIndices.size());
}
}
else
{
static_assert(Type == GroupingType::disjoint);
// Shouldn't pass any overlap bits in if we indicated they weren't
// relevant
assert(currentRectangleOverlapBits == 0);
}
#if !defined(NDEBUG)
// Ensure this rectangle preserves the integrity of our list.
{
auto r = findMaxIntersectingGroupIndex<Type>(ltrb, {});
auto maxGroup = simd::reduce_max(r.maxGroupIndices);
if constexpr (Type == GroupingType::overlapAllowed)
{
// If overlapping is allowed, this rectangle can be in the same
// group as another it overlaps, but cannot be below it.
assert(groupIndex >= maxGroup);
assert(groupIndex >= m_baselineGroupIndex);
}
else
{
// If no overlapping is allowed, this rectangle cannot be at or
// below a group it overlaps.
assert(groupIndex > maxGroup);
assert(groupIndex > m_baselineGroupIndex);
}
}
#endif
// Translate ltrb to our tile and negate the right and bottom sides.
ltrb -= m_topLeft;
// right = TILE_DIM - right, bottom = TILE_DIM- bottom
ltrb.zw = TILE_DIM - ltrb.zw;
// Ensure ltrb isn't completely outside the tile.
assert(simd::all(ltrb < TILE_DIM));
ltrb = simd::max(ltrb, int4(0)); // Clamp ltrb to our tile.
if constexpr (Type == GroupingType::overlapAllowed)
{
if (groupIndex == m_baselineGroupIndex &&
(currentRectangleOverlapBits | m_baselineOverlapBits) ==
m_baselineOverlapBits)
{
// Nothing needs to be done here, this is going into the baseline
// group and it adds no new overlap bits that are not already in the
// baseline.
return;
}
}
if (simd::all(ltrb == 0))
{
// ltrb covers the entire tile
if constexpr (Type == GroupingType::overlapAllowed)
{
// This rectangle is allowed to be at or higher than the max group
// (rather than strictly above it, in the no-overlap case)
assert(groupIndex >= m_maxGroupIndex);
if (groupIndex == m_maxGroupIndex)
{
// This rectangle is within the existing max group, so do a
// heavier-handed update (some rectangles in the max group
// may still need to stay, so we can't do a reset)
updateBaselineToMaxGroupIndex<Type>(
currentRectangleOverlapBits);
return;
}
}
else
{
// If no overlapping is allowed, a full-tile rectangle had
// better be above the current max group.
assert(groupIndex > m_maxGroupIndex);
}
// Setting a new baseline, so reset everything, update the group index
// and baseline overlap
reset(m_topLeft.x,
m_topLeft.y,
groupIndex,
currentRectangleOverlapBits);
return;
}
// Append a chunk if needed, to make room for this new rectangle.
uint32_t subIdx = m_rectangleCount % CHUNK_SIZE;
if (subIdx == 0)
{
// Push back maximally negative rectangles so they always fail
// intersection tests.
assert(m_edges.size() * CHUNK_SIZE == m_rectangleCount);
m_edges.push_back(int8x32(TILE_DIM + TILE_EDGE_BIAS));
// Uninitialized since the corresponding rectangles never pass an
// intersection test.
assert(m_groupIndices.size() * CHUNK_SIZE == m_rectangleCount);
m_groupIndices.emplace_back();
if constexpr (Type == GroupingType::overlapAllowed)
{
assert(m_overlapBits.size() * CHUNK_SIZE == m_rectangleCount);
m_overlapBits.emplace_back();
}
}
// m_edges is a list of 8 rectangles encoded as
// [L, T, TILE_DIM - R, TILE_DIM - B], relative to m_topLeft. The data is
// also transposed: [L0..L7, T0..T7, -R0..R7, -B0..B7]. Bias ltrb so
// we can use int8_t. SSE doesn't have an unsigned byte compare.
int4 biased = ltrb + TILE_EDGE_BIAS;
m_edges.back()[subIdx] = biased.x;
m_edges.back()[subIdx + 1 * CHUNK_SIZE] = biased.y;
m_edges.back()[subIdx + 2 * CHUNK_SIZE] =
biased.z; // Already converted to "TILE_DIM - right" above.
m_edges.back()[subIdx + 3 * CHUNK_SIZE] =
biased.w; // Already converted to "TILE_DIM - bottom" above.
m_groupIndices.back()[subIdx] = groupIndex;
if constexpr (Type == GroupingType::overlapAllowed)
{
m_overlapBits.back()[subIdx] = currentRectangleOverlapBits;
if (groupIndex > m_maxGroupIndex)
{
// There's a new maximum group, so set it and restart its overlap
// bits.
m_maxGroupIndex = groupIndex;
m_overlapBitsForMaxGroup = currentRectangleOverlapBits;
}
else if (groupIndex == m_maxGroupIndex)
{
// This is going into the existing max group so add its overlap bits
// to the max group's tracked overlap bits
m_overlapBitsForMaxGroup |= currentRectangleOverlapBits;
}
}
else
{
// If we aren't tracking overlap bits then simply ensuring the max group
// index is correct is sufficient.
m_maxGroupIndex = std::max(m_maxGroupIndex, groupIndex);
}
++m_rectangleCount;
}
#ifdef WITH_RIVE_TOOLS
template <GroupingType Type>
void IntersectionTile::testingOnly_addRectangleAndValidate(
int4 ltrb,
int16_t groupIndex,
uint16_t currentRectangleOverlapBits)
{
addRectangle<Type>(ltrb, groupIndex, currentRectangleOverlapBits);
// Baseline should not be higher than the inserted rectangle,
// max group should not be lower.
assert(m_baselineGroupIndex <= groupIndex);
assert(m_maxGroupIndex >= groupIndex);
// Should be no baseline overlap bits if there's no baseline
// (ditto for max group)
assert(m_baselineGroupIndex != 0 || m_baselineOverlapBits == 0);
assert(m_maxGroupIndex != 0 || m_overlapBitsForMaxGroup == 0);
uint16_t foundOverlapBitsForMaxGroup;
if constexpr (Type == GroupingType::overlapAllowed)
{
foundOverlapBitsForMaxGroup = (m_baselineGroupIndex == m_maxGroupIndex)
? m_baselineOverlapBits
: 0u;
}
else
{
// Ignore this value (not using std::ignore here because it
// would require initializing the value first, which would
// mean the compiler won't catch a mistake where this gets
// used elsewhere when it shouldn't be)
(void)foundOverlapBitsForMaxGroup;
// If overlap isn't allowed this should never be allocated
assert(m_overlapBits.empty());
}
// Exhaustively check to ensure that the overlap bits and max
// group indices match our tracked values.
auto maxGroupIndexFound = m_baselineGroupIndex;
for (auto i = 0u; i < m_rectangleCount; i++)
{
auto outerI = i / CHUNK_SIZE;
auto innerI = i % CHUNK_SIZE;
assert(m_groupIndices[outerI][innerI] >= m_baselineGroupIndex);
assert(m_groupIndices[outerI][innerI] <= m_maxGroupIndex);
if constexpr (Type == GroupingType::overlapAllowed)
{
if (m_groupIndices[outerI][innerI] == m_maxGroupIndex)
{
foundOverlapBitsForMaxGroup |= m_overlapBits[outerI][innerI];
}
}
maxGroupIndexFound =
std::max(maxGroupIndexFound, m_groupIndices[outerI][innerI]);
}
if constexpr (Type == GroupingType::overlapAllowed)
{
assert(foundOverlapBitsForMaxGroup == m_overlapBitsForMaxGroup);
if (m_baselineGroupIndex == m_maxGroupIndex)
{
// max group is allowed to have more set bits than the
// baseline (as there might be non-full-tile rectangles
// at that level), but there should never be bits in
// baseline that aren't part of the max group set if
// their group indices match.
assert((m_overlapBitsForMaxGroup | m_baselineOverlapBits) ==
m_overlapBitsForMaxGroup);
}
}
else
{
assert(m_overlapBitsForMaxGroup == 0);
assert(m_baselineOverlapBits == 0);
}
assert(maxGroupIndexFound == m_maxGroupIndex);
}
#endif
template <GroupingType Type>
void IntersectionTile::updateBaselineToMaxGroupIndex(
uint16_t additionalBaselineOverlapBits)
{
// This should only be called when overlap testing is enabled. The template
// argument is really just here to make it hard to accidentally call this
// from a place where it will be wrong
static_assert(Type == GroupingType::overlapAllowed);
// We've added a full-tile rectangle at m_maxGroupIndex, which means we can
// update the baseline and its overlap bits.
if ((additionalBaselineOverlapBits | m_overlapBitsForMaxGroup) ==
additionalBaselineOverlapBits ||
m_rectangleCount == 0)
{
// This rectangle both covers the whole tile and either it contains all
// of the existing overlap bits in this group, or there are no
// existing rectangles (the max group is the already just the baseline),
// which means we can do a simple reset, as we don't need to keep any
// rectangles - even ones that are in this max group.
reset(m_topLeft.x,
m_topLeft.y,
m_maxGroupIndex,
m_overlapBitsForMaxGroup | additionalBaselineOverlapBits);
return;
}
m_overlapBitsForMaxGroup |= additionalBaselineOverlapBits;
if (m_maxGroupIndex == m_baselineGroupIndex)
{
if (m_overlapBitsForMaxGroup == m_baselineOverlapBits)
{
// This rectangle does not change the baseline or its bits in
// any way, so we're done early.
return;
}
m_baselineOverlapBits |= additionalBaselineOverlapBits;
}
else
{
m_baselineGroupIndex = m_maxGroupIndex;
m_baselineOverlapBits = additionalBaselineOverlapBits;
}
// Do a pass through the array and zero out the group indices of any
// rectangles that should no longer remain in the list (this makes the test
// in the compaction loop more efficient).
{
#if !defined(FALLBACK_ON_SSE2_INTRINSICS)
auto baselineGroupIndexVector = int16x8(m_baselineGroupIndex);
auto baselineOverlapBitsVector = uint16x8(m_baselineOverlapBits);
#else
auto baselineGroupIndexVector = _mm_set1_epi16(m_baselineGroupIndex);
auto baselineOverlapBitsVector = _mm_set1_epi16(m_baselineOverlapBits);
#endif
for (size_t i = 0; i < m_groupIndices.size(); i++)
{
#if !defined(FALLBACK_ON_SSE2_INTRINSICS)
// Groups under the baseline will go away
auto mask = m_groupIndices[i] >= baselineGroupIndexVector;
// Of those that remained, only keep the ones where the overlap bits
// have bits that aren't in the baseline's bits.
mask &= ((baselineOverlapBitsVector | m_overlapBits[i]) !=
baselineOverlapBitsVector);
m_groupIndices[i] &= mask;
#else
// The SSE2 code has to build the mask backwards from the above
// code, as there is neither a ">=" or "!=" operator.
auto groupIndices = math::bit_cast<__m128i>(m_groupIndices[i]);
auto overlapBits = math::bit_cast<__m128i>(m_overlapBits[i]);
auto mask = _mm_cmpgt_epi16(baselineGroupIndexVector, groupIndices);
mask = _mm_or_si128(
mask,
_mm_cmpeq_epi16(
baselineOverlapBitsVector,
_mm_or_si128(baselineOverlapBitsVector, overlapBits)));
// Conveniently, there is a (~mask & groupIndices) single operation
// to allow us to use the inverted mask directly.
groupIndices = _mm_andnot_si128(mask, groupIndices);
m_groupIndices[i] = math::bit_cast<int16x8>(groupIndices);
#endif
}
}
// Now, remove all of the rectangles that are no longer above the line.
// (Thankfully, ordering doesn't matter, so we don't need the partitioning
// to be stable)
// Start by scaning the first index forward until we find a rectangle that
// doesn't need to stay (or we hit the end)
size_t firstIdx = 0u;
while (m_groupIndices[firstIdx / CHUNK_SIZE][firstIdx % CHUNK_SIZE] != 0)
{
firstIdx++;
if (firstIdx == m_rectangleCount)
{
// Every rectangle stays! We don't have to do any additional
// work.
return;
}
}
size_t lastIdx = m_rectangleCount - 1;
while (lastIdx > firstIdx &&
m_groupIndices[lastIdx / CHUNK_SIZE][lastIdx % CHUNK_SIZE] == 0)
{
--lastIdx;
}
while (firstIdx < lastIdx)
{
// Now first index points to something that should go and last index
// points to something that should stay, and they're out of order. Move
// the latter element to the beginning to put it into place.
{
size_t firstOuterIdx = firstIdx / CHUNK_SIZE;
size_t firstInnerIdx = firstIdx % CHUNK_SIZE;
size_t lastOuterIdx = lastIdx / CHUNK_SIZE;
size_t lastInnerIdx = lastIdx % CHUNK_SIZE;
// Put the one that wants to stay at the first index.
m_edges[firstOuterIdx][firstInnerIdx + 0] =
m_edges[lastOuterIdx][lastInnerIdx + 0];
m_edges[firstOuterIdx][firstInnerIdx + CHUNK_SIZE] =
m_edges[lastOuterIdx][lastInnerIdx + CHUNK_SIZE];
m_edges[firstOuterIdx][firstInnerIdx + 2 * CHUNK_SIZE] =
m_edges[lastOuterIdx][lastInnerIdx + 2 * CHUNK_SIZE];
m_edges[firstOuterIdx][firstInnerIdx + 3 * CHUNK_SIZE] =
m_edges[lastOuterIdx][lastInnerIdx + 3 * CHUNK_SIZE];
m_groupIndices[firstOuterIdx][firstInnerIdx] =
m_groupIndices[lastOuterIdx][lastInnerIdx];
m_overlapBits[firstOuterIdx][firstInnerIdx] =
m_overlapBits[lastOuterIdx][lastInnerIdx];
// Update the group index at the last index to be a 0, which now
// marks it as a "this doesn't need to stay" rectangle. This allows
// it to act as a sentinel value for the loop where we increment
// firstRectangleIndex so it can avoid an extra bounds check.
m_groupIndices[lastOuterIdx][lastInnerIdx] = 0;
}
// Because of that zero, we're now guaranteed that there
// is a rectangle that should be removed to the right of the first index
// and one that should stay to the left of the last index (minimally,
// the ones we just adjusted), so these inner loops do not need to
// bounds check.
do
{
++firstIdx;
assert(firstIdx < m_rectangleCount);
} while (m_groupIndices[firstIdx / CHUNK_SIZE][firstIdx % CHUNK_SIZE] !=
0);
do
{
--lastIdx;
// NOTE: This assert seems backwards, but it's an unsigned variable
// and will wrap past zero, and thus go *very* positive.
assert(lastIdx < m_rectangleCount);
} while (m_groupIndices[lastIdx / CHUNK_SIZE][lastIdx % CHUNK_SIZE] ==
0);
}
// firstRectangleIndex now points to the first rectangle that needs to be
// removed, which means it is also our new rectangle count.
m_rectangleCount = firstIdx;
auto vectorElementCount = (m_rectangleCount + CHUNK_SIZE - 1) / CHUNK_SIZE;
m_edges.resize(vectorElementCount);
m_groupIndices.resize(vectorElementCount);
m_overlapBits.resize(vectorElementCount);
if (vectorElementCount > 0 && (m_rectangleCount % CHUNK_SIZE) != 0)
{
auto& lastEdges = m_edges.back();
auto& lastGroupIndices = m_groupIndices.back();
auto& lastOverlapBits = m_overlapBits.back();
constexpr auto DEFAULT_EDGE_EXTENT = TILE_DIM + TILE_EDGE_BIAS;
// We have a chunk with potentially some boxes that had entries that
// shouldn't anymore, so clear them.
for (auto i = (m_rectangleCount % CHUNK_SIZE); i < CHUNK_SIZE; i++)
{
lastEdges[i + 0 * CHUNK_SIZE] = DEFAULT_EDGE_EXTENT;
lastEdges[i + 1 * CHUNK_SIZE] = DEFAULT_EDGE_EXTENT;
lastEdges[i + 2 * CHUNK_SIZE] = DEFAULT_EDGE_EXTENT;
lastEdges[i + 3 * CHUNK_SIZE] = DEFAULT_EDGE_EXTENT;
lastGroupIndices[i] = 0;
lastOverlapBits[i] = 0;
}
}
}
template <GroupingType Type>
IntersectionTile::FindResult IntersectionTile::findMaxIntersectingGroupIndex(
int4 ltrb,
FindResult running) const
{
assert(simd::all(ltrb.xy < ltrb.zw)); // Ensure ltrb isn't zero or negative.
// Since we mask non-intersecting groupIndices to zero, the "mask and max"
// algorithm is only correct for positive values. (running.maxGroupIndices
// is only signed because SSE pre 4.1 doesn't have an unsigned max
// instruction.)
assert(simd::all(running.maxGroupIndices >= 0));
assert(m_baselineGroupIndex >= 0);
assert(m_maxGroupIndex >= m_baselineGroupIndex);
// Translate ltrb to our tile and negate the left and top sides.
ltrb -= m_topLeft;
// left = TILE_DIM - left, top = TILE_DIM - top
ltrb.xy = TILE_DIM - ltrb.xy;
assert(
simd::all(ltrb > 0)); // Ensure ltrb isn't completely outside the tile.
ltrb = simd::min(ltrb, int4(TILE_DIM)); // Clamp ltrb to our tile.
if (simd::all(ltrb == TILE_DIM))
{
// ltrb covers the tile -- we know it intersects with every rectangle.
if constexpr (Type == GroupingType::overlapAllowed)
{
auto currentMax = running.maxGroupIndices[0];
if (currentMax < m_maxGroupIndex)
{
// The first element's max group index is changing, so restart
// its overlap bits with the new set.
running.maxGroupIndices[0] = m_maxGroupIndex;
running.overlapBits[0] = m_overlapBitsForMaxGroup;
}
else if (currentMax == m_maxGroupIndex)
{
// The group index of this element is the same, so just add in
// any new overlap bits that might exist.
running.overlapBits[0] |= m_overlapBitsForMaxGroup;
}
}
else
{
static_assert(Type == GroupingType::disjoint);
running.maxGroupIndices[0] =
std::max(running.maxGroupIndices[0], m_maxGroupIndex);
}
return running;
}
// Intersection test: l0 < r1 &&
// t0 < b1 &&
// r0 > l1 &&
// b0 > t1
//
// Or, to make them all use the same operator: +l0 < +r1 &&
// +t0 < +b1 &&
// -r0 < -l1 &&
// -b0 < -t1
//
// m_edges are already encoded like the left column, so encode "ltrb" like
// the right.
//
// Bias ltrb so we can use int8_t. SSE (pre-4.1) doesn't have an unsigned
// byte compare.
int4 biased = ltrb + TILE_EDGE_BIAS;
int8x8 r = biased.z;
int8x8 b = biased.w;
int8x8 _l = biased.x; // Already converted to "TILE_DIM - left" above.
int8x8 _t = biased.y; // Already converted to "TILE_DIM - top" above.
assert(m_edges.size() == m_groupIndices.size());
if constexpr (Type == GroupingType::overlapAllowed)
{
assert(m_edges.size() == m_overlapBits.size());
}
#if !defined(FALLBACK_ON_SSE2_INTRINSICS)
auto edges = m_edges.begin();
auto groupIndices = m_groupIndices.begin();
const uint16x8* groupOverlapBits = nullptr;
// We only care about groupOverlapBits if this rectangle overlap testing is
// enabled *and* there are non-zero overlap bits in the mix
if constexpr (Type == GroupingType::overlapAllowed)
{
groupOverlapBits = m_overlapBits.data();
}
PUSH_DISABLE_CLANG_SIMD_ABI_WARNING()
int8x32 complement = simd::join(r, b, _l, _t);
for (; edges != m_edges.end(); ++edges, ++groupIndices)
{
// Test 32 edges!
auto edgeMasks = *edges < complement;
// Since the transposed L,T,R,B rows are a each 64-bit vectors,
// "and-reducing" them returns the intersection test (l0 < r1 && t0 < b1
// && r0 > l1 && b0 > t1) in each byte.
int64_t isectMask =
simd::reduce_and(math::bit_cast<int64x4>(edgeMasks));
// Each element of isectMasks8 is 0xff if we intersect with the
// corresponding rectangle, otherwise 0.
int8x8 isectMasks8 = math::bit_cast<int8x8>(isectMask);
// Widen isectMasks8 to 16 bits per mask, where each element of
// isectMasks16 is 0xffff if we intersect with the rectangle, otherwise
// 0.
int16x8 isectMasks16 =
math::bit_cast<int16x8>(simd::zip(isectMasks8, isectMasks8));
// Mask out any groupIndices we don't intersect with so they don't
// participate in the test for maximum groupIndex.
int16x8 maskedGroupIndices = isectMasks16 & *groupIndices;
// Doing this as a compile-time check to not have to do the test in the
// loop at runtime.
if constexpr (Type == GroupingType::overlapAllowed)
{
// Clear any bits from the running overlap where the new index is
// covering it (i.e. there's a new closest group index)
auto keepRunningBits =
(maskedGroupIndices <= running.maxGroupIndices);
running.overlapBits &= keepRunningBits;
// Keep any bits from the current group where the running index is
// not closer (i.e. where current is not covered).
// Note: this will additionally keep bits where there has not yet
// been an intersection (index is 0), and the current rectangle was
// not intersected. This is fine - these bits will get cleared by
// the above code when a rectangle intersection finally *does*
// happen, so we can save on an additional mask with isectMask16.
auto keepCurrentBits =
(running.maxGroupIndices <= maskedGroupIndices);
auto maskedOverlapBits = *groupOverlapBits & keepCurrentBits;
// Combine the two sets, ultimately this will add bits from
// overlapping rectangles that are in the new-highest draw group and
// clear any bits that are not.
running.overlapBits |= maskedOverlapBits;
++groupOverlapBits;
}
running.maxGroupIndices =
simd::max(maskedGroupIndices, running.maxGroupIndices);
}
POP_DISABLE_CLANG_SIMD_ABI_WARNING()
#else
// MSVC doesn't get good codegen for the above loop. Provide direct SSE
// intrinsics.
const __m128i* edgeData = reinterpret_cast<const __m128i*>(m_edges.data());
const __m128i* groupIndices =
reinterpret_cast<const __m128i*>(m_groupIndices.data());
const __m128i* groupOverlapBits = nullptr;
if constexpr (Type == GroupingType::overlapAllowed)
{
groupOverlapBits =
reinterpret_cast<const __m128i*>(m_overlapBits.data());
}
__m128i complementLO = math::bit_cast<__m128i>(simd::join(r, b));
__m128i complementHI = math::bit_cast<__m128i>(simd::join(_l, _t));
__m128i localMaxGroupIndices =
math::bit_cast<__m128i>(running.maxGroupIndices);
auto localOverlapBits = math::bit_cast<__m128i>(running.overlapBits);
for (size_t i = 0; i < m_groupIndices.size(); ++i)
{
__m128i edgesLO = edgeData[i * 2];
__m128i edgesHI = edgeData[i * 2 + 1];
// Test 32 edges!
__m128i edgeMasksLO = _mm_cmpgt_epi8(complementLO, edgesLO);
__m128i edgeMasksHI = _mm_cmpgt_epi8(complementHI, edgesHI);
// AND L & R masks (bits 0:63) and T & B masks (bits 63:127).
__m128i partialIsectMasks = _mm_and_si128(edgeMasksLO, edgeMasksHI);
// Widen partial edge masks from 8 bits to 16.
__m128i partialIsectMasksTB16 =
_mm_unpackhi_epi8(partialIsectMasks, partialIsectMasks);
__m128i partialIsectMasksLR16 =
_mm_unpacklo_epi8(partialIsectMasks, partialIsectMasks);
// AND LR masks with TB masks for a full LTRB intersection mask.
__m128i isectMasks16 =
_mm_and_si128(partialIsectMasksLR16, partialIsectMasksTB16);
// Mask out the groupIndices that don't intersect.
__m128i maskedGroupIndices =
_mm_and_si128(isectMasks16, groupIndices[i]);
// Doing this as a compile-time check to not have to do the test in the
// loop at runtime.
if constexpr (Type == GroupingType::overlapAllowed)
{
// Clear any bits from the running overlap where the new index is
// covering it (i.e. there's a new closest group index)
// NOTE: that masks and the "and"s are reversed from the non-SSE2
// code above: SSE2 does not have integral >= or <= operations, so
// the formulation here uses "!(a > b)" as equivalent to (a <= b),
// thankfully using no additional instructions thanks to the
// existence of "andnot" instructions.
auto clearRunningBits =
_mm_cmpgt_epi16(maskedGroupIndices, localMaxGroupIndices);
// andnot is (~a & b) so clearRunningBits needs to come first as
// it's what we intend to negate
localOverlapBits =
_mm_andnot_si128(clearRunningBits, localOverlapBits);
// Keep any bits from the current group where the running index is
// not closer (i.e. where current is not covered).
// Note: this will additionally keep bits where there has not yet
// been an intersection (index is 0), and the current rectangle was
// not intersected. This is fine - these bits will get cleared by
// the above code when a rectangle intersection finally *does*
// happen, so we can save on an additional mask with isectMask16.
auto clearCurrentBits =
_mm_cmpgt_epi16(localMaxGroupIndices, maskedGroupIndices);
auto maskedOverlapBits =
_mm_andnot_si128(clearCurrentBits, groupOverlapBits[i]);
// Combine the two sets, ultimately this will add bits from
// overlapping rectangles that are in the new-highest draw group and
// clear any bits that are not.
localOverlapBits =
_mm_or_si128(localOverlapBits, maskedOverlapBits);
}
// Accumulate max intersecting groupIndices.
localMaxGroupIndices =
_mm_max_epi16(maskedGroupIndices, localMaxGroupIndices);
}
running.maxGroupIndices = math::bit_cast<int16x8>(localMaxGroupIndices);
if constexpr (Type == GroupingType::overlapAllowed)
{
running.overlapBits = math::bit_cast<uint16x8>(localOverlapBits);
}
#endif // !FALLBACK_ON_SSE2_INTRINSICS
// Ensure we never drop below our baseline index.
if constexpr (Type == GroupingType::overlapAllowed)
{
if (running.maxGroupIndices[0] < m_baselineGroupIndex)
{
// We're bumping this entry up so the overlap bits need to
// be cleared.
running.maxGroupIndices[0] = m_baselineGroupIndex;
running.overlapBits[0] = m_baselineOverlapBits;
}
else if (running.maxGroupIndices[0] == m_baselineGroupIndex)
{
running.overlapBits[0] |= m_baselineOverlapBits;
}
}
else
{
running.maxGroupIndices[0] =
std::max(running.maxGroupIndices[0], m_baselineGroupIndex);
}
return running;
}
void IntersectionBoard::resizeAndReset(uint32_t viewportWidth,
uint32_t viewportHeight)
{
m_viewportSize =
int2{static_cast<int>(viewportWidth), static_cast<int>(viewportHeight)};
// Divide the board into TILE_DIM x TILE_DIM tiles.
int2 dims = (m_viewportSize + TILE_DIM - 1) / TILE_DIM;
m_cols = dims.x;
m_rows = dims.y;
if (m_tiles.size() < m_cols * m_rows)
{
m_tiles.resize(m_cols * m_rows);
}
auto tileIter = m_tiles.begin();
for (int y = 0; y < m_rows; ++y)
{
for (int x = 0; x < m_cols; ++x)
{
tileIter->reset(x * TILE_DIM, y * TILE_DIM);
++tileIter;
}
}
}
int16_t IntersectionBoard::addRectangle(int4 ltrb,
uint16_t currentRectangleOverlapBits,
uint16_t disallowOverlapMask,
int16_t layerCount)
{
switch (m_groupingType)
{
case GroupingType::overlapAllowed:
return addRectangle<GroupingType::overlapAllowed>(
ltrb,
currentRectangleOverlapBits,
disallowOverlapMask,
layerCount);
case GroupingType::disjoint:
return addRectangle<GroupingType::disjoint>(
ltrb,
currentRectangleOverlapBits,
disallowOverlapMask,
layerCount);
}
RIVE_UNREACHABLE();
}
template <GroupingType Type>
int16_t IntersectionBoard::addRectangle(int4 ltrb,
uint16_t currentRectangleOverlapBits,
uint16_t disallowedOverlapBitsMask,
int16_t layerCount)
{
if constexpr (Type == GroupingType::overlapAllowed)
{
assert(
layerCount == 1 &&
"rectangles that are allowed to overlap should only be a single layer");
}
// Discard empty, negative, or offscreen rectangles.
if (simd::any(ltrb.xy >= m_viewportSize || ltrb.zw <= 0 ||
ltrb.xy >= ltrb.zw))
{
return 0;
}
// Clamp ltrb to the viewport to avoid integer overflows.
ltrb.xy = simd::max(ltrb.xy, int2(0));
ltrb.zw = simd::min(ltrb.zw, m_viewportSize);
// Find the tiled row and column that each corner of the rectangle falls on.
int4 span = (ltrb - int4{0, 0, 1, 1}) / TILE_DIM;
span = simd::clamp(span, int4(0), int4{m_cols, m_rows, m_cols, m_rows} - 1);
assert(simd::all(span.xy <= span.zw));
// Accumulate the max groupIndex from each tile the rectangle touches.
IntersectionTile::FindResult results;
for (int y = span.y; y <= span.w; ++y)
{
auto tileIter = m_tiles.begin() + y * m_cols + span.x;
for (int x = span.x; x <= span.z; ++x)
{
results =
tileIter->findMaxIntersectingGroupIndex<Type>(ltrb, results);
++tileIter;
}
}
// Find the absolute max group index this rectangle intersects with.
int16_t bottomGroupIndex = simd::reduce_max(results.maxGroupIndices);
// It is the caller's responsibility to not insert more rectangles than can
// fit in a signed 16-bit integer.
assert(bottomGroupIndex <=
std::numeric_limits<int16_t>::max() - layerCount);
if constexpr (Type == GroupingType::overlapAllowed)
{
// Clear bits of anything that wasn't in our max draw group, they don't
// matter (there's already a barrier between them)
auto overlapMask = (results.maxGroupIndices == bottomGroupIndex);
results.overlapBits &= overlapMask;
// Now doing this reduce will give just the bits related to things we
// overlapped.
int16_t allOverlapBits = simd::reduce_or(results.overlapBits);
if ((allOverlapBits & disallowedOverlapBitsMask) != 0)
{
// Bits were set in any overlapping rectangle that prevent an
// overlap here, so we cannot stay in the same group.
bottomGroupIndex++;
}
else
{
bottomGroupIndex = std::max(bottomGroupIndex, int16_t(1));
}
}
else
{
// With no overlapping allowed, always start in the next group up from
// the lowest one we intersected (or 1, if no intersection occurred)
bottomGroupIndex++;
}
// Add the rectangle and its newly-found groupIndex to each tile it touches.
int16_t topGroupIndex = bottomGroupIndex + layerCount - 1;
for (int y = span.y; y <= span.w; ++y)
{
auto tileIter = m_tiles.begin() + y * m_cols + span.x;
for (int x = span.x; x <= span.z; ++x)
{
tileIter->addRectangle<Type>(ltrb,
topGroupIndex,
currentRectangleOverlapBits);
++tileIter;
}
}
return bottomGroupIndex;
}
// Explicitly instantiate the public template methods to ensure they get linked
template IntersectionTile::FindResult IntersectionTile::
findMaxIntersectingGroupIndex<GroupingType::disjoint>(
int4 ltrb,
FindResult running) const;
template IntersectionTile::FindResult IntersectionTile::
findMaxIntersectingGroupIndex<GroupingType::overlapAllowed>(
int4 ltrb,
FindResult running) const;
template void IntersectionTile::addRectangle<GroupingType::disjoint>(
int4 ltrb,
int16_t groupIndex,
uint16_t overlapBits);
template void IntersectionTile::addRectangle<GroupingType::overlapAllowed>(
int4 ltrb,
int16_t groupIndex,
uint16_t overlapBits);
#ifdef WITH_RIVE_TOOLS
template void IntersectionTile::testingOnly_addRectangleAndValidate<
GroupingType::disjoint>(int4 ltrb,
int16_t groupIndex,
uint16_t overlapBits);
template void IntersectionTile::testingOnly_addRectangleAndValidate<
GroupingType::overlapAllowed>(int4 ltrb,
int16_t groupIndex,
uint16_t overlapBits);
#endif
} // namespace rive::gpu