| /* |
| * Copyright 2024 Rive |
| */ |
| |
| #include "intersection_board.hpp" |
| |
| #include "rive/math/math_types.hpp" |
| |
| #if !SIMD_NATIVE_GVEC && (defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64)) |
| // MSVC doesn't get codegen for the inner loop. Provide direct SSE intrinsics. |
| #include <emmintrin.h> |
| #define FALLBACK_ON_SSE2_INTRINSICS |
| #else |
| #endif |
| |
| namespace rive::pls |
| { |
| void IntersectionTile::reset(int left, int top, int16_t baselineGroupIndex) |
| { |
| // Since we mask non-intersecting groupIndices to zero, the "mask and max" algorithm is only |
| // correct for positive values. (baselineGroupIndex is only signed because SSE doesn't have an |
| // unsigned max instruction.) |
| assert(baselineGroupIndex >= 0); |
| m_topLeft = {left, top, left, top}; |
| m_baselineGroupIndex = baselineGroupIndex; |
| m_maxGroupIndex = baselineGroupIndex; |
| m_edges.clear(); |
| m_groupIndices.clear(); |
| m_rectangleCount = 0; |
| } |
| |
| void IntersectionTile::addRectangle(int4 ltrb, int16_t groupIndex) |
| { |
| assert(simd::all(ltrb.xy < ltrb.zw)); // Ensure ltrb isn't zero or negative. |
| // Ensure this rectangle preserves the integrity of our list. |
| assert(groupIndex > simd::reduce_max(findMaxIntersectingGroupIndex(ltrb, 0))); |
| assert(groupIndex > m_baselineGroupIndex); |
| assert(groupIndex >= 0); |
| |
| // Translate ltrb to our tile and negate the right and bottom sides. |
| ltrb -= m_topLeft; |
| ltrb.zw = 255 - ltrb.zw; // right = 255 - right, bottom = 255 - bottom |
| assert(simd::all(ltrb < 255)); // Ensure ltrb isn't completely outside the tile. |
| ltrb = simd::max(ltrb, int4(0)); // Clamp ltrb to our tile. |
| |
| if (simd::all(ltrb == 0)) |
| { |
| // ltrb covers the entire tile -- reset to a new baseline. |
| assert(groupIndex > m_maxGroupIndex); |
| reset(m_topLeft.x, m_topLeft.y, groupIndex); |
| return; |
| } |
| |
| // Append a chunk if needed, to make room for this new rectangle. |
| uint32_t subIdx = m_rectangleCount % kChunkSize; |
| if (subIdx == 0) |
| { |
| // Push back maximally negative rectangles so they always fail intersection tests. |
| assert(m_edges.size() * kChunkSize == m_rectangleCount); |
| m_edges.push_back(int8x32(std::numeric_limits<int8_t>::max())); |
| |
| // Uninitialized since the corresponding rectangles never pass an intersection test. |
| assert(m_groupIndices.size() * kChunkSize == m_rectangleCount); |
| m_groupIndices.emplace_back(); |
| } |
| |
| // m_edges is a list of 8 rectangles encoded as [L, T, 255 - R, 255 - B], relative to m_topLeft. |
| // The data is also transposed: [L0..L7, T0..T7, -R0..R7, -B0..B7]. |
| // Bias ltrb by -128 so we can use int8_t. SSE doesn't have an unsigned byte compare. |
| int4 biased = ltrb + std::numeric_limits<int8_t>::min(); |
| m_edges.back()[subIdx] = biased.x; |
| m_edges.back()[subIdx + 8] = biased.y; |
| m_edges.back()[subIdx + 16] = biased.z; // Already converted to "255 - right" above. |
| m_edges.back()[subIdx + 24] = biased.w; // Already converted to "255 - bottom" above. |
| |
| m_groupIndices.back()[subIdx] = groupIndex; |
| |
| m_maxGroupIndex = std::max(groupIndex, m_maxGroupIndex); |
| ++m_rectangleCount; |
| } |
| |
| int16x8 IntersectionTile::findMaxIntersectingGroupIndex(int4 ltrb, |
| int16x8 runningMaxGroupIndices) const |
| { |
| assert(simd::all(ltrb.xy < ltrb.zw)); // Ensure ltrb isn't zero or negative. |
| |
| // Since we mask non-intersecting groupIndices to zero, the "mask and max" algorithm is only |
| // correct for positive values. (runningMaxGroupIndices is only signed because SSE doesn't have |
| // an unsigned max instruction.) |
| assert(simd::all(runningMaxGroupIndices >= 0)); |
| assert(m_baselineGroupIndex >= 0); |
| assert(m_maxGroupIndex >= m_baselineGroupIndex); |
| |
| // Translate ltrb to our tile and negate the left and top sides. |
| ltrb -= m_topLeft; |
| ltrb.xy = 255 - ltrb.xy; // left = 255 - left, top = 255 - top |
| assert(simd::all(ltrb > 0)); // Ensure ltrb isn't completely outside the tile. |
| ltrb = simd::min(ltrb, int4(255)); // Clamp ltrb to our tile. |
| |
| if (simd::all(ltrb == 255)) |
| { |
| // ltrb covers the entire -- we know it intersects with every rectangle. |
| runningMaxGroupIndices[0] = std::max(runningMaxGroupIndices[0], m_maxGroupIndex); |
| return runningMaxGroupIndices; |
| } |
| |
| // Intersection test: l0 < r1 && |
| // t0 < b1 && |
| // r0 > l1 && |
| // b0 > t1 |
| // |
| // Or, to make them all use the same operator: +l0 < +r1 && |
| // +t0 < +b1 && |
| // -r0 < -l1 && |
| // -b0 < -t1 |
| // |
| // m_edges are already encoded like the left column, so encode "ltrb" like the right. |
| // |
| // Bias ltrb by -128 so we can use int8_t. SSE doesn't have an unsigned byte compare. |
| int4 biased = ltrb + std::numeric_limits<int8_t>::min(); |
| int8x8 r = biased.z; |
| int8x8 b = biased.w; |
| int8x8 _l = biased.x; // Already converted to "255 - left" above. |
| int8x8 _t = biased.y; // Already converted to "255 - top" above. |
| |
| #if !defined(FALLBACK_ON_SSE2_INTRINSICS) |
| auto edges = m_edges.begin(); |
| auto groupIndices = m_groupIndices.begin(); |
| assert(m_edges.size() == m_groupIndices.size()); |
| int8x32 complement = simd::join(r, b, _l, _t); |
| for (; edges != m_edges.end(); ++edges, ++groupIndices) |
| { |
| // Test 32 edges! |
| auto edgeMasks = *edges < complement; |
| // Since the transposed L,T,R,B rows are a each 64-bit vectors, "and-reducing" them returns |
| // the intersection test (l0 < r1 && t0 < b1 && r0 > l1 && b0 > t1) in each byte. |
| int64_t isectMask = simd::reduce_and(math::bit_cast<int64x4>(edgeMasks)); |
| // Each element of isectMasks8 is 0xff if we intersect with the corresponding rectangle, |
| // otherwise 0. |
| int8x8 isectMasks8 = math::bit_cast<int8x8>(isectMask); |
| // Widen isectMasks8 to 16 bits per mask, where each element of isectMasks16 is 0xffff if we |
| // intersect with the rectangle, otherwise 0. |
| int16x8 isectMasks16 = math::bit_cast<int16x8>(simd::zip(isectMasks8, isectMasks8)); |
| // Mask out any groupIndices we don't intersect with so they don't participate in the test |
| // for maximum groupIndex. |
| int16x8 maskedGroupIndices = isectMasks16 & *groupIndices; |
| runningMaxGroupIndices = simd::max(maskedGroupIndices, runningMaxGroupIndices); |
| } |
| #else |
| // MSVC doesn't get good codegen for the above loop. Provide direct SSE intrinsics. |
| const __m128i* edgeData = reinterpret_cast<const __m128i*>(m_edges.data()); |
| const __m128i* groupIndices = reinterpret_cast<const __m128i*>(m_groupIndices.data()); |
| __m128i complementLO = math::bit_cast<__m128i>(simd::join(r, b)); |
| __m128i complementHI = math::bit_cast<__m128i>(simd::join(_l, _t)); |
| __m128i localMaxGroupIndices = math::bit_cast<__m128i>(runningMaxGroupIndices); |
| for (size_t i = 0; i < m_groupIndices.size(); ++i) |
| { |
| __m128i edgesLO = edgeData[i * 2]; |
| __m128i edgesHI = edgeData[i * 2 + 1]; |
| // Test 32 edges! |
| __m128i edgeMasksLO = _mm_cmpgt_epi8(complementLO, edgesLO); |
| __m128i edgeMasksHI = _mm_cmpgt_epi8(complementHI, edgesHI); |
| // AND L & R masks (bits 0:63) and T & B masks (bits 63:127). |
| __m128i partialIsectMasks = _mm_and_si128(edgeMasksLO, edgeMasksHI); |
| // Widen partial edge masks from 8 bits to 16. |
| __m128i partialIsectMasksTB16 = _mm_unpackhi_epi8(partialIsectMasks, partialIsectMasks); |
| __m128i partialIsectMasksLR16 = _mm_unpacklo_epi8(partialIsectMasks, partialIsectMasks); |
| // AND LR masks with TB masks for a full LTRB intersection mask. |
| __m128i isectMasks16 = _mm_and_si128(partialIsectMasksLR16, partialIsectMasksTB16); |
| // Mask out the groupIndices that don't intersect. |
| __m128i intersectingGroupIndices = _mm_and_si128(isectMasks16, groupIndices[i]); |
| // Accumulate max intersecting groupIndices. |
| localMaxGroupIndices = _mm_max_epi16(intersectingGroupIndices, localMaxGroupIndices); |
| } |
| runningMaxGroupIndices = math::bit_cast<int16x8>(localMaxGroupIndices); |
| #endif // !FALLBACK_ON_SSE2_INTRINSICS |
| |
| // Ensure we never drop below our baseline index. |
| runningMaxGroupIndices[0] = std::max(runningMaxGroupIndices[0], m_baselineGroupIndex); |
| return runningMaxGroupIndices; |
| } |
| |
| void IntersectionBoard::resizeAndReset(uint32_t viewportWidth, uint32_t viewportHeight) |
| { |
| m_viewportSize = int2{static_cast<int>(viewportWidth), static_cast<int>(viewportHeight)}; |
| |
| // Divide the board into 255x255 tiles. |
| int2 dims = (m_viewportSize + 254) / 255; |
| m_cols = dims.x; |
| m_rows = dims.y; |
| if (m_tiles.size() < m_cols * m_rows) |
| { |
| m_tiles.resize(m_cols * m_rows); |
| } |
| auto tileIter = m_tiles.begin(); |
| for (int y = 0; y < m_rows; ++y) |
| { |
| for (int x = 0; x < m_cols; ++x) |
| { |
| tileIter->reset(x * 255, y * 255); |
| ++tileIter; |
| } |
| } |
| } |
| |
| int16_t IntersectionBoard::addRectangle(int4 ltrb) |
| { |
| // Discard empty, negative, or offscreen rectangles. |
| if (simd::any(ltrb.xy >= m_viewportSize || ltrb.zw <= 0 || ltrb.xy >= ltrb.zw)) |
| { |
| return 0; |
| } |
| |
| // Clamp ltrb to the viewport to avoid integer overflows. |
| ltrb.xy = simd::max(ltrb.xy, int2(0)); |
| ltrb.zw = simd::min(ltrb.zw, m_viewportSize); |
| |
| // Find the tiled row and column that each corner of the rectangle falls on. |
| int4 span = (ltrb - int4{0, 0, 1, 1}) / 255; |
| span = simd::clamp(span, int4(0), int4{m_cols, m_rows, m_cols, m_rows} - 1); |
| assert(simd::all(span.xy <= span.zw)); |
| |
| // Accumulate the max groupIndex from each tile the rectangle touches. |
| int16x8 maxGroupIndices = 0; |
| for (int y = span.y; y <= span.w; ++y) |
| { |
| auto tileIter = m_tiles.begin() + y * m_cols + span.x; |
| for (int x = span.x; x <= span.z; ++x) |
| { |
| maxGroupIndices = tileIter->findMaxIntersectingGroupIndex(ltrb, maxGroupIndices); |
| ++tileIter; |
| } |
| } |
| |
| // Find the absolute max group index this rectangle intersects with. |
| int16_t maxGroupIndex = simd::reduce_max(maxGroupIndices); |
| // It is the caller's responsibility to not insert more rectangles than can fit in a signed |
| // 16-bit integer. |
| assert(maxGroupIndex < std::numeric_limits<int16_t>::max()); |
| |
| // Add the rectangle and its newly-found groupIndex to each tile it touches. |
| int16_t nextGroupIndex = maxGroupIndex + 1; |
| for (int y = span.y; y <= span.w; ++y) |
| { |
| auto tileIter = m_tiles.begin() + y * m_cols + span.x; |
| for (int x = span.x; x <= span.z; ++x) |
| { |
| tileIter->addRectangle(ltrb, nextGroupIndex); |
| ++tileIter; |
| } |
| } |
| |
| return nextGroupIndex; |
| } |
| } // namespace rive::pls |