src/gpu/graphite/sparse_strips/MSAA_LUT.h - skia - Git at Google

 /*
  * Copyright 2026 Google LLC
  *
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE file.
  */
 #ifndef skgpu_graphite_sparse_strips_MSAA_LUT_DEFINED
 #define skgpu_graphite_sparse_strips_MSAA_LUT_DEFINED

 #include "include/private/base/SkTDArray.h"

 #include <array>
 #include <cmath>
 #include <cstdint>
 #include <type_traits>

 namespace skgpu::graphite {

 /*
  * Naively, calculating MSAA coverage in software is O(n) per each sample location. Instead we use a
  * variation of a lookup technique introduced by Laine and Karras, adapted to 2D by Li et al., and
  * refined by the Vello project:
  * https://dl.acm.org/doi/abs/10.1111/j.1467-8659.2010.01728.x,
  * https://dl.acm.org/doi/10.1145/2980179.2982434)
  * https://github.com/linebender/vello/blob/main/vello_shaders/shader/fine.wgsl
  * https://github.com/linebender/vello/pull/64
  *
  * * To precompute the MSAA coverage of a line segment slicing through a 1x1 pixel, in Li et al. the
  *   line is parameterized by its normal vector (angle) and perpendicular distance from the center.
  *   While effective, this approach has drawbacks: angle and distance do not map linearly to the
  *   Cartesian pixel grid, leading to higher quantization errors (especially near corners), and
  *   computing the normal requires expensive square root operations (e.g., std::hypot) during
  *   rendering.
  * * Instead, we use a continuous parameterization introduced by the Vello project. We
  *   map the infinite range of standard slopes (m) and y-intercepts (b) into two new variables, s
  *   (normalized slope) and t (normalized translation), strictly bounded between 0.0 and 1.0.
  *
  * ----------------------------------
  * Squashing the Slope (m -> s)
  * ----------------------------------
  * * We squash the infinite positive slope m in [0, infinity) using the curve:
  *   s = 1 / (m + 1)
  * * This maps the extremes while avoiding division-by-zero risks:
  *   A perfectly vertical line (m = infinity) maps to s = 0.0
  *   A 45-degree line (m = 1) maps to s = 0.5
  *   A perfectly horizontal line (m = 0) maps to s = 1.0
  * * To use this in the half-plane equation (m*x - y + b >= 0), we solve for m:
  *   m = (1 - s) / s
  *   Substituting and multiplying by s gives:
  *   x(1 - s) - y*s + b*s >= 0
  *
  * ----------------------------------
  * Squashing the Translation (b -> t)
  * ----------------------------------
  * * To ensure our spatial offset stays between 0.0 and 1.0, we scale the y-intercept against our
  *   new slope parameter:
  *   t = (b + m) / (m + 1)
  * * We can rewrite t entirely in terms of s:
  *   t = (b + m) * s  =>  t = b*s + m*s
  * * Since m*s = 1 - s, we get:
  *   t = b*s + 1 - s
  * * Solving for b*s gives:
  *   b*s = s + t - 1
  *
  * ----------------------------------
  * Final Equation
  * ----------------------------------
  * * Substitute our new definition of b*s back into the half-plane equation:
  *   x(1 - s) - y*s + (s + t - 1) >= 0
  * * To group this cleanly, we add zero (+ t*s - t*s):
  *   x(1 - s) - y*s + s + t - 1 + t*s - t*s >= 0
  * * Rearranging to group by (1 - s) and s:
  *   x(1 - s) - (1 - s - t + t*s) - y*s + t*s >= 0
  * * Factoring the middle group into (1 - t)(1 - s), and factoring -s from the end:
  *   x(1 - s) - (1 - t)(1 - s) - (y - t)s >= 0
  * * Factoring out (1 - s) yields the final parameterization:
  *   (x - (1 - t))(1 - s) - (y - t)s >= 0
  *
  * ----------------------------------
  * LUT Generation
  * ----------------------------------
  * * Because s and t are strictly bounded [0.0, 1.0], we can quantize them into a discrete 2D grid.
  *   The grid has `kWidth` columns representing the translation t, and `kHeight / 2` rows
  *   representing the slope s.
  * * Since our mathematical derivation assumes a positive slope (m >= 0), we partition the LUT into
  *   two halves. The bottom half (v >= kHeight / 2) stores masks for positive slopes. The top half
  *   stores masks for negative slopes.
  * * For negative slopes, we reuse the exact same mathematical equation but geometrically flip the
  *   Y-axis of our sub-pixel sample points (y = 1.0 - y).
  * * To minimize maximum quantization error, we extract the continuous s and t values from the exact
  *   center of each grid cell (e.g., `(u + 0.5) / kWidth`).
  * * For each cell, we evaluate the half-plane equation against all N sub-pixel sample coordinates.
  *   If the result is >= 0.0, the sample is covered, and we set the corresponding bit in our integer
  *   mask using a bitwise OR (`mask |= (1 << k)`).
  *
  * * The actual positions of the subsample points use the D3D11 standard multisample pattern:
  *   https://learn.microsoft.com/en-us/windows/win32/api/d3d11/ne-d3d11-d3d11_standard_multisample_quality_levels
  */

 template <typename T> class MSAA_LUT {
 public:
     static constexpr int32_t kWidth       = 64;
     static constexpr int32_t kHeight      = 64;
     static constexpr int32_t kHalfHeight  = kHeight / 2;
     static constexpr size_t  kSampleCount = sizeof(T) * 8;

     static constexpr std::array<uint8_t, kSampleCount> kPattern = []() {
         if constexpr (std::is_same_v<T, uint8_t>) {
             return std::array<uint8_t, 8>{0, 5, 3, 7, 1, 4, 6, 2};
         } else if constexpr (std::is_same_v<T, uint16_t>) {
             return std::array<uint8_t, 16>{1, 8, 4, 11, 15, 7, 3, 12, 0, 9, 5, 13, 2, 10, 6, 14};
         } else {
             SkUNREACHABLE;
         }
     }();

     static SkTDArray<T> Make() {
         constexpr float scale = 1.0f / static_cast<float>(kSampleCount);
         std::array<float, kSampleCount> subX;
         std::array<float, kSampleCount> subY;

         for (size_t k = 0; k < kSampleCount; ++k) {
             subX[k] = (static_cast<float>(kPattern[k]) + 0.5f) * scale;
             subY[k] = (static_cast<float>(k) + 0.5f) * scale;
         }

         SkTDArray<T> lut;
         lut.reserve(kWidth * kHeight);

         for (int32_t i = 0; i < kWidth * kHeight; ++i) {
             int32_t u = i % kWidth;
             int32_t v = i / kWidth;

             bool isPos = v >= kHalfHeight;

             // Extract continuous parameters from the center of the grid cells
             float t = (static_cast<float>(u) + 0.5f) / static_cast<float>(kWidth);
             float s = (static_cast<float>(v % kHalfHeight) + 0.5f) /
                        static_cast<float>(kHalfHeight);

             T mask = 0;
             for (size_t k = 0; k < kSampleCount; ++k) {
                 float x = subX[k];
                 float y = subY[k];

                 if (!isPos) {
                     y = 1.0f - y;
                 }

                 float val = (x - (1.0f - t)) * (1.0f - s) - (y - t) * s;

                 if (val >= 0.0f) {
                     mask |= (static_cast<T>(1) << k);
                 }
             }
             lut.push_back(mask);
         }
         return lut;
     }
 };

 template <typename T>
 SkTDArray<T> GenerateMSAALUT() {
     if constexpr (std::is_same_v<T, uint8_t> || std::is_same_v<T, uint16_t>) {
         return MSAA_LUT<T>::Make();
     } else {
         SkUNREACHABLE;
     }
 }

 }  // namespace skgpu::graphite

 #endif  // skgpu_graphite_sparse_strips_MSAA_LUT_DEFINED
	/*
	* Copyright 2026 Google LLC
	*
	* Use of this source code is governed by a BSD-style license that can be
	* found in the LICENSE file.
	*/
	#ifndef skgpu_graphite_sparse_strips_MSAA_LUT_DEFINED
	#define skgpu_graphite_sparse_strips_MSAA_LUT_DEFINED

	#include "include/private/base/SkTDArray.h"

	#include <array>
	#include <cmath>
	#include <cstdint>
	#include <type_traits>

	namespace skgpu::graphite {

	/*
	* Naively, calculating MSAA coverage in software is O(n) per each sample location. Instead we use a
	* variation of a lookup technique introduced by Laine and Karras, adapted to 2D by Li et al., and
	* refined by the Vello project:
	* https://dl.acm.org/doi/abs/10.1111/j.1467-8659.2010.01728.x,
	* https://dl.acm.org/doi/10.1145/2980179.2982434)
	* https://github.com/linebender/vello/blob/main/vello_shaders/shader/fine.wgsl
	* https://github.com/linebender/vello/pull/64
	*
	* * To precompute the MSAA coverage of a line segment slicing through a 1x1 pixel, in Li et al. the
	* line is parameterized by its normal vector (angle) and perpendicular distance from the center.
	* While effective, this approach has drawbacks: angle and distance do not map linearly to the
	* Cartesian pixel grid, leading to higher quantization errors (especially near corners), and
	* computing the normal requires expensive square root operations (e.g., std::hypot) during
	* rendering.
	* * Instead, we use a continuous parameterization introduced by the Vello project. We
	* map the infinite range of standard slopes (m) and y-intercepts (b) into two new variables, s
	* (normalized slope) and t (normalized translation), strictly bounded between 0.0 and 1.0.
	*
	* ----------------------------------
	* Squashing the Slope (m -> s)
	* ----------------------------------
	* * We squash the infinite positive slope m in [0, infinity) using the curve:
	* s = 1 / (m + 1)
	* * This maps the extremes while avoiding division-by-zero risks:
	* A perfectly vertical line (m = infinity) maps to s = 0.0
	* A 45-degree line (m = 1) maps to s = 0.5
	* A perfectly horizontal line (m = 0) maps to s = 1.0
	* * To use this in the half-plane equation (m*x - y + b >= 0), we solve for m:
	* m = (1 - s) / s
	* Substituting and multiplying by s gives:
	* x(1 - s) - ys + bs >= 0
	*
	* ----------------------------------
	* Squashing the Translation (b -> t)
	* ----------------------------------
	* * To ensure our spatial offset stays between 0.0 and 1.0, we scale the y-intercept against our
	* new slope parameter:
	* t = (b + m) / (m + 1)
	* * We can rewrite t entirely in terms of s:
	* t = (b + m) * s => t = bs + ms
	* * Since m*s = 1 - s, we get:
	* t = b*s + 1 - s
	* * Solving for b*s gives:
	* b*s = s + t - 1
	*
	* ----------------------------------
	* Final Equation
	* ----------------------------------
	* * Substitute our new definition of b*s back into the half-plane equation:
	* x(1 - s) - y*s + (s + t - 1) >= 0
	* * To group this cleanly, we add zero (+ ts - ts):
	* x(1 - s) - ys + s + t - 1 + ts - t*s >= 0
	* * Rearranging to group by (1 - s) and s:
	* x(1 - s) - (1 - s - t + ts) - ys + t*s >= 0
	* * Factoring the middle group into (1 - t)(1 - s), and factoring -s from the end:
	* x(1 - s) - (1 - t)(1 - s) - (y - t)s >= 0
	* * Factoring out (1 - s) yields the final parameterization:
	* (x - (1 - t))(1 - s) - (y - t)s >= 0
	*
	* ----------------------------------
	* LUT Generation
	* ----------------------------------
	* * Because s and t are strictly bounded [0.0, 1.0], we can quantize them into a discrete 2D grid.
	* The grid has `kWidth` columns representing the translation t, and `kHeight / 2` rows
	* representing the slope s.
	* * Since our mathematical derivation assumes a positive slope (m >= 0), we partition the LUT into
	* two halves. The bottom half (v >= kHeight / 2) stores masks for positive slopes. The top half
	* stores masks for negative slopes.
	* * For negative slopes, we reuse the exact same mathematical equation but geometrically flip the
	* Y-axis of our sub-pixel sample points (y = 1.0 - y).
	* * To minimize maximum quantization error, we extract the continuous s and t values from the exact
	* center of each grid cell (e.g., `(u + 0.5) / kWidth`).
	* * For each cell, we evaluate the half-plane equation against all N sub-pixel sample coordinates.
	* If the result is >= 0.0, the sample is covered, and we set the corresponding bit in our integer
	* mask using a bitwise OR (`mask \|= (1 << k)`).
	*
	* * The actual positions of the subsample points use the D3D11 standard multisample pattern:
	* https://learn.microsoft.com/en-us/windows/win32/api/d3d11/ne-d3d11-d3d11_standard_multisample_quality_levels
	*/

	template <typename T> class MSAA_LUT {
	public:
	static constexpr int32_t kWidth = 64;
	static constexpr int32_t kHeight = 64;
	static constexpr int32_t kHalfHeight = kHeight / 2;
	static constexpr size_t kSampleCount = sizeof(T) * 8;

	static constexpr std::array<uint8_t, kSampleCount> kPattern = []() {
	if constexpr (std::is_same_v<T, uint8_t>) {
	return std::array<uint8_t, 8>{0, 5, 3, 7, 1, 4, 6, 2};
	} else if constexpr (std::is_same_v<T, uint16_t>) {
	return std::array<uint8_t, 16>{1, 8, 4, 11, 15, 7, 3, 12, 0, 9, 5, 13, 2, 10, 6, 14};
	} else {
	SkUNREACHABLE;
	}
	}();

	static SkTDArray<T> Make() {
	constexpr float scale = 1.0f / static_cast<float>(kSampleCount);
	std::array<float, kSampleCount> subX;
	std::array<float, kSampleCount> subY;

	for (size_t k = 0; k < kSampleCount; ++k) {
	subX[k] = (static_cast<float>(kPattern[k]) + 0.5f) * scale;
	subY[k] = (static_cast<float>(k) + 0.5f) * scale;
	}

	SkTDArray<T> lut;
	lut.reserve(kWidth * kHeight);

	for (int32_t i = 0; i < kWidth * kHeight; ++i) {
	int32_t u = i % kWidth;
	int32_t v = i / kWidth;

	bool isPos = v >= kHalfHeight;

	// Extract continuous parameters from the center of the grid cells
	float t = (static_cast<float>(u) + 0.5f) / static_cast<float>(kWidth);
	float s = (static_cast<float>(v % kHalfHeight) + 0.5f) /
	static_cast<float>(kHalfHeight);

	T mask = 0;
	for (size_t k = 0; k < kSampleCount; ++k) {
	float x = subX[k];
	float y = subY[k];

	if (!isPos) {
	y = 1.0f - y;
	}

	float val = (x - (1.0f - t)) * (1.0f - s) - (y - t) * s;

	if (val >= 0.0f) {
	mask \|= (static_cast<T>(1) << k);
	}
	}
	lut.push_back(mask);
	}
	return lut;
	}
	};

	template <typename T>
	SkTDArray<T> GenerateMSAALUT() {
	if constexpr (std::is_same_v<T, uint8_t> \|\| std::is_same_v<T, uint16_t>) {
	return MSAA_LUT<T>::Make();
	} else {
	SkUNREACHABLE;
	}
	}

	} // namespace skgpu::graphite

	#endif // skgpu_graphite_sparse_strips_MSAA_LUT_DEFINED