blob: ef42e4c2458fb4bf71da7b39044144b870d7922a [file] [log] [blame]
/*
* Copyright 2024 Google LLC
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "src/core/SkBlurEngine.h"
#include "include/core/SkAlphaType.h"
#include "include/core/SkBlendMode.h"
#include "include/core/SkClipOp.h"
#include "include/core/SkColorSpace.h"
#include "include/core/SkImageInfo.h"
#include "include/core/SkM44.h"
#include "include/core/SkMatrix.h"
#include "include/core/SkPaint.h"
#include "include/core/SkRect.h"
#include "include/core/SkSamplingOptions.h"
#include "include/core/SkScalar.h"
#include "include/core/SkShader.h"
#include "include/effects/SkRuntimeEffect.h"
#include "include/private/base/SkAssert.h"
#include "include/private/base/SkMath.h"
#include "include/private/base/SkTo.h"
#include "src/core/SkDevice.h"
#include "src/core/SkKnownRuntimeEffects.h"
#include "src/core/SkSpecialImage.h"
#include <array>
#include <cmath>
#include <cstdint>
#include <cstring>
#include <utility>
void SkShaderBlurAlgorithm::Compute2DBlurKernel(SkSize sigma,
SkISize radius,
SkSpan<float> kernel) {
// Callers likely had to calculate the radius prior to filling out the kernel value, which is
// why it's provided; but make sure it's consistent with expectations.
SkASSERT(SkBlurEngine::SigmaToRadius(sigma.width()) == radius.width() &&
SkBlurEngine::SigmaToRadius(sigma.height()) == radius.height());
// Callers are responsible for downscaling large sigmas to values that can be processed by the
// effects, so ensure the radius won't overflow 'kernel'
const int width = KernelWidth(radius.width());
const int height = KernelWidth(radius.height());
const size_t kernelSize = SkTo<size_t>(sk_64_mul(width, height));
SkASSERT(kernelSize <= kernel.size());
// And the definition of an identity blur should be sufficient that 2sigma^2 isn't near zero
// when there's a non-trivial radius.
const float twoSigmaSqrdX = 2.0f * sigma.width() * sigma.width();
const float twoSigmaSqrdY = 2.0f * sigma.height() * sigma.height();
SkASSERT((radius.width() == 0 || !SkScalarNearlyZero(twoSigmaSqrdX)) &&
(radius.height() == 0 || !SkScalarNearlyZero(twoSigmaSqrdY)));
// Setting the denominator to 1 when the radius is 0 automatically converts the remaining math
// to the 1D Gaussian distribution. When both radii are 0, it correctly computes a weight of 1.0
const float sigmaXDenom = radius.width() > 0 ? 1.0f / twoSigmaSqrdX : 1.f;
const float sigmaYDenom = radius.height() > 0 ? 1.0f / twoSigmaSqrdY : 1.f;
float sum = 0.0f;
for (int x = 0; x < width; x++) {
float xTerm = static_cast<float>(x - radius.width());
xTerm = xTerm * xTerm * sigmaXDenom;
for (int y = 0; y < height; y++) {
float yTerm = static_cast<float>(y - radius.height());
float xyTerm = std::exp(-(xTerm + yTerm * yTerm * sigmaYDenom));
// Note that the constant term (1/(sqrt(2*pi*sigma^2)) of the Gaussian
// is dropped here, since we renormalize the kernel below.
kernel[y * width + x] = xyTerm;
sum += xyTerm;
}
}
// Normalize the kernel
float scale = 1.0f / sum;
for (size_t i = 0; i < kernelSize; ++i) {
kernel[i] *= scale;
}
// Zero remainder of the array
memset(kernel.data() + kernelSize, 0, sizeof(float)*(kernel.size() - kernelSize));
}
void SkShaderBlurAlgorithm::Compute2DBlurKernel(SkSize sigma,
SkISize radii,
std::array<SkV4, kMaxSamples/4>& kernel) {
static_assert(sizeof(kernel) == sizeof(std::array<float, kMaxSamples>));
static_assert(alignof(float) == alignof(SkV4));
float* data = kernel[0].ptr();
Compute2DBlurKernel(sigma, radii, SkSpan<float>(data, kMaxSamples));
}
void SkShaderBlurAlgorithm::Compute2DBlurOffsets(SkISize radius,
std::array<SkV4, kMaxSamples/2>& offsets) {
const int kernelArea = KernelWidth(radius.width()) * KernelWidth(radius.height());
SkASSERT(kernelArea <= kMaxSamples);
SkSpan<float> offsetView{offsets[0].ptr(), kMaxSamples*2};
int i = 0;
for (int y = -radius.height(); y <= radius.height(); ++y) {
for (int x = -radius.width(); x <= radius.width(); ++x) {
offsetView[2*i] = x;
offsetView[2*i+1] = y;
++i;
}
}
SkASSERT(i == kernelArea);
const int lastValidOffset = 2*(kernelArea - 1);
for (; i < kMaxSamples; ++i) {
offsetView[2*i] = offsetView[lastValidOffset];
offsetView[2*i+1] = offsetView[lastValidOffset+1];
}
}
void SkShaderBlurAlgorithm::Compute1DBlurLinearKernel(
float sigma,
int radius,
std::array<SkV4, kMaxSamples/2>& offsetsAndKernel) {
SkASSERT(sigma <= kMaxLinearSigma);
SkASSERT(radius == SkBlurEngine::SigmaToRadius(sigma));
SkASSERT(LinearKernelWidth(radius) <= kMaxSamples);
// Given 2 adjacent gaussian points, they are blended as: Wi * Ci + Wj * Cj.
// The GPU will mix Ci and Cj as Ci * (1 - x) + Cj * x during sampling.
// Compute W', x such that W' * (Ci * (1 - x) + Cj * x) = Wi * Ci + Wj * Cj.
// Solving W' * x = Wj, W' * (1 - x) = Wi:
// W' = Wi + Wj
// x = Wj / (Wi + Wj)
auto get_new_weight = [](float* new_w, float* offset, float wi, float wj) {
*new_w = wi + wj;
*offset = wj / (wi + wj);
};
// Create a temporary standard kernel. The maximum blur radius that can be passed to this
// function is (kMaxBlurSamples-1), so make an array large enough to hold the full kernel width.
static constexpr int kMaxKernelWidth = KernelWidth(kMaxSamples - 1);
SkASSERT(KernelWidth(radius) <= kMaxKernelWidth);
std::array<float, kMaxKernelWidth> fullKernel;
Compute1DBlurKernel(sigma, radius, SkSpan<float>{fullKernel.data(), KernelWidth(radius)});
std::array<float, kMaxSamples> kernel;
std::array<float, kMaxSamples> offsets;
// Note that halfsize isn't just size / 2, but radius + 1. This is the size of the output array.
int halfSize = LinearKernelWidth(radius);
int halfRadius = halfSize / 2;
int lowIndex = halfRadius - 1;
// Compute1DGaussianKernel produces a full 2N + 1 kernel. Since the kernel can be mirrored,
// compute only the upper half and mirror to the lower half.
int index = radius;
if (radius & 1) {
// If N is odd, then use two samples.
// The centre texel gets sampled twice, so halve its influence for each sample.
// We essentially sample like this:
// Texel edges
// v v v v
// | | | |
// \-----^---/ Lower sample
// \---^-----/ Upper sample
get_new_weight(&kernel[halfRadius],
&offsets[halfRadius],
fullKernel[index] * 0.5f,
fullKernel[index + 1]);
kernel[lowIndex] = kernel[halfRadius];
offsets[lowIndex] = -offsets[halfRadius];
index++;
lowIndex--;
} else {
// If N is even, then there are an even number of texels on either side of the centre texel.
// Sample the centre texel directly.
kernel[halfRadius] = fullKernel[index];
offsets[halfRadius] = 0.0f;
}
index++;
// Every other pair gets one sample.
for (int i = halfRadius + 1; i < halfSize; index += 2, i++, lowIndex--) {
get_new_weight(&kernel[i], &offsets[i], fullKernel[index], fullKernel[index + 1]);
offsets[i] += static_cast<float>(index - radius);
// Mirror to lower half.
kernel[lowIndex] = kernel[i];
offsets[lowIndex] = -offsets[i];
}
// Zero out remaining values in the kernel
memset(kernel.data() + halfSize, 0, sizeof(float)*(kMaxSamples - halfSize));
// But copy the last valid offset into the remaining offsets, to increase the chance that
// over-iteration in a fragment shader will have a cache hit.
for (int i = halfSize; i < kMaxSamples; ++i) {
offsets[i] = offsets[halfSize - 1];
}
// Interleave into the output array to match the 1D SkSL effect
for (int i = 0; i < kMaxSamples / 2; ++i) {
offsetsAndKernel[i] = SkV4{offsets[2*i], kernel[2*i], offsets[2*i+1], kernel[2*i+1]};
}
}
static SkKnownRuntimeEffects::StableKey to_stablekey(int kernelWidth, uint32_t baseKey) {
SkASSERT(kernelWidth >= 2 && kernelWidth <= SkShaderBlurAlgorithm::kMaxSamples);
switch(kernelWidth) {
// Batch on multiples of 4 (skipping width=1, since that can't happen)
case 2: [[fallthrough]];
case 3: [[fallthrough]];
case 4: return static_cast<SkKnownRuntimeEffects::StableKey>(baseKey);
case 5: [[fallthrough]];
case 6: [[fallthrough]];
case 7: [[fallthrough]];
case 8: return static_cast<SkKnownRuntimeEffects::StableKey>(baseKey+1);
case 9: [[fallthrough]];
case 10: [[fallthrough]];
case 11: [[fallthrough]];
case 12: return static_cast<SkKnownRuntimeEffects::StableKey>(baseKey+2);
case 13: [[fallthrough]];
case 14: [[fallthrough]];
case 15: [[fallthrough]];
case 16: return static_cast<SkKnownRuntimeEffects::StableKey>(baseKey+3);
case 17: [[fallthrough]];
case 18: [[fallthrough]];
case 19: [[fallthrough]];
// With larger kernels, batch on multiples of eight so up to 7 wasted samples.
case 20: return static_cast<SkKnownRuntimeEffects::StableKey>(baseKey+4);
case 21: [[fallthrough]];
case 22: [[fallthrough]];
case 23: [[fallthrough]];
case 24: [[fallthrough]];
case 25: [[fallthrough]];
case 26: [[fallthrough]];
case 27: [[fallthrough]];
case 28: return static_cast<SkKnownRuntimeEffects::StableKey>(baseKey+5);
default:
SkUNREACHABLE;
}
}
const SkRuntimeEffect* SkShaderBlurAlgorithm::GetLinearBlur1DEffect(int radius) {
return GetKnownRuntimeEffect(
to_stablekey(LinearKernelWidth(radius),
static_cast<uint32_t>(SkKnownRuntimeEffects::StableKey::k1DBlurBase)));
}
const SkRuntimeEffect* SkShaderBlurAlgorithm::GetBlur2DEffect(const SkISize& radii) {
int kernelArea = KernelWidth(radii.width()) * KernelWidth(radii.height());
return GetKnownRuntimeEffect(
to_stablekey(kernelArea,
static_cast<uint32_t>(SkKnownRuntimeEffects::StableKey::k2DBlurBase)));
}
sk_sp<SkSpecialImage> SkShaderBlurAlgorithm::renderBlur(sk_sp<SkShader> blurEffect,
const SkIRect& dstRect,
SkColorType colorType,
sk_sp<SkColorSpace> colorSpace) const {
SkImageInfo outII = SkImageInfo::Make({dstRect.width(), dstRect.height()},
colorType, kPremul_SkAlphaType, std::move(colorSpace));
sk_sp<SkDevice> device = this->makeDevice(outII);
if (!device) {
return nullptr;
}
// TODO(b/294102201): This is very much like AutoSurface in SkImageFilterTypes.cpp
SkIRect subset = SkIRect::MakeSize(dstRect.size());
device->clipRect(SkRect::Make(subset), SkClipOp::kIntersect, /*aa=*/false);
device->setLocalToDevice(SkM44::Translate(-dstRect.left(), -dstRect.top()));
SkPaint paint;
paint.setBlendMode(SkBlendMode::kSrc);
paint.setShader(std::move(blurEffect));
device->drawPaint(paint);
return device->snapSpecial(subset);
}
sk_sp<SkSpecialImage> SkShaderBlurAlgorithm::evalBlur2D(SkSize sigma,
SkISize radii,
sk_sp<SkSpecialImage> input,
const SkIRect& srcRect,
SkTileMode tileMode,
const SkIRect& dstRect) const {
std::array<SkV4, kMaxSamples/4> kernel;
std::array<SkV4, kMaxSamples/2> offsets;
Compute2DBlurKernel(sigma, radii, kernel);
Compute2DBlurOffsets(radii, offsets);
SkRuntimeShaderBuilder builder{sk_ref_sp(GetBlur2DEffect(radii))};
builder.uniform("kernel") = kernel;
builder.uniform("offsets") = offsets;
// TODO(b/294102201): This is very much like FilterResult::asShader()...
builder.child("child") =
input->makeSubset(srcRect)->asShader(tileMode,
SkFilterMode::kNearest,
SkMatrix::Translate(srcRect.left(),srcRect.top()));
return this->renderBlur(builder.makeShader(), dstRect,
input->colorType(), input->colorInfo().refColorSpace());
}
sk_sp<SkSpecialImage> SkShaderBlurAlgorithm::evalBlur1D(float sigma,
int radius,
SkV2 dir,
sk_sp<SkSpecialImage> input,
SkIRect srcRect,
SkTileMode tileMode,
SkIRect dstRect) const {
std::array<SkV4, kMaxSamples/2> offsetsAndKernel;
Compute1DBlurLinearKernel(sigma, radius, offsetsAndKernel);
SkRuntimeShaderBuilder builder{sk_ref_sp(GetLinearBlur1DEffect(radius))};
builder.uniform("offsetsAndKernel") = offsetsAndKernel;
builder.uniform("dir") = dir;
// TODO(b/294102201): This is very much like FilterResult::asShader()...
builder.child("child") =
input->makeSubset(srcRect)->asShader(tileMode,
SkFilterMode::kLinear,
SkMatrix::Translate(srcRect.left(),srcRect.top()));
return this->renderBlur(builder.makeShader(), dstRect,
input->colorType(), input->colorInfo().refColorSpace());
}
sk_sp<SkSpecialImage> SkShaderBlurAlgorithm::blur(SkSize sigma,
sk_sp<SkSpecialImage> src,
const SkIRect& srcRect,
SkTileMode tileMode,
const SkIRect& dstRect) const {
SkASSERT(sigma.width() <= kMaxLinearSigma && sigma.height() <= kMaxLinearSigma);
int radiusX = SkBlurEngine::SigmaToRadius(sigma.width());
int radiusY = SkBlurEngine::SigmaToRadius(sigma.height());
const int kernelArea = KernelWidth(radiusX) * KernelWidth(radiusY);
if (kernelArea <= kMaxSamples && radiusX > 0 && radiusY > 0) {
// Use a single-pass 2D kernel if it fits and isn't just 1D already
return this->evalBlur2D(sigma,
{radiusX, radiusY},
std::move(src),
srcRect,
tileMode,
dstRect);
} else {
// Use two passes of a 1D kernel (one per axis).
SkIRect intermediateSrcRect = srcRect;
SkIRect intermediateDstRect = dstRect;
if (radiusX > 0) {
if (radiusY > 0) {
// Outset the output size of dstRect by the radius required for the next Y pass
intermediateDstRect.outset(0, radiusY);
if (!intermediateDstRect.intersect(srcRect.makeOutset(radiusX, radiusY))) {
return nullptr;
}
}
src = this->evalBlur1D(sigma.width(),
radiusX,
/*dir=*/{1.f, 0.f},
std::move(src),
srcRect,
tileMode,
intermediateDstRect);
if (!src) {
return nullptr;
}
intermediateSrcRect = SkIRect::MakeWH(src->width(), src->height());
intermediateDstRect = dstRect.makeOffset(-intermediateDstRect.left(),
-intermediateDstRect.top());
}
if (radiusY > 0) {
src = this->evalBlur1D(sigma.height(),
radiusY,
/*dir=*/{0.f, 1.f},
std::move(src),
intermediateSrcRect,
tileMode,
intermediateDstRect);
}
return src;
}
}