blob: cf3f6d000c1b06372eef21d7a95290cdb1ae4b4f [file] [log] [blame]
/*
* Copyright 2013 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "src/core/SkGpuBlurUtils.h"
#include "include/core/SkBitmap.h"
#include "include/core/SkRect.h"
#include "src/core/SkMathPriv.h"
#if SK_SUPPORT_GPU
#include "include/core/SkColorSpace.h"
#include "include/gpu/GrRecordingContext.h"
#include "src/gpu/ganesh/GrCaps.h"
#include "src/gpu/ganesh/GrRecordingContextPriv.h"
#include "src/gpu/ganesh/SkGr.h"
#include "src/gpu/ganesh/effects/GrGaussianConvolutionFragmentProcessor.h"
#include "src/gpu/ganesh/effects/GrMatrixConvolutionEffect.h"
#include "src/gpu/ganesh/effects/GrTextureEffect.h"
#if SK_GPU_V1
#include "src/gpu/ganesh/v1/SurfaceDrawContext_v1.h"
using Direction = GrGaussianConvolutionFragmentProcessor::Direction;
static void fill_in_2D_gaussian_kernel(float* kernel, int width, int height,
SkScalar sigmaX, SkScalar sigmaY) {
const float twoSigmaSqrdX = 2.0f * SkScalarToFloat(SkScalarSquare(sigmaX));
const float twoSigmaSqrdY = 2.0f * SkScalarToFloat(SkScalarSquare(sigmaY));
// SkGpuBlurUtils::GaussianBlur() should have detected the cases where a 2D blur
// degenerates to a 1D on X or Y, or to the identity.
SkASSERT(!SkGpuBlurUtils::IsEffectivelyZeroSigma(sigmaX) &&
!SkGpuBlurUtils::IsEffectivelyZeroSigma(sigmaY));
SkASSERT(!SkScalarNearlyZero(twoSigmaSqrdX) && !SkScalarNearlyZero(twoSigmaSqrdY));
const float sigmaXDenom = 1.0f / twoSigmaSqrdX;
const float sigmaYDenom = 1.0f / twoSigmaSqrdY;
const int xRadius = width / 2;
const int yRadius = height / 2;
float sum = 0.0f;
for (int x = 0; x < width; x++) {
float xTerm = static_cast<float>(x - xRadius);
xTerm = xTerm * xTerm * sigmaXDenom;
for (int y = 0; y < height; y++) {
float yTerm = static_cast<float>(y - yRadius);
float xyTerm = sk_float_exp(-(xTerm + yTerm * yTerm * sigmaYDenom));
// Note that the constant term (1/(sqrt(2*pi*sigma^2)) of the Gaussian
// is dropped here, since we renormalize the kernel below.
kernel[y * width + x] = xyTerm;
sum += xyTerm;
}
}
// Normalize the kernel
float scale = 1.0f / sum;
for (int i = 0; i < width * height; ++i) {
kernel[i] *= scale;
}
}
/**
* Draws 'dstRect' into 'surfaceFillContext' evaluating a 1D Gaussian over 'srcView'. The src rect
* is 'dstRect' offset by 'dstToSrcOffset'. 'mode' and 'bounds' are applied to the src coords.
*/
static void convolve_gaussian_1d(skgpu::SurfaceFillContext* sfc,
GrSurfaceProxyView srcView,
const SkIRect srcSubset,
SkIVector dstToSrcOffset,
const SkIRect& dstRect,
SkAlphaType srcAlphaType,
Direction direction,
int radius,
float sigma,
SkTileMode mode) {
SkASSERT(radius && !SkGpuBlurUtils::IsEffectivelyZeroSigma(sigma));
auto wm = SkTileModeToWrapMode(mode);
auto srcRect = dstRect.makeOffset(dstToSrcOffset);
// NOTE: This could just be GrMatrixConvolutionEffect with one of the dimensions set to 1
// and the appropriate kernel already computed, but there's value in keeping the shader simpler.
// TODO(michaelludwig): Is this true? If not, is the shader key simplicity worth it two have
// two convolution effects?
std::unique_ptr<GrFragmentProcessor> conv =
GrGaussianConvolutionFragmentProcessor::Make(std::move(srcView),
srcAlphaType,
direction,
radius,
sigma,
wm,
srcSubset,
&srcRect,
*sfc->caps());
sfc->fillRectToRectWithFP(srcRect, dstRect, std::move(conv));
}
static std::unique_ptr<skgpu::v1::SurfaceDrawContext> convolve_gaussian_2d(
GrRecordingContext* rContext,
GrSurfaceProxyView srcView,
GrColorType srcColorType,
const SkIRect& srcBounds,
const SkIRect& dstBounds,
int radiusX,
int radiusY,
SkScalar sigmaX,
SkScalar sigmaY,
SkTileMode mode,
sk_sp<SkColorSpace> finalCS,
SkBackingFit dstFit) {
SkASSERT(radiusX && radiusY);
SkASSERT(!SkGpuBlurUtils::IsEffectivelyZeroSigma(sigmaX) &&
!SkGpuBlurUtils::IsEffectivelyZeroSigma(sigmaY));
// Create the sdc with default SkSurfaceProps. Gaussian blurs will soon use a
// SurfaceFillContext, at which point the SkSurfaceProps won't exist anymore.
auto sdc = skgpu::v1::SurfaceDrawContext::Make(
rContext, srcColorType, std::move(finalCS), dstFit, dstBounds.size(), SkSurfaceProps(),
1, GrMipmapped::kNo, srcView.proxy()->isProtected(), srcView.origin());
if (!sdc) {
return nullptr;
}
SkISize size = SkISize::Make(SkGpuBlurUtils::KernelWidth(radiusX),
SkGpuBlurUtils::KernelWidth(radiusY));
SkIPoint kernelOffset = SkIPoint::Make(radiusX, radiusY);
GrPaint paint;
auto wm = SkTileModeToWrapMode(mode);
// GaussianBlur() should have downsampled the request until we can handle the 2D blur with
// just a uniform array.
SkASSERT(size.area() <= GrMatrixConvolutionEffect::kMaxUniformSize);
float kernel[GrMatrixConvolutionEffect::kMaxUniformSize];
fill_in_2D_gaussian_kernel(kernel, size.width(), size.height(), sigmaX, sigmaY);
auto conv = GrMatrixConvolutionEffect::Make(rContext, std::move(srcView), srcBounds,
size, kernel, 1.0f, 0.0f, kernelOffset, wm, true,
*sdc->caps());
paint.setColorFragmentProcessor(std::move(conv));
paint.setPorterDuffXPFactory(SkBlendMode::kSrc);
// 'dstBounds' is actually in 'srcView' proxy space. It represents the blurred area from src
// space that we want to capture in the new RTC at {0, 0}. Hence, we use its size as the rect to
// draw and it directly as the local rect.
sdc->fillRectToRect(nullptr, std::move(paint), GrAA::kNo, SkMatrix::I(),
SkRect::Make(dstBounds.size()), SkRect::Make(dstBounds));
return sdc;
}
static std::unique_ptr<skgpu::v1::SurfaceDrawContext> convolve_gaussian(
GrRecordingContext* rContext,
GrSurfaceProxyView srcView,
GrColorType srcColorType,
SkAlphaType srcAlphaType,
SkIRect srcBounds,
SkIRect dstBounds,
Direction direction,
int radius,
float sigma,
SkTileMode mode,
sk_sp<SkColorSpace> finalCS,
SkBackingFit fit) {
using namespace SkGpuBlurUtils;
SkASSERT(radius > 0 && !SkGpuBlurUtils::IsEffectivelyZeroSigma(sigma));
// Logically we're creating an infinite blur of 'srcBounds' of 'srcView' with 'mode' tiling
// and then capturing the 'dstBounds' portion in a new RTC where the top left of 'dstBounds' is
// at {0, 0} in the new RTC.
//
// Create the sdc with default SkSurfaceProps. Gaussian blurs will soon use a
// SurfaceFillContext, at which point the SkSurfaceProps won't exist anymore.
auto dstSDC = skgpu::v1::SurfaceDrawContext::Make(
rContext, srcColorType, std::move(finalCS), fit, dstBounds.size(), SkSurfaceProps(), 1,
GrMipmapped::kNo, srcView.proxy()->isProtected(), srcView.origin());
if (!dstSDC) {
return nullptr;
}
// This represents the translation from 'dstSurfaceDrawContext' coords to 'srcView' coords.
auto rtcToSrcOffset = dstBounds.topLeft();
auto srcBackingBounds = SkIRect::MakeSize(srcView.proxy()->backingStoreDimensions());
// We've implemented splitting the dst bounds up into areas that do and do not need to
// use shader based tiling but only for some modes...
bool canSplit = mode == SkTileMode::kDecal || mode == SkTileMode::kClamp;
// ...but it's not worth doing the splitting if we'll get HW tiling instead of shader tiling.
bool canHWTile =
srcBounds.contains(srcBackingBounds) &&
!rContext->priv().caps()->reducedShaderMode() && // this mode always uses shader tiling
!(mode == SkTileMode::kDecal && !rContext->priv().caps()->clampToBorderSupport());
if (!canSplit || canHWTile) {
auto dstRect = SkIRect::MakeSize(dstBounds.size());
convolve_gaussian_1d(dstSDC.get(), std::move(srcView), srcBounds,
rtcToSrcOffset, dstRect, srcAlphaType, direction, radius, sigma, mode);
return dstSDC;
}
// 'left' and 'right' are the sub rects of 'srcBounds' where 'mode' must be enforced.
// 'mid' is the area where we can ignore the mode because the kernel does not reach to the
// edge of 'srcBounds'.
SkIRect mid, left, right;
// 'top' and 'bottom' are areas of 'dstBounds' that are entirely above/below 'srcBounds'.
// These are areas that we can simply clear in the dst in kDecal mode. If 'srcBounds'
// straddles the top edge of 'dstBounds' then 'top' will be inverted and we will skip
// processing for the rect. Similar for 'bottom'. The positional/directional labels above refer
// to the Direction::kX case and one should think of these as 'left' and 'right' for
// Direction::kY.
SkIRect top, bottom;
if (Direction::kX == direction) {
top = {dstBounds.left(), dstBounds.top() , dstBounds.right(), srcBounds.top() };
bottom = {dstBounds.left(), srcBounds.bottom(), dstBounds.right(), dstBounds.bottom()};
// Inset for sub-rect of 'srcBounds' where the x-dir kernel doesn't reach the edges, clipped
// vertically to dstBounds.
int midA = std::max(srcBounds.top() , dstBounds.top() );
int midB = std::min(srcBounds.bottom(), dstBounds.bottom());
mid = {srcBounds.left() + radius, midA, srcBounds.right() - radius, midB};
if (mid.isEmpty()) {
// There is no middle where the bounds can be ignored. Make the left span the whole
// width of dst and we will not draw mid or right.
left = {dstBounds.left(), mid.top(), dstBounds.right(), mid.bottom()};
} else {
left = {dstBounds.left(), mid.top(), mid.left() , mid.bottom()};
right = {mid.right(), mid.top(), dstBounds.right(), mid.bottom()};
}
} else {
// This is the same as the x direction code if you turn your head 90 degrees CCW. Swap x and
// y and swap top/bottom with left/right.
top = {dstBounds.left(), dstBounds.top(), srcBounds.left() , dstBounds.bottom()};
bottom = {srcBounds.right(), dstBounds.top(), dstBounds.right(), dstBounds.bottom()};
int midA = std::max(srcBounds.left() , dstBounds.left() );
int midB = std::min(srcBounds.right(), dstBounds.right());
mid = {midA, srcBounds.top() + radius, midB, srcBounds.bottom() - radius};
if (mid.isEmpty()) {
left = {mid.left(), dstBounds.top(), mid.right(), dstBounds.bottom()};
} else {
left = {mid.left(), dstBounds.top(), mid.right(), mid.top() };
right = {mid.left(), mid.bottom() , mid.right(), dstBounds.bottom()};
}
}
auto convolve = [&](SkIRect rect) {
// Transform rect into the render target's coord system.
rect.offset(-rtcToSrcOffset);
convolve_gaussian_1d(dstSDC.get(), srcView, srcBounds, rtcToSrcOffset, rect,
srcAlphaType, direction, radius, sigma, mode);
};
auto clear = [&](SkIRect rect) {
// Transform rect into the render target's coord system.
rect.offset(-rtcToSrcOffset);
dstSDC->clearAtLeast(rect, SK_PMColor4fTRANSPARENT);
};
// Doing mid separately will cause two draws to occur (left and right batch together). At
// small sizes of mid it is worse to issue more draws than to just execute the slightly
// more complicated shader that implements the tile mode across mid. This threshold is
// very arbitrary right now. It is believed that a 21x44 mid on a Moto G4 is a significant
// regression compared to doing one draw but it has not been locally evaluated or tuned.
// The optimal cutoff is likely to vary by GPU.
if (!mid.isEmpty() && mid.width()*mid.height() < 256*256) {
left.join(mid);
left.join(right);
mid = SkIRect::MakeEmpty();
right = SkIRect::MakeEmpty();
// It's unknown whether for kDecal it'd be better to expand the draw rather than a draw and
// up to two clears.
if (mode == SkTileMode::kClamp) {
left.join(top);
left.join(bottom);
top = SkIRect::MakeEmpty();
bottom = SkIRect::MakeEmpty();
}
}
if (!top.isEmpty()) {
if (mode == SkTileMode::kDecal) {
clear(top);
} else {
convolve(top);
}
}
if (!bottom.isEmpty()) {
if (mode == SkTileMode::kDecal) {
clear(bottom);
} else {
convolve(bottom);
}
}
if (mid.isEmpty()) {
convolve(left);
} else {
convolve(left);
convolve(right);
convolve(mid);
}
return dstSDC;
}
// Expand the contents of 'src' to fit in 'dstSize'. At this point, we are expanding an intermediate
// image, so there's no need to account for a proxy offset from the original input.
static std::unique_ptr<skgpu::v1::SurfaceDrawContext> reexpand(
GrRecordingContext* rContext,
std::unique_ptr<skgpu::SurfaceContext> src,
const SkRect& srcBounds,
SkISize dstSize,
sk_sp<SkColorSpace> colorSpace,
SkBackingFit fit) {
GrSurfaceProxyView srcView = src->readSurfaceView();
if (!srcView.asTextureProxy()) {
return nullptr;
}
GrColorType srcColorType = src->colorInfo().colorType();
SkAlphaType srcAlphaType = src->colorInfo().alphaType();
src.reset(); // no longer needed
// Create the sdc with default SkSurfaceProps. Gaussian blurs will soon use a
// SurfaceFillContext, at which point the SkSurfaceProps won't exist anymore.
auto dstSDC = skgpu::v1::SurfaceDrawContext::Make(
rContext, srcColorType, std::move(colorSpace), fit, dstSize, SkSurfaceProps(), 1,
GrMipmapped::kNo, srcView.proxy()->isProtected(), srcView.origin());
if (!dstSDC) {
return nullptr;
}
GrPaint paint;
auto fp = GrTextureEffect::MakeSubset(std::move(srcView), srcAlphaType, SkMatrix::I(),
GrSamplerState::Filter::kLinear, srcBounds, srcBounds,
*rContext->priv().caps());
paint.setColorFragmentProcessor(std::move(fp));
paint.setPorterDuffXPFactory(SkBlendMode::kSrc);
dstSDC->fillRectToRect(nullptr, std::move(paint), GrAA::kNo, SkMatrix::I(),
SkRect::Make(dstSize), srcBounds);
return dstSDC;
}
static std::unique_ptr<skgpu::v1::SurfaceDrawContext> two_pass_gaussian(
GrRecordingContext* rContext,
GrSurfaceProxyView srcView,
GrColorType srcColorType,
SkAlphaType srcAlphaType,
sk_sp<SkColorSpace> colorSpace,
SkIRect srcBounds,
SkIRect dstBounds,
float sigmaX,
float sigmaY,
int radiusX,
int radiusY,
SkTileMode mode,
SkBackingFit fit) {
SkASSERT(radiusX || radiusY);
std::unique_ptr<skgpu::v1::SurfaceDrawContext> dstSDC;
if (radiusX > 0) {
SkBackingFit xFit = radiusY > 0 ? SkBackingFit::kApprox : fit;
// Expand the dstBounds vertically to produce necessary content for the y-pass. Then we will
// clip these in a tile-mode dependent way to ensure the tile-mode gets implemented
// correctly. However, if we're not going to do a y-pass then we must use the original
// dstBounds without clipping to produce the correct output size.
SkIRect xPassDstBounds = dstBounds;
if (radiusY) {
xPassDstBounds.outset(0, radiusY);
if (mode == SkTileMode::kRepeat || mode == SkTileMode::kMirror) {
int srcH = srcBounds.height();
int srcTop = srcBounds.top();
if (mode == SkTileMode::kMirror) {
srcTop -= srcH;
srcH *= 2;
}
float floatH = srcH;
// First row above the dst rect where we should restart the tile mode.
int n = sk_float_floor2int_no_saturate((xPassDstBounds.top() - srcTop)/floatH);
int topClip = srcTop + n*srcH;
// First row above below the dst rect where we should restart the tile mode.
n = sk_float_ceil2int_no_saturate(
(xPassDstBounds.bottom() - srcBounds.bottom())/floatH);
int bottomClip = srcBounds.bottom() + n*srcH;
xPassDstBounds.fTop = std::max(xPassDstBounds.top(), topClip);
xPassDstBounds.fBottom = std::min(xPassDstBounds.bottom(), bottomClip);
} else {
if (xPassDstBounds.fBottom <= srcBounds.top()) {
if (mode == SkTileMode::kDecal) {
return nullptr;
}
xPassDstBounds.fTop = srcBounds.top();
xPassDstBounds.fBottom = xPassDstBounds.fTop + 1;
} else if (xPassDstBounds.fTop >= srcBounds.bottom()) {
if (mode == SkTileMode::kDecal) {
return nullptr;
}
xPassDstBounds.fBottom = srcBounds.bottom();
xPassDstBounds.fTop = xPassDstBounds.fBottom - 1;
} else {
xPassDstBounds.fTop = std::max(xPassDstBounds.fTop, srcBounds.top());
xPassDstBounds.fBottom = std::min(xPassDstBounds.fBottom, srcBounds.bottom());
}
int leftSrcEdge = srcBounds.fLeft - radiusX ;
int rightSrcEdge = srcBounds.fRight + radiusX;
if (mode == SkTileMode::kClamp) {
// In clamp the column just outside the src bounds has the same value as the
// column just inside, unlike decal.
leftSrcEdge += 1;
rightSrcEdge -= 1;
}
if (xPassDstBounds.fRight <= leftSrcEdge) {
if (mode == SkTileMode::kDecal) {
return nullptr;
}
xPassDstBounds.fLeft = xPassDstBounds.fRight - 1;
} else {
xPassDstBounds.fLeft = std::max(xPassDstBounds.fLeft, leftSrcEdge);
}
if (xPassDstBounds.fLeft >= rightSrcEdge) {
if (mode == SkTileMode::kDecal) {
return nullptr;
}
xPassDstBounds.fRight = xPassDstBounds.fLeft + 1;
} else {
xPassDstBounds.fRight = std::min(xPassDstBounds.fRight, rightSrcEdge);
}
}
}
dstSDC = convolve_gaussian(
rContext, std::move(srcView), srcColorType, srcAlphaType, srcBounds, xPassDstBounds,
Direction::kX, radiusX, sigmaX, mode, colorSpace, xFit);
if (!dstSDC) {
return nullptr;
}
srcView = dstSDC->readSurfaceView();
SkIVector newDstBoundsOffset = dstBounds.topLeft() - xPassDstBounds.topLeft();
dstBounds = SkIRect::MakeSize(dstBounds.size()).makeOffset(newDstBoundsOffset);
srcBounds = SkIRect::MakeSize(xPassDstBounds.size());
}
if (!radiusY) {
return dstSDC;
}
return convolve_gaussian(rContext, std::move(srcView), srcColorType, srcAlphaType, srcBounds,
dstBounds, Direction::kY, radiusY, sigmaY, mode, colorSpace, fit);
}
#endif // SK_GPU_V1
namespace SkGpuBlurUtils {
#if SK_GPU_V1
std::unique_ptr<skgpu::v1::SurfaceDrawContext> GaussianBlur(GrRecordingContext* rContext,
GrSurfaceProxyView srcView,
GrColorType srcColorType,
SkAlphaType srcAlphaType,
sk_sp<SkColorSpace> colorSpace,
SkIRect dstBounds,
SkIRect srcBounds,
float sigmaX,
float sigmaY,
SkTileMode mode,
SkBackingFit fit) {
SkASSERT(rContext);
TRACE_EVENT2("skia.gpu", "GaussianBlur", "sigmaX", sigmaX, "sigmaY", sigmaY);
if (!srcView.asTextureProxy()) {
return nullptr;
}
int maxRenderTargetSize = rContext->priv().caps()->maxRenderTargetSize();
if (dstBounds.width() > maxRenderTargetSize || dstBounds.height() > maxRenderTargetSize) {
return nullptr;
}
int radiusX = SigmaRadius(sigmaX);
int radiusY = SigmaRadius(sigmaY);
// Attempt to reduce the srcBounds in order to detect that we can set the sigmas to zero or
// to reduce the amount of work to rescale the source if sigmas are large. TODO: Could consider
// how to minimize the required source bounds for repeat/mirror modes.
if (mode == SkTileMode::kClamp || mode == SkTileMode::kDecal) {
SkIRect reach = dstBounds.makeOutset(radiusX, radiusY);
SkIRect intersection;
if (!intersection.intersect(reach, srcBounds)) {
if (mode == SkTileMode::kDecal) {
return nullptr;
} else {
if (reach.fLeft >= srcBounds.fRight) {
srcBounds.fLeft = srcBounds.fRight - 1;
} else if (reach.fRight <= srcBounds.fLeft) {
srcBounds.fRight = srcBounds.fLeft + 1;
}
if (reach.fTop >= srcBounds.fBottom) {
srcBounds.fTop = srcBounds.fBottom - 1;
} else if (reach.fBottom <= srcBounds.fTop) {
srcBounds.fBottom = srcBounds.fTop + 1;
}
}
} else {
srcBounds = intersection;
}
}
if (mode != SkTileMode::kDecal) {
// All non-decal tile modes are equivalent for one pixel width/height src and amount to a
// single color value repeated at each column/row. Applying the normalized kernel to that
// column/row yields that same color. So no blurring is necessary.
if (srcBounds.width() == 1) {
sigmaX = 0.f;
radiusX = 0;
}
if (srcBounds.height() == 1) {
sigmaY = 0.f;
radiusY = 0;
}
}
// If we determined that there is no blurring necessary in either direction then just do a
// a draw that applies the tile mode.
if (!radiusX && !radiusY) {
// Create the sdc with default SkSurfaceProps. Gaussian blurs will soon use a
// SurfaceFillContext, at which point the SkSurfaceProps won't exist anymore.
auto result = skgpu::v1::SurfaceDrawContext::Make(rContext,
srcColorType,
std::move(colorSpace),
fit,
dstBounds.size(),
SkSurfaceProps(),
1,
GrMipmapped::kNo,
srcView.proxy()->isProtected(),
srcView.origin());
if (!result) {
return nullptr;
}
GrSamplerState sampler(SkTileModeToWrapMode(mode), GrSamplerState::Filter::kNearest);
auto fp = GrTextureEffect::MakeSubset(std::move(srcView),
srcAlphaType,
SkMatrix::I(),
sampler,
SkRect::Make(srcBounds),
SkRect::Make(dstBounds),
*rContext->priv().caps());
result->fillRectToRectWithFP(dstBounds, SkIRect::MakeSize(dstBounds.size()), std::move(fp));
return result;
}
if (sigmaX <= kMaxSigma && sigmaY <= kMaxSigma) {
SkASSERT(radiusX <= GrGaussianConvolutionFragmentProcessor::kMaxKernelRadius);
SkASSERT(radiusY <= GrGaussianConvolutionFragmentProcessor::kMaxKernelRadius);
// For really small blurs (certainly no wider than 5x5 on desktop GPUs) it is faster to just
// launch a single non separable kernel vs two launches.
const int kernelSize = (2 * radiusX + 1) * (2 * radiusY + 1);
if (radiusX > 0 && radiusY > 0 &&
kernelSize <= GrMatrixConvolutionEffect::kMaxUniformSize &&
!rContext->priv().caps()->reducedShaderMode()) {
// Apply the proxy offset to src bounds and offset directly
return convolve_gaussian_2d(rContext, std::move(srcView), srcColorType, srcBounds,
dstBounds, radiusX, radiusY, sigmaX, sigmaY, mode,
std::move(colorSpace), fit);
}
// This will automatically degenerate into a single pass of X or Y if only one of the
// radii are non-zero.
return two_pass_gaussian(rContext, std::move(srcView), srcColorType, srcAlphaType,
std::move(colorSpace), srcBounds, dstBounds, sigmaX, sigmaY,
radiusX, radiusY, mode, fit);
}
GrColorInfo colorInfo(srcColorType, srcAlphaType, colorSpace);
auto srcCtx = rContext->priv().makeSC(srcView, colorInfo);
SkASSERT(srcCtx);
float scaleX = sigmaX > kMaxSigma ? kMaxSigma/sigmaX : 1.f;
float scaleY = sigmaY > kMaxSigma ? kMaxSigma/sigmaY : 1.f;
// We round down here so that when we recalculate sigmas we know they will be below
// kMaxSigma (but clamp to 1 do we don't have an empty texture).
SkISize rescaledSize = {std::max(sk_float_floor2int(srcBounds.width() *scaleX), 1),
std::max(sk_float_floor2int(srcBounds.height()*scaleY), 1)};
// Compute the sigmas using the actual scale factors used once we integerized the
// rescaledSize.
scaleX = static_cast<float>(rescaledSize.width()) /srcBounds.width();
scaleY = static_cast<float>(rescaledSize.height())/srcBounds.height();
sigmaX *= scaleX;
sigmaY *= scaleY;
// When we are in clamp mode any artifacts in the edge pixels due to downscaling may be
// exacerbated because of the tile mode. The particularly egregious case is when the original
// image has transparent black around the edges and the downscaling pulls in some non-zero
// values from the interior. Ultimately it'd be better for performance if the calling code could
// give us extra context around the blur to account for this. We don't currently have a good way
// to communicate this up stack. So we leave a 1 pixel border around the rescaled src bounds.
// We populate the top 1 pixel tall row of this border by rescaling the top row of the original
// source bounds into it. Because this is only rescaling in x (i.e. rescaling a 1 pixel high
// row into a shorter but still 1 pixel high row) we won't read any interior values. And similar
// for the other three borders. We'll adjust the source/dest bounds rescaled blur so that this
// border of extra pixels is used as the edge pixels for clamp mode but the dest bounds
// corresponds only to the pixels inside the border (the normally rescaled pixels inside this
// border).
// Moreover, if we clamped the rescaled size to 1 column or row then we still have a sigma
// that is greater than kMaxSigma. By using a pad and making the src 3 wide/tall instead of
// 1 we can recurse again and do another downscale. Since mirror and repeat modes are trivial
// for a single col/row we only add padding based on sigma exceeding kMaxSigma for decal.
int padX = mode == SkTileMode::kClamp ||
(mode == SkTileMode::kDecal && sigmaX > kMaxSigma) ? 1 : 0;
int padY = mode == SkTileMode::kClamp ||
(mode == SkTileMode::kDecal && sigmaY > kMaxSigma) ? 1 : 0;
// Create the sdc with default SkSurfaceProps. Gaussian blurs will soon use a
// SurfaceFillContext, at which point the SkSurfaceProps won't exist anymore.
auto rescaledSDC = skgpu::v1::SurfaceDrawContext::Make(
srcCtx->recordingContext(),
colorInfo.colorType(),
colorInfo.refColorSpace(),
SkBackingFit::kApprox,
{rescaledSize.width() + 2*padX, rescaledSize.height() + 2*padY},
SkSurfaceProps(),
1,
GrMipmapped::kNo,
srcCtx->asSurfaceProxy()->isProtected(),
srcCtx->origin());
if (!rescaledSDC) {
return nullptr;
}
if ((padX || padY) && mode == SkTileMode::kDecal) {
rescaledSDC->clear(SkPMColor4f{0, 0, 0, 0});
}
if (!srcCtx->rescaleInto(rescaledSDC.get(),
SkIRect::MakeSize(rescaledSize).makeOffset(padX, padY),
srcBounds,
SkSurface::RescaleGamma::kSrc,
SkSurface::RescaleMode::kRepeatedLinear)) {
return nullptr;
}
if (mode == SkTileMode::kClamp) {
SkASSERT(padX == 1 && padY == 1);
// Rather than run a potentially multi-pass rescaler on single rows/columns we just do a
// single bilerp draw. If we find this quality unacceptable we should think more about how
// to rescale these with better quality but without 4 separate multi-pass downscales.
auto cheapDownscale = [&](SkIRect dstRect, SkIRect srcRect) {
rescaledSDC->drawTexture(nullptr,
srcCtx->readSurfaceView(),
srcAlphaType,
GrSamplerState::Filter::kLinear,
GrSamplerState::MipmapMode::kNone,
SkBlendMode::kSrc,
SK_PMColor4fWHITE,
SkRect::Make(srcRect),
SkRect::Make(dstRect),
GrQuadAAFlags::kNone,
SkCanvas::SrcRectConstraint::kFast_SrcRectConstraint,
SkMatrix::I(),
nullptr);
};
auto [dw, dh] = rescaledSize;
// The are the src rows and columns from the source that we will scale into the dst padding.
float sLCol = srcBounds.left();
float sTRow = srcBounds.top();
float sRCol = srcBounds.right() - 1;
float sBRow = srcBounds.bottom() - 1;
int sx = srcBounds.left();
int sy = srcBounds.top();
int sw = srcBounds.width();
int sh = srcBounds.height();
// Downscale the edges from the original source. These draws should batch together (and with
// the above interior rescaling when it is a single pass).
cheapDownscale(SkIRect::MakeXYWH( 0, 1, 1, dh),
SkIRect::MakeXYWH( sLCol, sy, 1, sh));
cheapDownscale(SkIRect::MakeXYWH( 1, 0, dw, 1),
SkIRect::MakeXYWH( sx, sTRow, sw, 1));
cheapDownscale(SkIRect::MakeXYWH(dw + 1, 1, 1, dh),
SkIRect::MakeXYWH( sRCol, sy, 1, sh));
cheapDownscale(SkIRect::MakeXYWH( 1, dh + 1, dw, 1),
SkIRect::MakeXYWH( sx, sBRow, sw, 1));
// Copy the corners from the original source. These would batch with the edges except that
// at time of writing we recognize these can use kNearest and downgrade the filter. So they
// batch with each other but not the edge draws.
cheapDownscale(SkIRect::MakeXYWH( 0, 0, 1, 1),
SkIRect::MakeXYWH(sLCol, sTRow, 1, 1));
cheapDownscale(SkIRect::MakeXYWH(dw + 1, 0, 1, 1),
SkIRect::MakeXYWH(sRCol, sTRow, 1, 1));
cheapDownscale(SkIRect::MakeXYWH(dw + 1,dh + 1, 1, 1),
SkIRect::MakeXYWH(sRCol, sBRow, 1, 1));
cheapDownscale(SkIRect::MakeXYWH( 0, dh + 1, 1, 1),
SkIRect::MakeXYWH(sLCol, sBRow, 1, 1));
}
srcView = rescaledSDC->readSurfaceView();
// Drop the contexts so we don't hold the proxies longer than necessary.
rescaledSDC.reset();
srcCtx.reset();
// Compute the dst bounds in the scaled down space. First move the origin to be at the top
// left since we trimmed off everything above and to the left of the original src bounds during
// the rescale.
SkRect scaledDstBounds = SkRect::Make(dstBounds.makeOffset(-srcBounds.topLeft()));
scaledDstBounds.fLeft *= scaleX;
scaledDstBounds.fTop *= scaleY;
scaledDstBounds.fRight *= scaleX;
scaledDstBounds.fBottom *= scaleY;
// Account for padding in our rescaled src, if any.
scaledDstBounds.offset(padX, padY);
// Turn the scaled down dst bounds into an integer pixel rect.
auto scaledDstBoundsI = scaledDstBounds.roundOut();
SkIRect scaledSrcBounds = SkIRect::MakeSize(srcView.dimensions());
auto sdc = GaussianBlur(rContext,
std::move(srcView),
srcColorType,
srcAlphaType,
colorSpace,
scaledDstBoundsI,
scaledSrcBounds,
sigmaX,
sigmaY,
mode,
fit);
if (!sdc) {
return nullptr;
}
// We rounded out the integer scaled dst bounds. Select the fractional dst bounds from the
// integer dimension blurred result when we scale back up.
scaledDstBounds.offset(-scaledDstBoundsI.left(), -scaledDstBoundsI.top());
return reexpand(rContext, std::move(sdc), scaledDstBounds, dstBounds.size(),
std::move(colorSpace), fit);
}
#endif // SK_GPU_V1
bool ComputeBlurredRRectParams(const SkRRect& srcRRect, const SkRRect& devRRect,
SkScalar sigma, SkScalar xformedSigma,
SkRRect* rrectToDraw,
SkISize* widthHeight,
SkScalar rectXs[kBlurRRectMaxDivisions],
SkScalar rectYs[kBlurRRectMaxDivisions],
SkScalar texXs[kBlurRRectMaxDivisions],
SkScalar texYs[kBlurRRectMaxDivisions]) {
unsigned int devBlurRadius = 3*SkScalarCeilToInt(xformedSigma-1/6.0f);
SkScalar srcBlurRadius = 3.0f * sigma;
const SkRect& devOrig = devRRect.getBounds();
const SkVector& devRadiiUL = devRRect.radii(SkRRect::kUpperLeft_Corner);
const SkVector& devRadiiUR = devRRect.radii(SkRRect::kUpperRight_Corner);
const SkVector& devRadiiLR = devRRect.radii(SkRRect::kLowerRight_Corner);
const SkVector& devRadiiLL = devRRect.radii(SkRRect::kLowerLeft_Corner);
const int devLeft = SkScalarCeilToInt(std::max<SkScalar>(devRadiiUL.fX, devRadiiLL.fX));
const int devTop = SkScalarCeilToInt(std::max<SkScalar>(devRadiiUL.fY, devRadiiUR.fY));
const int devRight = SkScalarCeilToInt(std::max<SkScalar>(devRadiiUR.fX, devRadiiLR.fX));
const int devBot = SkScalarCeilToInt(std::max<SkScalar>(devRadiiLL.fY, devRadiiLR.fY));
// This is a conservative check for nine-patchability
if (devOrig.fLeft + devLeft + devBlurRadius >= devOrig.fRight - devRight - devBlurRadius ||
devOrig.fTop + devTop + devBlurRadius >= devOrig.fBottom - devBot - devBlurRadius) {
return false;
}
const SkVector& srcRadiiUL = srcRRect.radii(SkRRect::kUpperLeft_Corner);
const SkVector& srcRadiiUR = srcRRect.radii(SkRRect::kUpperRight_Corner);
const SkVector& srcRadiiLR = srcRRect.radii(SkRRect::kLowerRight_Corner);
const SkVector& srcRadiiLL = srcRRect.radii(SkRRect::kLowerLeft_Corner);
const SkScalar srcLeft = std::max<SkScalar>(srcRadiiUL.fX, srcRadiiLL.fX);
const SkScalar srcTop = std::max<SkScalar>(srcRadiiUL.fY, srcRadiiUR.fY);
const SkScalar srcRight = std::max<SkScalar>(srcRadiiUR.fX, srcRadiiLR.fX);
const SkScalar srcBot = std::max<SkScalar>(srcRadiiLL.fY, srcRadiiLR.fY);
int newRRWidth = 2*devBlurRadius + devLeft + devRight + 1;
int newRRHeight = 2*devBlurRadius + devTop + devBot + 1;
widthHeight->fWidth = newRRWidth + 2 * devBlurRadius;
widthHeight->fHeight = newRRHeight + 2 * devBlurRadius;
const SkRect srcProxyRect = srcRRect.getBounds().makeOutset(srcBlurRadius, srcBlurRadius);
rectXs[0] = srcProxyRect.fLeft;
rectXs[1] = srcProxyRect.fLeft + 2*srcBlurRadius + srcLeft;
rectXs[2] = srcProxyRect.fRight - 2*srcBlurRadius - srcRight;
rectXs[3] = srcProxyRect.fRight;
rectYs[0] = srcProxyRect.fTop;
rectYs[1] = srcProxyRect.fTop + 2*srcBlurRadius + srcTop;
rectYs[2] = srcProxyRect.fBottom - 2*srcBlurRadius - srcBot;
rectYs[3] = srcProxyRect.fBottom;
texXs[0] = 0.0f;
texXs[1] = 2.0f*devBlurRadius + devLeft;
texXs[2] = 2.0f*devBlurRadius + devLeft + 1;
texXs[3] = SkIntToScalar(widthHeight->fWidth);
texYs[0] = 0.0f;
texYs[1] = 2.0f*devBlurRadius + devTop;
texYs[2] = 2.0f*devBlurRadius + devTop + 1;
texYs[3] = SkIntToScalar(widthHeight->fHeight);
const SkRect newRect = SkRect::MakeXYWH(SkIntToScalar(devBlurRadius),
SkIntToScalar(devBlurRadius),
SkIntToScalar(newRRWidth),
SkIntToScalar(newRRHeight));
SkVector newRadii[4];
newRadii[0] = { SkScalarCeilToScalar(devRadiiUL.fX), SkScalarCeilToScalar(devRadiiUL.fY) };
newRadii[1] = { SkScalarCeilToScalar(devRadiiUR.fX), SkScalarCeilToScalar(devRadiiUR.fY) };
newRadii[2] = { SkScalarCeilToScalar(devRadiiLR.fX), SkScalarCeilToScalar(devRadiiLR.fY) };
newRadii[3] = { SkScalarCeilToScalar(devRadiiLL.fX), SkScalarCeilToScalar(devRadiiLL.fY) };
rrectToDraw->setRectRadii(newRect, newRadii);
return true;
}
// TODO: it seems like there should be some synergy with SkBlurMask::ComputeBlurProfile
// TODO: maybe cache this on the cpu side?
int CreateIntegralTable(float sixSigma, SkBitmap* table) {
// The texture we're producing represents the integral of a normal distribution over a
// six-sigma range centered at zero. We want enough resolution so that the linear
// interpolation done in texture lookup doesn't introduce noticeable artifacts. We
// conservatively choose to have 2 texels for each dst pixel.
int minWidth = 2 * sk_float_ceil2int(sixSigma);
// Bin by powers of 2 with a minimum so we get good profile reuse.
int width = std::max(SkNextPow2(minWidth), 32);
if (!table) {
return width;
}
if (!table->tryAllocPixels(SkImageInfo::MakeA8(width, 1))) {
return 0;
}
*table->getAddr8(0, 0) = 255;
const float invWidth = 1.f / width;
for (int i = 1; i < width - 1; ++i) {
float x = (i + 0.5f) * invWidth;
x = (-6 * x + 3) * SK_ScalarRoot2Over2;
float integral = 0.5f * (std::erf(x) + 1.f);
*table->getAddr8(i, 0) = SkToU8(sk_float_round2int(255.f * integral));
}
*table->getAddr8(width - 1, 0) = 0;
table->setImmutable();
return table->width();
}
void Compute1DGaussianKernel(float* kernel, float sigma, int radius) {
SkASSERT(radius == SigmaRadius(sigma));
if (SkGpuBlurUtils::IsEffectivelyZeroSigma(sigma)) {
// Calling SigmaRadius() produces 1, just computing ceil(sigma)*3 produces 3
SkASSERT(KernelWidth(radius) == 1);
std::fill_n(kernel, 1, 0.f);
kernel[0] = 1.f;
return;
}
// If this fails, kEffectivelyZeroSigma isn't big enough to prevent precision issues
SkASSERT(!SkScalarNearlyZero(2.f * sigma * sigma));
const float sigmaDenom = 1.0f / (2.f * sigma * sigma);
int size = KernelWidth(radius);
float sum = 0.0f;
for (int i = 0; i < size; ++i) {
float term = static_cast<float>(i - radius);
// Note that the constant term (1/(sqrt(2*pi*sigma^2)) of the Gaussian
// is dropped here, since we renormalize the kernel below.
kernel[i] = sk_float_exp(-term * term * sigmaDenom);
sum += kernel[i];
}
// Normalize the kernel
float scale = 1.0f / sum;
for (int i = 0; i < size; ++i) {
kernel[i] *= scale;
}
}
void Compute1DLinearGaussianKernel(float* kernel, float* offset, float sigma, int radius) {
// Given 2 adjacent gaussian points, they are blended as: Wi * Ci + Wj * Cj.
// The GPU will mix Ci and Cj as Ci * (1 - x) + Cj * x during sampling.
// Compute W', x such that W' * (Ci * (1 - x) + Cj * x) = Wi * Ci + Wj * Cj.
// Solving W' * x = Wj, W' * (1 - x) = Wi:
// W' = Wi + Wj
// x = Wj / (Wi + Wj)
auto get_new_weight = [](float* new_w, float* offset, float wi, float wj) {
*new_w = wi + wj;
*offset = wj / (wi + wj);
};
// Create a temporary standard kernel.
int size = KernelWidth(radius);
std::unique_ptr<float[]> temp_kernel(new float[size]);
Compute1DGaussianKernel(temp_kernel.get(), sigma, radius);
// Note that halfsize isn't just size / 2, but radius + 1. This is the size of the output array.
int halfsize = LinearKernelWidth(radius);
int halfradius = halfsize / 2;
int low_index = halfradius - 1;
// Compute1DGaussianKernel produces a full 2N + 1 kernel. Since the kernel can be mirrored,
// compute only the upper half and mirror to the lower half.
int index = radius;
if (radius & 1) {
// If N is odd, then use two samples.
// The centre texel gets sampled twice, so halve its influence for each sample.
// We essentially sample like this:
// Texel edges
// v v v v
// | | | |
// \-----^---/ Lower sample
// \---^-----/ Upper sample
get_new_weight(&kernel[halfradius], &offset[halfradius],
temp_kernel[index] * 0.5f, temp_kernel[index + 1]);
kernel[low_index] = kernel[halfradius];
offset[low_index] = -offset[halfradius];
index++;
low_index--;
} else {
// If N is even, then there are an even number of texels on either side of the centre texel.
// Sample the centre texel directly.
kernel[halfradius] = temp_kernel[index];
offset[halfradius] = 0.0f;
}
index++;
// Every other pair gets one sample.
for (int i = halfradius + 1; i < halfsize; index += 2, i++, low_index--) {
get_new_weight(&kernel[i], &offset[i], temp_kernel[index], temp_kernel[index + 1]);
offset[i] += static_cast<float>(index - radius);
// Mirror to lower half.
kernel[low_index] = kernel[i];
offset[low_index] = -offset[i];
}
}
} // namespace SkGpuBlurUtils
#endif