blob: e39aa2669237e0cd906da03ecdd27885eef318ef [file] [log] [blame]
/*
* Copyright 2017 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "SkArenaAlloc.h"
#include "SkGaussFilter.h"
#include "SkMalloc.h"
#include "SkMaskBlurFilter.h"
#include "SkNx.h"
#include "SkSafeMath.h"
#include <cmath>
#include <climits>
static const double kPi = 3.14159265358979323846264338327950288;
static constexpr double kSmallSigma = 2.0;
class BlurScanInterface {
public:
virtual ~BlurScanInterface() = default;
virtual void blur(const uint8_t* src, int srcStride, const uint8_t* srcEnd,
uint8_t* dst, int dstStride, uint8_t* dstEnd) const = 0;
virtual bool canBlur4() { return false; }
virtual void blur4Transpose(
const uint8_t* src, int srcStride, const uint8_t* srcEnd,
uint8_t* dst, int dstStride, uint8_t* dstEnd) const {
SK_ABORT("This should not be called.");
}
};
class PlanningInterface {
public:
virtual ~PlanningInterface() = default;
virtual size_t bufferSize() const = 0;
virtual int border() const = 0;
virtual bool needsBlur() const = 0;
virtual BlurScanInterface* makeBlurScan(
SkArenaAlloc* alloc, int width, uint32_t* buffer) const = 0;
};
class None final : public PlanningInterface {
public:
None() = default;
size_t bufferSize() const override { return 0; }
int border() const override { return 0; }
bool needsBlur() const override { return false; }
BlurScanInterface* makeBlurScan(
SkArenaAlloc* alloc, int width, uint32_t* buffer) const override {
SK_ABORT("Should never be called.");
return nullptr;
}
};
class PlanBox final : public PlanningInterface {
public:
explicit PlanBox(double sigma) {
// Calculate the radius from sigma. Taken from the old code until something better is
// figured out.
auto possibleRadius = 1.5 * sigma - 0.5;
auto radius = std::max(std::numeric_limits<double>::epsilon(), possibleRadius);
auto outerRadius = std::ceil(radius);
auto outerWindow = 2 * outerRadius + 1;
auto outerFactor = (1 - (outerRadius - radius)) / outerWindow;
fOuterWeight = static_cast<uint32_t>(round(outerFactor * (1ull << 24)));
auto innerRadius = outerRadius - 1;
auto innerWindow = 2 * innerRadius + 1;
auto innerFactor = (1 - (radius - innerRadius)) / innerWindow;
fInnerWeight = static_cast<uint32_t>(round(innerFactor * (1ull << 24)));
// Sliding window is defined by the relationship between the outer and inner widows.
// In the single window case, you add the element on the right, and subtract the element on
// the left. But, because two windows are used, this relationship is more complicated; an
// element is added from the right of the outer window, and subtracted from the left of the
// inner window. Because innerWindow = outerWindow - 2, the distance between
// the left and right in the two window case is outerWindow - 1.
fSlidingWindow = static_cast<int>(outerWindow - 1);
}
size_t bufferSize() const override {
return fSlidingWindow * (sizeof(Sk4u) / sizeof(uint32_t));
}
// Remember that sliding window = window - 1. Therefore, radius = sliding window / 2.
int border() const override { return fSlidingWindow / 2; }
bool needsBlur() const override { return true; }
BlurScanInterface* makeBlurScan(
SkArenaAlloc* alloc, int width, uint32_t* buffer) const override
{
int noChangeCount,
trailingEdgeZeroCount;
// The relation between the slidingWindow and the width dictates two operating modes.
// * width >= slidingWindow - both sides of the window are contained in the image while
// scanning. Therefore, we assume that slidingWindow zeros are consumed on the trailing
// edge of the window. After this count, then both edges are traversing the image.
// * slidingWindow > width - both sides of the window are off the image while scanning
// the middle. The front edge of the window can only travel width until it falls off the
// image. At this point, both edges of the window are off the image consuming zeros
// and therefore, the destination value does not change. The scan produces unchanged
// values until the trailing edge of the window enters the image. This count is
// slidingWindow - width.
if (width >= fSlidingWindow) {
noChangeCount = 0;
trailingEdgeZeroCount = fSlidingWindow;
} else {
noChangeCount = fSlidingWindow - width;
trailingEdgeZeroCount = width;
}
Sk4u* sk4uBuffer = reinterpret_cast<Sk4u*>(buffer);
return alloc->make<Box>(fOuterWeight, fInnerWeight, noChangeCount, trailingEdgeZeroCount,
sk4uBuffer, sk4uBuffer + fSlidingWindow);
}
private:
class Box final : public BlurScanInterface {
public:
Box(uint32_t outerWeight, uint32_t innerWeight,
int noChangeCount, int trailingEdgeZeroCount,
Sk4u* buffer, Sk4u* bufferEnd)
: fOuterWeight{outerWeight}
, fInnerWeight{innerWeight}
, fNoChangeCount{noChangeCount}
, fTrailingEdgeZeroCount{trailingEdgeZeroCount}
, fBuffer{buffer}
, fBufferEnd{bufferEnd} { }
void blur(const uint8_t* src, int srcStride, const uint8_t* srcEnd,
uint8_t* dst, int dstStride, uint8_t* dstEnd) const override {
auto rightOuter = src;
auto dstCursor = dst;
auto interpolateSums = [this](uint32_t outerSum, uint32_t innerSum) {
return SkTo<uint8_t>(
(fOuterWeight * outerSum + fInnerWeight * innerSum + kHalf) >> 24);
};
uint32_t outerSum = 0;
uint32_t innerSum = 0;
for (int i = 0; i < fTrailingEdgeZeroCount; i++) {
innerSum = outerSum;
outerSum += *rightOuter;
*dstCursor = interpolateSums(outerSum, innerSum);
rightOuter += srcStride;
dstCursor += dstStride;
}
// slidingWindow > width
for (int i = 0; i < fNoChangeCount; i++) {
*dstCursor = interpolateSums(outerSum, innerSum);;
dstCursor += dstStride;
}
// width > slidingWindow
auto leftInner = src;
while (rightOuter < srcEnd) {
innerSum = outerSum - *leftInner;
outerSum += *rightOuter;
*dstCursor = interpolateSums(outerSum, innerSum);
outerSum -= *leftInner;
rightOuter += srcStride;
leftInner += srcStride;
dstCursor += dstStride;
}
auto leftOuter = srcEnd;
dstCursor = dstEnd;
outerSum = 0;
for (int i = 0; i < fTrailingEdgeZeroCount; i++) {
leftOuter -= srcStride;
dstCursor -= dstStride;
innerSum = outerSum;
outerSum += *leftOuter;
*dstCursor = interpolateSums(outerSum, innerSum);
}
}
bool canBlur4() override { return true; }
// NB this is a transposing scan. The next src is src+1, and the next down is
// src+srcStride.
void blur4Transpose(
const uint8_t* src, int srcStride, const uint8_t* srcEnd,
uint8_t* dst, int dstStride, uint8_t* dstEnd) const override {
auto rightOuter = src;
auto dstCursor = dst;
Sk4u* const bufferStart = fBuffer;
Sk4u* bufferCursor = bufferStart;
Sk4u* const bufferEnd = fBufferEnd;
const Sk4u outerWeight(SkTo<uint32_t>(fOuterWeight));
const Sk4u innerWeight(SkTo<uint32_t>(fInnerWeight));
auto load = [](const uint8_t* cursor, int stride) -> Sk4u {
return Sk4u(cursor[0*stride], cursor[1*stride], cursor[2*stride], cursor[3*stride]);
};
auto interpolateSums = [&] (const Sk4u& outerSum, const Sk4u& innerSum) {
return
SkNx_cast<uint8_t>(
(outerSum * outerWeight + innerSum * innerWeight + kHalf) >> 24);
};
Sk4u outerSum = 0;
Sk4u innerSum = 0;
for (int i = 0; i < fTrailingEdgeZeroCount; i++) {
innerSum = outerSum;
Sk4u leadingEdge = load(rightOuter, srcStride);
outerSum += leadingEdge;
Sk4b blurred = interpolateSums(outerSum, innerSum);
blurred.store(dstCursor);
leadingEdge.store(bufferCursor);
bufferCursor = (bufferCursor + 1) < bufferEnd ? bufferCursor + 1 : bufferStart;
rightOuter += 1;
dstCursor += dstStride;
}
// slidingWindow > width
for (int i = 0; i < fNoChangeCount; i++) {
Sk4b blurred = interpolateSums(outerSum, innerSum);
blurred.store(dstCursor);
dstCursor += dstStride;
}
// width > slidingWindow
auto leftInner = src;
while (rightOuter < srcEnd) {
Sk4u trailEdge = Sk4u::Load(bufferCursor);
Sk4u leadingEdge = load(rightOuter, srcStride);
innerSum = outerSum - trailEdge;
outerSum += leadingEdge;
Sk4b blurred = interpolateSums(outerSum, innerSum);
blurred.store(dstCursor);
outerSum -= trailEdge;
leadingEdge.store(bufferCursor);
bufferCursor = (bufferCursor + 1) < bufferEnd ? bufferCursor + 1 : bufferStart;
rightOuter += 1;
leftInner += 1;
dstCursor += dstStride;
}
auto leftOuter = srcEnd;
dstCursor = dstEnd;
outerSum = 0;
for (int i = 0; i < fTrailingEdgeZeroCount; i++) {
leftOuter -= 1;
dstCursor -= dstStride;
innerSum = outerSum;
outerSum += load(leftOuter, srcStride);
Sk4b blurred = interpolateSums(outerSum, innerSum);
blurred.store(dstCursor);
}
}
private:
static constexpr uint32_t kHalf = static_cast<uint32_t>(1) << 23;
const uint32_t fOuterWeight;
const uint32_t fInnerWeight;
const int fNoChangeCount;
const int fTrailingEdgeZeroCount;
Sk4u* const fBuffer;
Sk4u* const fBufferEnd;
};
private:
uint32_t fOuterWeight;
uint32_t fInnerWeight;
int fSlidingWindow;
};
class PlanGauss final : public PlanningInterface {
public:
explicit PlanGauss(double sigma) {
auto possibleWindow = static_cast<int>(floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5));
auto window = std::max(1, possibleWindow);
fPass0Size = window - 1;
fPass1Size = window - 1;
fPass2Size = (window & 1) == 1 ? window - 1 : window;
// Calculating the border is tricky. I will go through the odd case which is simpler, and
// then through the even case. Given a stack of filters seven wide for the odd case of
// three passes.
//
// S
// aaaAaaa
// bbbBbbb
// cccCccc
// D
//
// The furthest changed pixel is when the filters are in the following configuration.
//
// S
// aaaAaaa
// bbbBbbb
// cccCccc
// D
//
// The A pixel is calculated using the value S, the B uses A, and the C uses B, and
// finally D is C. So, with a window size of seven the border is nine. In general, the
// border is 3*((window - 1)/2).
//
// For even cases the filter stack is more complicated. The spec specifies two passes
// of even filters and a final pass of odd filters. A stack for a width of six looks like
// this.
//
// S
// aaaAaa
// bbBbbb
// cccCccc
// D
//
// The furthest pixel looks like this.
//
// S
// aaaAaa
// bbBbbb
// cccCccc
// D
//
// For a window of size, the border value is seven. In general the border is 3 *
// (window/2) -1.
fBorder = (window & 1) == 1 ? 3 * ((window - 1) / 2) : 3 * (window / 2) - 1;
fSlidingWindow = 2 * fBorder + 1;
// If the window is odd then the divisor is just window ^ 3 otherwise,
// it is window * window * (window + 1) = window ^ 2 + window ^ 3;
auto window2 = window * window;
auto window3 = window2 * window;
auto divisor = (window & 1) == 1 ? window3 : window3 + window2;
fWeight = static_cast<uint64_t>(round(1.0 / divisor * (1ull << 32)));
}
size_t bufferSize() const override { return fPass0Size + fPass1Size + fPass2Size; }
int border() const override { return fBorder; }
bool needsBlur() const override { return true; }
BlurScanInterface* makeBlurScan(
SkArenaAlloc* alloc, int width, uint32_t* buffer) const override
{
uint32_t* buffer0, *buffer0End, *buffer1, *buffer1End, *buffer2, *buffer2End;
buffer0 = buffer;
buffer0End = buffer1 = buffer0 + fPass0Size;
buffer1End = buffer2 = buffer1 + fPass1Size;
buffer2End = buffer2 + fPass2Size;
int noChangeCount = fSlidingWindow > width ? fSlidingWindow - width : 0;
return alloc->make<Gauss>(
fWeight, noChangeCount,
buffer0, buffer0End,
buffer1, buffer1End,
buffer2, buffer2End);
}
public:
class Gauss final : public BlurScanInterface {
public:
Gauss(uint64_t weight, int noChangeCount,
uint32_t* buffer0, uint32_t* buffer0End,
uint32_t* buffer1, uint32_t* buffer1End,
uint32_t* buffer2, uint32_t* buffer2End)
: fWeight{weight}
, fNoChangeCount{noChangeCount}
, fBuffer0{buffer0}
, fBuffer0End{buffer0End}
, fBuffer1{buffer1}
, fBuffer1End{buffer1End}
, fBuffer2{buffer2}
, fBuffer2End{buffer2End}
{ }
void blur(const uint8_t* src, int srcStride, const uint8_t* srcEnd,
uint8_t* dst, int dstStride, uint8_t* dstEnd) const override {
auto buffer0Cursor = fBuffer0;
auto buffer1Cursor = fBuffer1;
auto buffer2Cursor = fBuffer2;
std::memset(fBuffer0, 0x00, (fBuffer2End - fBuffer0) * sizeof(*fBuffer0));
uint32_t sum0 = 0;
uint32_t sum1 = 0;
uint32_t sum2 = 0;
// Consume the source generating pixels.
for (auto srcCursor = src;
srcCursor < srcEnd; dst += dstStride, srcCursor += srcStride) {
uint32_t leadingEdge = *srcCursor;
sum0 += leadingEdge;
sum1 += sum0;
sum2 += sum1;
*dst = this->finalScale(sum2);
sum2 -= *buffer2Cursor;
*buffer2Cursor = sum1;
buffer2Cursor = (buffer2Cursor + 1) < fBuffer2End ? buffer2Cursor + 1 : fBuffer2;
sum1 -= *buffer1Cursor;
*buffer1Cursor = sum0;
buffer1Cursor = (buffer1Cursor + 1) < fBuffer1End ? buffer1Cursor + 1 : fBuffer1;
sum0 -= *buffer0Cursor;
*buffer0Cursor = leadingEdge;
buffer0Cursor = (buffer0Cursor + 1) < fBuffer0End ? buffer0Cursor + 1 : fBuffer0;
}
// The leading edge is off the right side of the mask.
for (int i = 0; i < fNoChangeCount; i++) {
uint32_t leadingEdge = 0;
sum0 += leadingEdge;
sum1 += sum0;
sum2 += sum1;
*dst = this->finalScale(sum2);
sum2 -= *buffer2Cursor;
*buffer2Cursor = sum1;
buffer2Cursor = (buffer2Cursor + 1) < fBuffer2End ? buffer2Cursor + 1 : fBuffer2;
sum1 -= *buffer1Cursor;
*buffer1Cursor = sum0;
buffer1Cursor = (buffer1Cursor + 1) < fBuffer1End ? buffer1Cursor + 1 : fBuffer1;
sum0 -= *buffer0Cursor;
*buffer0Cursor = leadingEdge;
buffer0Cursor = (buffer0Cursor + 1) < fBuffer0End ? buffer0Cursor + 1 : fBuffer0;
dst += dstStride;
}
// Starting from the right, fill in the rest of the buffer.
std::memset(fBuffer0, 0, (fBuffer2End - fBuffer0) * sizeof(*fBuffer0));
sum0 = sum1 = sum2 = 0;
uint8_t* dstCursor = dstEnd;
const uint8_t* srcCursor = srcEnd;
while (dstCursor > dst) {
dstCursor -= dstStride;
srcCursor -= srcStride;
uint32_t leadingEdge = *srcCursor;
sum0 += leadingEdge;
sum1 += sum0;
sum2 += sum1;
*dstCursor = this->finalScale(sum2);
sum2 -= *buffer2Cursor;
*buffer2Cursor = sum1;
buffer2Cursor = (buffer2Cursor + 1) < fBuffer2End ? buffer2Cursor + 1 : fBuffer2;
sum1 -= *buffer1Cursor;
*buffer1Cursor = sum0;
buffer1Cursor = (buffer1Cursor + 1) < fBuffer1End ? buffer1Cursor + 1 : fBuffer1;
sum0 -= *buffer0Cursor;
*buffer0Cursor = leadingEdge;
buffer0Cursor = (buffer0Cursor + 1) < fBuffer0End ? buffer0Cursor + 1 : fBuffer0;
}
}
private:
static constexpr uint64_t kHalf = static_cast<uint64_t>(1) << 31;
uint8_t finalScale(uint32_t sum) const {
return SkTo<uint8_t>((fWeight * sum + kHalf) >> 32);
}
uint64_t fWeight;
int fNoChangeCount;
uint32_t* fBuffer0;
uint32_t* fBuffer0End;
uint32_t* fBuffer1;
uint32_t* fBuffer1End;
uint32_t* fBuffer2;
uint32_t* fBuffer2End;
};
uint64_t fWeight;
int fBorder;
int fSlidingWindow;
int fPass0Size;
int fPass1Size;
int fPass2Size;
};
static PlanningInterface* make_plan(SkArenaAlloc* alloc, double sigma) {
PlanningInterface* plan = nullptr;
if (3 * sigma <= 1) {
plan = alloc->make<None>();
} else if (sigma < kSmallSigma) {
plan = alloc->make<PlanBox>(sigma);
} else {
plan = alloc->make<PlanGauss>(sigma);
}
return plan;
};
// NB 136 is the largest sigma that will not cause a buffer full of 255 mask values to overflow
// using the Gauss filter. It also limits the size of buffers used hold intermediate values.
// Explanation of maximums:
// sum0 = window * 255
// sum1 = window * sum0 -> window * window * 255
// sum2 = window * sum1 -> window * window * window * 255 -> window^3 * 255
//
// The value window^3 * 255 must fit in a uint32_t. So,
// window^3 < 2^32. window = 255.
//
// window = floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5)
// For window <= 255, the largest value for sigma is 136.
SkMaskBlurFilter::SkMaskBlurFilter(double sigmaW, double sigmaH)
: fSigmaW{SkTPin(sigmaW, 0.0, 136.0)}
, fSigmaH{SkTPin(sigmaH, 0.0, 136.0)}
{
SkASSERT(sigmaW >= 0);
SkASSERT(sigmaH >= 0);
}
bool SkMaskBlurFilter::hasNoBlur() const {
return (3 * fSigmaW <= 1) && (3 * fSigmaH <= 1);
}
static SkMask prepare_destination(int radiusX, int radiusY, const SkMask& src) {
SkSafeMath safe;
SkMask dst;
// dstW = srcW + 2 * radiusX;
size_t dstW = safe.add(src.fBounds.width(), safe.add(radiusX, radiusX));
// dstH = srcH + 2 * radiusY;
size_t dstH = safe.add(src.fBounds.height(), safe.add(radiusY, radiusY));
dst.fBounds.set(0, 0, SkTo<int>(dstW), SkTo<int>(dstH));
dst.fBounds.offset(src.fBounds.x(), src.fBounds.y());
dst.fBounds.offset(-radiusX, -radiusY);
dst.fImage = nullptr;
dst.fRowBytes = SkTo<uint32_t>(dstW);
dst.fFormat = SkMask::kA8_Format;
size_t toAlloc = safe.mul(dstW, dstH);
if (safe && src.fImage != nullptr) {
dst.fImage = SkMask::AllocImage(toAlloc);
}
return dst;
}
#if !defined(SK_USE_LEGACY_INTERP_BLUR)
static constexpr uint16_t _____ = 0u;
static constexpr uint16_t kHalf = 0x80u;
static SK_ALWAYS_INLINE Sk8h load(const uint8_t* from, int width) {
uint8_t buffer[8];
if (width < 8) {
sk_bzero(buffer, sizeof(buffer));
for (int i = 0; i < width; i++) {
buffer[i] = from[i];
}
from = buffer;
}
auto v = SkNx_cast<uint16_t>(Sk8b::Load(from));
// Convert from 0-255 to 8.8 encoding.
return v << 8;
};
static SK_ALWAYS_INLINE void store(uint8_t* to, const Sk8h& v, int width) {
Sk8b b = SkNx_cast<uint8_t>(v >> 8);
if (width == 8) {
b.store(to);
} else {
uint8_t buffer[8];
b.store(buffer);
for (int i = 0; i < width; i++) {
to[i] = buffer[i];
}
}
};
// In all the blur_x_radius_N and blur_y_radius_N functions the gaussian values are encoded
// in 0.16 format, none of the values is greater than one. The incoming mask values are in 8.8
// format. The resulting multiply has a 8.24 format, by the mulhi truncates the lower 16 bits
// resulting in a 8.8 format.
//
// The blur_x_radius_N function below blur along a row of pixels using a kernel with radius N. This
// system is setup to minimize the number of multiplies needed.
//
// Explanation:
// Blurring a specific mask value is given by the following equation where D_n is the resulting
// mask value and S_n is the source value. The example below is for a filter with a radius of 1
// and a width of 3 (radius == (width-1)/2). The indexes for the source and destination are
// aligned. The filter is given by G_n where n is the symmetric filter value.
//
// D[n] = S[n-1]*G[1] + S[n]*G[0] + S[n+1]*G[1].
//
// We can start the source index at an offset relative to the destination separated by the
// radius. This results in a non-traditional restating of the above filter.
//
// D[n] = S[n]*G[1] + S[n+1]*G[0] + S[n+2]*G[1]
//
// If we look at three specific consecutive destinations the following equations result:
//
// D[5] = S[5]*G[1] + S[6]*G[0] + S[7]*G[1]
// D[7] = S[6]*G[1] + S[7]*G[0] + S[8]*G[1]
// D[8] = S[7]*G[1] + S[8]*G[0] + S[9]*G[1].
//
// In the above equations, notice that S[7] is used in all three. In particular, two values are
// used: S[7]*G[0] and S[7]*G[1]. So, S[7] is only multiplied twice, but used in D[5], D[6] and
// D[7].
//
// From the point of view of a source value we end up with the following three equations.
//
// Given S[7]:
// D[5] += S[7]*G[1]
// D[6] += S[7]*G[0]
// D[7] += S[7]*G[1]
//
// In General:
// D[n] += S[n]*G[1]
// D[n+1] += S[n]*G[0]
// D[n+2] += S[n]*G[1]
//
// Now these equations can be ganged using SIMD to form:
// D[n..n+7] += S[n..n+7]*G[1]
// D[n+1..n+8] += S[n..n+7]*G[0]
// D[n+2..n+9] += S[n..n+7]*G[1]
// The next set of values becomes.
// D[n+8..n+15] += S[n+8..n+15]*G[1]
// D[n+9..n+16] += S[n+8..n+15]*G[0]
// D[n+10..n+17] += S[n+8..n+15]*G[1]
// You can see that the D[n+8] and D[n+9] values overlap the two sets, using parts of both
// S[n..7] and S[n+8..n+15].
//
// Just one more transformation allows the code to maintain all working values in
// registers. I introduce the notation {0, S[n..n+7] * G[k]} to mean that the value where 0 is
// prepended to the array of values to form {0, S[n] * G[k], ..., S[n+7]*G[k]}.
//
// D[n..n+7] += S[n..n+7] * G[1]
// D[n..n+8] += {0, S[n..n+7] * G[0]}
// D[n..n+9] += {0, 0, S[n..n+7] * G[1]}
//
// Now we can encode D[n..n+7] in a single Sk8h register called d0, and D[n+8..n+15] in a
// register d8. In addition, S[0..n+7] becomes s0.
//
// The translation of the {0, S[n..n+7] * G[k]} is translated in the following way below.
//
// Sk8h v0 = s0*G[0]
// Sk8h v1 = s0*G[1]
// /* D[n..n+7] += S[n..n+7] * G[1] */
// d0 += v1;
// /* D[n..n+8] += {0, S[n..n+7] * G[0]} */
// d0 += {_____, v0[0], v0[1], v0[2], v0[3], v0[4], v0[5], v0[6]}
// d1 += {v0[7], _____, _____, _____, _____, _____, _____, _____}
// /* D[n..n+9] += {0, 0, S[n..n+7] * G[1]} */
// d0 += {_____, _____, v1[0], v1[1], v1[2], v1[3], v1[4], v1[5]}
// d1 += {v1[6], v1[7], _____, _____, _____, _____, _____, _____}
// Where we rely on the compiler to generate efficient code for the {____, n, ....} notation.
static SK_ALWAYS_INLINE void blur_x_radius_1(
const Sk8h& s0,
const Sk8h& g0, const Sk8h& g1, const Sk8h&, const Sk8h&, const Sk8h&,
Sk8h* d0, Sk8h* d8) {
auto v1 = s0.mulHi(g1);
auto v0 = s0.mulHi(g0);
// D[n..n+7] += S[n..n+7] * G[1]
*d0 += v1;
//D[n..n+8] += {0, S[n..n+7] * G[0]}
*d0 += Sk8h{_____, v0[0], v0[1], v0[2], v0[3], v0[4], v0[5], v0[6]};
*d8 += Sk8h{v0[7], _____, _____, _____, _____, _____, _____, _____};
// D[n..n+9] += {0, 0, S[n..n+7] * G[1]}
*d0 += Sk8h{_____, _____, v1[0], v1[1], v1[2], v1[3], v1[4], v1[5]};
*d8 += Sk8h{v1[6], v1[7], _____, _____, _____, _____, _____, _____};
}
static SK_ALWAYS_INLINE void blur_x_radius_2(
const Sk8h& s0,
const Sk8h& g0, const Sk8h& g1, const Sk8h& g2, const Sk8h&, const Sk8h&,
Sk8h* d0, Sk8h* d8) {
auto v0 = s0.mulHi(g0);
auto v1 = s0.mulHi(g1);
auto v2 = s0.mulHi(g2);
// D[n..n+7] += S[n..n+7] * G[2]
*d0 += v2;
// D[n..n+8] += {0, S[n..n+7] * G[1]}
*d0 += Sk8h{_____, v1[0], v1[1], v1[2], v1[3], v1[4], v1[5], v1[6]};
*d8 += Sk8h{v1[7], _____, _____, _____, _____, _____, _____, _____};
// D[n..n+9] += {0, 0, S[n..n+7] * G[0]}
*d0 += Sk8h{_____, _____, v0[0], v0[1], v0[2], v0[3], v0[4], v0[5]};
*d8 += Sk8h{v0[6], v0[7], _____, _____, _____, _____, _____, _____};
// D[n..n+10] += {0, 0, 0, S[n..n+7] * G[1]}
*d0 += Sk8h{_____, _____, _____, v1[0], v1[1], v1[2], v1[3], v1[4]};
*d8 += Sk8h{v1[5], v1[6], v1[7], _____, _____, _____, _____, _____};
// D[n..n+11] += {0, 0, 0, 0, S[n..n+7] * G[2]}
*d0 += Sk8h{_____, _____, _____, _____, v2[0], v2[1], v2[2], v2[3]};
*d8 += Sk8h{v2[4], v2[5], v2[6], v2[7], _____, _____, _____, _____};
}
static SK_ALWAYS_INLINE void blur_x_radius_3(
const Sk8h& s0,
const Sk8h& gauss0, const Sk8h& gauss1, const Sk8h& gauss2, const Sk8h& gauss3, const Sk8h&,
Sk8h* d0, Sk8h* d8) {
auto v0 = s0.mulHi(gauss0);
auto v1 = s0.mulHi(gauss1);
auto v2 = s0.mulHi(gauss2);
auto v3 = s0.mulHi(gauss3);
// D[n..n+7] += S[n..n+7] * G[3]
*d0 += v3;
// D[n..n+8] += {0, S[n..n+7] * G[2]}
*d0 += Sk8h{_____, v2[0], v2[1], v2[2], v2[3], v2[4], v2[5], v2[6]};
*d8 += Sk8h{v2[7], _____, _____, _____, _____, _____, _____, _____};
// D[n..n+9] += {0, 0, S[n..n+7] * G[1]}
*d0 += Sk8h{_____, _____, v1[0], v1[1], v1[2], v1[3], v1[4], v1[5]};
*d8 += Sk8h{v1[6], v1[7], _____, _____, _____, _____, _____, _____};
// D[n..n+10] += {0, 0, 0, S[n..n+7] * G[0]}
*d0 += Sk8h{_____, _____, _____, v0[0], v0[1], v0[2], v0[3], v0[4]};
*d8 += Sk8h{v0[5], v0[6], v0[7], _____, _____, _____, _____, _____};
// D[n..n+11] += {0, 0, 0, 0, S[n..n+7] * G[1]}
*d0 += Sk8h{_____, _____, _____, _____, v1[0], v1[1], v1[2], v1[3]};
*d8 += Sk8h{v1[4], v1[5], v1[6], v1[7], _____, _____, _____, _____};
// D[n..n+12] += {0, 0, 0, 0, 0, S[n..n+7] * G[2]}
*d0 += Sk8h{_____, _____, _____, _____, _____, v2[0], v2[1], v2[2]};
*d8 += Sk8h{v2[3], v2[4], v2[5], v2[6], v2[7], _____, _____, _____};
// D[n..n+13] += {0, 0, 0, 0, 0, 0, S[n..n+7] * G[3]}
*d0 += Sk8h{_____, _____, _____, _____, _____, _____, v3[0], v3[1]};
*d8 += Sk8h{v3[2], v3[3], v3[4], v3[5], v3[6], v3[7], _____, _____};
}
static SK_ALWAYS_INLINE void blur_x_radius_4(
const Sk8h& s0,
const Sk8h& gauss0,
const Sk8h& gauss1,
const Sk8h& gauss2,
const Sk8h& gauss3,
const Sk8h& gauss4,
Sk8h* d0, Sk8h* d8) {
auto v0 = s0.mulHi(gauss0);
auto v1 = s0.mulHi(gauss1);
auto v2 = s0.mulHi(gauss2);
auto v3 = s0.mulHi(gauss3);
auto v4 = s0.mulHi(gauss4);
// D[n..n+7] += S[n..n+7] * G[4]
*d0 += v4;
// D[n..n+8] += {0, S[n..n+7] * G[3]}
*d0 += Sk8h{_____, v3[0], v3[1], v3[2], v3[3], v3[4], v3[5], v3[6]};
*d8 += Sk8h{v3[7], _____, _____, _____, _____, _____, _____, _____};
// D[n..n+9] += {0, 0, S[n..n+7] * G[2]}
*d0 += Sk8h{_____, _____, v2[0], v2[1], v2[2], v2[3], v2[4], v2[5]};
*d8 += Sk8h{v2[6], v2[7], _____, _____, _____, _____, _____, _____};
// D[n..n+10] += {0, 0, 0, S[n..n+7] * G[1]}
*d0 += Sk8h{_____, _____, _____, v1[0], v1[1], v1[2], v1[3], v1[4]};
*d8 += Sk8h{v1[5], v1[6], v1[7], _____, _____, _____, _____, _____};
// D[n..n+11] += {0, 0, 0, 0, S[n..n+7] * G[0]}
*d0 += Sk8h{_____, _____, _____, _____, v0[0], v0[1], v0[2], v0[3]};
*d8 += Sk8h{v0[4], v0[5], v0[6], v0[7], _____, _____, _____, _____};
// D[n..n+12] += {0, 0, 0, 0, 0, S[n..n+7] * G[1]}
*d0 += Sk8h{_____, _____, _____, _____, _____, v1[0], v1[1], v1[2]};
*d8 += Sk8h{v1[3], v1[4], v1[5], v1[6], v1[7], _____, _____, _____};
// D[n..n+13] += {0, 0, 0, 0, 0, 0, S[n..n+7] * G[2]}
*d0 += Sk8h{_____, _____, _____, _____, _____, _____, v2[0], v2[1]};
*d8 += Sk8h{v2[2], v2[3], v2[4], v2[5], v2[6], v2[7], _____, _____};
// D[n..n+14] += {0, 0, 0, 0, 0, 0, 0, S[n..n+7] * G[3]}
*d0 += Sk8h{_____, _____, _____, _____, _____, _____, _____, v3[0]};
*d8 += Sk8h{v3[1], v3[2], v3[3], v3[4], v3[5], v3[6], v3[7], _____};
// D[n..n+15] += {0, 0, 0, 0, 0, 0, 0, 0, S[n..n+7] * G[4]}
*d8 += v4;
}
using BlurX = decltype(blur_x_radius_1);
// BlurX will only be one of the functions blur_x_radius_(1|2|3|4).
static SK_ALWAYS_INLINE void blur_row(
BlurX blur,
const Sk8h& g0, const Sk8h& g1, const Sk8h& g2, const Sk8h& g3, const Sk8h& g4,
const uint8_t* src, int srcW,
uint8_t* dst, int dstW) {
// Clear the buffer to handle summing wider than source.
Sk8h d0{kHalf}, d8{kHalf};
// Go by multiples of 8 in src.
int x = 0;
for (; x <= srcW - 8; x += 8) {
blur(load(src, 8), g0, g1, g2, g3, g4, &d0, &d8);
store(dst, d0, 8);
d0 = d8;
d8 = Sk8h{kHalf};
src += 8;
dst += 8;
}
// There are src values left, but the remainder of src values is not a multiple of 8.
int srcTail = srcW - x;
if (srcTail > 0) {
blur(load(src, srcTail), g0, g1, g2, g3, g4, &d0, &d8);
int dstTail = std::min(8, dstW - x);
store(dst, d0, dstTail);
d0 = d8;
dst += dstTail;
x += dstTail;
}
// There are dst mask values to complete.
int dstTail = dstW - x;
if (dstTail > 0) {
store(dst, d0, dstTail);
}
}
// BlurX will only be one of the functions blur_x_radius_(1|2|3|4).
static SK_ALWAYS_INLINE void blur_x_rect(
BlurX blur,
uint16_t* gauss,
const uint8_t* src, size_t srcStride, int srcW,
uint8_t* dst, size_t dstStride, int dstW, int dstH) {
Sk8h g0{gauss[0]},
g1{gauss[1]},
g2{gauss[2]},
g3{gauss[3]},
g4{gauss[4]};
// Blur *ALL* the rows.
for (int y = 0; y < dstH; y++) {
blur_row(blur, g0, g1, g2, g3, g4, src, srcW, dst, dstW);
src += srcStride;
dst += dstStride;
}
}
SK_ATTRIBUTE(noinline) static void direct_blur_x(
int radius, uint16_t* gauss,
const uint8_t* src, size_t srcStride, int srcW,
uint8_t* dst, size_t dstStride, int dstW, int dstH) {
switch (radius) {
case 1:
blur_x_rect(blur_x_radius_1, gauss, src, srcStride, srcW, dst, dstStride, dstW, dstH);
break;
case 2:
blur_x_rect(blur_x_radius_2, gauss, src, srcStride, srcW, dst, dstStride, dstW, dstH);
break;
case 3:
blur_x_rect(blur_x_radius_3, gauss, src, srcStride, srcW, dst, dstStride, dstW, dstH);
break;
case 4:
blur_x_rect(blur_x_radius_4, gauss, src, srcStride, srcW, dst, dstStride, dstW, dstH);
break;
default:
SkASSERTF(false, "The radius %d is not handled\n", radius);
}
}
// The operations of the blur_y_radius_N functions work on a theme similar to the blur_x_radius_N
// functions, but end up being simpler because there is no complicated shift of registers. We
// start with the non-traditional form of the gaussian filter. In the following r is the value
// when added generates the next value in the column.
//
// D[n+0r] = S[n+0r]*G[1]
// + S[n+1r]*G[0]
// + S[n+2r]*G[1]
//
// Expanding out in a way similar to blur_x_radius_N for specific values of n.
//
// D[n+0r] = S[n-2r]*G[1] + S[n-1r]*G[0] + S[n+0r]*G[1]
// D[n+1r] = S[n-1r]*G[1] + S[n+0r]*G[0] + S[n+1r]*G[1]
// D[n+2r] = S[n+0r]*G[1] + S[n+1r]*G[0] + S[n+2r]*G[1]
//
// We can see that S[n+0r] is in all three D[] equations, but is only multiplied twice. Now we
// can look at the calculation form the point of view of a source value.
//
// Given S[n+0r]:
// D[n+0r] += S[n+0r]*G[1];
// /* D[n+0r] is done and can be stored now. */
// D[n+1r] += S[n+0r]*G[0];
// D[n+2r] = S[n+0r]*G[1];
//
// Remember, by induction, that D[n+0r] == S[n-2r]*G[1] + S[n-1r]*G[0] before adding in
// S[n+0r]*G[1]. So, after the addition D[n+0r] has finished calculation and can be stored. Also,
// notice that D[n+2r] is receiving its first value from S[n+0r]*G[1] and is not added in. Notice
// how values flow in the following two iterations in source.
//
// D[n+0r] += S[n+0r]*G[1]
// D[n+1r] += S[n+0r]*G[0]
// D[n+2r] = S[n+0r]*G[1]
// /* ------- */
// D[n+1r] += S[n+1r]*G[1]
// D[n+2r] += S[n+1r]*G[0]
// D[n+3r] = S[n+1r]*G[1]
//
// Instead of using memory we can introduce temporaries d01 and d12. The update step changes
// to the following.
//
// answer = d01 + S[n+0r]*G[1]
// d01 = d12 + S[n+0r]*G[0]
// d12 = S[n+0r]*G[1]
// return answer
//
// Finally, this can be ganged into SIMD style.
// answer[0..7] = d01[0..7] + S[n+0r..n+0r+7]*G[1]
// d01[0..7] = d12[0..7] + S[n+0r..n+0r+7]*G[0]
// d12[0..7] = S[n+0r..n+0r+7]*G[1]
// return answer[0..7]
static SK_ALWAYS_INLINE Sk8h blur_y_radius_1(
const Sk8h& s0,
const Sk8h& g0, const Sk8h& g1, const Sk8h&, const Sk8h&, const Sk8h&,
Sk8h* d01, Sk8h* d12, Sk8h*, Sk8h*, Sk8h*, Sk8h*, Sk8h*, Sk8h*) {
auto v0 = s0.mulHi(g0);
auto v1 = s0.mulHi(g1);
Sk8h answer = *d01 + v1;
*d01 = *d12 + v0;
*d12 = v1 + kHalf;
return answer;
}
static SK_ALWAYS_INLINE Sk8h blur_y_radius_2(
const Sk8h& s0,
const Sk8h& g0, const Sk8h& g1, const Sk8h& g2, const Sk8h&, const Sk8h&,
Sk8h* d01, Sk8h* d12, Sk8h* d23, Sk8h* d34, Sk8h*, Sk8h*, Sk8h*, Sk8h*) {
auto v0 = s0.mulHi(g0);
auto v1 = s0.mulHi(g1);
auto v2 = s0.mulHi(g2);
Sk8h answer = *d01 + v2;
*d01 = *d12 + v1;
*d12 = *d23 + v0;
*d23 = *d34 + v1;
*d34 = v2 + kHalf;
return answer;
}
static SK_ALWAYS_INLINE Sk8h blur_y_radius_3(
const Sk8h& s0,
const Sk8h& g0, const Sk8h& g1, const Sk8h& g2, const Sk8h& g3, const Sk8h&,
Sk8h* d01, Sk8h* d12, Sk8h* d23, Sk8h* d34, Sk8h* d45, Sk8h* d56, Sk8h*, Sk8h*) {
auto v0 = s0.mulHi(g0);
auto v1 = s0.mulHi(g1);
auto v2 = s0.mulHi(g2);
auto v3 = s0.mulHi(g3);
Sk8h answer = *d01 + v3;
*d01 = *d12 + v2;
*d12 = *d23 + v1;
*d23 = *d34 + v0;
*d34 = *d45 + v1;
*d45 = *d56 + v2;
*d56 = v3 + kHalf;
return answer;
}
static SK_ALWAYS_INLINE Sk8h blur_y_radius_4(
const Sk8h& s0,
const Sk8h& g0, const Sk8h& g1, const Sk8h& g2, const Sk8h& g3, const Sk8h& g4,
Sk8h* d01, Sk8h* d12, Sk8h* d23, Sk8h* d34, Sk8h* d45, Sk8h* d56, Sk8h* d67, Sk8h* d78) {
auto v0 = s0.mulHi(g0);
auto v1 = s0.mulHi(g1);
auto v2 = s0.mulHi(g2);
auto v3 = s0.mulHi(g3);
auto v4 = s0.mulHi(g4);
Sk8h answer = *d01 + v4;
*d01 = *d12 + v3;
*d12 = *d23 + v2;
*d23 = *d34 + v1;
*d34 = *d45 + v0;
*d45 = *d56 + v1;
*d56 = *d67 + v2;
*d67 = *d78 + v3;
*d78 = v4 + kHalf;
return answer;
}
using BlurY = decltype(blur_y_radius_1);
// BlurY will be one of blur_y_radius_(1|2|3|4).
static SK_ALWAYS_INLINE void blur_column(
BlurY blur, int radius, int width,
const Sk8h& g0, const Sk8h& g1, const Sk8h& g2, const Sk8h& g3, const Sk8h& g4,
const uint8_t* src, size_t srcStride, int srcH,
uint8_t* dst, size_t dstStride) {
Sk8h d01{kHalf}, d12{kHalf}, d23{kHalf}, d34{kHalf},
d45{kHalf}, d56{kHalf}, d67{kHalf}, d78{kHalf};
auto flush = [&](uint8_t* to, const Sk8h& v0, const Sk8h& v1) {
store(to, v0, width);
to += dstStride;
store(to, v1, width);
return to + dstStride;
};
for (int y = 0; y < srcH; y += 1) {
auto s = load(src, width);
auto b = blur(s,
g0, g1, g2, g3, g4,
&d01, &d12, &d23, &d34, &d45, &d56, &d67, &d78);
store(dst, b, width);
src += srcStride;
dst += dstStride;
}
if (radius >= 1) {
dst = flush(dst, d01, d12);
}
if (radius >= 2) {
dst = flush(dst, d23, d34);
}
if (radius >= 3) {
dst = flush(dst, d45, d56);
}
if (radius >= 4) {
flush(dst, d67, d78);
}
}
// BlurY will be one of blur_y_radius_(1|2|3|4).
static SK_ALWAYS_INLINE void blur_y_rect(
BlurY blur, int radius, uint16_t *gauss,
const uint8_t *src, size_t srcStride, int srcW, int srcH,
uint8_t *dst, size_t dstStride) {
Sk8h g0{gauss[0]},
g1{gauss[1]},
g2{gauss[2]},
g3{gauss[3]},
g4{gauss[4]};
int x = 0;
for (; x <= srcW - 8; x += 8) {
blur_column(blur, radius, 8,
g0, g1, g2, g3, g4,
src, srcStride, srcH,
dst, dstStride);
src += 8;
dst += 8;
}
int xTail = srcW - x;
if (xTail > 0) {
blur_column(blur, radius, xTail,
g0, g1, g2, g3, g4,
src, srcStride, srcH,
dst, dstStride);
}
}
SK_ATTRIBUTE(noinline) static void direct_blur_y(
int radius, uint16_t* gauss,
const uint8_t* src, size_t srcStride, int srcW, int srcH,
uint8_t* dst, size_t dstStride) {
switch (radius) {
case 1:
blur_y_rect(blur_y_radius_1, 1, gauss,
src, srcStride, srcW, srcH,
dst, dstStride);
break;
case 2:
blur_y_rect(blur_y_radius_2, 2, gauss,
src, srcStride, srcW, srcH,
dst, dstStride);
break;
case 3:
blur_y_rect(blur_y_radius_3, 3, gauss,
src, srcStride, srcW, srcH,
dst, dstStride);
break;
case 4:
blur_y_rect(blur_y_radius_4, 4, gauss,
src, srcStride, srcW, srcH,
dst, dstStride);
break;
default:
SkASSERTF(false, "The radius %d is not handled\n", radius);
}
}
static SkIPoint small_blur(double sigmaX, double sigmaY, const SkMask& src, SkMask* dst) {
SkASSERT(0 <= sigmaX && sigmaX < 2);
SkASSERT(0 <= sigmaY && sigmaY < 2);
SkGaussFilter filterX{sigmaX, SkGaussFilter::Type::Bessel},
filterY{sigmaY, SkGaussFilter::Type::Bessel};
int radiusX = filterX.radius(),
radiusY = filterY.radius();
SkASSERT(radiusX <= 4 && radiusY <= 4);
auto prepareGauss = [](const SkGaussFilter& filter, uint16_t* factors) {
int i = 0;
for (double d : filter) {
factors[i++] = static_cast<uint16_t>(round(d * (1 << 16)));
}
};
uint16_t gaussFactorsX[SkGaussFilter::kGaussArrayMax],
gaussFactorsY[SkGaussFilter::kGaussArrayMax];
prepareGauss(filterX, gaussFactorsX);
prepareGauss(filterY, gaussFactorsY);
*dst = prepare_destination(radiusX, radiusY, src);
if (src.fImage == nullptr) {
return {SkTo<int32_t>(radiusX), SkTo<int32_t>(radiusY)};
}
if (dst->fImage == nullptr) {
dst->fBounds.setEmpty();
return {0, 0};
}
int srcW = src.fBounds.width(),
srcH = src.fBounds.height();
int dstW = dst->fBounds.width(),
dstH = dst->fBounds.height();
size_t srcStride = src.fRowBytes,
dstStride = dst->fRowBytes;
//TODO: handle bluring in only one direction.
// Blur vertically and copy to destination.
direct_blur_y(radiusY, gaussFactorsY,
src.fImage, srcStride, srcW, srcH,
dst->fImage + radiusX, dstStride);
// Blur horizontally in place.
direct_blur_x(radiusX, gaussFactorsX,
dst->fImage + radiusX, dstStride, srcW,
dst->fImage, dstStride, dstW, dstH);
return {radiusX, radiusY};
}
#endif // SK_USE_LEGACY_INTERP_BLUR
SkIPoint SkMaskBlurFilter::blur(const SkMask& src, SkMask* dst) const {
#if !defined(SK_USE_LEGACY_INTERP_BLUR)
if (fSigmaW < 2.0 && fSigmaH < 2.0) {
return small_blur(fSigmaW, fSigmaH, src, dst);
}
#endif
// 1024 is a place holder guess until more analysis can be done.
SkSTArenaAlloc<1024> alloc;
PlanningInterface* planW = make_plan(&alloc, fSigmaW);
PlanningInterface* planH = make_plan(&alloc, fSigmaH);
int borderW = planW->border(),
borderH = planH->border();
SkASSERT(borderH >= 0 && borderW >= 0);
*dst = prepare_destination(borderW, borderH, src);
if (src.fImage == nullptr) {
return {SkTo<int32_t>(borderW), SkTo<int32_t>(borderH)};
}
if (dst->fImage == nullptr) {
dst->fBounds.setEmpty();
return {0, 0};
}
int srcW = src.fBounds.width(),
srcH = src.fBounds.height(),
dstW = dst->fBounds.width(),
dstH = dst->fBounds.height();
SkASSERT(srcW >= 0 && srcH >= 0 && dstW >= 0 && dstH >= 0);
auto bufferSize = std::max(planW->bufferSize(), planH->bufferSize());
auto buffer = alloc.makeArrayDefault<uint32_t>(bufferSize);
if (planW->needsBlur() && planH->needsBlur()) {
// Blur both directions.
int tmpW = srcH,
tmpH = dstW;
auto tmp = alloc.makeArrayDefault<uint8_t>(tmpW * tmpH);
// Blur horizontally, and transpose.
auto scanW = planW->makeBlurScan(&alloc, srcW, buffer);
int y = 0;
if (scanW->canBlur4() && srcH > 4) {
for (;y + 4 <= srcH; y += 4) {
auto srcStart = &src.fImage[y * src.fRowBytes];
auto tmpStart = &tmp[y];
scanW->blur4Transpose(srcStart, src.fRowBytes, srcStart + srcW,
tmpStart, tmpW, tmpStart + tmpW * tmpH);
}
}
for (;y < srcH; y++) {
auto srcStart = &src.fImage[y * src.fRowBytes];
auto tmpStart = &tmp[y];
scanW->blur(srcStart, 1, srcStart + srcW,
tmpStart, tmpW, tmpStart + tmpW * tmpH);
}
// Blur vertically (scan in memory order because of the transposition),
// and transpose back to the original orientation.
auto scanH = planH->makeBlurScan(&alloc, tmpW, buffer);
y = 0;
if (scanH->canBlur4() && tmpH > 4) {
for (;y + 4 <= tmpH; y += 4) {
auto tmpStart = &tmp[y * tmpW];
auto dstStart = &dst->fImage[y];
scanH->blur4Transpose(
tmpStart, tmpW, tmpStart + tmpW,
dstStart, dst->fRowBytes, dstStart + dst->fRowBytes * dstH);
}
}
for (;y < tmpH; y++) {
auto tmpStart = &tmp[y * tmpW];
auto dstStart = &dst->fImage[y];
scanH->blur(tmpStart, 1, tmpStart + tmpW,
dstStart, dst->fRowBytes, dstStart + dst->fRowBytes * dstH);
}
} else if (planW->needsBlur()) {
// Blur only horizontally.
auto scanW = planW->makeBlurScan(&alloc, srcW, buffer);
for (int y = 0; y < srcH; y++) {
auto srcStart = &src.fImage[y * src.fRowBytes];
auto dstStart = &dst->fImage[y * dst->fRowBytes];
scanW->blur(srcStart, 1, srcStart + srcW,
dstStart, 1, dstStart + dstW);
}
} else if (planH->needsBlur()) {
// Blur only vertically.
auto srcEnd = &src.fImage[src.fRowBytes * srcH];
auto dstEnd = &dst->fImage[dst->fRowBytes * dstH];
auto scanH = planH->makeBlurScan(&alloc, srcH, buffer);
for (int x = 0; x < srcW; x++) {
auto srcStart = &src.fImage[x];
auto dstStart = &dst->fImage[x];
scanH->blur(srcStart, src.fRowBytes, srcEnd,
dstStart, dst->fRowBytes, dstEnd);
}
} else {
// Copy to dst. No Blur.
SkASSERT(false); // should not get here
for (int y = 0; y < srcH; y++) {
std::memcpy(&dst->fImage[y * dst->fRowBytes], &src.fImage[y * src.fRowBytes], dstW);
}
}
return {SkTo<int32_t>(borderW), SkTo<int32_t>(borderH)};
}