| /* |
| * Copyright 2011 The Android Open Source Project |
| * |
| * Use of this source code is governed by a BSD-style license that can be |
| * found in the LICENSE file. |
| */ |
| |
| #include "include/core/SkBitmap.h" |
| #include "include/core/SkColorType.h" |
| #include "include/core/SkFlattenable.h" |
| #include "include/core/SkImageFilter.h" |
| #include "include/core/SkImageInfo.h" |
| #include "include/core/SkMatrix.h" |
| #include "include/core/SkPoint.h" |
| #include "include/core/SkRect.h" |
| #include "include/core/SkRefCnt.h" |
| #include "include/core/SkScalar.h" |
| #include "include/core/SkSize.h" |
| #include "include/core/SkTileMode.h" |
| #include "include/core/SkTypes.h" |
| #include "include/effects/SkImageFilters.h" |
| #include "include/private/base/SkFloatingPoint.h" |
| #include "include/private/base/SkMalloc.h" |
| #include "src/base/SkArenaAlloc.h" |
| #include "src/base/SkVx.h" |
| #include "src/core/SkImageFilter_Base.h" |
| #include "src/core/SkReadBuffer.h" |
| #include "src/core/SkSpecialImage.h" |
| #include "src/core/SkWriteBuffer.h" |
| |
| #include <algorithm> |
| #include <cmath> |
| #include <cstdint> |
| #include <cstring> |
| #include <memory> |
| #include <utility> |
| |
| #if defined(SK_GANESH) |
| #include "include/private/gpu/ganesh/GrTypesPriv.h" |
| #include "src/core/SkGpuBlurUtils.h" |
| #include "src/gpu/ganesh/GrSurfaceProxyView.h" |
| #include "src/gpu/ganesh/SurfaceDrawContext.h" |
| #endif // defined(SK_GANESH) |
| |
| #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1 |
| #include <xmmintrin.h> |
| #define SK_PREFETCH(ptr) _mm_prefetch(reinterpret_cast<const char*>(ptr), _MM_HINT_T0) |
| #elif defined(__GNUC__) |
| #define SK_PREFETCH(ptr) __builtin_prefetch(ptr) |
| #else |
| #define SK_PREFETCH(ptr) |
| #endif |
| |
| namespace { |
| |
| class SkBlurImageFilter final : public SkImageFilter_Base { |
| public: |
| SkBlurImageFilter(SkScalar sigmaX, SkScalar sigmaY, SkTileMode tileMode, |
| sk_sp<SkImageFilter> input, const SkRect* cropRect) |
| : INHERITED(&input, 1, cropRect) |
| , fSigma{sigmaX, sigmaY} |
| , fTileMode(tileMode) {} |
| |
| SkRect computeFastBounds(const SkRect&) const override; |
| |
| protected: |
| void flatten(SkWriteBuffer&) const override; |
| sk_sp<SkSpecialImage> onFilterImage(const Context&, SkIPoint* offset) const override; |
| SkIRect onFilterNodeBounds(const SkIRect& src, const SkMatrix& ctm, |
| MapDirection, const SkIRect* inputRect) const override; |
| |
| private: |
| friend void ::SkRegisterBlurImageFilterFlattenable(); |
| SK_FLATTENABLE_HOOKS(SkBlurImageFilter) |
| |
| #if defined(SK_GANESH) |
| sk_sp<SkSpecialImage> gpuFilter( |
| const Context& ctx, SkVector sigma, |
| const sk_sp<SkSpecialImage> &input, |
| SkIRect inputBounds, SkIRect dstBounds, SkIPoint inputOffset, SkIPoint* offset) const; |
| #endif |
| |
| SkSize fSigma; |
| SkTileMode fTileMode; |
| |
| using INHERITED = SkImageFilter_Base; |
| }; |
| |
| } // end namespace |
| |
| sk_sp<SkImageFilter> SkImageFilters::Blur( |
| SkScalar sigmaX, SkScalar sigmaY, SkTileMode tileMode, sk_sp<SkImageFilter> input, |
| const CropRect& cropRect) { |
| if (sigmaX < SK_ScalarNearlyZero && sigmaY < SK_ScalarNearlyZero && !cropRect) { |
| return input; |
| } |
| return sk_sp<SkImageFilter>( |
| new SkBlurImageFilter(sigmaX, sigmaY, tileMode, input, cropRect)); |
| } |
| |
| void SkRegisterBlurImageFilterFlattenable() { |
| SK_REGISTER_FLATTENABLE(SkBlurImageFilter); |
| SkFlattenable::Register("SkBlurImageFilterImpl", SkBlurImageFilter::CreateProc); |
| } |
| |
| sk_sp<SkFlattenable> SkBlurImageFilter::CreateProc(SkReadBuffer& buffer) { |
| SK_IMAGEFILTER_UNFLATTEN_COMMON(common, 1); |
| SkScalar sigmaX = buffer.readScalar(); |
| SkScalar sigmaY = buffer.readScalar(); |
| SkTileMode tileMode = buffer.read32LE(SkTileMode::kLastTileMode); |
| return SkImageFilters::Blur( |
| sigmaX, sigmaY, tileMode, common.getInput(0), common.cropRect()); |
| } |
| |
| void SkBlurImageFilter::flatten(SkWriteBuffer& buffer) const { |
| this->INHERITED::flatten(buffer); |
| buffer.writeScalar(fSigma.fWidth); |
| buffer.writeScalar(fSigma.fHeight); |
| |
| SkASSERT(fTileMode <= SkTileMode::kLastTileMode); |
| buffer.writeInt(static_cast<int>(fTileMode)); |
| } |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| |
| namespace { |
| // This is defined by the SVG spec: |
| // https://drafts.fxtf.org/filter-effects/#feGaussianBlurElement |
| int calculate_window(double sigma) { |
| auto possibleWindow = static_cast<int>(floor(sigma * 3 * sqrt(2 * SK_DoublePI) / 4 + 0.5)); |
| return std::max(1, possibleWindow); |
| } |
| |
| // This rather arbitrary-looking value results in a maximum box blur kernel size |
| // of 1000 pixels on the raster path, which matches the WebKit and Firefox |
| // implementations. Since the GPU path does not compute a box blur, putting |
| // the limit on sigma ensures consistent behaviour between the GPU and |
| // raster paths. |
| static constexpr SkScalar kMaxSigma = 532.f; |
| |
| static SkVector map_sigma(const SkSize& localSigma, const SkMatrix& ctm) { |
| SkVector sigma = SkVector::Make(localSigma.width(), localSigma.height()); |
| ctm.mapVectors(&sigma, 1); |
| sigma.fX = std::min(SkScalarAbs(sigma.fX), kMaxSigma); |
| sigma.fY = std::min(SkScalarAbs(sigma.fY), kMaxSigma); |
| // Disable blurring on axes that were never finite, or became non-finite after mapping by ctm. |
| if (!SkScalarIsFinite(sigma.fX)) { |
| sigma.fX = 0.f; |
| } |
| if (!SkScalarIsFinite(sigma.fY)) { |
| sigma.fY = 0.f; |
| } |
| return sigma; |
| } |
| |
| |
| class Pass { |
| public: |
| explicit Pass(int border) : fBorder(border) {} |
| virtual ~Pass() = default; |
| |
| void blur(int srcLeft, int srcRight, int dstRight, |
| const uint32_t* src, int srcStride, |
| uint32_t* dst, int dstStride) { |
| this->startBlur(); |
| |
| auto srcStart = srcLeft - fBorder, |
| srcEnd = srcRight - fBorder, |
| dstEnd = dstRight, |
| srcIdx = srcStart, |
| dstIdx = 0; |
| |
| const uint32_t* srcCursor = src; |
| uint32_t* dstCursor = dst; |
| |
| if (dstIdx < srcIdx) { |
| // The destination pixels are not effected by the src pixels, |
| // change to zero as per the spec. |
| // https://drafts.fxtf.org/filter-effects/#FilterPrimitivesOverviewIntro |
| while (dstIdx < srcIdx) { |
| *dstCursor = 0; |
| dstCursor += dstStride; |
| SK_PREFETCH(dstCursor); |
| dstIdx++; |
| } |
| } else if (srcIdx < dstIdx) { |
| // The edge of the source is before the edge of the destination. Calculate the sums for |
| // the pixels before the start of the destination. |
| if (int commonEnd = std::min(dstIdx, srcEnd); srcIdx < commonEnd) { |
| // Preload the blur with values from src before dst is entered. |
| int n = commonEnd - srcIdx; |
| this->blurSegment(n, srcCursor, srcStride, nullptr, 0); |
| srcIdx += n; |
| srcCursor += n * srcStride; |
| } |
| if (srcIdx < dstIdx) { |
| // The weird case where src is out of pixels before dst is even started. |
| int n = dstIdx - srcIdx; |
| this->blurSegment(n, nullptr, 0, nullptr, 0); |
| srcIdx += n; |
| } |
| } |
| |
| // Both srcIdx and dstIdx are in sync now, and can run in a 1:1 fashion. This is the |
| // normal mode of operation. |
| SkASSERT(srcIdx == dstIdx); |
| if (int commonEnd = std::min(dstEnd, srcEnd); dstIdx < commonEnd) { |
| int n = commonEnd - dstIdx; |
| this->blurSegment(n, srcCursor, srcStride, dstCursor, dstStride); |
| srcCursor += n * srcStride; |
| dstCursor += n * dstStride; |
| dstIdx += n; |
| srcIdx += n; |
| } |
| |
| // Drain the remaining blur values into dst assuming 0's for the leading edge. |
| if (dstIdx < dstEnd) { |
| int n = dstEnd - dstIdx; |
| this->blurSegment(n, nullptr, 0, dstCursor, dstStride); |
| } |
| } |
| |
| protected: |
| virtual void startBlur() = 0; |
| virtual void blurSegment( |
| int n, const uint32_t* src, int srcStride, uint32_t* dst, int dstStride) = 0; |
| |
| private: |
| const int fBorder; |
| }; |
| |
| class PassMaker { |
| public: |
| explicit PassMaker(int window) : fWindow{window} {} |
| virtual ~PassMaker() = default; |
| virtual Pass* makePass(void* buffer, SkArenaAlloc* alloc) const = 0; |
| virtual size_t bufferSizeBytes() const = 0; |
| int window() const {return fWindow;} |
| |
| private: |
| const int fWindow; |
| }; |
| |
| // Implement a scanline processor that uses a three-box filter to approximate a Gaussian blur. |
| // The GaussPass is limit to processing sigmas < 135. |
| class GaussPass final : public Pass { |
| public: |
| // NB 136 is the largest sigma that will not cause a buffer full of 255 mask values to overflow |
| // using the Gauss filter. It also limits the size of buffers used hold intermediate values. |
| // Explanation of maximums: |
| // sum0 = window * 255 |
| // sum1 = window * sum0 -> window * window * 255 |
| // sum2 = window * sum1 -> window * window * window * 255 -> window^3 * 255 |
| // |
| // The value window^3 * 255 must fit in a uint32_t. So, |
| // window^3 < 2^32. window = 255. |
| // |
| // window = floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5) |
| // For window <= 255, the largest value for sigma is 136. |
| static PassMaker* MakeMaker(double sigma, SkArenaAlloc* alloc) { |
| SkASSERT(0 <= sigma); |
| int window = calculate_window(sigma); |
| if (255 <= window) { |
| return nullptr; |
| } |
| |
| class Maker : public PassMaker { |
| public: |
| explicit Maker(int window) : PassMaker{window} {} |
| Pass* makePass(void* buffer, SkArenaAlloc* alloc) const override { |
| return GaussPass::Make(this->window(), buffer, alloc); |
| } |
| |
| size_t bufferSizeBytes() const override { |
| int window = this->window(); |
| size_t onePassSize = window - 1; |
| // If the window is odd, then there is an obvious middle element. For even sizes |
| // 2 passes are shifted, and the last pass has an extra element. Like this: |
| // S |
| // aaaAaa |
| // bbBbbb |
| // cccCccc |
| // D |
| size_t bufferCount = (window & 1) == 1 ? 3 * onePassSize : 3 * onePassSize + 1; |
| return bufferCount * sizeof(skvx::Vec<4, uint32_t>); |
| } |
| }; |
| |
| return alloc->make<Maker>(window); |
| } |
| |
| static GaussPass* Make(int window, void* buffers, SkArenaAlloc* alloc) { |
| // We don't need to store the trailing edge pixel in the buffer; |
| int passSize = window - 1; |
| skvx::Vec<4, uint32_t>* buffer0 = static_cast<skvx::Vec<4, uint32_t>*>(buffers); |
| skvx::Vec<4, uint32_t>* buffer1 = buffer0 + passSize; |
| skvx::Vec<4, uint32_t>* buffer2 = buffer1 + passSize; |
| // If the window is odd just one buffer is needed, but if it's even, then there is one |
| // more element on that pass. |
| skvx::Vec<4, uint32_t>* buffersEnd = buffer2 + ((window & 1) ? passSize : passSize + 1); |
| |
| // Calculating the border is tricky. The border is the distance in pixels between the first |
| // dst pixel and the first src pixel (or the last src pixel and the last dst pixel). |
| // I will go through the odd case which is simpler, and then through the even case. Given a |
| // stack of filters seven wide for the odd case of three passes. |
| // |
| // S |
| // aaaAaaa |
| // bbbBbbb |
| // cccCccc |
| // D |
| // |
| // The furthest changed pixel is when the filters are in the following configuration. |
| // |
| // S |
| // aaaAaaa |
| // bbbBbbb |
| // cccCccc |
| // D |
| // |
| // The A pixel is calculated using the value S, the B uses A, and the C uses B, and |
| // finally D is C. So, with a window size of seven the border is nine. In the odd case, the |
| // border is 3*((window - 1)/2). |
| // |
| // For even cases the filter stack is more complicated. The spec specifies two passes |
| // of even filters and a final pass of odd filters. A stack for a width of six looks like |
| // this. |
| // |
| // S |
| // aaaAaa |
| // bbBbbb |
| // cccCccc |
| // D |
| // |
| // The furthest pixel looks like this. |
| // |
| // S |
| // aaaAaa |
| // bbBbbb |
| // cccCccc |
| // D |
| // |
| // For a window of six, the border value is eight. In the even case the border is 3 * |
| // (window/2) - 1. |
| int border = (window & 1) == 1 ? 3 * ((window - 1) / 2) : 3 * (window / 2) - 1; |
| |
| // If the window is odd then the divisor is just window ^ 3 otherwise, |
| // it is window * window * (window + 1) = window ^ 3 + window ^ 2; |
| int window2 = window * window; |
| int window3 = window2 * window; |
| int divisor = (window & 1) == 1 ? window3 : window3 + window2; |
| return alloc->make<GaussPass>(buffer0, buffer1, buffer2, buffersEnd, border, divisor); |
| } |
| |
| GaussPass(skvx::Vec<4, uint32_t>* buffer0, |
| skvx::Vec<4, uint32_t>* buffer1, |
| skvx::Vec<4, uint32_t>* buffer2, |
| skvx::Vec<4, uint32_t>* buffersEnd, |
| int border, |
| int divisor) |
| : Pass{border} |
| , fBuffer0{buffer0} |
| , fBuffer1{buffer1} |
| , fBuffer2{buffer2} |
| , fBuffersEnd{buffersEnd} |
| , fDivider(divisor) {} |
| |
| private: |
| void startBlur() override { |
| skvx::Vec<4, uint32_t> zero = {0u, 0u, 0u, 0u}; |
| zero.store(fSum0); |
| zero.store(fSum1); |
| auto half = fDivider.half(); |
| skvx::Vec<4, uint32_t>{half, half, half, half}.store(fSum2); |
| sk_bzero(fBuffer0, (fBuffersEnd - fBuffer0) * sizeof(skvx::Vec<4, uint32_t>)); |
| |
| fBuffer0Cursor = fBuffer0; |
| fBuffer1Cursor = fBuffer1; |
| fBuffer2Cursor = fBuffer2; |
| } |
| |
| // GaussPass implements the common three pass box filter approximation of Gaussian blur, |
| // but combines all three passes into a single pass. This approach is facilitated by three |
| // circular buffers the width of the window which track values for trailing edges of each of |
| // the three passes. This allows the algorithm to use more precision in the calculation |
| // because the values are not rounded each pass. And this implementation also avoids a trap |
| // that's easy to fall into resulting in blending in too many zeroes near the edge. |
| // |
| // In general, a window sum has the form: |
| // sum_n+1 = sum_n + leading_edge - trailing_edge. |
| // If instead we do the subtraction at the end of the previous iteration, we can just |
| // calculate the sums instead of having to do the subtractions too. |
| // |
| // In previous iteration: |
| // sum_n+1 = sum_n - trailing_edge. |
| // |
| // In this iteration: |
| // sum_n+1 = sum_n + leading_edge. |
| // |
| // Now we can stack all three sums and do them at once. Sum0 gets its leading edge from the |
| // actual data. Sum1's leading edge is just Sum0, and Sum2's leading edge is Sum1. So, doing the |
| // three passes at the same time has the form: |
| // |
| // sum0_n+1 = sum0_n + leading edge |
| // sum1_n+1 = sum1_n + sum0_n+1 |
| // sum2_n+1 = sum2_n + sum1_n+1 |
| // |
| // sum2_n+1 / window^3 is the new value of the destination pixel. |
| // |
| // Reduce the sums by the trailing edges which were stored in the circular buffers for the |
| // next go around. This is the case for odd sized windows, even windows the the third |
| // circular buffer is one larger then the first two circular buffers. |
| // |
| // sum2_n+2 = sum2_n+1 - buffer2[i]; |
| // buffer2[i] = sum1; |
| // sum1_n+2 = sum1_n+1 - buffer1[i]; |
| // buffer1[i] = sum0; |
| // sum0_n+2 = sum0_n+1 - buffer0[i]; |
| // buffer0[i] = leading edge |
| void blurSegment( |
| int n, const uint32_t* src, int srcStride, uint32_t* dst, int dstStride) override { |
| skvx::Vec<4, uint32_t>* buffer0Cursor = fBuffer0Cursor; |
| skvx::Vec<4, uint32_t>* buffer1Cursor = fBuffer1Cursor; |
| skvx::Vec<4, uint32_t>* buffer2Cursor = fBuffer2Cursor; |
| skvx::Vec<4, uint32_t> sum0 = skvx::Vec<4, uint32_t>::Load(fSum0); |
| skvx::Vec<4, uint32_t> sum1 = skvx::Vec<4, uint32_t>::Load(fSum1); |
| skvx::Vec<4, uint32_t> sum2 = skvx::Vec<4, uint32_t>::Load(fSum2); |
| |
| // Given an expanded input pixel, move the window ahead using the leadingEdge value. |
| auto processValue = [&](const skvx::Vec<4, uint32_t>& leadingEdge) { |
| sum0 += leadingEdge; |
| sum1 += sum0; |
| sum2 += sum1; |
| |
| skvx::Vec<4, uint32_t> blurred = fDivider.divide(sum2); |
| |
| sum2 -= *buffer2Cursor; |
| *buffer2Cursor = sum1; |
| buffer2Cursor = (buffer2Cursor + 1) < fBuffersEnd ? buffer2Cursor + 1 : fBuffer2; |
| sum1 -= *buffer1Cursor; |
| *buffer1Cursor = sum0; |
| buffer1Cursor = (buffer1Cursor + 1) < fBuffer2 ? buffer1Cursor + 1 : fBuffer1; |
| sum0 -= *buffer0Cursor; |
| *buffer0Cursor = leadingEdge; |
| buffer0Cursor = (buffer0Cursor + 1) < fBuffer1 ? buffer0Cursor + 1 : fBuffer0; |
| |
| return skvx::cast<uint8_t>(blurred); |
| }; |
| |
| auto loadEdge = [&](const uint32_t* srcCursor) { |
| return skvx::cast<uint32_t>(skvx::Vec<4, uint8_t>::Load(srcCursor)); |
| }; |
| |
| if (!src && !dst) { |
| while (n --> 0) { |
| (void)processValue(0); |
| } |
| } else if (src && !dst) { |
| while (n --> 0) { |
| (void)processValue(loadEdge(src)); |
| src += srcStride; |
| } |
| } else if (!src && dst) { |
| while (n --> 0) { |
| processValue(0u).store(dst); |
| dst += dstStride; |
| } |
| } else if (src && dst) { |
| while (n --> 0) { |
| processValue(loadEdge(src)).store(dst); |
| src += srcStride; |
| dst += dstStride; |
| } |
| } |
| |
| // Store the state |
| fBuffer0Cursor = buffer0Cursor; |
| fBuffer1Cursor = buffer1Cursor; |
| fBuffer2Cursor = buffer2Cursor; |
| |
| sum0.store(fSum0); |
| sum1.store(fSum1); |
| sum2.store(fSum2); |
| } |
| |
| skvx::Vec<4, uint32_t>* const fBuffer0; |
| skvx::Vec<4, uint32_t>* const fBuffer1; |
| skvx::Vec<4, uint32_t>* const fBuffer2; |
| skvx::Vec<4, uint32_t>* const fBuffersEnd; |
| const skvx::ScaledDividerU32 fDivider; |
| |
| // blur state |
| char fSum0[sizeof(skvx::Vec<4, uint32_t>)]; |
| char fSum1[sizeof(skvx::Vec<4, uint32_t>)]; |
| char fSum2[sizeof(skvx::Vec<4, uint32_t>)]; |
| skvx::Vec<4, uint32_t>* fBuffer0Cursor; |
| skvx::Vec<4, uint32_t>* fBuffer1Cursor; |
| skvx::Vec<4, uint32_t>* fBuffer2Cursor; |
| }; |
| |
| // Implement a scanline processor that uses a two-box filter to approximate a Tent filter. |
| // The TentPass is limit to processing sigmas < 2183. |
| class TentPass final : public Pass { |
| public: |
| // NB 2183 is the largest sigma that will not cause a buffer full of 255 mask values to overflow |
| // using the Tent filter. It also limits the size of buffers used hold intermediate values. |
| // Explanation of maximums: |
| // sum0 = window * 255 |
| // sum1 = window * sum0 -> window * window * 255 |
| // |
| // The value window^2 * 255 must fit in a uint32_t. So, |
| // window^2 < 2^32. window = 4104. |
| // |
| // window = floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5) |
| // For window <= 4104, the largest value for sigma is 2183. |
| static PassMaker* MakeMaker(double sigma, SkArenaAlloc* alloc) { |
| SkASSERT(0 <= sigma); |
| int gaussianWindow = calculate_window(sigma); |
| // This is a naive method of using the window size for the Gaussian blur to calculate the |
| // window size for the Tent blur. This seems to work well in practice. |
| // |
| // We can use a single pixel to generate the effective blur area given a window size. For |
| // the Gaussian blur this is 3 * window size. For the Tent filter this is 2 * window size. |
| int tentWindow = 3 * gaussianWindow / 2; |
| if (tentWindow >= 4104) { |
| return nullptr; |
| } |
| |
| class Maker : public PassMaker { |
| public: |
| explicit Maker(int window) : PassMaker{window} {} |
| Pass* makePass(void* buffer, SkArenaAlloc* alloc) const override { |
| return TentPass::Make(this->window(), buffer, alloc); |
| } |
| |
| size_t bufferSizeBytes() const override { |
| size_t onePassSize = this->window() - 1; |
| // If the window is odd, then there is an obvious middle element. For even sizes 2 |
| // passes are shifted, and the last pass has an extra element. Like this: |
| // S |
| // aaaAaa |
| // bbBbbb |
| // D |
| size_t bufferCount = 2 * onePassSize; |
| return bufferCount * sizeof(skvx::Vec<4, uint32_t>); |
| } |
| }; |
| |
| return alloc->make<Maker>(tentWindow); |
| } |
| |
| static TentPass* Make(int window, void* buffers, SkArenaAlloc* alloc) { |
| if (window > 4104) { |
| return nullptr; |
| } |
| |
| // We don't need to store the trailing edge pixel in the buffer; |
| int passSize = window - 1; |
| skvx::Vec<4, uint32_t>* buffer0 = static_cast<skvx::Vec<4, uint32_t>*>(buffers); |
| skvx::Vec<4, uint32_t>* buffer1 = buffer0 + passSize; |
| skvx::Vec<4, uint32_t>* buffersEnd = buffer1 + passSize; |
| |
| // Calculating the border is tricky. The border is the distance in pixels between the first |
| // dst pixel and the first src pixel (or the last src pixel and the last dst pixel). |
| // I will go through the odd case which is simpler, and then through the even case. Given a |
| // stack of filters seven wide for the odd case of three passes. |
| // |
| // S |
| // aaaAaaa |
| // bbbBbbb |
| // D |
| // |
| // The furthest changed pixel is when the filters are in the following configuration. |
| // |
| // S |
| // aaaAaaa |
| // bbbBbbb |
| // D |
| // |
| // The A pixel is calculated using the value S, the B uses A, and the D uses B. |
| // So, with a window size of seven the border is nine. In the odd case, the border is |
| // window - 1. |
| // |
| // For even cases the filter stack is more complicated. It uses two passes |
| // of even filters offset from each other. A stack for a width of six looks like |
| // this. |
| // |
| // S |
| // aaaAaa |
| // bbBbbb |
| // D |
| // |
| // The furthest pixel looks like this. |
| // |
| // S |
| // aaaAaa |
| // bbBbbb |
| // D |
| // |
| // For a window of six, the border value is 5. In the even case the border is |
| // window - 1. |
| int border = window - 1; |
| |
| int divisor = window * window; |
| return alloc->make<TentPass>(buffer0, buffer1, buffersEnd, border, divisor); |
| } |
| |
| TentPass(skvx::Vec<4, uint32_t>* buffer0, |
| skvx::Vec<4, uint32_t>* buffer1, |
| skvx::Vec<4, uint32_t>* buffersEnd, |
| int border, |
| int divisor) |
| : Pass{border} |
| , fBuffer0{buffer0} |
| , fBuffer1{buffer1} |
| , fBuffersEnd{buffersEnd} |
| , fDivider(divisor) {} |
| |
| private: |
| void startBlur() override { |
| skvx::Vec<4, uint32_t>{0u, 0u, 0u, 0u}.store(fSum0); |
| auto half = fDivider.half(); |
| skvx::Vec<4, uint32_t>{half, half, half, half}.store(fSum1); |
| sk_bzero(fBuffer0, (fBuffersEnd - fBuffer0) * sizeof(skvx::Vec<4, uint32_t>)); |
| |
| fBuffer0Cursor = fBuffer0; |
| fBuffer1Cursor = fBuffer1; |
| } |
| |
| // TentPass implements the common two pass box filter approximation of Tent filter, |
| // but combines all both passes into a single pass. This approach is facilitated by two |
| // circular buffers the width of the window which track values for trailing edges of each of |
| // both passes. This allows the algorithm to use more precision in the calculation |
| // because the values are not rounded each pass. And this implementation also avoids a trap |
| // that's easy to fall into resulting in blending in too many zeroes near the edge. |
| // |
| // In general, a window sum has the form: |
| // sum_n+1 = sum_n + leading_edge - trailing_edge. |
| // If instead we do the subtraction at the end of the previous iteration, we can just |
| // calculate the sums instead of having to do the subtractions too. |
| // |
| // In previous iteration: |
| // sum_n+1 = sum_n - trailing_edge. |
| // |
| // In this iteration: |
| // sum_n+1 = sum_n + leading_edge. |
| // |
| // Now we can stack all three sums and do them at once. Sum0 gets its leading edge from the |
| // actual data. Sum1's leading edge is just Sum0, and Sum2's leading edge is Sum1. So, doing the |
| // three passes at the same time has the form: |
| // |
| // sum0_n+1 = sum0_n + leading edge |
| // sum1_n+1 = sum1_n + sum0_n+1 |
| // |
| // sum1_n+1 / window^2 is the new value of the destination pixel. |
| // |
| // Reduce the sums by the trailing edges which were stored in the circular buffers for the |
| // next go around. |
| // |
| // sum1_n+2 = sum1_n+1 - buffer1[i]; |
| // buffer1[i] = sum0; |
| // sum0_n+2 = sum0_n+1 - buffer0[i]; |
| // buffer0[i] = leading edge |
| void blurSegment( |
| int n, const uint32_t* src, int srcStride, uint32_t* dst, int dstStride) override { |
| skvx::Vec<4, uint32_t>* buffer0Cursor = fBuffer0Cursor; |
| skvx::Vec<4, uint32_t>* buffer1Cursor = fBuffer1Cursor; |
| skvx::Vec<4, uint32_t> sum0 = skvx::Vec<4, uint32_t>::Load(fSum0); |
| skvx::Vec<4, uint32_t> sum1 = skvx::Vec<4, uint32_t>::Load(fSum1); |
| |
| // Given an expanded input pixel, move the window ahead using the leadingEdge value. |
| auto processValue = [&](const skvx::Vec<4, uint32_t>& leadingEdge) { |
| sum0 += leadingEdge; |
| sum1 += sum0; |
| |
| skvx::Vec<4, uint32_t> blurred = fDivider.divide(sum1); |
| |
| sum1 -= *buffer1Cursor; |
| *buffer1Cursor = sum0; |
| buffer1Cursor = (buffer1Cursor + 1) < fBuffersEnd ? buffer1Cursor + 1 : fBuffer1; |
| sum0 -= *buffer0Cursor; |
| *buffer0Cursor = leadingEdge; |
| buffer0Cursor = (buffer0Cursor + 1) < fBuffer1 ? buffer0Cursor + 1 : fBuffer0; |
| |
| return skvx::cast<uint8_t>(blurred); |
| }; |
| |
| auto loadEdge = [&](const uint32_t* srcCursor) { |
| return skvx::cast<uint32_t>(skvx::Vec<4, uint8_t>::Load(srcCursor)); |
| }; |
| |
| if (!src && !dst) { |
| while (n --> 0) { |
| (void)processValue(0); |
| } |
| } else if (src && !dst) { |
| while (n --> 0) { |
| (void)processValue(loadEdge(src)); |
| src += srcStride; |
| } |
| } else if (!src && dst) { |
| while (n --> 0) { |
| processValue(0u).store(dst); |
| dst += dstStride; |
| } |
| } else if (src && dst) { |
| while (n --> 0) { |
| processValue(loadEdge(src)).store(dst); |
| src += srcStride; |
| dst += dstStride; |
| } |
| } |
| |
| // Store the state |
| fBuffer0Cursor = buffer0Cursor; |
| fBuffer1Cursor = buffer1Cursor; |
| sum0.store(fSum0); |
| sum1.store(fSum1); |
| } |
| |
| skvx::Vec<4, uint32_t>* const fBuffer0; |
| skvx::Vec<4, uint32_t>* const fBuffer1; |
| skvx::Vec<4, uint32_t>* const fBuffersEnd; |
| const skvx::ScaledDividerU32 fDivider; |
| |
| // blur state |
| char fSum0[sizeof(skvx::Vec<4, uint32_t>)]; |
| char fSum1[sizeof(skvx::Vec<4, uint32_t>)]; |
| skvx::Vec<4, uint32_t>* fBuffer0Cursor; |
| skvx::Vec<4, uint32_t>* fBuffer1Cursor; |
| }; |
| |
| sk_sp<SkSpecialImage> copy_image_with_bounds( |
| const SkImageFilter_Base::Context& ctx, const sk_sp<SkSpecialImage> &input, |
| SkIRect srcBounds, SkIRect dstBounds) { |
| SkBitmap inputBM; |
| if (!input->getROPixels(&inputBM)) { |
| return nullptr; |
| } |
| |
| if (inputBM.colorType() != kN32_SkColorType) { |
| return nullptr; |
| } |
| |
| SkBitmap src; |
| inputBM.extractSubset(&src, srcBounds); |
| |
| // Make everything relative to the destination bounds. |
| srcBounds.offset(-dstBounds.x(), -dstBounds.y()); |
| dstBounds.offset(-dstBounds.x(), -dstBounds.y()); |
| |
| auto srcW = srcBounds.width(), |
| dstW = dstBounds.width(), |
| dstH = dstBounds.height(); |
| |
| SkImageInfo dstInfo = SkImageInfo::Make(dstW, dstH, inputBM.colorType(), inputBM.alphaType()); |
| |
| SkBitmap dst; |
| if (!dst.tryAllocPixels(dstInfo)) { |
| return nullptr; |
| } |
| |
| // There is no blurring to do, but we still need to copy the source while accounting for the |
| // dstBounds. Remember that the src was intersected with the dst. |
| int y = 0; |
| size_t dstWBytes = dstW * sizeof(uint32_t); |
| for (;y < srcBounds.top(); y++) { |
| sk_bzero(dst.getAddr32(0, y), dstWBytes); |
| } |
| |
| for (;y < srcBounds.bottom(); y++) { |
| int x = 0; |
| uint32_t* dstPtr = dst.getAddr32(0, y); |
| for (;x < srcBounds.left(); x++) { |
| *dstPtr++ = 0; |
| } |
| |
| memcpy(dstPtr, src.getAddr32(x - srcBounds.left(), y - srcBounds.top()), |
| srcW * sizeof(uint32_t)); |
| |
| dstPtr += srcW; |
| x += srcW; |
| |
| for (;x < dstBounds.right(); x++) { |
| *dstPtr++ = 0; |
| } |
| } |
| |
| for (;y < dstBounds.bottom(); y++) { |
| sk_bzero(dst.getAddr32(0, y), dstWBytes); |
| } |
| |
| return SkSpecialImage::MakeFromRaster(SkIRect::MakeWH(dstBounds.width(), |
| dstBounds.height()), |
| dst, ctx.surfaceProps()); |
| } |
| |
| // TODO: Implement CPU backend for different fTileMode. |
| sk_sp<SkSpecialImage> cpu_blur( |
| const SkImageFilter_Base::Context& ctx, |
| SkVector sigma, const sk_sp<SkSpecialImage> &input, |
| SkIRect srcBounds, SkIRect dstBounds) { |
| // map_sigma limits sigma to 532 to match 1000px box filter limit of WebKit and Firefox. |
| // Since this does not exceed the limits of the TentPass (2183), there won't be overflow when |
| // computing a kernel over a pixel window filled with 255. |
| static_assert(kMaxSigma <= 2183.0f); |
| |
| SkSTArenaAlloc<1024> alloc; |
| auto makeMaker = [&](double sigma) -> PassMaker* { |
| SkASSERT(0 <= sigma && sigma <= 2183); // should be guaranteed after map_sigma |
| if (PassMaker* maker = GaussPass::MakeMaker(sigma, &alloc)) { |
| return maker; |
| } |
| if (PassMaker* maker = TentPass::MakeMaker(sigma, &alloc)) { |
| return maker; |
| } |
| SK_ABORT("Sigma is out of range."); |
| }; |
| |
| PassMaker* makerX = makeMaker(sigma.x()); |
| PassMaker* makerY = makeMaker(sigma.y()); |
| |
| if (makerX->window() <= 1 && makerY->window() <= 1) { |
| return copy_image_with_bounds(ctx, input, srcBounds, dstBounds); |
| } |
| |
| SkBitmap inputBM; |
| |
| if (!input->getROPixels(&inputBM)) { |
| return nullptr; |
| } |
| |
| if (inputBM.colorType() != kN32_SkColorType) { |
| return nullptr; |
| } |
| |
| SkBitmap src; |
| inputBM.extractSubset(&src, srcBounds); |
| |
| // Make everything relative to the destination bounds. |
| srcBounds.offset(-dstBounds.x(), -dstBounds.y()); |
| dstBounds.offset(-dstBounds.x(), -dstBounds.y()); |
| |
| auto srcW = srcBounds.width(), |
| srcH = srcBounds.height(), |
| dstW = dstBounds.width(), |
| dstH = dstBounds.height(); |
| |
| SkImageInfo dstInfo = inputBM.info().makeWH(dstW, dstH); |
| |
| SkBitmap dst; |
| if (!dst.tryAllocPixels(dstInfo)) { |
| return nullptr; |
| } |
| |
| size_t bufferSizeBytes = std::max(makerX->bufferSizeBytes(), makerY->bufferSizeBytes()); |
| auto buffer = alloc.makeBytesAlignedTo(bufferSizeBytes, alignof(skvx::Vec<4, uint32_t>)); |
| |
| // Basic Plan: The three cases to handle |
| // * Horizontal and Vertical - blur horizontally while copying values from the source to |
| // the destination. Then, do an in-place vertical blur. |
| // * Horizontal only - blur horizontally copying values from the source to the destination. |
| // * Vertical only - blur vertically copying values from the source to the destination. |
| |
| // Default to vertical only blur case. If a horizontal blur is needed, then these values |
| // will be adjusted while doing the horizontal blur. |
| auto intermediateSrc = static_cast<uint32_t *>(src.getPixels()); |
| auto intermediateRowBytesAsPixels = src.rowBytesAsPixels(); |
| auto intermediateWidth = srcW; |
| |
| // Because the border is calculated before the fork of the GPU/CPU path. The border is |
| // the maximum of the two rendering methods. In the case where sigma is zero, then the |
| // src and dst left values are the same. If sigma is small resulting in a window size of |
| // 1, then border calculations add some pixels which will always be zero. Inset the |
| // destination by those zero pixels. This case is very rare. |
| auto intermediateDst = dst.getAddr32(srcBounds.left(), 0); |
| |
| // The following code is executed very rarely, I have never seen it in a real web |
| // page. If sigma is small but not zero then shared GPU/CPU border calculation |
| // code adds extra pixels for the border. Just clear everything to clear those pixels. |
| // This solution is overkill, but very simple. |
| if (makerX->window() == 1 || makerY->window() == 1) { |
| dst.eraseColor(0); |
| } |
| |
| if (makerX->window() > 1) { |
| Pass* pass = makerX->makePass(buffer, &alloc); |
| // Make int64 to avoid overflow in multiplication below. |
| int64_t shift = srcBounds.top() - dstBounds.top(); |
| |
| // For the horizontal blur, starts part way down in anticipation of the vertical blur. |
| // For a vertical sigma of zero shift should be zero. But, for small sigma, |
| // shift may be > 0 but the vertical window could be 1. |
| intermediateSrc = static_cast<uint32_t *>(dst.getPixels()) |
| + (shift > 0 ? shift * dst.rowBytesAsPixels() : 0); |
| intermediateRowBytesAsPixels = dst.rowBytesAsPixels(); |
| intermediateWidth = dstW; |
| intermediateDst = static_cast<uint32_t *>(dst.getPixels()); |
| |
| const uint32_t* srcCursor = static_cast<uint32_t*>(src.getPixels()); |
| uint32_t* dstCursor = intermediateSrc; |
| for (auto y = 0; y < srcH; y++) { |
| pass->blur(srcBounds.left(), srcBounds.right(), dstBounds.right(), |
| srcCursor, 1, dstCursor, 1); |
| srcCursor += src.rowBytesAsPixels(); |
| dstCursor += intermediateRowBytesAsPixels; |
| } |
| } |
| |
| if (makerY->window() > 1) { |
| Pass* pass = makerY->makePass(buffer, &alloc); |
| const uint32_t* srcCursor = intermediateSrc; |
| uint32_t* dstCursor = intermediateDst; |
| for (auto x = 0; x < intermediateWidth; x++) { |
| pass->blur(srcBounds.top(), srcBounds.bottom(), dstBounds.bottom(), |
| srcCursor, intermediateRowBytesAsPixels, |
| dstCursor, dst.rowBytesAsPixels()); |
| srcCursor += 1; |
| dstCursor += 1; |
| } |
| } |
| |
| return SkSpecialImage::MakeFromRaster(SkIRect::MakeWH(dstBounds.width(), |
| dstBounds.height()), |
| dst, ctx.surfaceProps()); |
| } |
| } // namespace |
| |
| sk_sp<SkSpecialImage> SkBlurImageFilter::onFilterImage(const Context& ctx, |
| SkIPoint* offset) const { |
| SkIPoint inputOffset = SkIPoint::Make(0, 0); |
| |
| sk_sp<SkSpecialImage> input(this->filterInput(0, ctx, &inputOffset)); |
| if (!input) { |
| return nullptr; |
| } |
| |
| SkIRect inputBounds = SkIRect::MakeXYWH(inputOffset.fX, inputOffset.fY, |
| input->width(), input->height()); |
| |
| // Calculate the destination bounds. |
| SkIRect dstBounds; |
| if (!this->applyCropRect(this->mapContext(ctx), inputBounds, &dstBounds)) { |
| return nullptr; |
| } |
| if (!inputBounds.intersect(dstBounds)) { |
| return nullptr; |
| } |
| |
| // Save the offset in preparation to make all rectangles relative to the inputOffset. |
| SkIPoint resultOffset = SkIPoint::Make(dstBounds.fLeft, dstBounds.fTop); |
| |
| // Make all bounds relative to the inputOffset. |
| inputBounds.offset(-inputOffset); |
| dstBounds.offset(-inputOffset); |
| |
| SkVector sigma = map_sigma(fSigma, ctx.ctm()); |
| SkASSERT(SkScalarIsFinite(sigma.x()) && sigma.x() >= 0.f && sigma.x() <= kMaxSigma && |
| SkScalarIsFinite(sigma.y()) && sigma.y() >= 0.f && sigma.y() <= kMaxSigma); |
| |
| sk_sp<SkSpecialImage> result; |
| #if defined(SK_GANESH) |
| if (ctx.gpuBacked()) { |
| // Ensure the input is in the destination's gamut. This saves us from having to do the |
| // xform during the filter itself. |
| input = ImageToColorSpace(input.get(), ctx.colorType(), ctx.colorSpace(), |
| ctx.surfaceProps()); |
| result = this->gpuFilter(ctx, sigma, input, inputBounds, dstBounds, inputOffset, |
| &resultOffset); |
| } else |
| #endif |
| { |
| result = cpu_blur(ctx, sigma, input, inputBounds, dstBounds); |
| } |
| |
| // Return the resultOffset if the blur succeeded. |
| if (result != nullptr) { |
| *offset = resultOffset; |
| } |
| return result; |
| } |
| |
| #if defined(SK_GANESH) |
| sk_sp<SkSpecialImage> SkBlurImageFilter::gpuFilter( |
| const Context& ctx, SkVector sigma, const sk_sp<SkSpecialImage> &input, SkIRect inputBounds, |
| SkIRect dstBounds, SkIPoint inputOffset, SkIPoint* offset) const { |
| if (SkGpuBlurUtils::IsEffectivelyZeroSigma(sigma.x()) && |
| SkGpuBlurUtils::IsEffectivelyZeroSigma(sigma.y())) { |
| offset->fX = inputBounds.x() + inputOffset.fX; |
| offset->fY = inputBounds.y() + inputOffset.fY; |
| return input->makeSubset(inputBounds); |
| } |
| |
| auto context = ctx.getContext(); |
| |
| GrSurfaceProxyView inputView = input->view(context); |
| if (!inputView.proxy()) { |
| return nullptr; |
| } |
| SkASSERT(inputView.asTextureProxy()); |
| |
| dstBounds.offset(input->subset().topLeft()); |
| inputBounds.offset(input->subset().topLeft()); |
| auto sdc = SkGpuBlurUtils::GaussianBlur( |
| context, |
| std::move(inputView), |
| SkColorTypeToGrColorType(input->colorType()), |
| input->alphaType(), |
| ctx.refColorSpace(), |
| dstBounds, |
| inputBounds, |
| sigma.x(), |
| sigma.y(), |
| fTileMode); |
| if (!sdc) { |
| return nullptr; |
| } |
| |
| return SkSpecialImage::MakeDeferredFromGpu(context, |
| SkIRect::MakeSize(dstBounds.size()), |
| kNeedNewImageUniqueID_SpecialImage, |
| sdc->readSurfaceView(), |
| sdc->colorInfo(), |
| ctx.surfaceProps()); |
| } |
| #endif |
| |
| SkRect SkBlurImageFilter::computeFastBounds(const SkRect& src) const { |
| SkRect bounds = this->getInput(0) ? this->getInput(0)->computeFastBounds(src) : src; |
| bounds.outset(fSigma.width() * 3, fSigma.height() * 3); |
| return bounds; |
| } |
| |
| SkIRect SkBlurImageFilter::onFilterNodeBounds(const SkIRect& src, const SkMatrix& ctm, |
| MapDirection, const SkIRect* inputRect) const { |
| SkVector sigma = map_sigma(fSigma, ctm); |
| return src.makeOutset(SkScalarCeilToInt(sigma.x() * 3), SkScalarCeilToInt(sigma.y() * 3)); |
| } |