Make small epsilons more rigorous for gpu gaussian blurs

This also has several other bug fixes and refactorings within it that
I realized were possible while updating every where that had checked
sigma > 0 to be sigma > kEffectivelyZeroSigma.

The big things are that SkBlurPriv.h goes away and its functions are
just moved into SkGpuBlurUtils since they were only used by the GPU.
The implementations of those functions are also collected into
SkGpuBlurUtils.cpp.  I removed the GrMatrixConvolution::MakeGaussian,
in favor of SkGpuBlurUtils filling in the kernel itself and then calling
the regular Make. This let me consolidate two different 1D kernel
computing functions, and remove the 1D fallback code from the 2D kernel
calculation because GaussianBlur() can detect that earlier.

The new GM, BlurSigmaSmall, originally drew incorrectly on the GPU
backend because it's small but non-zero sigma would trick the sigma > 0
checks in various places so we'd do a full 2 pass X/Y blur. However,
when the sigma was too small, the kernel was just filled with 0s so the
Y pass would effectively clear everything. While I could have just fixed
that to be a [0, 1, 0] kernel, updating the blur pipeline to compare
against integer radii seems more robust.

Change-Id: I3c41e0235a27615a9056b25e627ffedd995264bd
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/328797
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
diff --git a/gm/blurs.cpp b/gm/blurs.cpp
index f79188f..8ca356f 100644
--- a/gm/blurs.cpp
+++ b/gm/blurs.cpp
@@ -18,7 +18,7 @@
 #include "include/core/SkScalar.h"
 #include "include/core/SkTypeface.h"
 #include "include/core/SkTypes.h"
-#include "include/effects/SkBlurImageFilter.h"
+#include "include/effects/SkImageFilters.h"
 #include "src/core/SkBlurMask.h"
 #include "tools/Resources.h"
 #include "tools/ToolUtils.h"
@@ -134,7 +134,16 @@
 DEF_SIMPLE_GM(BlurBigSigma, canvas, 1024, 1024) {
     SkPaint layerPaint, p;
 
-    p.setImageFilter(SkBlurImageFilter::Make(500, 500, nullptr));
+    p.setImageFilter(SkImageFilters::Blur(500, 500, nullptr));
 
     canvas->drawRect(SkRect::MakeWH(700, 800), p);
 }
+
+DEF_SIMPLE_GM(BlurSmallSigma, canvas, 256, 256) {
+    // Normal sigma on x-axis, a small but non-zero sigma on y-axis that should
+    // be treated as identity.
+
+    SkPaint paint;
+    paint.setImageFilter(SkImageFilters::Blur(16.f, 1e-5f, nullptr));
+    canvas->drawRect(SkRect::MakeLTRB(64, 64, 192, 192), paint);
+}
diff --git a/src/core/SkBlurMF.cpp b/src/core/SkBlurMF.cpp
index 48e1601..d197ca7 100644
--- a/src/core/SkBlurMF.cpp
+++ b/src/core/SkBlurMF.cpp
@@ -11,7 +11,6 @@
 #include "include/core/SkStrokeRec.h"
 #include "include/core/SkVertices.h"
 #include "src/core/SkBlurMask.h"
-#include "src/core/SkBlurPriv.h"
 #include "src/core/SkGpuBlurUtils.h"
 #include "src/core/SkMaskFilterBase.h"
 #include "src/core/SkMathPriv.h"
@@ -117,117 +116,6 @@
 
 const SkScalar SkBlurMaskFilterImpl::kMAX_BLUR_SIGMA = SkIntToScalar(128);
 
-bool SkComputeBlurredRRectParams(const SkRRect& srcRRect, const SkRRect& devRRect,
-                                 SkScalar sigma, SkScalar xformedSigma,
-                                 SkRRect* rrectToDraw,
-                                 SkISize* widthHeight,
-                                 SkScalar rectXs[kSkBlurRRectMaxDivisions],
-                                 SkScalar rectYs[kSkBlurRRectMaxDivisions],
-                                 SkScalar texXs[kSkBlurRRectMaxDivisions],
-                                 SkScalar texYs[kSkBlurRRectMaxDivisions]) {
-    unsigned int devBlurRadius = 3*SkScalarCeilToInt(xformedSigma-1/6.0f);
-    SkScalar srcBlurRadius = 3.0f * sigma;
-
-    const SkRect& devOrig = devRRect.getBounds();
-    const SkVector& devRadiiUL = devRRect.radii(SkRRect::kUpperLeft_Corner);
-    const SkVector& devRadiiUR = devRRect.radii(SkRRect::kUpperRight_Corner);
-    const SkVector& devRadiiLR = devRRect.radii(SkRRect::kLowerRight_Corner);
-    const SkVector& devRadiiLL = devRRect.radii(SkRRect::kLowerLeft_Corner);
-
-    const int devLeft  = SkScalarCeilToInt(std::max<SkScalar>(devRadiiUL.fX, devRadiiLL.fX));
-    const int devTop   = SkScalarCeilToInt(std::max<SkScalar>(devRadiiUL.fY, devRadiiUR.fY));
-    const int devRight = SkScalarCeilToInt(std::max<SkScalar>(devRadiiUR.fX, devRadiiLR.fX));
-    const int devBot   = SkScalarCeilToInt(std::max<SkScalar>(devRadiiLL.fY, devRadiiLR.fY));
-
-    // This is a conservative check for nine-patchability
-    if (devOrig.fLeft + devLeft + devBlurRadius >= devOrig.fRight  - devRight - devBlurRadius ||
-        devOrig.fTop  + devTop  + devBlurRadius >= devOrig.fBottom - devBot   - devBlurRadius) {
-        return false;
-    }
-
-    const SkVector& srcRadiiUL = srcRRect.radii(SkRRect::kUpperLeft_Corner);
-    const SkVector& srcRadiiUR = srcRRect.radii(SkRRect::kUpperRight_Corner);
-    const SkVector& srcRadiiLR = srcRRect.radii(SkRRect::kLowerRight_Corner);
-    const SkVector& srcRadiiLL = srcRRect.radii(SkRRect::kLowerLeft_Corner);
-
-    const SkScalar srcLeft  = std::max<SkScalar>(srcRadiiUL.fX, srcRadiiLL.fX);
-    const SkScalar srcTop   = std::max<SkScalar>(srcRadiiUL.fY, srcRadiiUR.fY);
-    const SkScalar srcRight = std::max<SkScalar>(srcRadiiUR.fX, srcRadiiLR.fX);
-    const SkScalar srcBot   = std::max<SkScalar>(srcRadiiLL.fY, srcRadiiLR.fY);
-
-    int newRRWidth = 2*devBlurRadius + devLeft + devRight + 1;
-    int newRRHeight = 2*devBlurRadius + devTop + devBot + 1;
-    widthHeight->fWidth = newRRWidth + 2 * devBlurRadius;
-    widthHeight->fHeight = newRRHeight + 2 * devBlurRadius;
-
-    const SkRect srcProxyRect = srcRRect.getBounds().makeOutset(srcBlurRadius, srcBlurRadius);
-
-    rectXs[0] = srcProxyRect.fLeft;
-    rectXs[1] = srcProxyRect.fLeft + 2*srcBlurRadius + srcLeft;
-    rectXs[2] = srcProxyRect.fRight - 2*srcBlurRadius - srcRight;
-    rectXs[3] = srcProxyRect.fRight;
-
-    rectYs[0] = srcProxyRect.fTop;
-    rectYs[1] = srcProxyRect.fTop + 2*srcBlurRadius + srcTop;
-    rectYs[2] = srcProxyRect.fBottom - 2*srcBlurRadius - srcBot;
-    rectYs[3] = srcProxyRect.fBottom;
-
-    texXs[0] = 0.0f;
-    texXs[1] = 2.0f*devBlurRadius + devLeft;
-    texXs[2] = 2.0f*devBlurRadius + devLeft + 1;
-    texXs[3] = SkIntToScalar(widthHeight->fWidth);
-
-    texYs[0] = 0.0f;
-    texYs[1] = 2.0f*devBlurRadius + devTop;
-    texYs[2] = 2.0f*devBlurRadius + devTop + 1;
-    texYs[3] = SkIntToScalar(widthHeight->fHeight);
-
-    const SkRect newRect = SkRect::MakeXYWH(SkIntToScalar(devBlurRadius),
-                                            SkIntToScalar(devBlurRadius),
-                                            SkIntToScalar(newRRWidth),
-                                            SkIntToScalar(newRRHeight));
-    SkVector newRadii[4];
-    newRadii[0] = { SkScalarCeilToScalar(devRadiiUL.fX), SkScalarCeilToScalar(devRadiiUL.fY) };
-    newRadii[1] = { SkScalarCeilToScalar(devRadiiUR.fX), SkScalarCeilToScalar(devRadiiUR.fY) };
-    newRadii[2] = { SkScalarCeilToScalar(devRadiiLR.fX), SkScalarCeilToScalar(devRadiiLR.fY) };
-    newRadii[3] = { SkScalarCeilToScalar(devRadiiLL.fX), SkScalarCeilToScalar(devRadiiLL.fY) };
-
-    rrectToDraw->setRectRadii(newRect, newRadii);
-    return true;
-}
-
-// TODO: it seems like there should be some synergy with SkBlurMask::ComputeBlurProfile
-// TODO: maybe cache this on the cpu side?
-int SkCreateIntegralTable(float sixSigma, SkBitmap* table) {
-    // The texture we're producing represents the integral of a normal distribution over a
-    // six-sigma range centered at zero. We want enough resolution so that the linear
-    // interpolation done in texture lookup doesn't introduce noticeable artifacts. We
-    // conservatively choose to have 2 texels for each dst pixel.
-    int minWidth = 2 * sk_float_ceil2int(sixSigma);
-    // Bin by powers of 2 with a minimum so we get good profile reuse.
-    int width = std::max(SkNextPow2(minWidth), 32);
-
-    if (!table) {
-        return width;
-    }
-
-    if (!table->tryAllocPixels(SkImageInfo::MakeA8(width, 1))) {
-        return 0;
-    }
-    *table->getAddr8(0, 0) = 255;
-    const float invWidth = 1.f / width;
-    for (int i = 1; i < width - 1; ++i) {
-        float x = (i + 0.5f) * invWidth;
-        x = (-6 * x + 3) * SK_ScalarRoot2Over2;
-        float integral = 0.5f * (std::erf(x) + 1.f);
-        *table->getAddr8(i, 0) = SkToU8(sk_float_round2int(255.f * integral));
-    }
-
-    *table->getAddr8(width - 1, 0) = 0;
-    table->setImmutable();
-    return table->width();
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 
 SkBlurMaskFilterImpl::SkBlurMaskFilterImpl(SkScalar sigma, SkBlurStyle style, bool respectCTM)
@@ -701,8 +589,9 @@
     }
 
     SkScalar xformedSigma = this->computeXformedSigma(viewMatrix);
-    if (xformedSigma <= 0) {
-        return false;
+    if (SkGpuBlurUtils::IsEffectivelyZeroSigma(xformedSigma)) {
+        renderTargetContext->drawShape(clip, std::move(paint), GrAA::kYes, viewMatrix, shape);
+        return true;
     }
 
     SkRRect srcRRect;
@@ -821,9 +710,9 @@
                                             const SkMatrix& ctm,
                                             SkIRect* maskRect) const {
     SkScalar xformedSigma = this->computeXformedSigma(ctm);
-    if (xformedSigma <= 0) {
-        maskRect->setEmpty();
-        return false;
+    if (SkGpuBlurUtils::IsEffectivelyZeroSigma(xformedSigma)) {
+        *maskRect = devSpaceShapeBounds;
+        return maskRect->intersect(clipBounds);
     }
 
     if (maskRect) {
@@ -862,7 +751,6 @@
     const SkIRect clipRect = SkIRect::MakeWH(maskRect.width(), maskRect.height());
 
     SkScalar xformedSigma = this->computeXformedSigma(ctm);
-    SkASSERT(xformedSigma > 0);
 
     // If we're doing a normal blur, we can clobber the pathTexture in the
     // gaussianBlur.  Otherwise, we need to save it for later compositing.
diff --git a/src/core/SkBlurPriv.h b/src/core/SkBlurPriv.h
deleted file mode 100644
index 07b6ebc..0000000
--- a/src/core/SkBlurPriv.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright 2018 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#ifndef SkBlurPriv_DEFINED
-#define SkBlurPriv_DEFINED
-
-#include "include/core/SkRRect.h"
-#include "include/core/SkSize.h"
-
-static const int kSkBlurRRectMaxDivisions = 6;
-
-// This method computes all the parameters for drawing a partially occluded nine-patched
-// blurred rrect mask:
-//   rrectToDraw - the integerized rrect to draw in the mask
-//   widthHeight - how large to make the mask (rrectToDraw will be centered in this coord sys)
-//   rectXs, rectYs - the x & y coordinates of the covering geometry lattice
-//   texXs, texYs - the texture coordinate at each point in rectXs & rectYs
-// It returns true if 'devRRect' is nine-patchable
-bool SkComputeBlurredRRectParams(const SkRRect& srcRRect, const SkRRect& devRRect,
-                                 SkScalar sigma, SkScalar xformedSigma,
-                                 SkRRect* rrectToDraw,
-                                 SkISize* widthHeight,
-                                 SkScalar rectXs[kSkBlurRRectMaxDivisions],
-                                 SkScalar rectYs[kSkBlurRRectMaxDivisions],
-                                 SkScalar texXs[kSkBlurRRectMaxDivisions],
-                                 SkScalar texYs[kSkBlurRRectMaxDivisions]);
-
-int SkCreateIntegralTable(float sixSigma, SkBitmap* table);
-
-void SkFillIn1DGaussianKernel(float* kernel, float gaussianSigma, int radius);
-
-extern void sk_register_blur_maskfilter_createproc();
-
-#endif
diff --git a/src/core/SkGpuBlurUtils.cpp b/src/core/SkGpuBlurUtils.cpp
index c17134b70..08f7bab 100644
--- a/src/core/SkGpuBlurUtils.cpp
+++ b/src/core/SkGpuBlurUtils.cpp
@@ -13,7 +13,6 @@
 #include "include/gpu/GrRecordingContext.h"
 #include "src/gpu/GrCaps.h"
 #include "src/gpu/GrRecordingContextPriv.h"
-#include "src/gpu/GrRenderTargetContext.h"
 #include "src/gpu/GrRenderTargetContextPriv.h"
 #include "src/gpu/effects/GrGaussianConvolutionFragmentProcessor.h"
 #include "src/gpu/effects/GrMatrixConvolutionEffect.h"
@@ -24,9 +23,40 @@
 
 using Direction = GrGaussianConvolutionFragmentProcessor::Direction;
 
-static int sigma_radius(float sigma) {
-    SkASSERT(sigma >= 0);
-    return static_cast<int>(ceilf(sigma * 3.0f));
+static void fill_in_2D_gaussian_kernel(float* kernel, int width, int height,
+                                       SkScalar sigmaX, SkScalar sigmaY) {
+    const float twoSigmaSqrdX = 2.0f * SkScalarToFloat(SkScalarSquare(sigmaX));
+    const float twoSigmaSqrdY = 2.0f * SkScalarToFloat(SkScalarSquare(sigmaY));
+
+    // SkGpuBlurUtils::GaussianBlur() should have detected the cases where a 2D blur
+    // degenerates to a 1D on X or Y, or to the identity.
+    SkASSERT(!SkGpuBlurUtils::IsEffectivelyZeroSigma(sigmaX) &&
+             !SkGpuBlurUtils::IsEffectivelyZeroSigma(sigmaY));
+    SkASSERT(!SkScalarNearlyZero(twoSigmaSqrdX) && !SkScalarNearlyZero(twoSigmaSqrdY));
+
+    const float sigmaXDenom = 1.0f / twoSigmaSqrdX;
+    const float sigmaYDenom = 1.0f / twoSigmaSqrdY;
+    const int xRadius = width / 2;
+    const int yRadius = height / 2;
+
+    float sum = 0.0f;
+    for (int x = 0; x < width; x++) {
+        float xTerm = static_cast<float>(x - xRadius);
+        xTerm = xTerm * xTerm * sigmaXDenom;
+        for (int y = 0; y < height; y++) {
+            float yTerm = static_cast<float>(y - yRadius);
+            float xyTerm = sk_float_exp(-(xTerm + yTerm * yTerm * sigmaYDenom));
+            // Note that the constant term (1/(sqrt(2*pi*sigma^2)) of the Gaussian
+            // is dropped here, since we renormalize the kernel below.
+            kernel[y * width + x] = xyTerm;
+            sum += xyTerm;
+        }
+    }
+    // Normalize the kernel
+    float scale = 1.0f / sum;
+    for (int i = 0; i < width * height; ++i) {
+        kernel[i] *= scale;
+    }
 }
 
 /**
@@ -43,9 +73,15 @@
                                  int radius,
                                  float sigma,
                                  SkTileMode mode) {
+    SkASSERT(radius && !SkGpuBlurUtils::IsEffectivelyZeroSigma(sigma));
     GrPaint paint;
     auto wm = SkTileModeToWrapMode(mode);
     auto srcRect = rtcRect.makeOffset(rtcToSrcOffset);
+
+    // NOTE: This could just be GrMatrixConvolutionEffect with one of the dimensions set to 1
+    // and the appropriate kernel already computed, but there's value in keeping the shader simpler.
+    // TODO(michaelludwig): Is this true? If not, is the shader key simplicity worth it two have
+    // two convolution effects?
     std::unique_ptr<GrFragmentProcessor> conv(GrGaussianConvolutionFragmentProcessor::Make(
             std::move(srcView), srcAlphaType, direction, radius, sigma, wm, srcSubset, &srcRect,
             *renderTargetContext->caps()));
@@ -67,6 +103,9 @@
                                                                    SkTileMode mode,
                                                                    sk_sp<SkColorSpace> finalCS,
                                                                    SkBackingFit dstFit) {
+    SkASSERT(radiusX && radiusY);
+    SkASSERT(!SkGpuBlurUtils::IsEffectivelyZeroSigma(sigmaX) &&
+             !SkGpuBlurUtils::IsEffectivelyZeroSigma(sigmaY));
     auto renderTargetContext = GrRenderTargetContext::Make(
             context, srcColorType, std::move(finalCS), dstFit, dstBounds.size(), 1,
             GrMipmapped::kNo, srcView.proxy()->isProtected(), srcView.origin());
@@ -74,14 +113,21 @@
         return nullptr;
     }
 
-    SkISize size = SkISize::Make(2 * radiusX + 1,  2 * radiusY + 1);
+    SkISize size = SkISize::Make(SkGpuBlurUtils::KernelWidth(radiusX),
+                                 SkGpuBlurUtils::KernelWidth(radiusY));
     SkIPoint kernelOffset = SkIPoint::Make(radiusX, radiusY);
     GrPaint paint;
     auto wm = SkTileModeToWrapMode(mode);
-    auto conv = GrMatrixConvolutionEffect::MakeGaussian(context, std::move(srcView), srcBounds,
-                                                        size, 1.0, 0.0, kernelOffset, wm, true,
-                                                        sigmaX, sigmaY,
-                                                        *renderTargetContext->caps());
+
+    // GaussianBlur() should have downsampled the request until we can handle the 2D blur with
+    // just a uniform array.
+    SkASSERT(size.area() <= GrMatrixConvolutionEffect::kMaxUniformSize);
+    float kernel[GrMatrixConvolutionEffect::kMaxUniformSize];
+    fill_in_2D_gaussian_kernel(kernel, size.width(), size.height(), sigmaX, sigmaY);
+    auto conv = GrMatrixConvolutionEffect::Make(context, std::move(srcView), srcBounds,
+                                                size, kernel, 1.0f, 0.0f, kernelOffset, wm, true,
+                                                *renderTargetContext->caps());
+
     paint.setColorFragmentProcessor(std::move(conv));
     paint.setPorterDuffXPFactory(SkBlendMode::kSrc);
 
@@ -106,6 +152,8 @@
                                                                 SkTileMode mode,
                                                                 sk_sp<SkColorSpace> finalCS,
                                                                 SkBackingFit fit) {
+    using namespace SkGpuBlurUtils;
+    SkASSERT(radius > 0 && !SkGpuBlurUtils::IsEffectivelyZeroSigma(sigma));
     // Logically we're creating an infinite blur of 'srcBounds' of 'srcView' with 'mode' tiling
     // and then capturing the 'dstBounds' portion in a new RTC where the top left of 'dstBounds' is
     // at {0, 0} in the new RTC.
@@ -290,16 +338,16 @@
                                                                 int radiusY,
                                                                 SkTileMode mode,
                                                                 SkBackingFit fit) {
-    SkASSERT(sigmaX || sigmaY);
+    SkASSERT(radiusX || radiusY);
     std::unique_ptr<GrRenderTargetContext> dstRenderTargetContext;
-    if (sigmaX > 0.0f) {
-        SkBackingFit xFit = sigmaY > 0 ? SkBackingFit::kApprox : fit;
+    if (radiusX > 0) {
+        SkBackingFit xFit = radiusY > 0 ? SkBackingFit::kApprox : fit;
         // Expand the dstBounds vertically to produce necessary content for the y-pass. Then we will
         // clip these in a tile-mode dependent way to ensure the tile-mode gets implemented
         // correctly. However, if we're not going to do a y-pass then we must use the original
         // dstBounds without clipping to produce the correct output size.
         SkIRect xPassDstBounds = dstBounds;
-        if (sigmaY) {
+        if (radiusY) {
             xPassDstBounds.outset(0, radiusY);
             if (mode == SkTileMode::kRepeat || mode == SkTileMode::kMirror) {
                 int srcH = srcBounds.height();
@@ -376,7 +424,7 @@
         srcBounds = SkIRect::MakeSize(xPassDstBounds.size());
     }
 
-    if (sigmaY == 0.0f) {
+    if (!radiusY) {
         return dstRenderTargetContext;
     }
 
@@ -409,12 +457,12 @@
         return nullptr;
     }
 
+    int radiusX = SigmaRadius(sigmaX);
+    int radiusY = SigmaRadius(sigmaY);
     // Attempt to reduce the srcBounds in order to detect that we can set the sigmas to zero or
     // to reduce the amount of work to rescale the source if sigmas are large. TODO: Could consider
     // how to minimize the required source bounds for repeat/mirror modes.
     if (mode == SkTileMode::kClamp || mode == SkTileMode::kDecal) {
-        int radiusX = sigma_radius(sigmaX);
-        int radiusY = sigma_radius(sigmaY);
         SkIRect reach = dstBounds.makeOutset(radiusX, radiusY);
         SkIRect intersection;
         if (!intersection.intersect(reach, srcBounds)) {
@@ -443,17 +491,20 @@
         // column/row yields that same color. So no blurring is necessary.
         if (srcBounds.width() == 1) {
             sigmaX = 0.f;
+            radiusX = 0;
         }
         if (srcBounds.height() == 1) {
             sigmaY = 0.f;
+            radiusY = 0;
         }
     }
 
     // If we determined that there is no blurring necessary in either direction then just do a
     // a draw that applies the tile mode.
-    if (!sigmaX && !sigmaY) {
+    if (!radiusX && !radiusY) {
         auto result = GrRenderTargetContext::Make(context, srcColorType, std::move(colorSpace), fit,
-                                                  dstBounds.size());
+                                                  dstBounds.size(), 1, GrMipmapped::kNo,
+                                                  srcView.proxy()->isProtected(), srcView.origin());
         GrSamplerState sampler(SkTileModeToWrapMode(mode), GrSamplerState::Filter::kNearest);
         auto fp = GrTextureEffect::MakeSubset(std::move(srcView), srcAlphaType, SkMatrix::I(),
                                               sampler, SkRect::Make(srcBounds),
@@ -466,19 +517,20 @@
     }
 
     if (sigmaX <= MAX_BLUR_SIGMA && sigmaY <= MAX_BLUR_SIGMA) {
-        int radiusX = sigma_radius(sigmaX);
-        int radiusY = sigma_radius(sigmaY);
         SkASSERT(radiusX <= GrGaussianConvolutionFragmentProcessor::kMaxKernelRadius);
         SkASSERT(radiusY <= GrGaussianConvolutionFragmentProcessor::kMaxKernelRadius);
         // For really small blurs (certainly no wider than 5x5 on desktop GPUs) it is faster to just
         // launch a single non separable kernel vs two launches.
         const int kernelSize = (2 * radiusX + 1) * (2 * radiusY + 1);
-        if (sigmaX > 0 && sigmaY > 0 && kernelSize <= GrMatrixConvolutionEffect::kMaxUniformSize) {
+        if (radiusX > 0 && radiusY > 0 &&
+            kernelSize <= GrMatrixConvolutionEffect::kMaxUniformSize) {
             // Apply the proxy offset to src bounds and offset directly
             return convolve_gaussian_2d(context, std::move(srcView), srcColorType, srcBounds,
                                         dstBounds, radiusX, radiusY, sigmaX, sigmaY, mode,
                                         std::move(colorSpace), fit);
         }
+        // This will automatically degenerate into a single pass of X or Y if only one of the
+        // radii are non-zero.
         return two_pass_gaussian(context, std::move(srcView), srcColorType, srcAlphaType,
                                  std::move(colorSpace), srcBounds, dstBounds, sigmaX, sigmaY,
                                  radiusX, radiusY, mode, fit);
@@ -535,6 +587,149 @@
     return reexpand(context, std::move(rtc), scaledDstBounds, dstBounds.size(),
                     std::move(colorSpace), fit);
 }
+
+bool ComputeBlurredRRectParams(const SkRRect& srcRRect, const SkRRect& devRRect,
+                               SkScalar sigma, SkScalar xformedSigma,
+                               SkRRect* rrectToDraw,
+                               SkISize* widthHeight,
+                               SkScalar rectXs[kBlurRRectMaxDivisions],
+                               SkScalar rectYs[kBlurRRectMaxDivisions],
+                               SkScalar texXs[kBlurRRectMaxDivisions],
+                               SkScalar texYs[kBlurRRectMaxDivisions]) {
+    unsigned int devBlurRadius = 3*SkScalarCeilToInt(xformedSigma-1/6.0f);
+    SkScalar srcBlurRadius = 3.0f * sigma;
+
+    const SkRect& devOrig = devRRect.getBounds();
+    const SkVector& devRadiiUL = devRRect.radii(SkRRect::kUpperLeft_Corner);
+    const SkVector& devRadiiUR = devRRect.radii(SkRRect::kUpperRight_Corner);
+    const SkVector& devRadiiLR = devRRect.radii(SkRRect::kLowerRight_Corner);
+    const SkVector& devRadiiLL = devRRect.radii(SkRRect::kLowerLeft_Corner);
+
+    const int devLeft  = SkScalarCeilToInt(std::max<SkScalar>(devRadiiUL.fX, devRadiiLL.fX));
+    const int devTop   = SkScalarCeilToInt(std::max<SkScalar>(devRadiiUL.fY, devRadiiUR.fY));
+    const int devRight = SkScalarCeilToInt(std::max<SkScalar>(devRadiiUR.fX, devRadiiLR.fX));
+    const int devBot   = SkScalarCeilToInt(std::max<SkScalar>(devRadiiLL.fY, devRadiiLR.fY));
+
+    // This is a conservative check for nine-patchability
+    if (devOrig.fLeft + devLeft + devBlurRadius >= devOrig.fRight  - devRight - devBlurRadius ||
+        devOrig.fTop  + devTop  + devBlurRadius >= devOrig.fBottom - devBot   - devBlurRadius) {
+        return false;
+    }
+
+    const SkVector& srcRadiiUL = srcRRect.radii(SkRRect::kUpperLeft_Corner);
+    const SkVector& srcRadiiUR = srcRRect.radii(SkRRect::kUpperRight_Corner);
+    const SkVector& srcRadiiLR = srcRRect.radii(SkRRect::kLowerRight_Corner);
+    const SkVector& srcRadiiLL = srcRRect.radii(SkRRect::kLowerLeft_Corner);
+
+    const SkScalar srcLeft  = std::max<SkScalar>(srcRadiiUL.fX, srcRadiiLL.fX);
+    const SkScalar srcTop   = std::max<SkScalar>(srcRadiiUL.fY, srcRadiiUR.fY);
+    const SkScalar srcRight = std::max<SkScalar>(srcRadiiUR.fX, srcRadiiLR.fX);
+    const SkScalar srcBot   = std::max<SkScalar>(srcRadiiLL.fY, srcRadiiLR.fY);
+
+    int newRRWidth = 2*devBlurRadius + devLeft + devRight + 1;
+    int newRRHeight = 2*devBlurRadius + devTop + devBot + 1;
+    widthHeight->fWidth = newRRWidth + 2 * devBlurRadius;
+    widthHeight->fHeight = newRRHeight + 2 * devBlurRadius;
+
+    const SkRect srcProxyRect = srcRRect.getBounds().makeOutset(srcBlurRadius, srcBlurRadius);
+
+    rectXs[0] = srcProxyRect.fLeft;
+    rectXs[1] = srcProxyRect.fLeft + 2*srcBlurRadius + srcLeft;
+    rectXs[2] = srcProxyRect.fRight - 2*srcBlurRadius - srcRight;
+    rectXs[3] = srcProxyRect.fRight;
+
+    rectYs[0] = srcProxyRect.fTop;
+    rectYs[1] = srcProxyRect.fTop + 2*srcBlurRadius + srcTop;
+    rectYs[2] = srcProxyRect.fBottom - 2*srcBlurRadius - srcBot;
+    rectYs[3] = srcProxyRect.fBottom;
+
+    texXs[0] = 0.0f;
+    texXs[1] = 2.0f*devBlurRadius + devLeft;
+    texXs[2] = 2.0f*devBlurRadius + devLeft + 1;
+    texXs[3] = SkIntToScalar(widthHeight->fWidth);
+
+    texYs[0] = 0.0f;
+    texYs[1] = 2.0f*devBlurRadius + devTop;
+    texYs[2] = 2.0f*devBlurRadius + devTop + 1;
+    texYs[3] = SkIntToScalar(widthHeight->fHeight);
+
+    const SkRect newRect = SkRect::MakeXYWH(SkIntToScalar(devBlurRadius),
+                                            SkIntToScalar(devBlurRadius),
+                                            SkIntToScalar(newRRWidth),
+                                            SkIntToScalar(newRRHeight));
+    SkVector newRadii[4];
+    newRadii[0] = { SkScalarCeilToScalar(devRadiiUL.fX), SkScalarCeilToScalar(devRadiiUL.fY) };
+    newRadii[1] = { SkScalarCeilToScalar(devRadiiUR.fX), SkScalarCeilToScalar(devRadiiUR.fY) };
+    newRadii[2] = { SkScalarCeilToScalar(devRadiiLR.fX), SkScalarCeilToScalar(devRadiiLR.fY) };
+    newRadii[3] = { SkScalarCeilToScalar(devRadiiLL.fX), SkScalarCeilToScalar(devRadiiLL.fY) };
+
+    rrectToDraw->setRectRadii(newRect, newRadii);
+    return true;
+}
+
+// TODO: it seems like there should be some synergy with SkBlurMask::ComputeBlurProfile
+// TODO: maybe cache this on the cpu side?
+int CreateIntegralTable(float sixSigma, SkBitmap* table) {
+    // The texture we're producing represents the integral of a normal distribution over a
+    // six-sigma range centered at zero. We want enough resolution so that the linear
+    // interpolation done in texture lookup doesn't introduce noticeable artifacts. We
+    // conservatively choose to have 2 texels for each dst pixel.
+    int minWidth = 2 * sk_float_ceil2int(sixSigma);
+    // Bin by powers of 2 with a minimum so we get good profile reuse.
+    int width = std::max(SkNextPow2(minWidth), 32);
+
+    if (!table) {
+        return width;
+    }
+
+    if (!table->tryAllocPixels(SkImageInfo::MakeA8(width, 1))) {
+        return 0;
+    }
+    *table->getAddr8(0, 0) = 255;
+    const float invWidth = 1.f / width;
+    for (int i = 1; i < width - 1; ++i) {
+        float x = (i + 0.5f) * invWidth;
+        x = (-6 * x + 3) * SK_ScalarRoot2Over2;
+        float integral = 0.5f * (std::erf(x) + 1.f);
+        *table->getAddr8(i, 0) = SkToU8(sk_float_round2int(255.f * integral));
+    }
+
+    *table->getAddr8(width - 1, 0) = 0;
+    table->setImmutable();
+    return table->width();
+}
+
+
+void Compute1DGaussianKernel(float* kernel, float sigma, int radius) {
+    SkASSERT(radius == SigmaRadius(sigma));
+    if (SkGpuBlurUtils::IsEffectivelyZeroSigma(sigma)) {
+        // Calling SigmaRadius() produces 1, just computing ceil(sigma)*3 produces 3
+        SkASSERT(KernelWidth(radius) == 1);
+        std::fill_n(kernel, 1, 0.f);
+        kernel[0] = 1.f;
+        return;
+    }
+
+    // If this fails, kEffectivelyZeroSigma isn't big enough to prevent precision issues
+    SkASSERT(!SkScalarNearlyZero(2.f * sigma * sigma));
+
+    const float sigmaDenom = 1.0f / (2.f * sigma * sigma);
+    int size = KernelWidth(radius);
+    float sum = 0.0f;
+    for (int i = 0; i < size; ++i) {
+        float term = static_cast<float>(i - radius);
+        // Note that the constant term (1/(sqrt(2*pi*sigma^2)) of the Gaussian
+        // is dropped here, since we renormalize the kernel below.
+        kernel[i] = sk_float_exp(-term * term * sigmaDenom);
+        sum += kernel[i];
+    }
+    // Normalize the kernel
+    float scale = 1.0f / sum;
+    for (int i = 0; i < size; ++i) {
+        kernel[i] *= scale;
+    }
+}
+
 }  // namespace SkGpuBlurUtils
 
 #endif
diff --git a/src/core/SkGpuBlurUtils.h b/src/core/SkGpuBlurUtils.h
index 1f22c57..a58e07a 100644
--- a/src/core/SkGpuBlurUtils.h
+++ b/src/core/SkGpuBlurUtils.h
@@ -8,6 +8,8 @@
 #ifndef SkGpuBlurUtils_DEFINED
 #define SkGpuBlurUtils_DEFINED
 
+#include "include/core/SkTypes.h"
+
 #if SK_SUPPORT_GPU
 #include "src/gpu/GrRenderTargetContext.h"
 
@@ -17,6 +19,7 @@
 struct SkRect;
 
 namespace SkGpuBlurUtils {
+
 /**
  * Applies a 2D Gaussian blur to a given texture. The blurred result is returned
  * as a renderTargetContext in case the caller wishes to draw into the result.
@@ -49,6 +52,40 @@
                                                     float sigmaY,
                                                     SkTileMode mode,
                                                     SkBackingFit fit = SkBackingFit::kApprox);
+
+static const int kBlurRRectMaxDivisions = 6;
+
+// This method computes all the parameters for drawing a partially occluded nine-patched
+// blurred rrect mask:
+//   rrectToDraw - the integerized rrect to draw in the mask
+//   widthHeight - how large to make the mask (rrectToDraw will be centered in this coord sys)
+//   rectXs, rectYs - the x & y coordinates of the covering geometry lattice
+//   texXs, texYs - the texture coordinate at each point in rectXs & rectYs
+// It returns true if 'devRRect' is nine-patchable
+bool ComputeBlurredRRectParams(const SkRRect& srcRRect, const SkRRect& devRRect,
+                                SkScalar sigma, SkScalar xformedSigma,
+                                SkRRect* rrectToDraw,
+                                SkISize* widthHeight,
+                                SkScalar rectXs[kBlurRRectMaxDivisions],
+                                SkScalar rectYs[kBlurRRectMaxDivisions],
+                                SkScalar texXs[kBlurRRectMaxDivisions],
+                                SkScalar texYs[kBlurRRectMaxDivisions]);
+
+int CreateIntegralTable(float sixSigma, SkBitmap* table);
+
+void Compute1DGaussianKernel(float* kernel, float sigma, int radius);
+
+// Any sigmas smaller than this are effectively an identity blur so can skip convolution at a higher
+// level. The value was chosen because it corresponds roughly to a radius of 1/10px, and is slightly
+// greater than sqrt(1/2*sigma^2) for SK_ScalarNearlyZero.
+inline bool IsEffectivelyZeroSigma(float sigma) { return sigma <= 0.03f; }
+
+inline int SigmaRadius(float sigma) {
+    return IsEffectivelyZeroSigma(sigma) ? 0 : static_cast<int>(ceilf(sigma * 3.0f));
+}
+
+inline int KernelWidth(int radius) { return 2 * radius + 1; }
+
 }  // namespace SkGpuBlurUtils
 
 #endif
diff --git a/src/core/SkMaskFilter.cpp b/src/core/SkMaskFilter.cpp
index fe3f969..5cfb842 100644
--- a/src/core/SkMaskFilter.cpp
+++ b/src/core/SkMaskFilter.cpp
@@ -11,7 +11,6 @@
 #include "include/core/SkRRect.h"
 #include "src/core/SkAutoMalloc.h"
 #include "src/core/SkBlitter.h"
-#include "src/core/SkBlurPriv.h"
 #include "src/core/SkCachedData.h"
 #include "src/core/SkDraw.h"
 #include "src/core/SkPathPriv.h"
diff --git a/src/core/SkMaskFilterBase.h b/src/core/SkMaskFilterBase.h
index 6780d58..42b4829 100644
--- a/src/core/SkMaskFilterBase.h
+++ b/src/core/SkMaskFilterBase.h
@@ -243,4 +243,7 @@
     return static_cast<SkMaskFilterBase*>(mf.get());
 }
 
+// For RegisterFlattenables access to the blur mask filter implementation
+extern void sk_register_blur_maskfilter_createproc();
+
 #endif
diff --git a/src/gpu/effects/GrCircleBlurFragmentProcessor.fp b/src/gpu/effects/GrCircleBlurFragmentProcessor.fp
index 11076ca..385c92b 100644
--- a/src/gpu/effects/GrCircleBlurFragmentProcessor.fp
+++ b/src/gpu/effects/GrCircleBlurFragmentProcessor.fp
@@ -39,6 +39,7 @@
 
 @cpp {
     #include "include/gpu/GrRecordingContext.h"
+    #include "src/core/SkGpuBlurUtils.h"
     #include "src/gpu/GrBitmapTextureMaker.h"
     #include "src/gpu/GrProxyProvider.h"
     #include "src/gpu/GrRecordingContextPriv.h"
@@ -283,6 +284,10 @@
     std::unique_ptr<GrFragmentProcessor> GrCircleBlurFragmentProcessor::Make(
             std::unique_ptr<GrFragmentProcessor> inputFP, GrRecordingContext* context,
             const SkRect& circle, float sigma) {
+        if (SkGpuBlurUtils::IsEffectivelyZeroSigma(sigma)) {
+            return inputFP;
+        }
+
         float solidRadius;
         float textureRadius;
         std::unique_ptr<GrFragmentProcessor> profile = create_profile_effect(context, circle, sigma,
diff --git a/src/gpu/effects/GrGaussianConvolutionFragmentProcessor.cpp b/src/gpu/effects/GrGaussianConvolutionFragmentProcessor.cpp
index fea477b..8f15733 100644
--- a/src/gpu/effects/GrGaussianConvolutionFragmentProcessor.cpp
+++ b/src/gpu/effects/GrGaussianConvolutionFragmentProcessor.cpp
@@ -7,6 +7,7 @@
 
 #include "src/gpu/effects/GrGaussianConvolutionFragmentProcessor.h"
 
+#include "src/core/SkGpuBlurUtils.h"
 #include "src/gpu/GrTexture.h"
 #include "src/gpu/GrTextureProxy.h"
 #include "src/gpu/effects/GrTextureEffect.h"
@@ -19,8 +20,6 @@
 using UniformHandle = GrGLSLProgramDataManager::UniformHandle;
 using Direction = GrGaussianConvolutionFragmentProcessor::Direction;
 
-static constexpr int radius_to_width(int r) { return 2*r + 1; }
-
 class GrGaussianConvolutionFragmentProcessor::Impl : public GrGLSLFragmentProcessor {
 public:
     void emitCode(EmitArgs&) override;
@@ -47,7 +46,7 @@
     fIncrementUni = uniformHandler->addUniform(&ce, kFragment_GrShaderFlag, kHalf2_GrSLType,
                                                "Increment", &inc);
 
-    int width = radius_to_width(ce.fRadius);
+    int width = SkGpuBlurUtils::KernelWidth(ce.fRadius);
 
     int arrayCount = (width + 3) / 4;
     SkASSERT(4 * arrayCount >= width);
@@ -87,7 +86,7 @@
     increment[static_cast<int>(conv.fDirection)] = 1;
     pdman.set2fv(fIncrementUni, 1, increment);
 
-    int width = radius_to_width(conv.fRadius);
+    int width = SkGpuBlurUtils::KernelWidth(conv.fRadius);
     int arrayCount = (width + 3)/4;
     SkDEBUGCODE(size_t arraySize = 4*arrayCount;)
     SkASSERT(arraySize >= static_cast<size_t>(width));
@@ -104,33 +103,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////
 
-void SkFillIn1DGaussianKernel(float* kernel, float gaussianSigma, int radius) {
-    const float twoSigmaSqrd = 2.0f * gaussianSigma * gaussianSigma;
-    int width = radius_to_width(radius);
-    if (SkScalarNearlyZero(twoSigmaSqrd, SK_ScalarNearlyZero)) {
-        for (int i = 0; i < width; ++i) {
-            kernel[i] = 0.0f;
-        }
-        return;
-    }
-
-    const float denom = 1.0f / twoSigmaSqrd;
-
-    float sum = 0.0f;
-    for (int i = 0; i < width; ++i) {
-        float x = static_cast<float>(i - radius);
-        // Note that the constant term (1/(sqrt(2*pi*sigma^2)) of the Gaussian
-        // is dropped here, since we renormalize the kernel below.
-        kernel[i] = sk_float_exp(-x * x * denom);
-        sum += kernel[i];
-    }
-    // Normalize the kernel
-    float scale = 1.0f / sum;
-    for (int i = 0; i < width; ++i) {
-        kernel[i] *= scale;
-    }
-}
-
 std::unique_ptr<GrFragmentProcessor> GrGaussianConvolutionFragmentProcessor::Make(
         GrSurfaceProxyView view,
         SkAlphaType alphaType,
@@ -143,6 +115,9 @@
         const GrCaps& caps) {
     std::unique_ptr<GrFragmentProcessor> child;
     GrSamplerState sampler(wm, GrSamplerState::Filter::kNearest);
+    if (SkGpuBlurUtils::IsEffectivelyZeroSigma(gaussianSigma)) {
+        halfWidth = 0;
+    }
     if (pixelDomain) {
         // Inset because we expect to be invoked at pixel centers.
         SkRect domain = SkRect::Make(*pixelDomain).makeInset(0.5, 0.5f);
@@ -156,6 +131,10 @@
         child = GrTextureEffect::MakeSubset(std::move(view), alphaType, SkMatrix::I(), sampler,
                                             SkRect::Make(subset), caps);
     }
+
+    if (SkGpuBlurUtils::IsEffectivelyZeroSigma(gaussianSigma)) {
+        return child;
+    }
     return std::unique_ptr<GrFragmentProcessor>(new GrGaussianConvolutionFragmentProcessor(
             std::move(child), dir, halfWidth, gaussianSigma));
 }
@@ -171,7 +150,7 @@
         , fDirection(direction) {
     this->registerChild(std::move(child), SkSL::SampleUsage::Explicit());
     SkASSERT(radius <= kMaxKernelRadius);
-    SkFillIn1DGaussianKernel(fKernel, gaussianSigma, fRadius);
+    SkGpuBlurUtils::Compute1DGaussianKernel(fKernel, gaussianSigma, fRadius);
     this->setUsesSampleCoordsDirectly();
 }
 
@@ -181,7 +160,7 @@
         , fRadius(that.fRadius)
         , fDirection(that.fDirection) {
     this->cloneAndRegisterAllChildProcessors(that);
-    memcpy(fKernel, that.fKernel, radius_to_width(fRadius) * sizeof(float));
+    memcpy(fKernel, that.fKernel, SkGpuBlurUtils::KernelWidth(fRadius) * sizeof(float));
     this->setUsesSampleCoordsDirectly();
 }
 
@@ -197,7 +176,7 @@
 bool GrGaussianConvolutionFragmentProcessor::onIsEqual(const GrFragmentProcessor& sBase) const {
     const auto& that = sBase.cast<GrGaussianConvolutionFragmentProcessor>();
     return fRadius == that.fRadius && fDirection == that.fDirection &&
-           std::equal(fKernel, fKernel + radius_to_width(fRadius), that.fKernel);
+           std::equal(fKernel, fKernel + SkGpuBlurUtils::KernelWidth(fRadius), that.fKernel);
 }
 
 ///////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/effects/GrMatrixConvolutionEffect.cpp b/src/gpu/effects/GrMatrixConvolutionEffect.cpp
index ce2cc0f..71ac6d6 100644
--- a/src/gpu/effects/GrMatrixConvolutionEffect.cpp
+++ b/src/gpu/effects/GrMatrixConvolutionEffect.cpp
@@ -338,85 +338,6 @@
            fConvolveAlpha == s.convolveAlpha();
 }
 
-static void fill_in_1D_gaussian_kernel_with_stride(float* kernel, int size, int stride,
-                                                   float twoSigmaSqrd) {
-    SkASSERT(!SkScalarNearlyZero(twoSigmaSqrd, SK_ScalarNearlyZero));
-
-    const float sigmaDenom = 1.0f / twoSigmaSqrd;
-    const int radius = size / 2;
-
-    float sum = 0.0f;
-    for (int i = 0; i < size; ++i) {
-        float term = static_cast<float>(i - radius);
-        // Note that the constant term (1/(sqrt(2*pi*sigma^2)) of the Gaussian
-        // is dropped here, since we renormalize the kernel below.
-        kernel[i * stride] = sk_float_exp(-term * term * sigmaDenom);
-        sum += kernel[i * stride];
-    }
-    // Normalize the kernel
-    float scale = 1.0f / sum;
-    for (int i = 0; i < size; ++i) {
-        kernel[i * stride] *= scale;
-    }
-}
-
-static void fill_in_2D_gaussian_kernel(float* kernel, int width, int height,
-                                       SkScalar sigmaX, SkScalar sigmaY) {
-    const float twoSigmaSqrdX = 2.0f * SkScalarToFloat(SkScalarSquare(sigmaX));
-    const float twoSigmaSqrdY = 2.0f * SkScalarToFloat(SkScalarSquare(sigmaY));
-
-    // TODO: in all of these degenerate cases we're uploading (and using) a whole lot of zeros.
-    if (SkScalarNearlyZero(twoSigmaSqrdX, SK_ScalarNearlyZero) ||
-        SkScalarNearlyZero(twoSigmaSqrdY, SK_ScalarNearlyZero)) {
-        // In this case the 2D Gaussian degenerates to a 1D Gaussian (in X or Y) or a point
-        SkASSERT(3 == width || 3 == height);
-        std::fill_n(kernel, width*height, 0);
-
-        if (SkScalarNearlyZero(twoSigmaSqrdX, SK_ScalarNearlyZero) &&
-            SkScalarNearlyZero(twoSigmaSqrdY, SK_ScalarNearlyZero)) {
-            // A point
-            SkASSERT(3 == width && 3 == height);
-            kernel[4] = 1.0f;
-        } else if (SkScalarNearlyZero(twoSigmaSqrdX, SK_ScalarNearlyZero)) {
-            // A 1D Gaussian in Y
-            SkASSERT(3 == width);
-            // Down the middle column of the kernel with a stride of width
-            fill_in_1D_gaussian_kernel_with_stride(&kernel[1], height, width, twoSigmaSqrdY);
-        } else {
-            // A 1D Gaussian in X
-            SkASSERT(SkScalarNearlyZero(twoSigmaSqrdY, SK_ScalarNearlyZero));
-            SkASSERT(3 == height);
-            // Down the middle row of the kernel with a stride of 1
-            fill_in_1D_gaussian_kernel_with_stride(&kernel[width], width, 1, twoSigmaSqrdX);
-        }
-        return;
-    }
-
-    const float sigmaXDenom = 1.0f / twoSigmaSqrdX;
-    const float sigmaYDenom = 1.0f / twoSigmaSqrdY;
-    const int xRadius = width / 2;
-    const int yRadius = height / 2;
-
-    float sum = 0.0f;
-    for (int x = 0; x < width; x++) {
-        float xTerm = static_cast<float>(x - xRadius);
-        xTerm = xTerm * xTerm * sigmaXDenom;
-        for (int y = 0; y < height; y++) {
-            float yTerm = static_cast<float>(y - yRadius);
-            float xyTerm = sk_float_exp(-(xTerm + yTerm * yTerm * sigmaYDenom));
-            // Note that the constant term (1/(sqrt(2*pi*sigma^2)) of the Gaussian
-            // is dropped here, since we renormalize the kernel below.
-            kernel[y * width + x] = xyTerm;
-            sum += xyTerm;
-        }
-    }
-    // Normalize the kernel
-    float scale = 1.0f / sum;
-    for (int i = 0; i < width * height; ++i) {
-        kernel[i] *= scale;
-    }
-}
-
 std::unique_ptr<GrFragmentProcessor> GrMatrixConvolutionEffect::Make(GrRecordingContext* context,
                                                                      GrSurfaceProxyView srcView,
                                                                      const SkIRect& srcBounds,
@@ -440,26 +361,6 @@
                                           gain, bias, kernelOffset, convolveAlpha));
 }
 
-std::unique_ptr<GrFragmentProcessor> GrMatrixConvolutionEffect::MakeGaussian(
-        GrRecordingContext* context,
-        GrSurfaceProxyView srcView,
-        const SkIRect& srcBounds,
-        const SkISize& kernelSize,
-        SkScalar gain,
-        SkScalar bias,
-        const SkIPoint& kernelOffset,
-        GrSamplerState::WrapMode wm,
-        bool convolveAlpha,
-        SkScalar sigmaX,
-        SkScalar sigmaY,
-        const GrCaps& caps) {
-    SkAutoSTMalloc<32, float> kernel(kernelSize.area());
-    fill_in_2D_gaussian_kernel(kernel.get(), kernelSize.width(), kernelSize.height(),
-                               sigmaX, sigmaY);
-    return Make(context, std::move(srcView), srcBounds, kernelSize, kernel.get(),
-                gain, bias, kernelOffset, wm, convolveAlpha, caps);
-}
-
 GR_DEFINE_FRAGMENT_PROCESSOR_TEST(GrMatrixConvolutionEffect);
 
 #if GR_TEST_UTILS
diff --git a/src/gpu/effects/GrMatrixConvolutionEffect.h b/src/gpu/effects/GrMatrixConvolutionEffect.h
index f296875..3a1b733 100644
--- a/src/gpu/effects/GrMatrixConvolutionEffect.h
+++ b/src/gpu/effects/GrMatrixConvolutionEffect.h
@@ -31,19 +31,6 @@
                                                      bool convolveAlpha,
                                                      const GrCaps&);
 
-    static std::unique_ptr<GrFragmentProcessor> MakeGaussian(GrRecordingContext*,
-                                                             GrSurfaceProxyView srcView,
-                                                             const SkIRect& srcBounds,
-                                                             const SkISize& kernelSize,
-                                                             SkScalar gain,
-                                                             SkScalar bias,
-                                                             const SkIPoint& kernelOffset,
-                                                             GrSamplerState::WrapMode,
-                                                             bool convolveAlpha,
-                                                             SkScalar sigmaX,
-                                                             SkScalar sigmaY,
-                                                             const GrCaps&);
-
     const SkIRect& bounds() const { return fBounds; }
     SkISize kernelSize() const { return fKernel.size(); }
     SkVector kernelOffset() const { return fKernelOffset; }
diff --git a/src/gpu/effects/GrRRectBlurEffect.fp b/src/gpu/effects/GrRRectBlurEffect.fp
index df41c09..dee923d 100644
--- a/src/gpu/effects/GrRRectBlurEffect.fp
+++ b/src/gpu/effects/GrRRectBlurEffect.fp
@@ -37,7 +37,6 @@
     #include "include/gpu/GrDirectContext.h"
     #include "include/gpu/GrRecordingContext.h"
     #include "src/core/SkAutoMalloc.h"
-    #include "src/core/SkBlurPriv.h"
     #include "src/core/SkGpuBlurUtils.h"
     #include "src/core/SkRRectPriv.h"
     #include "src/gpu/GrBitmapTextureMaker.h"
@@ -56,6 +55,7 @@
     static void make_blurred_rrect_key(GrUniqueKey* key,
                                        const SkRRect& rrectToDraw,
                                        float xformedSigma) {
+        SkASSERT(!SkGpuBlurUtils::IsEffectivelyZeroSigma(xformedSigma));
         static const GrUniqueKey::Domain kDomain = GrUniqueKey::GenerateDomain();
 
         GrUniqueKey::Builder builder(key, kDomain, 9, "RoundRect Blur Mask");
@@ -80,6 +80,7 @@
                             const SkRRect& rrectToDraw,
                             const SkISize& dimensions,
                             float xformedSigma) {
+        SkASSERT(!SkGpuBlurUtils::IsEffectivelyZeroSigma(xformedSigma));
         std::unique_ptr<GrRenderTargetContext> rtc = GrRenderTargetContext::MakeWithFallback(
                 dContext, GrColorType::kAlpha_8, nullptr, SkBackingFit::kExact, dimensions, 1,
                 GrMipmapped::kNo, GrProtected::kNo, kBlurredRRectMaskOrigin);
@@ -121,12 +122,6 @@
         return true;
     }
 
-    // TODO: merge w/ copy in SkGpuBlurUtils.cpp
-    static int sigma_radius(float sigma) {
-        SkASSERT(sigma >= 0);
-        return static_cast<int>(ceilf(sigma * 3.0f));
-    }
-
     // Evaluate the vertical blur at the specified 'y' value given the location of the top of the
     // rrect.
     static uint8_t eval_V(float top, int y,
@@ -177,7 +172,8 @@
                                                  const SkRRect& rrectToDraw,
                                                  const SkISize& dimensions,
                                                  float xformedSigma) {
-        int radius = sigma_radius(xformedSigma);
+        SkASSERT(!SkGpuBlurUtils::IsEffectivelyZeroSigma(xformedSigma));
+        int radius = SkGpuBlurUtils::SigmaRadius(xformedSigma);
         int kernelSize = 2*radius + 1;
 
         SkASSERT(kernelSize %2);
@@ -192,10 +188,10 @@
 
         std::unique_ptr<float[]> kernel(new float[kernelSize]);
 
-        SkFillIn1DGaussianKernel(kernel.get(), xformedSigma, radius);
+        SkGpuBlurUtils::Compute1DGaussianKernel(kernel.get(), xformedSigma, radius);
 
         SkBitmap integral;
-        if (!SkCreateIntegralTable(6*xformedSigma, &integral)) {
+        if (!SkGpuBlurUtils::CreateIntegralTable(6*xformedSigma, &integral)) {
             return {};
         }
 
@@ -251,6 +247,7 @@
             const SkRRect& rrectToDraw,
             const SkISize& dimensions,
             float xformedSigma) {
+        SkASSERT(!SkGpuBlurUtils::IsEffectivelyZeroSigma(xformedSigma));
         GrUniqueKey key;
         make_blurred_rrect_key(&key, rrectToDraw, xformedSigma);
 
@@ -332,18 +329,22 @@
             return nullptr;
         }
 
+        if (SkGpuBlurUtils::IsEffectivelyZeroSigma(xformedSigma)) {
+            return inputFP;
+        }
+
         // Make sure we can successfully ninepatch this rrect -- the blur sigma has to be
         // sufficiently small relative to both the size of the corner radius and the
         // width (and height) of the rrect.
         SkRRect rrectToDraw;
         SkISize dimensions;
-        SkScalar ignored[kSkBlurRRectMaxDivisions];
+        SkScalar ignored[SkGpuBlurUtils::kBlurRRectMaxDivisions];
 
-        bool ninePatchable = SkComputeBlurredRRectParams(srcRRect, devRRect,
-                                                         sigma, xformedSigma,
-                                                         &rrectToDraw, &dimensions,
-                                                         ignored, ignored,
-                                                         ignored, ignored);
+        bool ninePatchable = SkGpuBlurUtils::ComputeBlurredRRectParams(srcRRect, devRRect,
+                                                                       sigma, xformedSigma,
+                                                                       &rrectToDraw, &dimensions,
+                                                                       ignored, ignored,
+                                                                       ignored, ignored);
         if (!ninePatchable) {
             return nullptr;
         }
diff --git a/src/gpu/effects/GrRectBlurEffect.fp b/src/gpu/effects/GrRectBlurEffect.fp
index 3b0c912..1356d98 100644
--- a/src/gpu/effects/GrRectBlurEffect.fp
+++ b/src/gpu/effects/GrRectBlurEffect.fp
@@ -11,7 +11,7 @@
 #include "include/core/SkScalar.h"
 #include "include/gpu/GrRecordingContext.h"
 #include "src/core/SkBlurMask.h"
-#include "src/core/SkBlurPriv.h"
+#include "src/core/SkGpuBlurUtils.h"
 #include "src/core/SkMathPriv.h"
 #include "src/gpu/GrBitmapTextureMaker.h"
 #include "src/gpu/GrProxyProvider.h"
@@ -54,9 +54,10 @@
 @class {
 static std::unique_ptr<GrFragmentProcessor> MakeIntegralFP(GrRecordingContext* rContext,
                                                            float sixSigma) {
+    SkASSERT(!SkGpuBlurUtils::IsEffectivelyZeroSigma(sixSigma / 6.f));
     auto threadSafeCache = rContext->priv().threadSafeCache();
 
-    int width = SkCreateIntegralTable(sixSigma, nullptr);
+    int width = SkGpuBlurUtils::CreateIntegralTable(sixSigma, nullptr);
 
     static const GrUniqueKey::Domain kDomain = GrUniqueKey::GenerateDomain();
     GrUniqueKey key;
@@ -75,7 +76,7 @@
     }
 
     SkBitmap bitmap;
-    if (!SkCreateIntegralTable(sixSigma, &bitmap)) {
+    if (!SkGpuBlurUtils::CreateIntegralTable(sixSigma, &bitmap)) {
         return {};
     }
 
@@ -103,6 +104,11 @@
          SkASSERT(viewMatrix.preservesRightAngles());
          SkASSERT(srcRect.isSorted());
 
+         if (SkGpuBlurUtils::IsEffectivelyZeroSigma(transformedSigma)) {
+             // No need to blur the rect
+             return inputFP;
+         }
+
          SkMatrix invM;
          SkRect rect;
          if (viewMatrix.rectStaysRect()) {
diff --git a/src/gpu/effects/generated/GrCircleBlurFragmentProcessor.cpp b/src/gpu/effects/generated/GrCircleBlurFragmentProcessor.cpp
index 1a0d3e6..b7bd43a 100644
--- a/src/gpu/effects/generated/GrCircleBlurFragmentProcessor.cpp
+++ b/src/gpu/effects/generated/GrCircleBlurFragmentProcessor.cpp
@@ -11,6 +11,7 @@
 #include "GrCircleBlurFragmentProcessor.h"
 
 #include "include/gpu/GrRecordingContext.h"
+#include "src/core/SkGpuBlurUtils.h"
 #include "src/gpu/GrBitmapTextureMaker.h"
 #include "src/gpu/GrProxyProvider.h"
 #include "src/gpu/GrRecordingContextPriv.h"
@@ -267,6 +268,10 @@
         GrRecordingContext* context,
         const SkRect& circle,
         float sigma) {
+    if (SkGpuBlurUtils::IsEffectivelyZeroSigma(sigma)) {
+        return inputFP;
+    }
+
     float solidRadius;
     float textureRadius;
     std::unique_ptr<GrFragmentProcessor> profile =
@@ -307,18 +312,18 @@
                 args.fUniformHandler->getUniformCStr(circleDataVar),
                 args.fUniformHandler->getUniformCStr(circleDataVar),
                 args.fUniformHandler->getUniformCStr(circleDataVar));
-        SkString _sample13764 = this->invokeChild(0, args);
+        SkString _sample13905 = this->invokeChild(0, args);
         fragBuilder->codeAppendf(
                 R"SkSL(
 half4 inputColor = %s;)SkSL",
-                _sample13764.c_str());
-        SkString _coords13805("float2(half2(dist, 0.5))");
-        SkString _sample13805 = this->invokeChild(1, args, _coords13805.c_str());
+                _sample13905.c_str());
+        SkString _coords13946("float2(half2(dist, 0.5))");
+        SkString _sample13946 = this->invokeChild(1, args, _coords13946.c_str());
         fragBuilder->codeAppendf(
                 R"SkSL(
 return inputColor * %s.w;
 )SkSL",
-                _sample13805.c_str());
+                _sample13946.c_str());
     }
 
 private:
diff --git a/src/gpu/effects/generated/GrRRectBlurEffect.cpp b/src/gpu/effects/generated/GrRRectBlurEffect.cpp
index e93e6f2..0d811f6 100644
--- a/src/gpu/effects/generated/GrRRectBlurEffect.cpp
+++ b/src/gpu/effects/generated/GrRRectBlurEffect.cpp
@@ -13,7 +13,6 @@
 #include "include/gpu/GrDirectContext.h"
 #include "include/gpu/GrRecordingContext.h"
 #include "src/core/SkAutoMalloc.h"
-#include "src/core/SkBlurPriv.h"
 #include "src/core/SkGpuBlurUtils.h"
 #include "src/core/SkRRectPriv.h"
 #include "src/gpu/GrBitmapTextureMaker.h"
@@ -32,6 +31,7 @@
 static void make_blurred_rrect_key(GrUniqueKey* key,
                                    const SkRRect& rrectToDraw,
                                    float xformedSigma) {
+    SkASSERT(!SkGpuBlurUtils::IsEffectivelyZeroSigma(xformedSigma));
     static const GrUniqueKey::Domain kDomain = GrUniqueKey::GenerateDomain();
 
     GrUniqueKey::Builder builder(key, kDomain, 9, "RoundRect Blur Mask");
@@ -54,6 +54,7 @@
                                const SkRRect& rrectToDraw,
                                const SkISize& dimensions,
                                float xformedSigma) {
+    SkASSERT(!SkGpuBlurUtils::IsEffectivelyZeroSigma(xformedSigma));
     std::unique_ptr<GrRenderTargetContext> rtc = GrRenderTargetContext::MakeWithFallback(
             dContext, GrColorType::kAlpha_8, nullptr, SkBackingFit::kExact, dimensions, 1,
             GrMipmapped::kNo, GrProtected::kNo, kBlurredRRectMaskOrigin);
@@ -95,12 +96,6 @@
     return true;
 }
 
-// TODO: merge w/ copy in SkGpuBlurUtils.cpp
-static int sigma_radius(float sigma) {
-    SkASSERT(sigma >= 0);
-    return static_cast<int>(ceilf(sigma * 3.0f));
-}
-
 // Evaluate the vertical blur at the specified 'y' value given the location of the top of the
 // rrect.
 static uint8_t eval_V(float top, int y, const uint8_t* integral, int integralSize, float sixSigma) {
@@ -155,7 +150,8 @@
                                              const SkRRect& rrectToDraw,
                                              const SkISize& dimensions,
                                              float xformedSigma) {
-    int radius = sigma_radius(xformedSigma);
+    SkASSERT(!SkGpuBlurUtils::IsEffectivelyZeroSigma(xformedSigma));
+    int radius = SkGpuBlurUtils::SigmaRadius(xformedSigma);
     int kernelSize = 2 * radius + 1;
 
     SkASSERT(kernelSize % 2);
@@ -170,10 +166,10 @@
 
     std::unique_ptr<float[]> kernel(new float[kernelSize]);
 
-    SkFillIn1DGaussianKernel(kernel.get(), xformedSigma, radius);
+    SkGpuBlurUtils::Compute1DGaussianKernel(kernel.get(), xformedSigma, radius);
 
     SkBitmap integral;
-    if (!SkCreateIntegralTable(6 * xformedSigma, &integral)) {
+    if (!SkGpuBlurUtils::CreateIntegralTable(6 * xformedSigma, &integral)) {
         return {};
     }
 
@@ -228,6 +224,7 @@
         const SkRRect& rrectToDraw,
         const SkISize& dimensions,
         float xformedSigma) {
+    SkASSERT(!SkGpuBlurUtils::IsEffectivelyZeroSigma(xformedSigma));
     GrUniqueKey key;
     make_blurred_rrect_key(&key, rrectToDraw, xformedSigma);
 
@@ -308,16 +305,20 @@
         return nullptr;
     }
 
+    if (SkGpuBlurUtils::IsEffectivelyZeroSigma(xformedSigma)) {
+        return inputFP;
+    }
+
     // Make sure we can successfully ninepatch this rrect -- the blur sigma has to be
     // sufficiently small relative to both the size of the corner radius and the
     // width (and height) of the rrect.
     SkRRect rrectToDraw;
     SkISize dimensions;
-    SkScalar ignored[kSkBlurRRectMaxDivisions];
+    SkScalar ignored[SkGpuBlurUtils::kBlurRRectMaxDivisions];
 
-    bool ninePatchable =
-            SkComputeBlurredRRectParams(srcRRect, devRRect, sigma, xformedSigma, &rrectToDraw,
-                                        &dimensions, ignored, ignored, ignored, ignored);
+    bool ninePatchable = SkGpuBlurUtils::ComputeBlurredRRectParams(
+            srcRRect, devRRect, sigma, xformedSigma, &rrectToDraw, &dimensions, ignored, ignored,
+            ignored, ignored);
     if (!ninePatchable) {
         return nullptr;
     }
@@ -376,18 +377,18 @@
                 args.fUniformHandler->getUniformCStr(proxyRectVar),
                 args.fUniformHandler->getUniformCStr(blurRadiusVar),
                 args.fUniformHandler->getUniformCStr(cornerRadiusVar));
-        SkString _sample17184 = this->invokeChild(0, args);
+        SkString _sample17491 = this->invokeChild(0, args);
         fragBuilder->codeAppendf(
                 R"SkSL(
 half4 inputColor = %s;)SkSL",
-                _sample17184.c_str());
-        SkString _coords17232("float2(texCoord)");
-        SkString _sample17232 = this->invokeChild(1, args, _coords17232.c_str());
+                _sample17491.c_str());
+        SkString _coords17539("float2(texCoord)");
+        SkString _sample17539 = this->invokeChild(1, args, _coords17539.c_str());
         fragBuilder->codeAppendf(
                 R"SkSL(
 %s = inputColor * %s;
 )SkSL",
-                args.fOutputColor, _sample17232.c_str());
+                args.fOutputColor, _sample17539.c_str());
     }
 
 private:
diff --git a/src/gpu/effects/generated/GrRectBlurEffect.cpp b/src/gpu/effects/generated/GrRectBlurEffect.cpp
index e0ed896..e841334 100644
--- a/src/gpu/effects/generated/GrRectBlurEffect.cpp
+++ b/src/gpu/effects/generated/GrRectBlurEffect.cpp
@@ -70,14 +70,14 @@
                 rectFVar.isValid() ? args.fUniformHandler->getUniformCStr(rectFVar) : "float4(0)",
                 rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)",
                 rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)");
-        SkString _coords8137("float2(half2(xy.x, 0.5))");
-        SkString _sample8137 = this->invokeChild(1, args, _coords8137.c_str());
+        SkString _coords8395("float2(half2(xy.x, 0.5))");
+        SkString _sample8395 = this->invokeChild(1, args, _coords8395.c_str());
         fragBuilder->codeAppendf(
                 R"SkSL(
     xCoverage = %s.w;)SkSL",
-                _sample8137.c_str());
-        SkString _coords8195("float2(half2(xy.y, 0.5))");
-        SkString _sample8195 = this->invokeChild(1, args, _coords8195.c_str());
+                _sample8395.c_str());
+        SkString _coords8453("float2(half2(xy.y, 0.5))");
+        SkString _sample8453 = this->invokeChild(1, args, _coords8453.c_str());
         fragBuilder->codeAppendf(
                 R"SkSL(
     yCoverage = %s.w;
@@ -90,35 +90,35 @@
         rect.xy = half2(float2(%s.xy) - pos);
         rect.zw = half2(pos - float2(%s.zw));
     })SkSL",
-                _sample8195.c_str(),
+                _sample8453.c_str(),
                 rectFVar.isValid() ? args.fUniformHandler->getUniformCStr(rectFVar) : "float4(0)",
                 rectFVar.isValid() ? args.fUniformHandler->getUniformCStr(rectFVar) : "float4(0)",
                 rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)",
                 rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)");
-        SkString _coords9514("float2(half2(rect.x, 0.5))");
-        SkString _sample9514 = this->invokeChild(1, args, _coords9514.c_str());
-        SkString _coords9577("float2(half2(rect.z, 0.5))");
-        SkString _sample9577 = this->invokeChild(1, args, _coords9577.c_str());
+        SkString _coords9772("float2(half2(rect.x, 0.5))");
+        SkString _sample9772 = this->invokeChild(1, args, _coords9772.c_str());
+        SkString _coords9835("float2(half2(rect.z, 0.5))");
+        SkString _sample9835 = this->invokeChild(1, args, _coords9835.c_str());
         fragBuilder->codeAppendf(
                 R"SkSL(
     xCoverage = (1.0 - %s.w) - %s.w;)SkSL",
-                _sample9514.c_str(), _sample9577.c_str());
-        SkString _coords9641("float2(half2(rect.y, 0.5))");
-        SkString _sample9641 = this->invokeChild(1, args, _coords9641.c_str());
-        SkString _coords9704("float2(half2(rect.w, 0.5))");
-        SkString _sample9704 = this->invokeChild(1, args, _coords9704.c_str());
+                _sample9772.c_str(), _sample9835.c_str());
+        SkString _coords9899("float2(half2(rect.y, 0.5))");
+        SkString _sample9899 = this->invokeChild(1, args, _coords9899.c_str());
+        SkString _coords9962("float2(half2(rect.w, 0.5))");
+        SkString _sample9962 = this->invokeChild(1, args, _coords9962.c_str());
         fragBuilder->codeAppendf(
                 R"SkSL(
     yCoverage = (1.0 - %s.w) - %s.w;
 })SkSL",
-                _sample9641.c_str(), _sample9704.c_str());
-        SkString _sample9773 = this->invokeChild(0, args);
+                _sample9899.c_str(), _sample9962.c_str());
+        SkString _sample10031 = this->invokeChild(0, args);
         fragBuilder->codeAppendf(
                 R"SkSL(
 half4 inputColor = %s;
 %s = (inputColor * xCoverage) * yCoverage;
 )SkSL",
-                _sample9773.c_str(), args.fOutputColor);
+                _sample10031.c_str(), args.fOutputColor);
     }
 
 private:
diff --git a/src/gpu/effects/generated/GrRectBlurEffect.h b/src/gpu/effects/generated/GrRectBlurEffect.h
index 9dce2cc..48f997c 100644
--- a/src/gpu/effects/generated/GrRectBlurEffect.h
+++ b/src/gpu/effects/generated/GrRectBlurEffect.h
@@ -19,7 +19,7 @@
 #include "include/core/SkScalar.h"
 #include "include/gpu/GrRecordingContext.h"
 #include "src/core/SkBlurMask.h"
-#include "src/core/SkBlurPriv.h"
+#include "src/core/SkGpuBlurUtils.h"
 #include "src/core/SkMathPriv.h"
 #include "src/gpu/GrBitmapTextureMaker.h"
 #include "src/gpu/GrProxyProvider.h"
@@ -34,9 +34,10 @@
 public:
     static std::unique_ptr<GrFragmentProcessor> MakeIntegralFP(GrRecordingContext* rContext,
                                                                float sixSigma) {
+        SkASSERT(!SkGpuBlurUtils::IsEffectivelyZeroSigma(sixSigma / 6.f));
         auto threadSafeCache = rContext->priv().threadSafeCache();
 
-        int width = SkCreateIntegralTable(sixSigma, nullptr);
+        int width = SkGpuBlurUtils::CreateIntegralTable(sixSigma, nullptr);
 
         static const GrUniqueKey::Domain kDomain = GrUniqueKey::GenerateDomain();
         GrUniqueKey key;
@@ -55,7 +56,7 @@
         }
 
         SkBitmap bitmap;
-        if (!SkCreateIntegralTable(sixSigma, &bitmap)) {
+        if (!SkGpuBlurUtils::CreateIntegralTable(sixSigma, &bitmap)) {
             return {};
         }
 
@@ -81,6 +82,11 @@
         SkASSERT(viewMatrix.preservesRightAngles());
         SkASSERT(srcRect.isSorted());
 
+        if (SkGpuBlurUtils::IsEffectivelyZeroSigma(transformedSigma)) {
+            // No need to blur the rect
+            return inputFP;
+        }
+
         SkMatrix invM;
         SkRect rect;
         if (viewMatrix.rectStaysRect()) {
diff --git a/tests/BlurTest.cpp b/tests/BlurTest.cpp
index a69a43b..8a0e5a4 100644
--- a/tests/BlurTest.cpp
+++ b/tests/BlurTest.cpp
@@ -34,7 +34,7 @@
 #include "include/private/SkFloatBits.h"
 #include "include/private/SkTPin.h"
 #include "src/core/SkBlurMask.h"
-#include "src/core/SkBlurPriv.h"
+#include "src/core/SkGpuBlurUtils.h"
 #include "src/core/SkMask.h"
 #include "src/core/SkMaskFilterBase.h"
 #include "src/core/SkMathPriv.h"
@@ -523,10 +523,10 @@
     bool ninePatchable;
     SkRRect rrectToDraw;
     SkISize size;
-    SkScalar rectXs[kSkBlurRRectMaxDivisions],
-             rectYs[kSkBlurRRectMaxDivisions];
-    SkScalar texXs[kSkBlurRRectMaxDivisions],
-             texYs[kSkBlurRRectMaxDivisions];
+    SkScalar rectXs[SkGpuBlurUtils::kBlurRRectMaxDivisions],
+             rectYs[SkGpuBlurUtils::kBlurRRectMaxDivisions];
+    SkScalar texXs[SkGpuBlurUtils::kBlurRRectMaxDivisions],
+             texYs[SkGpuBlurUtils::kBlurRRectMaxDivisions];
 
     // not nine-patchable
     {
@@ -535,9 +535,9 @@
         SkRRect rr;
         rr.setRectRadii(r, radii);
 
-        ninePatchable = SkComputeBlurredRRectParams(rr, rr, kBlurRad, kBlurRad,
-                                                    &rrectToDraw, &size,
-                                                    rectXs, rectYs, texXs, texYs);
+        ninePatchable = SkGpuBlurUtils::ComputeBlurredRRectParams(rr, rr, kBlurRad, kBlurRad,
+                                                                  &rrectToDraw, &size,
+                                                                  rectXs, rectYs, texXs, texYs);
         REPORTER_ASSERT(reporter, !ninePatchable);
     }
 
@@ -547,9 +547,9 @@
         SkRRect rr;
         rr.setRectXY(r, kCornerRad, kCornerRad);
 
-        ninePatchable = SkComputeBlurredRRectParams(rr, rr, kBlurRad, kBlurRad,
-                                                    &rrectToDraw, &size,
-                                                    rectXs, rectYs, texXs, texYs);
+        ninePatchable = SkGpuBlurUtils::ComputeBlurredRRectParams(rr, rr, kBlurRad, kBlurRad,
+                                                                  &rrectToDraw, &size,
+                                                                  rectXs, rectYs, texXs, texYs);
 
         static const SkScalar kAns = 12.0f * kBlurRad + 2.0f * kCornerRad + 1.0f;
         REPORTER_ASSERT(reporter, ninePatchable);
@@ -564,9 +564,9 @@
         SkRRect rr;
         rr.setRectXY(r, kXCornerRad, kYCornerRad);
 
-        ninePatchable = SkComputeBlurredRRectParams(rr, rr, kBlurRad, kBlurRad,
-                                                    &rrectToDraw, &size,
-                                                    rectXs, rectYs, texXs, texYs);
+        ninePatchable = SkGpuBlurUtils::ComputeBlurredRRectParams(rr, rr, kBlurRad, kBlurRad,
+                                                                  &rrectToDraw, &size,
+                                                                  rectXs, rectYs, texXs, texYs);
 
         static const SkScalar kXAns = 12.0f * kBlurRad + 2.0f * kXCornerRad + 1.0f;
         static const SkScalar kYAns = 12.0f * kBlurRad + 2.0f * kYCornerRad + 1.0f;
@@ -663,4 +663,3 @@
     SkIPoint offset;
     bitmap.extractAlpha(&alpha, &paint, nullptr, &offset);
 }
-