Reland "Allow rect and circle blur fast cases to be used with rotation matrices."

This is a reland of 2bded27a961b4d301f2d8d88829b164fa35714ef

Original change's description:
> Allow rect and circle blur fast cases to be used with rotation matrices.
>
> For circles this is trivial. The existing shader works as is.
>
> For rects this requires back projecting from device space.
>
> Adds a GM for rotated rect blurs and modifies a circle blur GM to add
> rotation.
>
> Bug: chromium:1087705
>
> Change-Id: I6b969552fbcc9f9997cfa061b3a312a5a71e8841
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/318757
> Reviewed-by: Robert Phillips <robertphillips@google.com>
> Commit-Queue: Brian Salomon <bsalomon@google.com>

Bug: chromium:1087705
Change-Id: Ie3ef96c40461665f6207225a15e19b4cf5bcba94
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/319338
Reviewed-by: Brian Salomon <bsalomon@google.com>
Commit-Queue: Brian Salomon <bsalomon@google.com>
diff --git a/gm/blurcircles.cpp b/gm/blurcircles.cpp
index 18c2461..449fc49f 100644
--- a/gm/blurcircles.cpp
+++ b/gm/blurcircles.cpp
@@ -34,12 +34,12 @@
     }
 
     void onOnceBeforeDraw() override {
-        const float blurRadii[kNumBlurs] = { 1,5,10,20 };
+        const float blurRadii[kNumBlurs] = {1.f, 5.f, 10.f, 20.f};
 
         for (int i = 0; i < kNumBlurs; ++i) {
             fBlurFilters[i] = SkMaskFilter::MakeBlur(
                                     kNormal_SkBlurStyle,
-                                    SkBlurMask::ConvertRadiusToSigma(SkIntToScalar(blurRadii[i])));
+                                    SkBlurMask::ConvertRadiusToSigma(blurRadii[i]));
         }
     }
 
@@ -47,18 +47,23 @@
         canvas->scale(1.5f, 1.5f);
         canvas->translate(50,50);
 
-        const int circleRadii[] = { 5,10,25,50 };
+        const float circleRadii[] = {5.f, 10.f, 25.f, 50.f};
 
         for (size_t i = 0; i < kNumBlurs; ++i) {
             SkAutoCanvasRestore autoRestore(canvas, true);
-            canvas->translate(0, SkIntToScalar(150*i));
+            canvas->translate(0, 150.f*i);
             for (size_t j = 0; j < SK_ARRAY_COUNT(circleRadii); ++j) {
                 SkPaint paint;
                 paint.setColor(SK_ColorBLACK);
                 paint.setMaskFilter(fBlurFilters[i]);
 
-                canvas->drawCircle(SkIntToScalar(50),SkIntToScalar(50),SkIntToScalar(circleRadii[j]),paint);
-                canvas->translate(SkIntToScalar(150), 0);
+                static constexpr SkPoint kCenter = {50.f, 50.f};
+                // Throw a rotation in the mix to make sure GPU fast path handles it correctly.
+                canvas->save();
+                canvas->rotate(j*22.f, kCenter.fX, kCenter.fY);
+                canvas->drawCircle(kCenter, circleRadii[j], paint);
+                canvas->restore();
+                canvas->translate(150.f, 0.f);
             }
         }
     }
diff --git a/gm/blurrect.cpp b/gm/blurrect.cpp
index 35ef0f6..4416961 100644
--- a/gm/blurrect.cpp
+++ b/gm/blurrect.cpp
@@ -35,6 +35,8 @@
 #include "src/gpu/GrRecordingContextPriv.h"
 #include "tools/timer/TimeUtils.h"
 
+#include <vector>
+
 #define STROKE_WIDTH    SkIntToScalar(10)
 
 typedef void (*Proc)(SkCanvas*, const SkRect&, const SkPaint&);
@@ -440,13 +442,13 @@
                 for (size_t widthIdx = 0; widthIdx < kNumSizes; ++widthIdx) {
                     const auto& r =  fReferenceMasks[sigmaIdx][heightIdx][widthIdx];
                     const auto& a =     fActualMasks[sigmaIdx][heightIdx][widthIdx];
-                          auto& d = fMaskDifferences[sigmaIdx][heightIdx][widthIdx];
+                    auto& d       = fMaskDifferences[sigmaIdx][heightIdx][widthIdx];
                     // The actual image might not be present if we're on an abandoned GrContext.
                     if (!a) {
                         d.reset();
                         continue;
                     }
-                    SkASSERT(r->width()  == a->width());
+                    SkASSERT(r->width() == a->width());
                     SkASSERT(r->height() == a->height());
                     auto ii = SkImageInfo::Make(r->width(), r->height(),
                                                 kRGBA_8888_SkColorType, kPremul_SkAlphaType);
@@ -495,15 +497,64 @@
     bool fRecalcMasksForAnimation = false;
 };
 
-// Delete these when C++17.
-constexpr int BlurRectCompareGM::kSizes[];
-constexpr float BlurRectCompareGM::kSigmas[];
-constexpr size_t BlurRectCompareGM::kNumSizes;
-constexpr size_t BlurRectCompareGM::kNumSigmas;
-
 }  // namespace skiagm
 
 //////////////////////////////////////////////////////////////////////////////
 
 DEF_GM(return new BlurRectGM("blurrects", 0xFF);)
 DEF_GM(return new skiagm::BlurRectCompareGM();)
+
+//////////////////////////////////////////////////////////////////////////////
+
+DEF_SIMPLE_GM(blur_matrix_rect, canvas, 650, 685) {
+    static constexpr auto kRect = SkRect::MakeWH(14, 60);
+    static constexpr float kSigmas[] = {0.5f, 1.2f, 2.3f, 3.9f, 7.4f};
+    static constexpr size_t kNumSigmas = SK_ARRAY_COUNT(kSigmas);
+
+    const SkPoint c = {kRect.centerX(), kRect.centerY()};
+
+    std::vector<SkMatrix> matrices;
+
+    matrices.push_back(SkMatrix::RotateDeg(4.f, c));
+
+    matrices.push_back(SkMatrix::RotateDeg(63.f, c));
+
+    matrices.push_back(SkMatrix::RotateDeg(30.f, c));
+    matrices.back().preScale(1.1f, .5f);
+
+    matrices.push_back(SkMatrix::RotateDeg(147.f, c));
+    matrices.back().preScale(3.f, .1f);
+
+    SkMatrix mirror;
+    mirror.setAll(0, 1, 0,
+                  1, 0, 0,
+                  0, 0, 1);
+    matrices.push_back(SkMatrix::Concat(mirror, matrices.back()));
+
+    matrices.push_back(SkMatrix::RotateDeg(197.f, c));
+    matrices.back().preSkew(.3f, -.5f);
+
+    auto bounds = SkRect::MakeEmpty();
+    for (const auto& m : matrices) {
+        SkRect mapped;
+        m.mapRect(&mapped, kRect);
+        bounds.joinNonEmptyArg(mapped.makeSorted());
+    }
+    float blurPad = 2.f*kSigmas[kNumSigmas - 1];
+    bounds.outset(blurPad, blurPad);
+    canvas->translate(-bounds.left(), -bounds.top());
+    for (auto sigma : kSigmas) {
+        SkPaint paint;
+        paint.setMaskFilter(SkMaskFilter::MakeBlur(kNormal_SkBlurStyle, sigma));
+        canvas->save();
+        for (const auto& m : matrices) {
+            canvas->save();
+            canvas->concat(m);
+            canvas->drawRect(kRect, paint);
+            canvas->restore();
+            canvas->translate(0, bounds.height());
+        }
+        canvas->restore();
+        canvas->translate(bounds.width(), 0);
+    }
+}
diff --git a/include/core/SkMatrix.h b/include/core/SkMatrix.h
index 07d4a67..20791be 100644
--- a/include/core/SkMatrix.h
+++ b/include/core/SkMatrix.h
@@ -97,6 +97,11 @@
         m.setRotate(deg);
         return m;
     }
+    static SkMatrix SK_WARN_UNUSED_RESULT RotateDeg(SkScalar deg, SkPoint pt) {
+        SkMatrix m;
+        m.setRotate(deg, pt.x(), pt.y());
+        return m;
+    }
     static SkMatrix SK_WARN_UNUSED_RESULT RotateRad(SkScalar rad) {
         return RotateDeg(SkRadiansToDegrees(rad));
     }
diff --git a/src/core/SkBlurMF.cpp b/src/core/SkBlurMF.cpp
index 177f5b2..55581ff 100644
--- a/src/core/SkBlurMF.cpp
+++ b/src/core/SkBlurMF.cpp
@@ -695,10 +695,6 @@
         return false;
     }
 
-    if (!viewMatrix.isScaleTranslate()) {
-        return false;
-    }
-
     // TODO: we could handle blurred stroked circles
     if (!shape.style().isSimpleFill()) {
         return false;
@@ -715,24 +711,24 @@
         return false;
     }
 
-    SkRRect devRRect;
-    if (!srcRRect.transform(viewMatrix, &devRRect)) {
-        return false;
-    }
-
-    if (!SkRRectPriv::AllCornersCircular(devRRect)) {
-        return false;
-    }
-
     std::unique_ptr<GrFragmentProcessor> fp;
 
-    if (devRRect.isRect() || SkRRectPriv::IsCircle(devRRect)) {
-        if (devRRect.isRect()) {
+    bool canBeRect = srcRRect.isRect() && viewMatrix.preservesRightAngles();
+    bool canBeCircle = SkRRectPriv::IsCircle(srcRRect) && viewMatrix.isSimilarity();
+    if (canBeRect || canBeCircle) {
+        if (canBeRect) {
             fp = GrRectBlurEffect::Make(
                     /*inputFP=*/nullptr, context, *context->priv().caps()->shaderCaps(),
-                    devRRect.rect(), xformedSigma);
+                    srcRRect.rect(), viewMatrix, xformedSigma);
         } else {
-            fp = GrCircleBlurFragmentProcessor::Make(/*inputFP=*/nullptr, context, devRRect.rect(),
+            SkPoint center = {srcRRect.getBounds().centerX(), srcRRect.getBounds().centerY()};
+            viewMatrix.mapPoints(&center, 1);
+            SkScalar radius = viewMatrix.mapVector(0, srcRRect.width()/2.f).length();
+            SkRect devBounds = {center.x() - radius,
+                                center.y() - radius,
+                                center.x() + radius,
+                                center.y() + radius};
+            fp = GrCircleBlurFragmentProcessor::Make(/*inputFP=*/nullptr, context, devBounds,
                                                      xformedSigma);
         }
 
@@ -742,20 +738,32 @@
         paint.setCoverageFragmentProcessor(std::move(fp));
 
         SkRect srcProxyRect = srcRRect.rect();
-        SkScalar outsetX = 3.0f*fSigma;
-        SkScalar outsetY = 3.0f*fSigma;
-        if (this->ignoreXform()) {
-            // When we're ignoring the CTM the padding added to the source rect also needs to ignore
-            // the CTM. The matrix passed in here is guaranteed to be just scale and translate so we
-            // can just grab the X and Y scales off the matrix and pre-undo the scale.
+        // Determine how much to outset the src rect to ensure we hit pixels within three sigma.
+        SkScalar outsetX = 3.0f*xformedSigma;
+        SkScalar outsetY = 3.0f*xformedSigma;
+        if (viewMatrix.isScaleTranslate()) {
             outsetX /= SkScalarAbs(viewMatrix.getScaleX());
             outsetY /= SkScalarAbs(viewMatrix.getScaleY());
+        } else {
+            SkSize scale;
+            if (!viewMatrix.decomposeScale(&scale, nullptr)) {
+                return false;
+            }
+            outsetX /= scale.width();
+            outsetY /= scale.height();
         }
         srcProxyRect.outset(outsetX, outsetY);
 
         renderTargetContext->drawRect(clip, std::move(paint), GrAA::kNo, viewMatrix, srcProxyRect);
         return true;
     }
+    if (!viewMatrix.isScaleTranslate()) {
+        return false;
+    }
+    SkRRect devRRect;
+    if (!srcRRect.transform(viewMatrix, &devRRect) || !SkRRectPriv::AllCornersCircular(devRRect)) {
+        return false;
+    }
 
     fp = GrRRectBlurEffect::Make(/*inputFP=*/nullptr, context, fSigma, xformedSigma,
                                  srcRRect, devRRect);
diff --git a/src/gpu/effects/GrRectBlurEffect.fp b/src/gpu/effects/GrRectBlurEffect.fp
index f02de9c..9c8b27e 100644
--- a/src/gpu/effects/GrRectBlurEffect.fp
+++ b/src/gpu/effects/GrRectBlurEffect.fp
@@ -29,6 +29,9 @@
 layout(when= highp) uniform float4 rectF;
 layout(when=!highp) uniform half4  rectH;
 
+layout(key) in bool applyInvVM;
+layout(when=applyInvVM) in uniform float3x3 invVM;
+
 // Effect that is a LUT for integral of normal distribution. The value at x:[0,6*sigma] is the
 // integral from -inf to (3*sigma - x). I.e. x is mapped from [0, 6*sigma] to [3*sigma to -3*sigma].
 // The flip saves a reversal in the shader.
@@ -43,10 +46,6 @@
             kCompatibleWithCoverageAsAlpha_OptimizationFlag
 }
 
-@constructorParams {
-    GrSamplerState samplerParams
-}
-
 @samplerParams(integral) {
     samplerParams
 }
@@ -94,19 +93,52 @@
      static std::unique_ptr<GrFragmentProcessor> Make(std::unique_ptr<GrFragmentProcessor> inputFP,
                                                       GrRecordingContext* context,
                                                       const GrShaderCaps& caps,
-                                                      const SkRect& rect, float sigma) {
-         SkASSERT(rect.isSorted());
+                                                      const SkRect& srcRect,
+                                                      const SkMatrix& viewMatrix,
+                                                      float transformedSigma) {
+         SkASSERT(viewMatrix.preservesRightAngles());
+         SkASSERT(srcRect.isSorted());
+
+         SkMatrix invM;
+         SkRect rect;
+         if (viewMatrix.isScaleTranslate()) {
+             invM = SkMatrix::I();
+             // We can do everything in device space when there is no rotation.
+             SkAssertResult(viewMatrix.mapRect(&rect, srcRect));
+         } else {
+             // The view matrix may scale, perhaps anisotropically. But we want to apply our device
+             // space "transformedSigma" to the delta of frag coord from the rect edges. Factor out
+             // the scaling to define a space that is purely rotation/translation from device space
+             // (and scale from src space) We'll meet in the middle: pre-scale the src rect to be in
+             // this space and then apply the inverse of the rotation/translation portion to the
+             // frag coord.
+             SkMatrix m;
+             SkSize scale;
+             if (!viewMatrix.decomposeScale(&scale, &m)) {
+                 return nullptr;
+             }
+             if (!m.invert(&invM)) {
+                 return nullptr;
+             }
+             rect = {srcRect.left()   * scale.width(),
+                     srcRect.top()    * scale.height(),
+                     srcRect.right()  * scale.width(),
+                     srcRect.bottom() * scale.height()};
+         }
+
          if (!caps.floatIs32Bits()) {
              // We promote the math that gets us into the Gaussian space to full float when the rect
              // coords are large. If we don't have full float then fail. We could probably clip the
              // rect to an outset device bounds instead.
-             if (SkScalarAbs(rect.fLeft)  > 16000.f || SkScalarAbs(rect.fTop)    > 16000.f ||
-                 SkScalarAbs(rect.fRight) > 16000.f || SkScalarAbs(rect.fBottom) > 16000.f) {
+             if (SkScalarAbs(rect.fLeft)   > 16000.f ||
+                 SkScalarAbs(rect.fTop)    > 16000.f ||
+                 SkScalarAbs(rect.fRight)  > 16000.f ||
+                 SkScalarAbs(rect.fBottom) > 16000.f) {
                     return nullptr;
              }
          }
 
-         const float sixSigma = 6 * sigma;
+         const float sixSigma = 6 * transformedSigma;
          std::unique_ptr<GrFragmentProcessor> integral = MakeIntegralFP(context, sixSigma);
          if (!integral) {
              return nullptr;
@@ -117,24 +149,32 @@
          // inset the rect so that the edge of the inset rect corresponds to t = 0 in the texture.
          // It actually simplifies things a bit in the !isFast case, too.
          float threeSigma = sixSigma / 2;
-         SkRect insetRect = {rect.fLeft   + threeSigma,
-                             rect.fTop    + threeSigma,
-                             rect.fRight  - threeSigma,
-                             rect.fBottom - threeSigma};
+         SkRect insetRect = {rect.left()   + threeSigma,
+                             rect.top()    + threeSigma,
+                             rect.right()  - threeSigma,
+                             rect.bottom() - threeSigma};
 
          // In our fast variant we find the nearest horizontal and vertical edges and for each
          // do a lookup in the integral texture for each and multiply them. When the rect is
          // less than 6 sigma wide then things aren't so simple and we have to consider both the
          // left and right edge of the rectangle (and similar in y).
          bool isFast = insetRect.isSorted();
-         return std::unique_ptr<GrFragmentProcessor>(new GrRectBlurEffect(
-                    std::move(inputFP), insetRect, std::move(integral),
-                    isFast, GrSamplerState::Filter::kLinear));
+         return std::unique_ptr<GrFragmentProcessor>(new GrRectBlurEffect(std::move(inputFP),
+                                                                          insetRect,
+                                                                          !invM.isIdentity(),
+                                                                          invM,
+                                                                          std::move(integral),
+                                                                          isFast));
      }
 }
 
 void main() {
     half xCoverage, yCoverage;
+    float2 pos = sk_FragCoord.xy;
+    @if (applyInvVM) {
+        // It'd be great if we could lift this to the VS.
+        pos = (invVM*float3(pos,1)).xy;
+    }
     @if (isFast) {
         // Get the smaller of the signed distance from the frag coord to the left and right
         // edges and similar for y.
@@ -143,11 +183,9 @@
         // extending outward 6 * sigma from the inset rect.
         half2 xy;
         @if (highp) {
-            xy = max(half2(rectF.LT - sk_FragCoord.xy),
-                     half2(sk_FragCoord.xy - rectF.RB));
+            xy = max(half2(rectF.LT - pos), half2(pos - rectF.RB));
        } else {
-            xy = max(half2(rectH.LT - sk_FragCoord.xy),
-                     half2(sk_FragCoord.xy - rectH.RB));
+            xy = max(half2(rectH.LT - pos), half2(pos - rectH.RB));
         }
         xCoverage = sample(integral, half2(xy.x, 0.5)).a;
         yCoverage = sample(integral, half2(xy.y, 0.5)).a;
@@ -169,11 +207,11 @@
         // also factored in.
         half4 rect;
         @if (highp) {
-            rect.LT = half2(rectF.LT - sk_FragCoord.xy);
-            rect.RB = half2(sk_FragCoord.xy - rectF.RB);
+            rect.LT = half2(rectF.LT - pos);
+            rect.RB = half2(pos - rectF.RB);
         } else {
-            rect.LT = half2(rectH.LT - sk_FragCoord.xy);
-            rect.RB = half2(sk_FragCoord.xy - rectH.RB);
+            rect.LT = half2(rectH.LT - pos);
+            rect.RB = half2(pos - rectH.RB);
         }
         xCoverage = 1 - sample(integral, half2(rect.L, 0.5)).a
                       - sample(integral, half2(rect.R, 0.5)).a;
@@ -190,9 +228,13 @@
 }
 
 @test(data) {
-    float sigma = data->fRandom->nextRangeF(3,8);
-    float width = data->fRandom->nextRangeF(200,300);
-    float height = data->fRandom->nextRangeF(200,300);
+    float sigma = data->fRandom->nextRangeF(3, 8);
+    int x = data->fRandom->nextRangeF(1, 200);
+    int y = data->fRandom->nextRangeF(1, 200);
+    float width = data->fRandom->nextRangeF(200, 300);
+    float height = data->fRandom->nextRangeF(200, 300);
+    SkMatrix vm = GrTest::TestMatrixPreservesRightAngles(data->fRandom);
+    auto rect = SkRect::MakeXYWH(x, y, width, height);
     return GrRectBlurEffect::Make(data->inputFP(), data->context(), *data->caps()->shaderCaps(),
-                                  SkRect::MakeWH(width, height), sigma);
+                                  rect, vm, sigma);
 }
diff --git a/src/gpu/effects/generated/GrRectBlurEffect.cpp b/src/gpu/effects/generated/GrRectBlurEffect.cpp
index 681fe79..887f483 100644
--- a/src/gpu/effects/generated/GrRectBlurEffect.cpp
+++ b/src/gpu/effects/generated/GrRectBlurEffect.cpp
@@ -26,6 +26,10 @@
         (void)_outer;
         auto rect = _outer.rect;
         (void)rect;
+        auto applyInvVM = _outer.applyInvVM;
+        (void)applyInvVM;
+        auto invVM = _outer.invVM;
+        (void)invVM;
         auto isFast = _outer.isFast;
         (void)isFast;
         highp = ((abs(rect.left()) > 16000.0 || abs(rect.top()) > 16000.0) ||
@@ -39,82 +43,102 @@
             rectHVar = args.fUniformHandler->addUniform(&_outer, kFragment_GrShaderFlag,
                                                         kHalf4_GrSLType, "rectH");
         }
+        if (applyInvVM) {
+            invVMVar = args.fUniformHandler->addUniform(&_outer, kFragment_GrShaderFlag,
+                                                        kFloat3x3_GrSLType, "invVM");
+        }
         fragBuilder->codeAppendf(
                 R"SkSL(/* key */ bool highp = %s;
 half xCoverage, yCoverage;
+float2 pos = sk_FragCoord.xy;
+@if (%s) {
+    pos = (%s * float3(pos, 1.0)).xy;
+}
 @if (%s) {
     half2 xy;
     @if (highp) {
-        xy = max(half2(%s.xy - sk_FragCoord.xy), half2(sk_FragCoord.xy - %s.zw));
+        xy = max(half2(%s.xy - pos), half2(pos - %s.zw));
     } else {
-        xy = max(half2(float2(%s.xy) - sk_FragCoord.xy), half2(sk_FragCoord.xy - float2(%s.zw)));
+        xy = max(half2(float2(%s.xy) - pos), half2(pos - float2(%s.zw)));
     })SkSL",
-                (highp ? "true" : "false"), (_outer.isFast ? "true" : "false"),
+                (highp ? "true" : "false"), (_outer.applyInvVM ? "true" : "false"),
+                invVMVar.isValid() ? args.fUniformHandler->getUniformCStr(invVMVar) : "float3x3(1)",
+                (_outer.isFast ? "true" : "false"),
                 rectFVar.isValid() ? args.fUniformHandler->getUniformCStr(rectFVar) : "float4(0)",
                 rectFVar.isValid() ? args.fUniformHandler->getUniformCStr(rectFVar) : "float4(0)",
                 rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)",
                 rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)");
-        SkString _coords6340("float2(half2(xy.x, 0.5))");
-        SkString _sample6340 = this->invokeChild(1, args, _coords6340.c_str());
+        SkString _coords8314("float2(half2(xy.x, 0.5))");
+        SkString _sample8314 = this->invokeChild(1, args, _coords8314.c_str());
         fragBuilder->codeAppendf(
                 R"SkSL(
     xCoverage = %s.w;)SkSL",
-                _sample6340.c_str());
-        SkString _coords6398("float2(half2(xy.y, 0.5))");
-        SkString _sample6398 = this->invokeChild(1, args, _coords6398.c_str());
+                _sample8314.c_str());
+        SkString _coords8372("float2(half2(xy.y, 0.5))");
+        SkString _sample8372 = this->invokeChild(1, args, _coords8372.c_str());
         fragBuilder->codeAppendf(
                 R"SkSL(
     yCoverage = %s.w;
 } else {
     half4 rect;
     @if (highp) {
-        rect.xy = half2(%s.xy - sk_FragCoord.xy);
-        rect.zw = half2(sk_FragCoord.xy - %s.zw);
+        rect.xy = half2(%s.xy - pos);
+        rect.zw = half2(pos - %s.zw);
     } else {
-        rect.xy = half2(float2(%s.xy) - sk_FragCoord.xy);
-        rect.zw = half2(sk_FragCoord.xy - float2(%s.zw));
+        rect.xy = half2(float2(%s.xy) - pos);
+        rect.zw = half2(pos - float2(%s.zw));
     })SkSL",
-                _sample6398.c_str(),
+                _sample8372.c_str(),
                 rectFVar.isValid() ? args.fUniformHandler->getUniformCStr(rectFVar) : "float4(0)",
                 rectFVar.isValid() ? args.fUniformHandler->getUniformCStr(rectFVar) : "float4(0)",
                 rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)",
                 rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)");
-        SkString _coords7765("float2(half2(rect.x, 0.5))");
-        SkString _sample7765 = this->invokeChild(1, args, _coords7765.c_str());
-        SkString _coords7828("float2(half2(rect.z, 0.5))");
-        SkString _sample7828 = this->invokeChild(1, args, _coords7828.c_str());
+        SkString _coords9691("float2(half2(rect.x, 0.5))");
+        SkString _sample9691 = this->invokeChild(1, args, _coords9691.c_str());
+        SkString _coords9754("float2(half2(rect.z, 0.5))");
+        SkString _sample9754 = this->invokeChild(1, args, _coords9754.c_str());
         fragBuilder->codeAppendf(
                 R"SkSL(
     xCoverage = (1.0 - %s.w) - %s.w;)SkSL",
-                _sample7765.c_str(), _sample7828.c_str());
-        SkString _coords7892("float2(half2(rect.y, 0.5))");
-        SkString _sample7892 = this->invokeChild(1, args, _coords7892.c_str());
-        SkString _coords7955("float2(half2(rect.w, 0.5))");
-        SkString _sample7955 = this->invokeChild(1, args, _coords7955.c_str());
+                _sample9691.c_str(), _sample9754.c_str());
+        SkString _coords9818("float2(half2(rect.y, 0.5))");
+        SkString _sample9818 = this->invokeChild(1, args, _coords9818.c_str());
+        SkString _coords9881("float2(half2(rect.w, 0.5))");
+        SkString _sample9881 = this->invokeChild(1, args, _coords9881.c_str());
         fragBuilder->codeAppendf(
                 R"SkSL(
     yCoverage = (1.0 - %s.w) - %s.w;
 })SkSL",
-                _sample7892.c_str(), _sample7955.c_str());
-        SkString _sample8024 = this->invokeChild(0, args);
+                _sample9818.c_str(), _sample9881.c_str());
+        SkString _sample9950 = this->invokeChild(0, args);
         fragBuilder->codeAppendf(
                 R"SkSL(
 half4 inputColor = %s;
 %s = (inputColor * xCoverage) * yCoverage;
 )SkSL",
-                _sample8024.c_str(), args.fOutputColor);
+                _sample9950.c_str(), args.fOutputColor);
     }
 
 private:
     void onSetData(const GrGLSLProgramDataManager& pdman,
                    const GrFragmentProcessor& _proc) override {
         const GrRectBlurEffect& _outer = _proc.cast<GrRectBlurEffect>();
+        {
+            if (invVMVar.isValid()) {
+                static_assert(1 == 1);
+                pdman.setSkMatrix(invVMVar, (_outer.invVM));
+            }
+        }
         auto rect = _outer.rect;
         (void)rect;
         UniformHandle& rectF = rectFVar;
         (void)rectF;
         UniformHandle& rectH = rectHVar;
         (void)rectH;
+        auto applyInvVM = _outer.applyInvVM;
+        (void)applyInvVM;
+        UniformHandle& invVM = invVMVar;
+        (void)invVM;
         auto isFast = _outer.isFast;
         (void)isFast;
 
@@ -124,6 +148,7 @@
     bool highp = false;
     UniformHandle rectFVar;
     UniformHandle rectHVar;
+    UniformHandle invVMVar;
 };
 GrGLSLFragmentProcessor* GrRectBlurEffect::onCreateGLSLInstance() const {
     return new GrGLSLRectBlurEffect();
@@ -134,12 +159,15 @@
                   abs(rect.right()) > 16000.0) ||
                  abs(rect.bottom()) > 16000.0;
     b->add32((uint32_t)highp);
+    b->add32((uint32_t)applyInvVM);
     b->add32((uint32_t)isFast);
 }
 bool GrRectBlurEffect::onIsEqual(const GrFragmentProcessor& other) const {
     const GrRectBlurEffect& that = other.cast<GrRectBlurEffect>();
     (void)that;
     if (rect != that.rect) return false;
+    if (applyInvVM != that.applyInvVM) return false;
+    if (invVM != that.invVM) return false;
     if (isFast != that.isFast) return false;
     return true;
 }
@@ -147,6 +175,8 @@
 GrRectBlurEffect::GrRectBlurEffect(const GrRectBlurEffect& src)
         : INHERITED(kGrRectBlurEffect_ClassID, src.optimizationFlags())
         , rect(src.rect)
+        , applyInvVM(src.applyInvVM)
+        , invVM(src.invVM)
         , isFast(src.isFast) {
     this->cloneAndRegisterAllChildProcessors(src);
 }
@@ -155,17 +185,26 @@
 }
 #if GR_TEST_UTILS
 SkString GrRectBlurEffect::onDumpInfo() const {
-    return SkStringPrintf("(rect=float4(%f, %f, %f, %f), isFast=%s)", rect.left(), rect.top(),
-                          rect.right(), rect.bottom(), (isFast ? "true" : "false"));
+    return SkStringPrintf(
+            "(rect=float4(%f, %f, %f, %f), applyInvVM=%s, invVM=float3x3(%f, %f, %f, %f, %f, %f, "
+            "%f, %f, %f), isFast=%s)",
+            rect.left(), rect.top(), rect.right(), rect.bottom(), (applyInvVM ? "true" : "false"),
+            invVM.rc(0, 0), invVM.rc(1, 0), invVM.rc(2, 0), invVM.rc(0, 1), invVM.rc(1, 1),
+            invVM.rc(2, 1), invVM.rc(0, 2), invVM.rc(1, 2), invVM.rc(2, 2),
+            (isFast ? "true" : "false"));
 }
 #endif
 GR_DEFINE_FRAGMENT_PROCESSOR_TEST(GrRectBlurEffect);
 #if GR_TEST_UTILS
 std::unique_ptr<GrFragmentProcessor> GrRectBlurEffect::TestCreate(GrProcessorTestData* data) {
     float sigma = data->fRandom->nextRangeF(3, 8);
+    int x = data->fRandom->nextRangeF(1, 200);
+    int y = data->fRandom->nextRangeF(1, 200);
     float width = data->fRandom->nextRangeF(200, 300);
     float height = data->fRandom->nextRangeF(200, 300);
+    SkMatrix vm = GrTest::TestMatrixPreservesRightAngles(data->fRandom);
+    auto rect = SkRect::MakeXYWH(x, y, width, height);
     return GrRectBlurEffect::Make(data->inputFP(), data->context(), *data->caps()->shaderCaps(),
-                                  SkRect::MakeWH(width, height), sigma);
+                                  rect, vm, sigma);
 }
 #endif
diff --git a/src/gpu/effects/generated/GrRectBlurEffect.h b/src/gpu/effects/generated/GrRectBlurEffect.h
index b02527a..3c480f8 100644
--- a/src/gpu/effects/generated/GrRectBlurEffect.h
+++ b/src/gpu/effects/generated/GrRectBlurEffect.h
@@ -71,9 +71,37 @@
     static std::unique_ptr<GrFragmentProcessor> Make(std::unique_ptr<GrFragmentProcessor> inputFP,
                                                      GrRecordingContext* context,
                                                      const GrShaderCaps& caps,
-                                                     const SkRect& rect,
-                                                     float sigma) {
-        SkASSERT(rect.isSorted());
+                                                     const SkRect& srcRect,
+                                                     const SkMatrix& viewMatrix,
+                                                     float transformedSigma) {
+        SkASSERT(viewMatrix.preservesRightAngles());
+        SkASSERT(srcRect.isSorted());
+
+        SkMatrix invM;
+        SkRect rect;
+        if (viewMatrix.isScaleTranslate()) {
+            invM = SkMatrix::I();
+            // We can do everything in device space when there is no rotation.
+            SkAssertResult(viewMatrix.mapRect(&rect, srcRect));
+        } else {
+            // The view matrix may scale, perhaps anisotropically. But we want to apply our device
+            // space "transformedSigma" to the delta of frag coord from the rect edges. Factor out
+            // the scaling to define a space that is purely rotation/translation from device space
+            // (and scale from src space) We'll meet in the middle: pre-scale the src rect to be in
+            // this space and then apply the inverse of the rotation/translation portion to the
+            // frag coord.
+            SkMatrix m;
+            SkSize scale;
+            if (!viewMatrix.decomposeScale(&scale, &m)) {
+                return nullptr;
+            }
+            if (!m.invert(&invM)) {
+                return nullptr;
+            }
+            rect = {srcRect.left() * scale.width(), srcRect.top() * scale.height(),
+                    srcRect.right() * scale.width(), srcRect.bottom() * scale.height()};
+        }
+
         if (!caps.floatIs32Bits()) {
             // We promote the math that gets us into the Gaussian space to full float when the rect
             // coords are large. If we don't have full float then fail. We could probably clip the
@@ -84,7 +112,7 @@
             }
         }
 
-        const float sixSigma = 6 * sigma;
+        const float sixSigma = 6 * transformedSigma;
         std::unique_ptr<GrFragmentProcessor> integral = MakeIntegralFP(context, sixSigma);
         if (!integral) {
             return nullptr;
@@ -95,36 +123,44 @@
         // inset the rect so that the edge of the inset rect corresponds to t = 0 in the texture.
         // It actually simplifies things a bit in the !isFast case, too.
         float threeSigma = sixSigma / 2;
-        SkRect insetRect = {rect.fLeft + threeSigma, rect.fTop + threeSigma,
-                            rect.fRight - threeSigma, rect.fBottom - threeSigma};
+        SkRect insetRect = {rect.left() + threeSigma, rect.top() + threeSigma,
+                            rect.right() - threeSigma, rect.bottom() - threeSigma};
 
         // In our fast variant we find the nearest horizontal and vertical edges and for each
         // do a lookup in the integral texture for each and multiply them. When the rect is
         // less than 6 sigma wide then things aren't so simple and we have to consider both the
         // left and right edge of the rectangle (and similar in y).
         bool isFast = insetRect.isSorted();
-        return std::unique_ptr<GrFragmentProcessor>(
-                new GrRectBlurEffect(std::move(inputFP), insetRect, std::move(integral), isFast,
-                                     GrSamplerState::Filter::kLinear));
+        return std::unique_ptr<GrFragmentProcessor>(new GrRectBlurEffect(std::move(inputFP),
+                                                                         insetRect,
+                                                                         !invM.isIdentity(),
+                                                                         invM,
+                                                                         std::move(integral),
+                                                                         isFast));
     }
     GrRectBlurEffect(const GrRectBlurEffect& src);
     std::unique_ptr<GrFragmentProcessor> clone() const override;
     const char* name() const override { return "RectBlurEffect"; }
     bool usesExplicitReturn() const override;
     SkRect rect;
+    bool applyInvVM;
+    SkMatrix invVM;
     bool isFast;
 
 private:
     GrRectBlurEffect(std::unique_ptr<GrFragmentProcessor> inputFP,
                      SkRect rect,
+                     bool applyInvVM,
+                     SkMatrix invVM,
                      std::unique_ptr<GrFragmentProcessor> integral,
-                     bool isFast,
-                     GrSamplerState samplerParams)
+                     bool isFast)
             : INHERITED(kGrRectBlurEffect_ClassID,
                         (OptimizationFlags)(inputFP ? ProcessorOptimizationFlags(inputFP.get())
                                                     : kAll_OptimizationFlags) &
                                 kCompatibleWithCoverageAsAlpha_OptimizationFlag)
             , rect(rect)
+            , applyInvVM(applyInvVM)
+            , invVM(invVM)
             , isFast(isFast) {
         this->registerChild(std::move(inputFP), SkSL::SampleUsage::PassThrough());
         SkASSERT(integral);