New GrClipStack supporting only intersect/difference

Overview doc: https://docs.google.com/document/d/1ddIk74A1rL5Kj5kGcnInOYKVAXs3J2IsSgU5BLit0Ng/edit?usp=sharing

This is the new clip stack that will replace GrClipStackClip. The doc
link in the CL description has a much more detailed overview of what the
strategy of the new clip stack is, but at a very high level:

1. Add a temporary #define that lets SkGpuDevice switch between the old
stack and the new stack. For the new GrClipStack, it extends SkBaseDevice
directly and has to implement all of the device clipping virtuals.
   - If you look from patchset 5 and earlier, the define defaults to on
     so I can test it on the bots, etc. but the plan will be for it to
     default to off when this lands so it's only running on unit tests.
     Then in a follow up, I'll turn it on for our bots but keep it off in
     chrome and android. If everything looks good, chrome can then be
     turned on. There is a more extensive migration plan for android
     because of the expanding clip ops, but that is covered at the end of
     the overview doc.

2. GrClipStack manages save/restore logic of the stack and extends GrClip,
so the cpp file also includes code to apply a GrAppliedClip. At the moment
the apply strategy is as close to that in GrReducedClip and
GrClipStackClip as I could make it. Down the road, I think we can explore
other analytic coverage options and a clip atlas that replaces the unified
SW mask.
   - Once GrClipStack is enabled everywhere, it means GrReducedClip and
     GrClipStackClip can be deleted, so I'm not too worried about sharing
     code between the two. A lot is already shared through the use of
     GrSWMaskHelper and GrStencilMaskHelper.
   - SkClipStack and SkClipStackDevice are still used by the PDF and SVG
     backends, so they aren't necessarily deletable.

3. The GrClipStack only handles intersect and difference ops. It
represents all geometric clip operations as an element. The stack itself
is controlled by the "save record", which tracks aggregate bounds, valid
elements, and the non-geometric clip shader.
   - When a new save record is pushed on the stack, older elements are
     inactive. This means they cannot be modified, since they may need to
     be activated again when the current save is popped off the stack.
     However, they can still affect the clip during application.
   - When a new element is pushed on the stack, older elements may be
     invalidated. This means they don't need to be considered any more
     because they are redundant with the new clip shape (e.g. nested round
     rect clips only have to keep the innermost valid).


Bug: skia:10205
Change-Id: I68ccfd414033aa9014b102efaee3ad50a806f793
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/308283
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Robert Phillips <robertphillips@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
diff --git a/gn/gpu.gni b/gn/gpu.gni
index 88fa5e5..46d22e5 100644
--- a/gn/gpu.gni
+++ b/gn/gpu.gni
@@ -58,6 +58,8 @@
   "$_src/gpu/GrClientMappedBufferManager.cpp",
   "$_src/gpu/GrClientMappedBufferManager.h",
   "$_src/gpu/GrClip.h",
+  "$_src/gpu/GrClipStack.cpp",
+  "$_src/gpu/GrClipStack.h",
   "$_src/gpu/GrClipStackClip.cpp",
   "$_src/gpu/GrClipStackClip.h",
   "$_src/gpu/GrColor.h",
diff --git a/gn/tests.gni b/gn/tests.gni
index 5ac9e8f..b12e794 100644
--- a/gn/tests.gni
+++ b/gn/tests.gni
@@ -100,6 +100,7 @@
   "$_tests/GrAHardwareBufferTest.cpp",
   "$_tests/GrBlockAllocatorTest.cpp",
   "$_tests/GrCCPRTest.cpp",
+  "$_tests/GrClipStackTest.cpp",
   "$_tests/GrContextAbandonTest.cpp",
   "$_tests/GrContextFactoryTest.cpp",
   "$_tests/GrContextOOM.cpp",
diff --git a/src/core/SkRRect.cpp b/src/core/SkRRect.cpp
index 8a068bc..9121e93 100644
--- a/src/core/SkRRect.cpp
+++ b/src/core/SkRRect.cpp
@@ -757,7 +757,6 @@
     }
 
     SkASSERT(innerBounds.isSorted() && !innerBounds.isEmpty());
-    SkASSERT(rr.contains(innerBounds));
     return innerBounds;
 }
 
diff --git a/src/gpu/GrClipStack.cpp b/src/gpu/GrClipStack.cpp
new file mode 100644
index 0000000..22644b4
--- /dev/null
+++ b/src/gpu/GrClipStack.cpp
@@ -0,0 +1,1633 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "src/gpu/GrClipStack.h"
+
+#include "include/core/SkMatrix.h"
+#include "src/core/SkRRectPriv.h"
+#include "src/core/SkRectPriv.h"
+#include "src/core/SkTaskGroup.h"
+#include "src/gpu/GrClip.h"
+#include "src/gpu/GrContextPriv.h"
+#include "src/gpu/GrDeferredProxyUploader.h"
+#include "src/gpu/GrProxyProvider.h"
+#include "src/gpu/GrRecordingContextPriv.h"
+#include "src/gpu/GrRenderTargetContextPriv.h"
+#include "src/gpu/GrSWMaskHelper.h"
+#include "src/gpu/GrStencilMaskHelper.h"
+#include "src/gpu/ccpr/GrCoverageCountingPathRenderer.h"
+#include "src/gpu/effects/GrBlendFragmentProcessor.h"
+#include "src/gpu/effects/GrConvexPolyEffect.h"
+#include "src/gpu/effects/GrRRectEffect.h"
+#include "src/gpu/effects/GrTextureEffect.h"
+#include "src/gpu/effects/generated/GrAARectEffect.h"
+#include "src/gpu/effects/generated/GrDeviceSpaceEffect.h"
+#include "src/gpu/geometry/GrQuadUtils.h"
+
+namespace {
+
+// This captures which of the two elements in (A op B) would be required when they are combined,
+// where op is intersect or difference.
+enum class ClipGeometry {
+    kEmpty,
+    kAOnly,
+    kBOnly,
+    kBoth
+};
+
+// A and B can be Element, SaveRecord, or Draw. Supported combinations are, order not mattering,
+// (Element, Element), (Element, SaveRecord), (Element, Draw), and (SaveRecord, Draw).
+template<typename A, typename B>
+static ClipGeometry get_clip_geometry(const A& a, const B& b) {
+    // NOTE: SkIRect::Intersects() returns false when two rectangles touch at an edge (so the result
+    // is empty). This behavior is desired for the following clip effect policies.
+    if (a.op() == SkClipOp::kIntersect) {
+        if (b.op() == SkClipOp::kIntersect) {
+            // Intersect (A) + Intersect (B)
+            if (!SkIRect::Intersects(a.outerBounds(), b.outerBounds())) {
+                // Regions with non-zero coverage are disjoint, so intersection = empty
+                return ClipGeometry::kEmpty;
+            } else if (b.contains(a)) {
+                // B's full coverage region contains entirety of A, so intersection = A
+                return ClipGeometry::kAOnly;
+            } else if (a.contains(b)) {
+                // A's full coverage region contains entirety of B, so intersection = B
+                return ClipGeometry::kBOnly;
+            } else {
+                // The shapes intersect in some non-trivial manner
+                return ClipGeometry::kBoth;
+            }
+        } else {
+            SkASSERT(b.op() == SkClipOp::kDifference);
+            // Intersect (A) + Difference (B)
+            if (!SkIRect::Intersects(a.outerBounds(), b.outerBounds())) {
+                // A only intersects B's full coverage region, so intersection = A
+                return ClipGeometry::kAOnly;
+            } else if (b.contains(a)) {
+                // B's zero coverage region completely contains A, so intersection = empty
+                return ClipGeometry::kEmpty;
+            } else {
+                // Intersection cannot be simplified. Note that the combination of a intersect
+                // and difference op in this order cannot produce kBOnly
+                return ClipGeometry::kBoth;
+            }
+        }
+    } else {
+        SkASSERT(a.op() == SkClipOp::kDifference);
+        if (b.op() == SkClipOp::kIntersect) {
+            // Difference (A) + Intersect (B) - the mirror of Intersect(A) + Difference(B),
+            // but combining is commutative so this is equivalent barring naming.
+            if (!SkIRect::Intersects(b.outerBounds(), a.outerBounds())) {
+                // B only intersects A's full coverage region, so intersection = B
+                return ClipGeometry::kBOnly;
+            } else if (a.contains(b)) {
+                // A's zero coverage region completely contains B, so intersection = empty
+                return ClipGeometry::kEmpty;
+            } else {
+                // Cannot be simplified
+                return ClipGeometry::kBoth;
+            }
+        } else {
+            SkASSERT(b.op() == SkClipOp::kDifference);
+            // Difference (A) + Difference (B)
+            if (a.contains(b)) {
+                // A's zero coverage region contains B, so B doesn't remove any extra
+                // coverage from their intersection.
+                return ClipGeometry::kAOnly;
+            } else if (b.contains(a)) {
+                // Mirror of the above case, intersection = B instead
+                return ClipGeometry::kBOnly;
+            } else {
+                // Intersection of the two differences cannot be simplified. Note that for
+                // this op combination it is not possible to produce kEmpty.
+                return ClipGeometry::kBoth;
+            }
+        }
+    }
+}
+
+// a.contains(b) where a's local space is defined by 'aToDevice', and b's possibly separate local
+// space is defined by 'bToDevice'. 'a' and 'b' geometry are provided in their local spaces.
+// Automatically takes into account if the anti-aliasing policies differ. When the policies match,
+// we assume that coverage AA or GPU's non-AA rasterization will apply to A and B equivalently, so
+// we can compare the original shapes. When the modes are mixed, we outset B in device space first.
+static bool shape_contains_rect(
+        const GrShape& a, const SkMatrix& aToDevice, const SkMatrix& deviceToA,
+        const SkRect& b, const SkMatrix& bToDevice, bool mixedAAMode) {
+    if (!a.convex()) {
+        return false;
+    }
+
+    if (!mixedAAMode && aToDevice == bToDevice) {
+        // A and B are in the same coordinate space, so don't bother mapping
+        return a.conservativeContains(b);
+    }
+
+    // Test each corner for contains; since a is convex, if all 4 corners of b's bounds are
+    // contained, then the entirety of b is within a.
+    GrQuad deviceQuad = GrQuad::MakeFromRect(b, bToDevice);
+    if (any(deviceQuad.w4f() < SkPathPriv::kW0PlaneDistance)) {
+        // Something in B actually projects behind the W = 0 plane and would be clipped to infinity,
+        // so it's extremely unlikely that A can contain B.
+        return false;
+    }
+    if (mixedAAMode) {
+        // Outset it so its edges are 1/2px out, giving us a buffer to avoid cases where a non-AA
+        // clip or draw would snap outside an aa element.
+        GrQuadUtils::Outset({0.5f, 0.5f, 0.5f, 0.5f}, &deviceQuad);
+    }
+
+    for (int i = 0; i < 4; ++i) {
+        SkPoint cornerInA = deviceQuad.point(i);
+        deviceToA.mapPoints(&cornerInA, 1);
+        if (!a.conservativeContains(cornerInA)) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+static SkIRect subtract(const SkIRect& a, const SkIRect& b, bool exact) {
+    SkIRect diff;
+    if (SkRectPriv::Subtract(a, b, &diff) || !exact) {
+        // Either A-B is exactly the rectangle stored in diff, or we don't need an exact answer
+        // and can settle for the subrect of A excluded from B (which is also 'diff')
+        return diff;
+    } else {
+        // For our purposes, we want the original A when A-B cannot be exactly represented
+        return a;
+    }
+}
+
+static GrClipEdgeType get_clip_edge_type(SkClipOp op, GrAA aa) {
+    if (op == SkClipOp::kIntersect) {
+        return aa == GrAA::kYes ? GrClipEdgeType::kFillAA : GrClipEdgeType::kFillBW;
+    } else {
+        return aa == GrAA::kYes ? GrClipEdgeType::kInverseFillAA : GrClipEdgeType::kInverseFillBW;
+    }
+}
+
+static uint32_t kInvalidGenID  = 0;
+static uint32_t kEmptyGenID    = 1;
+static uint32_t kWideOpenGenID = 2;
+
+static uint32_t next_gen_id() {
+    // 0-2 are reserved for invalid, empty & wide-open
+    static const uint32_t kFirstUnreservedGenID = 3;
+    static std::atomic<uint32_t> nextID{kFirstUnreservedGenID};
+
+    uint32_t id;
+    do {
+        id = nextID++;
+    } while (id < kFirstUnreservedGenID);
+    return id;
+}
+
+// Functions for rendering / applying clip shapes in various ways
+// The general strategy is:
+//  - Represent the clip element as an analytic FP that tests sk_FragCoord vs. its device shape
+//  - Render the clip element to the stencil, if stencil is allowed and supports the AA, and the
+//    size of the element indicates stenciling will be worth it, vs. making a mask.
+//  - Try to put the individual element into a clip atlas, which is then sampled during the draw
+//  - Render the element into a SW mask and upload it. If possible, the SW rasterization happens
+//    in parallel.
+static constexpr GrSurfaceOrigin kMaskOrigin = kTopLeft_GrSurfaceOrigin;
+
+static GrFPResult analytic_clip_fp(const GrClipStack::Element& e,
+                                   const GrShaderCaps& caps,
+                                   std::unique_ptr<GrFragmentProcessor> fp) {
+    // All analytic clip shape FPs need to be in device space
+    GrClipEdgeType edgeType = get_clip_edge_type(e.fOp, e.fAA);
+    if (e.fLocalToDevice.isIdentity()) {
+        if (e.fShape.isRect()) {
+            return GrFPSuccess(GrAARectEffect::Make(std::move(fp), edgeType, e.fShape.rect()));
+        } else if (e.fShape.isRRect()) {
+            return GrRRectEffect::Make(std::move(fp), edgeType, e.fShape.rrect(), caps);
+        }
+    }
+
+    // A convex hull can be transformed into device space (this will handle rect shapes with a
+    // non-identity transform).
+    if (e.fShape.segmentMask() == SkPath::kLine_SegmentMask && e.fShape.convex()) {
+        SkPath devicePath;
+        e.fShape.asPath(&devicePath);
+        devicePath.transform(e.fLocalToDevice);
+        return GrConvexPolyEffect::Make(std::move(fp), edgeType, devicePath);
+    }
+
+    return GrFPFailure(std::move(fp));
+}
+
+// TODO: Currently this only works with CCPR because CCPR owns and manages the clip atlas. The
+// high-level concept should be generalized to support any path renderer going into a shared atlas.
+static std::unique_ptr<GrFragmentProcessor> clip_atlas_fp(GrCoverageCountingPathRenderer* ccpr,
+                                                          uint32_t opsTaskID,
+                                                          const SkIRect& bounds,
+                                                          const GrClipStack::Element& e,
+                                                          SkPath* devicePath,
+                                                          const GrCaps& caps,
+                                                          std::unique_ptr<GrFragmentProcessor> fp) {
+    // TODO: Currently the atlas manages device-space paths, so we have to transform by the ctm.
+    // In the future, the atlas manager should see the local path and the ctm so that it can
+    // cache across integer-only translations (internally, it already does this, just not exposed).
+    if (devicePath->isEmpty()) {
+        e.fShape.asPath(devicePath);
+        devicePath->transform(e.fLocalToDevice);
+        SkASSERT(!devicePath->isEmpty());
+    }
+
+    SkASSERT(!devicePath->isInverseFillType());
+    if (e.fOp == SkClipOp::kIntersect) {
+        return ccpr->makeClipProcessor(std::move(fp), opsTaskID, *devicePath, bounds, caps);
+    } else {
+        // Use kDstOut to convert the non-inverted mask alpha into (1-alpha), so the atlas only
+        // ever renders non-inverse filled paths.
+        //  - When the input FP is null, this turns into "(1-sample(ccpr, 1).a) * input"
+        //  - When not null, it works out to
+        //       (1-sample(ccpr, input.rgb1).a) * sample(fp, input.rgb1) * input.a
+        //  - Since clips only care about the alpha channel, these are both equivalent to the
+        //    desired product of (1-ccpr) * fp * input.a.
+        return GrBlendFragmentProcessor::Make(
+                ccpr->makeClipProcessor(nullptr, opsTaskID, *devicePath, bounds, caps), // src
+                std::move(fp),                                                          // dst
+                SkBlendMode::kDstOut);
+    }
+}
+
+static void draw_to_sw_mask(GrSWMaskHelper* helper, const GrClipStack::Element& e, bool clearMask) {
+    // If the first element to draw is an intersect, we clear to 0 and will draw it directly with
+    // coverage 1 (subsequent intersect elements will be inverse-filled and draw 0 outside).
+    // If the first element to draw is a difference, we clear to 1, and in all cases we draw the
+    // difference element directly with coverage 0.
+    if (clearMask) {
+        helper->clear(e.fOp == SkClipOp::kIntersect ? 0x00 : 0xFF);
+    }
+
+    uint8_t alpha;
+    bool invert;
+    if (e.fOp == SkClipOp::kIntersect) {
+        // Intersect modifies pixels outside of its geometry. If this isn't the first op, we
+        // draw the inverse-filled shape with 0 coverage to erase everything outside the element
+        // But if we are the first element, we can draw directly with coverage 1 since we
+        // cleared to 0.
+        if (clearMask) {
+            alpha = 0xFF;
+            invert = false;
+        } else {
+            alpha = 0x00;
+            invert = true;
+        }
+    } else {
+        // For difference ops, can always just subtract the shape directly by drawing 0 coverage
+        SkASSERT(e.fOp == SkClipOp::kDifference);
+        alpha = 0x00;
+        invert = false;
+    }
+
+    // Draw the shape; based on how we've initialized the buffer and chosen alpha+invert,
+    // every element is drawn with the kReplace_Op
+    if (invert) {
+        // Must invert the path
+        SkASSERT(!e.fShape.inverted());
+        // TODO: this is an extra copy effectively, just so we can toggle inversion; would be
+        // better perhaps to just call a drawPath() since we know it'll use path rendering w/
+        // the inverse fill type.
+        GrShape inverted(e.fShape);
+        inverted.setInverted(true);
+        helper->drawShape(inverted, e.fLocalToDevice, SkRegion::kReplace_Op, e.fAA, alpha);
+    } else {
+        helper->drawShape(e.fShape, e.fLocalToDevice, SkRegion::kReplace_Op, e.fAA, alpha);
+    }
+}
+
+static GrSurfaceProxyView render_sw_mask(GrRecordingContext* context, const SkIRect& bounds,
+                                         const GrClipStack::Element** elements, int count) {
+    SkASSERT(count > 0);
+
+    SkTaskGroup* taskGroup = nullptr;
+    if (auto direct = context->asDirectContext()) {
+        taskGroup = direct->priv().getTaskGroup();
+    }
+
+    if (taskGroup) {
+        const GrCaps* caps = context->priv().caps();
+        GrProxyProvider* proxyProvider = context->priv().proxyProvider();
+
+        // Create our texture proxy
+        GrBackendFormat format = caps->getDefaultBackendFormat(GrColorType::kAlpha_8,
+                                                               GrRenderable::kNo);
+
+        GrSwizzle swizzle = context->priv().caps()->getReadSwizzle(format, GrColorType::kAlpha_8);
+        auto proxy = proxyProvider->createProxy(format, bounds.size(), GrRenderable::kNo, 1,
+                                                GrMipMapped::kNo, SkBackingFit::kApprox,
+                                                SkBudgeted::kYes, GrProtected::kNo);
+
+        // Since this will be rendered on another thread, make a copy of the elements in case
+        // the clip stack is modified on the main thread
+        using Uploader = GrTDeferredProxyUploader<SkTArray<GrClipStack::Element>>;
+        std::unique_ptr<Uploader> uploader = std::make_unique<Uploader>(count);
+        for (int i = 0; i < count; ++i) {
+            uploader->data().push_back(*(elements[i]));
+        }
+
+        Uploader* uploaderRaw = uploader.get();
+        auto drawAndUploadMask = [uploaderRaw, bounds] {
+            TRACE_EVENT0("skia.gpu", "Threaded SW Clip Mask Render");
+            GrSWMaskHelper helper(uploaderRaw->getPixels());
+            if (helper.init(bounds)) {
+                for (int i = 0; i < uploaderRaw->data().count(); ++i) {
+                    draw_to_sw_mask(&helper, uploaderRaw->data()[i], i == 0);
+                }
+            } else {
+                SkDEBUGFAIL("Unable to allocate SW clip mask.");
+            }
+            uploaderRaw->signalAndFreeData();
+        };
+
+        taskGroup->add(std::move(drawAndUploadMask));
+        proxy->texPriv().setDeferredUploader(std::move(uploader));
+
+        return {std::move(proxy), kMaskOrigin, swizzle};
+    } else {
+        GrSWMaskHelper helper;
+        if (!helper.init(bounds)) {
+            return {};
+        }
+
+        for (int i = 0; i < count; ++i) {
+            draw_to_sw_mask(&helper,*(elements[i]), i == 0);
+        }
+
+        return helper.toTextureView(context, SkBackingFit::kApprox);
+    }
+}
+
+static void render_stencil_mask(GrRecordingContext* context, GrRenderTargetContext* rtc,
+                                uint32_t genID, const SkIRect& bounds,
+                                const GrClipStack::Element** elements, int count,
+                                GrAppliedClip* out) {
+    GrStencilMaskHelper helper(context, rtc);
+    if (helper.init(bounds, genID, out->windowRectsState().windows(), 0)) {
+        // This follows the same logic as in draw_sw_mask
+        bool startInside = elements[0]->fOp == SkClipOp::kDifference;
+        helper.clear(startInside);
+        for (int i = 0; i < count; ++i) {
+            const GrClipStack::Element& e = *(elements[i]);
+            SkRegion::Op op;
+            if (e.fOp == SkClipOp::kIntersect) {
+                op = (i == 0) ? SkRegion::kReplace_Op : SkRegion::kIntersect_Op;
+            } else {
+                op = SkRegion::kDifference_Op;
+            }
+            helper.drawShape(e.fShape, e.fLocalToDevice, op, e.fAA);
+        }
+        helper.finish();
+    }
+    out->hardClip().addStencilClip(genID);
+}
+
+} // anonymous namespace
+
+class GrClipStack::Draw {
+public:
+    Draw(const SkRect& drawBounds, GrAA aa)
+            : fBounds(GrClip::GetPixelIBounds(drawBounds, aa, BoundsType::kExterior))
+            , fAA(aa) {
+        // Be slightly more forgiving on whether or not a draw is inside a clip element.
+        fOriginalBounds = drawBounds.makeInset(GrClip::kBoundsTolerance, GrClip::kBoundsTolerance);
+        if (fOriginalBounds.isEmpty()) {
+            fOriginalBounds = drawBounds;
+        }
+    }
+
+    // Common clip type interface
+    SkClipOp op() const { return SkClipOp::kIntersect; }
+    const SkIRect& outerBounds() const { return fBounds; }
+
+    // Draw does not have inner bounds so cannot contain anything.
+    bool contains(const RawElement& e) const { return false; }
+    bool contains(const SaveRecord& s) const { return false; }
+
+    bool applyDeviceBounds(const SkIRect& deviceBounds) {
+        return fBounds.intersect(deviceBounds);
+    }
+
+    const SkRect& bounds() const { return fOriginalBounds; }
+    GrAA aa() const { return fAA; }
+
+private:
+    SkRect  fOriginalBounds;
+    SkIRect fBounds;
+    GrAA    fAA;
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// GrClipStack::Element
+
+GrClipStack::RawElement::RawElement(const SkMatrix& localToDevice, const GrShape& shape,
+                                    GrAA aa, SkClipOp op)
+        : Element{shape, localToDevice, op, aa}
+        , fInnerBounds(SkIRect::MakeEmpty())
+        , fOuterBounds(SkIRect::MakeEmpty())
+        , fInvalidatedByIndex(-1) {
+    if (!localToDevice.invert(&fDeviceToLocal)) {
+        // If the transform can't be inverted, it means that two dimensions are collapsed to 0 or
+        // 1 dimension, making the device-space geometry effectively empty.
+        fShape.reset();
+    }
+}
+
+void GrClipStack::RawElement::markInvalid(const SaveRecord& current) {
+    SkASSERT(!this->isInvalid());
+    fInvalidatedByIndex = current.firstActiveElementIndex();
+}
+
+void GrClipStack::RawElement::restoreValid(const SaveRecord& current) {
+    if (current.firstActiveElementIndex() < fInvalidatedByIndex) {
+        fInvalidatedByIndex = -1;
+    }
+}
+
+bool GrClipStack::RawElement::contains(const Draw& d) const {
+    if (fInnerBounds.contains(d.outerBounds())) {
+        return true;
+    } else {
+        // If the draw is non-AA, use the already computed outer bounds so we don't need to use
+        // device-space outsetting inside shape_contains_rect.
+        SkRect queryBounds = d.aa() == GrAA::kYes ? d.bounds() : SkRect::Make(d.outerBounds());
+        return shape_contains_rect(fShape, fLocalToDevice, fDeviceToLocal,
+                                   queryBounds, SkMatrix::I(), /* mixed-aa */ false);
+    }
+}
+
+bool GrClipStack::RawElement::contains(const SaveRecord& s) const {
+    if (fInnerBounds.contains(s.outerBounds())) {
+        return true;
+    } else {
+        // This is very similar to contains(Draw) but we just have outerBounds to work with.
+        SkRect queryBounds = SkRect::Make(s.outerBounds());
+        return shape_contains_rect(fShape, fLocalToDevice, fDeviceToLocal,
+                                   queryBounds, SkMatrix::I(), /* mixed-aa */ false);
+    }
+}
+
+bool GrClipStack::RawElement::contains(const RawElement& e) const {
+    // This is similar to how RawElement checks containment for a Draw, except that both the tester
+    // and testee have a transform that needs to be considered.
+    if (fInnerBounds.contains(e.fOuterBounds)) {
+        return true;
+    }
+
+    bool mixedAA = fAA != e.fAA;
+    if (!mixedAA && fLocalToDevice == e.fLocalToDevice) {
+        // Test the shapes directly against each other, with a special check for a rrect+rrect
+        // containment (a intersect b == a implies b contains a) and paths (same gen ID, or same
+        // path for small paths means they contain each other).
+        static constexpr int kMaxPathComparePoints = 16;
+        if (fShape.isRRect() && e.fShape.isRRect()) {
+            return SkRRectPriv::ConservativeIntersect(fShape.rrect(), e.fShape.rrect())
+                    == e.fShape.rrect();
+        } else if (fShape.isPath() && e.fShape.isPath()) {
+            return fShape.path().getGenerationID() == e.fShape.path().getGenerationID() ||
+                   (fShape.path().getPoints(nullptr, 0) <= kMaxPathComparePoints &&
+                    fShape.path() == e.fShape.path());
+        } // else fall through to shape_contains_rect
+    }
+
+    return shape_contains_rect(fShape, fLocalToDevice, fDeviceToLocal,
+                               e.fShape.bounds(), e.fLocalToDevice, mixedAA);
+
+}
+
+void GrClipStack::RawElement::simplify(const SkIRect& deviceBounds, bool forceAA) {
+    // Make sure the shape is not inverted. An inverted shape is equivalent to a non-inverted shape
+    // with the clip op toggled.
+    if (fShape.inverted()) {
+        fOp = fOp == SkClipOp::kIntersect ? SkClipOp::kDifference : SkClipOp::kIntersect;
+        fShape.setInverted(false);
+    }
+
+    // Then simplify the base shape, if it becomes empty, no need to update the bounds
+    fShape.simplify();
+    SkASSERT(!fShape.inverted());
+    if (fShape.isEmpty()) {
+        return;
+    }
+
+    // Lines and points should have been turned into empty since we assume everything is filled
+    SkASSERT(!fShape.isPoint() && !fShape.isLine());
+    // Validity check, we have no public API to create an arc at the moment
+    SkASSERT(!fShape.isArc());
+
+    SkRect outer = fLocalToDevice.mapRect(fShape.bounds());
+    if (!outer.intersect(SkRect::Make(deviceBounds))) {
+        // A non-empty shape is offscreen, so treat it as empty
+        fShape.reset();
+        return;
+    }
+
+    if (forceAA) {
+        fAA = GrAA::kYes;
+    }
+
+    // Except for non-AA axis-aligned rects, the outer bounds is the rounded-out device-space
+    // mapped bounds of the shape.
+    fOuterBounds = GrClip::GetPixelIBounds(outer, fAA, BoundsType::kExterior);
+
+    if (fLocalToDevice.isScaleTranslate()) {
+        if (fShape.isRect()) {
+            // The actual geometry can be updated to the device-intersected bounds and we can
+            // know the inner bounds
+            fShape.rect() = outer;
+            fLocalToDevice.setIdentity();
+            fDeviceToLocal.setIdentity();
+
+            if (fAA == GrAA::kNo && outer.width() >= 1.f && outer.height() >= 1.f) {
+                // NOTE: Legacy behavior to avoid performance regressions. For non-aa axis-aligned
+                // clip rects we always just round so that they can be scissor-only (avoiding the
+                // uncertainty in how a GPU might actually round an edge on fractional coords).
+                fOuterBounds = outer.round();
+                fInnerBounds = fOuterBounds;
+            } else {
+                fInnerBounds = GrClip::GetPixelIBounds(outer, fAA, BoundsType::kInterior);
+                SkASSERT(fOuterBounds.contains(fInnerBounds) || fInnerBounds.isEmpty());
+            }
+        } else if (fShape.isRRect()) {
+            // Can't transform in place
+            SkRRect src = fShape.rrect();
+            SkAssertResult(src.transform(fLocalToDevice, &fShape.rrect()));
+            fLocalToDevice.setIdentity();
+            fDeviceToLocal.setIdentity();
+
+            SkRect inner = SkRRectPriv::InnerBounds(fShape.rrect());
+            fInnerBounds = GrClip::GetPixelIBounds(inner, fAA, BoundsType::kInterior);
+            if (!fInnerBounds.intersect(deviceBounds)) {
+                fInnerBounds = SkIRect::MakeEmpty();
+            }
+        }
+    }
+
+    if (fOuterBounds.isEmpty()) {
+        // This can happen if we have non-AA shapes smaller than a pixel that do not cover a pixel
+        // center. We could round out, but rasterization would still result in an empty clip.
+        fShape.reset();
+    }
+
+    // Post-conditions on inner and outer bounds
+    SkASSERT(fShape.isEmpty() || (!fOuterBounds.isEmpty() && deviceBounds.contains(fOuterBounds)));
+    SkASSERT(fShape.isEmpty() || fInnerBounds.isEmpty() || fOuterBounds.contains(fInnerBounds));
+}
+
+bool GrClipStack::RawElement::combine(const RawElement& other, const SaveRecord& current) {
+    // To reduce the number of possibilities, only consider intersect+intersect. Difference and
+    // mixed op cases could be analyzed to simplify one of the shapes, but that is a rare
+    // occurrence and the math is much more complicated.
+    if (other.fOp != SkClipOp::kIntersect || fOp != SkClipOp::kIntersect) {
+        return false;
+    }
+
+    // At the moment, only rect+rect or rrect+rrect are supported (although rect+rrect is
+    // treated as a degenerate case of rrect+rrect).
+    bool shapeUpdated = false;
+    if (fShape.isRect() && other.fShape.isRect()) {
+        bool aaMatch = fAA == other.fAA;
+        if (fLocalToDevice.isIdentity() && other.fLocalToDevice.isIdentity() && !aaMatch) {
+            if (GrClip::IsPixelAligned(fShape.rect())) {
+                // Our AA type doesn't really matter, take other's since its edges may not be
+                // pixel aligned, so after intersection clip behavior should respect its aa type.
+                fAA = other.fAA;
+            } else if (!GrClip::IsPixelAligned(other.fShape.rect())) {
+                // Neither shape is pixel aligned and AA types don't match so can't combine
+                return false;
+            }
+            // Either we've updated this->fAA to actually match, or other->fAA doesn't matter so
+            // this can be set to true. We just can't modify other to set it's aa to this->fAA.
+            // But since 'this' becomes the combo of the two, other will be deleted so that's fine.
+            aaMatch = true;
+        }
+
+        if (aaMatch && fLocalToDevice == other.fLocalToDevice) {
+            if (!fShape.rect().intersect(other.fShape.rect())) {
+                // By floating point, it turns out the combination should be empty
+                this->fShape.reset();
+                this->markInvalid(current);
+                return true;
+            }
+            shapeUpdated = true;
+        }
+    } else if ((fShape.isRect() || fShape.isRRect()) &&
+               (other.fShape.isRect() || other.fShape.isRRect())) {
+        // No such pixel-aligned disregard for AA for round rects
+        if (fAA == other.fAA && fLocalToDevice == other.fLocalToDevice) {
+            // Treat rrect+rect intersections as rrect+rrect
+            SkRRect a = fShape.isRect() ? SkRRect::MakeRect(fShape.rect()) : fShape.rrect();
+            SkRRect b = other.fShape.isRect() ? SkRRect::MakeRect(other.fShape.rect())
+                                              : other.fShape.rrect();
+
+            SkRRect joined = SkRRectPriv::ConservativeIntersect(a, b);
+            if (!joined.isEmpty()) {
+                // Can reduce to a single element
+                if (joined.isRect()) {
+                    // And with a simplified type
+                    fShape.setRect(joined.rect());
+                } else {
+                    fShape.setRRect(joined);
+                }
+                shapeUpdated = true;
+            } else if (!a.getBounds().intersects(b.getBounds())) {
+                // Like the rect+rect combination, the intersection is actually empty
+                fShape.reset();
+                this->markInvalid(current);
+                return true;
+            }
+        }
+    }
+
+    if (shapeUpdated) {
+        // This logic works under the assumption that both combined elements were intersect, so we
+        // don't do the full bounds computations like in simplify().
+        SkASSERT(fOp == SkClipOp::kIntersect && other.fOp == SkClipOp::kIntersect);
+        SkAssertResult(fOuterBounds.intersect(other.fOuterBounds));
+        if (!fInnerBounds.intersect(other.fInnerBounds)) {
+            fInnerBounds = SkIRect::MakeEmpty();
+        }
+        return true;
+    } else {
+        return false;
+    }
+}
+
+void GrClipStack::RawElement::updateForElement(RawElement* added, const SaveRecord& current) {
+    if (this->isInvalid()) {
+        // Already doesn't do anything, so skip this element
+        return;
+    }
+
+    // 'A' refers to this element, 'B' refers to 'added'.
+    switch (get_clip_geometry(*this, *added)) {
+        case ClipGeometry::kEmpty:
+            // Mark both elements as invalid to signal that the clip is fully empty
+            this->markInvalid(current);
+            added->markInvalid(current);
+            break;
+
+        case ClipGeometry::kAOnly:
+            // This element already clips more than 'added', so mark 'added' is invalid to skip it
+            added->markInvalid(current);
+            break;
+
+        case ClipGeometry::kBOnly:
+            // 'added' clips more than this element, so mark this as invalid
+            this->markInvalid(current);
+            break;
+
+        case ClipGeometry::kBoth:
+            // Else the bounds checks think we need to keep both, but depending on the combination
+            // of the ops and shape kinds, we may be able to do better.
+            if (added->combine(*this, current)) {
+                // 'added' now fully represents the combination of the two elements
+                this->markInvalid(current);
+            }
+            break;
+    }
+}
+
+GrClipStack::ClipState GrClipStack::RawElement::clipType() const {
+    // Map from the internal shape kind to the clip state enum
+    switch (fShape.type()) {
+        case GrShape::Type::kEmpty:
+            return ClipState::kEmpty;
+
+        case GrShape::Type::kRect:
+            return fOp == SkClipOp::kIntersect && fLocalToDevice.isIdentity()
+                    ? ClipState::kDeviceRect : ClipState::kComplex;
+
+        case GrShape::Type::kRRect:
+            return fOp == SkClipOp::kIntersect && fLocalToDevice.isIdentity()
+                    ? ClipState::kDeviceRRect : ClipState::kComplex;
+
+        case GrShape::Type::kArc:
+        case GrShape::Type::kLine:
+        case GrShape::Type::kPoint:
+            // These types should never become RawElements
+            SkASSERT(false);
+            [[fallthrough]];
+
+        case GrShape::Type::kPath:
+            return ClipState::kComplex;
+    }
+    SkUNREACHABLE;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// GrClipStack::Mask
+
+GrClipStack::Mask::Mask(const SaveRecord& current, const SkIRect& drawBounds)
+        : fBounds(drawBounds)
+        , fGenID(current.genID()) {
+    static const GrUniqueKey::Domain kDomain = GrUniqueKey::GenerateDomain();
+
+    // The gen ID should not be invalid, empty, or wide open, since those do not require masks
+    SkASSERT(fGenID != kInvalidGenID && fGenID != kEmptyGenID && fGenID != kWideOpenGenID);
+
+    GrUniqueKey::Builder builder(&fKey, kDomain, 3, "clip_mask");
+    builder[0] = fGenID;
+    // SkToS16 because image filters outset layers to a size indicated by the filter, which can
+    // sometimes result in negative coordinates from device space.
+    builder[1] = SkToS16(drawBounds.fLeft) | (SkToS16(drawBounds.fRight) << 16);
+    builder[2] = SkToS16(drawBounds.fTop) | (SkToS16(drawBounds.fBottom) << 16);
+    SkASSERT(fKey.isValid());
+
+    SkDEBUGCODE(fOwner = &current;)
+}
+
+bool GrClipStack::Mask::appliesToDraw(const SaveRecord& current, const SkIRect& drawBounds) const {
+    // For the same save record, a larger mask will have the same or more elements
+    // baked into it, so it can be reused to clip the smaller draw.
+    SkASSERT(fGenID != current.genID() || &current == fOwner);
+    return fGenID == current.genID() && fBounds.contains(drawBounds);
+}
+
+void GrClipStack::Mask::invalidate(GrProxyProvider* proxyProvider) {
+    SkASSERT(proxyProvider);
+    SkASSERT(fKey.isValid()); // Should only be invalidated once
+    proxyProvider->processInvalidUniqueKey(
+            fKey, nullptr, GrProxyProvider::InvalidateGPUResource::kYes);
+    fKey.reset();
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// GrClipStack::SaveRecord
+
+GrClipStack::SaveRecord::SaveRecord(const SkIRect& deviceBounds)
+        : fInnerBounds(deviceBounds)
+        , fOuterBounds(deviceBounds)
+        , fShader(nullptr)
+        , fStartingMaskIndex(0)
+        , fStartingElementIndex(0)
+        , fOldestValidIndex(0)
+        , fDeferredSaveCount(0)
+        , fStackOp(SkClipOp::kIntersect)
+        , fState(ClipState::kWideOpen)
+        , fGenID(kInvalidGenID) {}
+
+GrClipStack::SaveRecord::SaveRecord(const SaveRecord& prior,
+                                    int startingMaskIndex,
+                                    int startingElementIndex)
+        : fInnerBounds(prior.fInnerBounds)
+        , fOuterBounds(prior.fOuterBounds)
+        , fShader(prior.fShader)
+        , fStartingMaskIndex(startingMaskIndex)
+        , fStartingElementIndex(startingElementIndex)
+        , fOldestValidIndex(prior.fOldestValidIndex)
+        , fDeferredSaveCount(0)
+        , fStackOp(prior.fStackOp)
+        , fState(prior.fState)
+        , fGenID(kInvalidGenID) {
+    // If the prior record never needed a mask, this one will insert into the same index
+    // (that's okay since we'll remove it when this record is popped off the stack).
+    SkASSERT(startingMaskIndex >= prior.fStartingMaskIndex);
+    // The same goes for elements (the prior could have been wide open).
+    SkASSERT(startingElementIndex >= prior.fStartingElementIndex);
+}
+
+uint32_t GrClipStack::SaveRecord::genID() const {
+    if (fState == ClipState::kEmpty) {
+        return kEmptyGenID;
+    } else if (fState == ClipState::kWideOpen) {
+        return kWideOpenGenID;
+    } else {
+        // The gen ID shouldn't be empty or wide open, since they are reserved for the above
+        // if-cases. It may be kInvalid if the record hasn't had any elements added to it yet.
+        SkASSERT(fGenID != kEmptyGenID && fGenID != kWideOpenGenID);
+        return fGenID;
+    }
+}
+
+GrClipStack::ClipState GrClipStack::SaveRecord::state() const {
+    if (fShader && fState != ClipState::kEmpty) {
+        return ClipState::kComplex;
+    } else {
+        return fState;
+    }
+}
+
+bool GrClipStack::SaveRecord::contains(const GrClipStack::Draw& draw) const {
+    return fInnerBounds.contains(draw.outerBounds());
+}
+
+bool GrClipStack::SaveRecord::contains(const GrClipStack::RawElement& element) const {
+    return fInnerBounds.contains(element.outerBounds());
+}
+
+void GrClipStack::SaveRecord::removeElements(RawElement::Stack* elements) {
+    while (elements->count() > fStartingElementIndex) {
+        elements->pop_back();
+    }
+}
+
+void GrClipStack::SaveRecord::restoreElements(RawElement::Stack* elements) {
+    // Presumably this SaveRecord is the new top of the stack, and so it owns the elements
+    // from its starting index to restoreCount - 1. Elements from the old save record have
+    // been destroyed already, so their indices would have been >= restoreCount, and any
+    // still-present element can be un-invalidated based on that.
+    int i = elements->count() - 1;
+    for (RawElement& e : elements->ritems()) {
+        if (i < fOldestValidIndex) {
+            break;
+        }
+        e.restoreValid(*this);
+        --i;
+    }
+}
+
+void GrClipStack::SaveRecord::invalidateMasks(GrProxyProvider* proxyProvider,
+                                              Mask::Stack* masks) {
+    // Must explicitly invalidate the key before removing the mask object from the stack
+    while (masks->count() > fStartingMaskIndex) {
+        SkASSERT(masks->back().owner() == this && proxyProvider);
+        masks->back().invalidate(proxyProvider);
+        masks->pop_back();
+    }
+    SkASSERT(masks->empty() || masks->back().genID() != fGenID);
+}
+
+void GrClipStack::SaveRecord::reset(const SkIRect& bounds) {
+    SkASSERT(this->canBeUpdated());
+    fOldestValidIndex = fStartingElementIndex;
+    fOuterBounds = bounds;
+    fInnerBounds = bounds;
+    fStackOp = SkClipOp::kIntersect;
+    fState = ClipState::kWideOpen;
+    fShader = nullptr;
+}
+
+void GrClipStack::SaveRecord::addShader(sk_sp<SkShader> shader) {
+    SkASSERT(shader);
+    SkASSERT(this->canBeUpdated());
+    if (!fShader) {
+        fShader = std::move(shader);
+    } else {
+        // The total coverage is computed by multiplying the coverage from each element (shape or
+        // shader), but since multiplication is associative, we can use kSrcIn blending to make
+        // a new shader that represents 'shader' * 'fShader'
+        fShader = SkShaders::Blend(SkBlendMode::kSrcIn, std::move(shader), fShader);
+    }
+}
+
+bool GrClipStack::SaveRecord::addElement(RawElement&& toAdd, RawElement::Stack* elements) {
+    // Validity check the element's state first; if the shape class isn't empty, the outer bounds
+    // shouldn't be empty; if the inner bounds are not empty, they must be contained in outer.
+    SkASSERT((toAdd.shape().isEmpty() || !toAdd.outerBounds().isEmpty()) &&
+             (toAdd.innerBounds().isEmpty() || toAdd.outerBounds().contains(toAdd.innerBounds())));
+    // And we shouldn't be adding an element if we have a deferred save
+    SkASSERT(this->canBeUpdated());
+
+    if (fState == ClipState::kEmpty) {
+        // The clip is already empty, and we only shrink, so there's no need to record this element.
+        return false;
+    } else if (toAdd.shape().isEmpty()) {
+        // An empty difference op should have been detected earlier, since it's a no-op
+        SkASSERT(toAdd.op() == SkClipOp::kIntersect);
+        fState = ClipState::kEmpty;
+        return true;
+    }
+
+    // In this invocation, 'A' refers to the existing stack's bounds and 'B' refers to the new
+    // element.
+    switch (get_clip_geometry(*this, toAdd)) {
+        case ClipGeometry::kEmpty:
+            // The combination results in an empty clip
+            fState = ClipState::kEmpty;
+            return true;
+
+        case ClipGeometry::kAOnly:
+            // The combination would not be any different than the existing clip
+            return false;
+
+        case ClipGeometry::kBOnly:
+            // The combination would invalidate the entire existing stack and can be replaced with
+            // just the new element.
+            this->replaceWithElement(std::move(toAdd), elements);
+            return true;
+
+        case ClipGeometry::kBoth:
+            // The new element combines in a complex manner, so update the stack's bounds based on
+            // the combination of its and the new element's ops (handled below)
+            break;
+    }
+
+    if (fState == ClipState::kWideOpen) {
+        // When the stack was wide open and the clip effect was kBoth, the "complex" manner is
+        // simply to keep the element and update the stack bounds to be the element's intersected
+        // with the device.
+        this->replaceWithElement(std::move(toAdd), elements);
+        return true;
+    }
+
+    // Some form of actual clip element(s) to combine with.
+    if (fStackOp == SkClipOp::kIntersect) {
+        if (toAdd.op() == SkClipOp::kIntersect) {
+            // Intersect (stack) + Intersect (toAdd)
+            //  - Bounds updates is simply the paired intersections of outer and inner.
+            SkAssertResult(fOuterBounds.intersect(toAdd.outerBounds()));
+            if (!fInnerBounds.intersect(toAdd.innerBounds())) {
+                // NOTE: this does the right thing if either rect is empty, since we set the
+                // inner bounds to empty here
+                fInnerBounds = SkIRect::MakeEmpty();
+            }
+        } else {
+            // Intersect (stack) + Difference (toAdd)
+            //  - Shrink the stack's outer bounds if the difference op's inner bounds completely
+            //    cuts off an edge.
+            //  - Shrink the stack's inner bounds to completely exclude the op's outer bounds.
+            fOuterBounds = subtract(fOuterBounds, toAdd.innerBounds(), /* exact */ true);
+            fInnerBounds = subtract(fInnerBounds, toAdd.outerBounds(), /* exact */ false);
+        }
+    } else {
+        if (toAdd.op() == SkClipOp::kIntersect) {
+            // Difference (stack) + Intersect (toAdd)
+            //  - Bounds updates are just the mirror of Intersect(stack) + Difference(toAdd)
+            SkIRect oldOuter = fOuterBounds;
+            fOuterBounds = subtract(toAdd.outerBounds(), fInnerBounds, /* exact */ true);
+            fInnerBounds = subtract(toAdd.innerBounds(), oldOuter,     /* exact */ false);
+        } else {
+            // Difference (stack) + Difference (toAdd)
+            //  - The updated outer bounds is the union of outer bounds and the inner becomes the
+            //    largest of the two possible inner bounds
+            fOuterBounds.join(toAdd.outerBounds());
+            if (toAdd.innerBounds().width() * toAdd.innerBounds().height() >
+                fInnerBounds.width() * fInnerBounds.height()) {
+                fInnerBounds = toAdd.innerBounds();
+            }
+        }
+    }
+
+    // If we get here, we're keeping the new element and the stack's bounds have been updated.
+    // We ought to have caught the cases where the stack bounds resemble an empty or wide open
+    // clip, so assert that's the case.
+    SkASSERT(!fOuterBounds.isEmpty() &&
+             (fInnerBounds.isEmpty() || fOuterBounds.contains(fInnerBounds)));
+
+    return this->appendElement(std::move(toAdd), elements);
+}
+
+bool GrClipStack::SaveRecord::appendElement(RawElement&& toAdd, RawElement::Stack* elements) {
+    // Update past elements to account for the new element
+    int i = elements->count() - 1;
+
+    // After the loop, elements between [max(youngestValid, startingIndex)+1, count-1] can be
+    // removed from the stack (these are the active elements that have been invalidated by the
+    // newest element; since it's the active part of the stack, no restore() can bring them back).
+    int youngestValid = fStartingElementIndex - 1;
+    // After the loop, elements between [0, oldestValid-1] are all invalid. The value of oldestValid
+    // becomes the save record's new fLastValidIndex value.
+    int oldestValid = elements->count();
+    // After the loop, this is the earliest active element that was invalidated. It may be
+    // older in the stack than earliestValid, so cannot be popped off, but can be used to store
+    // the new element instead of allocating more.
+    RawElement* oldestActiveInvalid = nullptr;
+    int oldestActiveInvalidIndex = elements->count();
+
+    for (RawElement& existing : elements->ritems()) {
+        if (i < fOldestValidIndex) {
+            break;
+        }
+        // We don't need to pass the actual index that toAdd will be saved to; just the minimum
+        // index of this save record, since that will result in the same restoration behavior later.
+        existing.updateForElement(&toAdd, *this);
+
+        if (toAdd.isInvalid()) {
+            if (existing.isInvalid()) {
+                // Both new and old invalid implies the entire clip becomes empty
+                fState = ClipState::kEmpty;
+                return true;
+            } else {
+                // The new element doesn't change the clip beyond what the old element already does
+                return false;
+            }
+        } else if (existing.isInvalid()) {
+            // The new element cancels out the old element. The new element may have been modified
+            // to account for the old element's geometry.
+            if (i >= fStartingElementIndex) {
+                // Still active, so the invalidated index could be used to store the new element
+                oldestActiveInvalid = &existing;
+                oldestActiveInvalidIndex = i;
+            }
+        } else {
+            // Keep both new and old elements
+            oldestValid = i;
+            if (i > youngestValid) {
+                youngestValid = i;
+            }
+        }
+
+        --i;
+    }
+
+    // Post-iteration validity check
+    SkASSERT(oldestValid == elements->count() ||
+             (oldestValid >= fOldestValidIndex && oldestValid < elements->count()));
+    SkASSERT(youngestValid == fStartingElementIndex - 1 ||
+             (youngestValid >= fStartingElementIndex && youngestValid < elements->count()));
+    SkASSERT((oldestActiveInvalid && oldestActiveInvalidIndex >= fStartingElementIndex &&
+              oldestActiveInvalidIndex < elements->count()) || !oldestActiveInvalid);
+
+    // Update final state
+    SkASSERT(oldestValid >= fOldestValidIndex);
+    fOldestValidIndex = std::min(oldestValid, oldestActiveInvalidIndex);
+    fState = oldestValid == elements->count() ? toAdd.clipType() : ClipState::kComplex;
+    if (fStackOp == SkClipOp::kDifference && toAdd.op() == SkClipOp::kIntersect) {
+        // The stack remains in difference mode only as long as all elements are difference
+        fStackOp = SkClipOp::kIntersect;
+    }
+
+    int targetCount = youngestValid + 1;
+    if (!oldestActiveInvalid || oldestActiveInvalidIndex >= targetCount) {
+        // toAdd will be stored right after youngestValid
+        targetCount++;
+        oldestActiveInvalid = nullptr;
+    }
+    while (elements->count() > targetCount) {
+        SkASSERT(oldestActiveInvalid != &elements->back()); // shouldn't delete what we'll reuse
+        elements->pop_back();
+    }
+    if (oldestActiveInvalid) {
+        *oldestActiveInvalid = std::move(toAdd);
+    } else if (elements->count() < targetCount) {
+        elements->push_back(std::move(toAdd));
+    } else {
+        elements->back() = std::move(toAdd);
+    }
+
+    // Changing this will prompt GrClipStack to invalidate any masks associated with this record.
+    fGenID = next_gen_id();
+    return true;
+}
+
+void GrClipStack::SaveRecord::replaceWithElement(RawElement&& toAdd, RawElement::Stack* elements) {
+    // The aggregate state of the save record mirrors the element
+    fInnerBounds = toAdd.innerBounds();
+    fOuterBounds = toAdd.outerBounds();
+    fStackOp = toAdd.op();
+    fState = toAdd.clipType();
+
+    // All prior active element can be removed from the stack: [startingIndex, count - 1]
+    int targetCount = fStartingElementIndex + 1;
+    while (elements->count() > targetCount) {
+        elements->pop_back();
+    }
+    if (elements->count() < targetCount) {
+        elements->push_back(std::move(toAdd));
+    } else {
+        elements->back() = std::move(toAdd);
+    }
+
+    SkASSERT(elements->count() == fStartingElementIndex + 1);
+
+    // This invalidates all older elements that are owned by save records lower in the clip stack.
+    fOldestValidIndex = fStartingElementIndex;
+    fGenID = next_gen_id();
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// GrClipStack
+
+// NOTE: Based on draw calls in all GMs, SKPs, and SVGs as of 08/20, 98% use a clip stack with
+// one Element and up to two SaveRecords, thus the inline size for RawElement::Stack and
+// SaveRecord::Stack (this conveniently keeps the size of GrClipStack manageable). The max
+// encountered element stack depth was 5 and the max save depth was 6. Using an increment of 8 for
+// these stacks means that clip management will incur a single allocation for the remaining 2%
+// of the draws, with extra head room for more complex clips encountered in the wild.
+//
+// The mask stack increment size was chosen to be smaller since only 0.2% of the evaluated draw call
+// set ever used a mask (which includes stencil masks), or up to 0.3% when CCPR is disabled.
+static constexpr int kElementStackIncrement = 8;
+static constexpr int kSaveStackIncrement = 8;
+static constexpr int kMaskStackIncrement = 4;
+
+// And from this same draw call set, the most complex clip could only use 5 analytic coverage FPs.
+// Historically we limited it to 4 based on Blink's call pattern, so we keep the limit as-is since
+// it's so close to the empirically encountered max.
+static constexpr int kMaxAnalyticFPs = 4;
+// The number of stack-allocated mask pointers to store before extending the arrays.
+// Stack size determined empirically, the maximum number of elements put in a SW mask was 4
+// across our set of GMs, SKPs, and SVGs used for testing.
+static constexpr int kNumStackMasks = 4;
+
+GrClipStack::GrClipStack(const SkIRect& deviceBounds, const SkMatrixProvider* matrixProvider,
+                         bool forceAA)
+        : fElements(kElementStackIncrement)
+        , fSaves(kSaveStackIncrement)
+        , fMasks(kMaskStackIncrement)
+        , fProxyProvider(nullptr)
+        , fDeviceBounds(deviceBounds)
+        , fMatrixProvider(matrixProvider)
+        , fForceAA(forceAA) {
+    // Start with a save record that is wide open
+    fSaves.emplace_back(deviceBounds);
+}
+
+GrClipStack::~GrClipStack() {
+    // Invalidate all mask keys that remain. Since we're tearing the clip stack down, we don't need
+    // to go through SaveRecord.
+    SkASSERT(fProxyProvider || fMasks.empty());
+    if (fProxyProvider) {
+        for (Mask& m : fMasks.ritems()) {
+            m.invalidate(fProxyProvider);
+        }
+    }
+}
+
+void GrClipStack::save() {
+    SkASSERT(!fSaves.empty());
+    fSaves.back().pushSave();
+}
+
+void GrClipStack::restore() {
+    SkASSERT(!fSaves.empty());
+    SaveRecord& current = fSaves.back();
+    if (current.popSave()) {
+        // This was just a deferred save being undone, so the record doesn't need to be removed yet
+        return;
+    }
+
+    // When we remove a save record, we delete all elements >= its starting index and any masks
+    // that were rasterized for it.
+    current.removeElements(&fElements);
+    SkASSERT(fProxyProvider || fMasks.empty());
+    if (fProxyProvider) {
+        current.invalidateMasks(fProxyProvider, &fMasks);
+    }
+    fSaves.pop_back();
+    // Restore any remaining elements that were only invalidated by the now-removed save record.
+    fSaves.back().restoreElements(&fElements);
+}
+
+SkIRect GrClipStack::getConservativeBounds() const {
+    const SaveRecord& current = this->currentSaveRecord();
+    if (current.state() == ClipState::kEmpty) {
+        return SkIRect::MakeEmpty();
+    } else if (current.state() == ClipState::kWideOpen) {
+        return fDeviceBounds;
+    } else {
+        if (current.op() == SkClipOp::kDifference) {
+            // The outer/inner bounds represent what's cut out, so full bounds remains the device
+            // bounds, minus any fully clipped content that spans the device edge.
+            return subtract(fDeviceBounds, current.innerBounds(), /* exact */ true);
+        } else {
+            SkASSERT(fDeviceBounds.contains(current.outerBounds()));
+            return current.outerBounds();
+        }
+    }
+}
+
+GrClip::PreClipResult GrClipStack::preApply(const SkRect& bounds, GrAA aa) const {
+    Draw draw(bounds, fForceAA ? GrAA::kYes : aa);
+    if (!draw.applyDeviceBounds(fDeviceBounds)) {
+        return GrClip::Effect::kClippedOut;
+    }
+
+    const SaveRecord& cs = this->currentSaveRecord();
+    // Early out if we know a priori that the clip is full 0s or full 1s.
+    if (cs.state() == ClipState::kEmpty) {
+        return GrClip::Effect::kClippedOut;
+    } else if (cs.state() == ClipState::kWideOpen) {
+        SkASSERT(!cs.shader());
+        return GrClip::Effect::kUnclipped;
+    }
+
+    // Given argument order, 'A' == current clip, 'B' == draw
+    switch (get_clip_geometry(cs, draw)) {
+        case ClipGeometry::kEmpty:
+            // Can ignore the shader since the geometry removed everything already
+            return GrClip::Effect::kClippedOut;
+
+        case ClipGeometry::kBOnly:
+            // Geometrically, the draw is unclipped, but can't ignore a shader
+            return cs.shader() ? GrClip::Effect::kClipped : GrClip::Effect::kUnclipped;
+
+        case ClipGeometry::kAOnly:
+            // Shouldn't happen since the inner bounds of a draw are unknown
+            SkASSERT(false);
+            // But if it did, it technically means the draw covered the clip and should be
+            // considered kClipped or similar, which is what the next case handles.
+            [[fallthrough]];
+
+        case ClipGeometry::kBoth: {
+            SkASSERT(fElements.count() > 0);
+            const RawElement& back = fElements.back();
+            if (cs.state() == ClipState::kDeviceRect) {
+                SkASSERT(back.clipType() == ClipState::kDeviceRect);
+                return {back.shape().rect(), back.aa()};
+            } else if (cs.state() == ClipState::kDeviceRRect) {
+                SkASSERT(back.clipType() == ClipState::kDeviceRRect);
+                return {back.shape().rrect(), back.aa()};
+            } else {
+                // The clip stack has complex shapes, multiple elements, or a shader; we could
+                // iterate per element like we would in apply(), but preApply() is meant to be
+                // conservative and efficient.
+                SkASSERT(cs.state() == ClipState::kComplex);
+                return GrClip::Effect::kClipped;
+            }
+        }
+    }
+
+    SkUNREACHABLE;
+}
+
+GrClip::Effect GrClipStack::apply(GrRecordingContext* context, GrRenderTargetContext* rtc,
+                                  GrAAType aa, bool hasUserStencilSettings,
+                                  GrAppliedClip* out, SkRect* bounds) const {
+    // TODO: Once we no longer store SW masks, we don't need to sneak the provider in like this
+    if (!fProxyProvider) {
+        fProxyProvider = context->priv().proxyProvider();
+    }
+    SkASSERT(fProxyProvider == context->priv().proxyProvider());
+    const GrCaps* caps = context->priv().caps();
+
+    // Convert the bounds to a Draw and apply device bounds clipping, making our query as tight
+    // as possible.
+    Draw draw(*bounds, GrAA(fForceAA || aa != GrAAType::kNone));
+    if (!draw.applyDeviceBounds(fDeviceBounds)) {
+        return Effect::kClippedOut;
+    }
+    SkAssertResult(bounds->intersect(SkRect::Make(fDeviceBounds)));
+
+    const SaveRecord& cs = this->currentSaveRecord();
+    // Early out if we know a priori that the clip is full 0s or full 1s.
+    if (cs.state() == ClipState::kEmpty) {
+        return Effect::kClippedOut;
+    } else if (cs.state() == ClipState::kWideOpen) {
+        SkASSERT(!cs.shader());
+        return Effect::kUnclipped;
+    }
+
+    // Convert any clip shader first, since it's not geometrically related to the draw bounds
+    std::unique_ptr<GrFragmentProcessor> clipFP = nullptr;
+    if (cs.shader()) {
+        static const GrColorInfo kCoverageColorInfo{GrColorType::kUnknown, kPremul_SkAlphaType,
+                                                    nullptr};
+        GrFPArgs args(context, *fMatrixProvider, kNone_SkFilterQuality, &kCoverageColorInfo);
+        clipFP = as_SB(cs.shader())->asFragmentProcessor(args);
+        if (clipFP) {
+            clipFP = GrFragmentProcessor::SwizzleOutput(std::move(clipFP), GrSwizzle::AAAA());
+        }
+    }
+
+    // A refers to the entire clip stack, B refers to the draw
+    switch (get_clip_geometry(cs, draw)) {
+        case ClipGeometry::kEmpty:
+            return Effect::kClippedOut;
+
+        case ClipGeometry::kBOnly:
+            // Geometrically unclipped, but may need to add the shader as a coverage FP
+            if (clipFP) {
+                out->addCoverageFP(std::move(clipFP));
+                return Effect::kClipped;
+            } else {
+                return Effect::kUnclipped;
+            }
+
+        case ClipGeometry::kAOnly:
+            // Shouldn't happen since draws don't report inner bounds
+            SkASSERT(false);
+            [[fallthrough]];
+
+        case ClipGeometry::kBoth:
+            // The draw is combined with the saved clip elements; the below logic tries to skip
+            // as many elements as possible.
+            SkASSERT(cs.state() == ClipState::kDeviceRect ||
+                     cs.state() == ClipState::kDeviceRRect ||
+                     cs.state() == ClipState::kComplex);
+            break;
+    }
+
+    // We can determine a scissor based on the draw and the overall stack bounds.
+    SkIRect scissorBounds;
+    if (cs.op() == SkClipOp::kIntersect) {
+        // Initially we keep this as large as possible; if the clip is applied solely with coverage
+        // FPs then using a loose scissor increases the chance we can batch the draws.
+        // We tighten it later if any form of mask or atlas element is needed.
+        scissorBounds = cs.outerBounds();
+    } else {
+        scissorBounds = subtract(draw.outerBounds(), cs.innerBounds(), /* exact */ true);
+    }
+
+    // We mark this true once we have a coverage FP (since complex clipping is occurring), or we
+    // have an element that wouldn't affect the scissored draw bounds, but does affect the regular
+    // draw bounds. In that case, the scissor is sufficient for clipping and we can skip the
+    // element but definitely cannot then drop the scissor.
+    bool scissorIsNeeded = SkToBool(cs.shader());
+
+    int remainingAnalyticFPs = kMaxAnalyticFPs;
+    if (rtc->numSamples() > 1 || aa == GrAAType::kMSAA || hasUserStencilSettings) {
+        // Disable analytic clips when we have MSAA. In MSAA we never conflate coverage and opacity.
+        remainingAnalyticFPs = 0;
+        // We disable MSAA when avoiding stencil so shouldn't get here.
+        SkASSERT(!context->priv().caps()->avoidStencilBuffers());
+    }
+
+    // If window rectangles are supported, we can use them to exclude inner bounds of difference ops
+    int maxWindowRectangles = rtc->priv().maxWindowRectangles();
+    GrWindowRectangles windowRects;
+
+    // Elements not represented as an analytic FP or skipped will be collected here and later
+    // applied by using the stencil buffer, CCPR clip atlas, or a cached SW mask.
+    SkSTArray<kNumStackMasks, const Element*> elementsForMask;
+    SkSTArray<kNumStackMasks, const RawElement*> elementsForAtlas;
+
+    bool maskRequiresAA = false;
+    auto* ccpr = context->priv().drawingManager()->getCoverageCountingPathRenderer();
+
+    int i = fElements.count();
+    for (const RawElement& e : fElements.ritems()) {
+        --i;
+        if (i < cs.oldestElementIndex()) {
+            // All earlier elements have been invalidated by elements already processed
+            break;
+        } else if (e.isInvalid()) {
+            continue;
+        }
+
+        switch (get_clip_geometry(e, draw)) {
+            case ClipGeometry::kEmpty:
+                // This can happen for difference op elements that have a larger fInnerBounds than
+                // can be preserved at the next level.
+                return Effect::kClippedOut;
+
+            case ClipGeometry::kBOnly:
+                // We don't need to produce a coverage FP or mask for the element
+                break;
+
+            case ClipGeometry::kAOnly:
+                // Shouldn't happen for draws, fall through to regular element processing
+                SkASSERT(false);
+                [[fallthrough]];
+
+            case ClipGeometry::kBoth: {
+                // The element must apply coverage to the draw, enable the scissor to limit overdraw
+                scissorIsNeeded = true;
+
+                // First apply using HW methods (scissor and window rects). When the inner and outer
+                // bounds match, nothing else needs to be done.
+                bool fullyApplied = false;
+                if (e.op() == SkClipOp::kIntersect) {
+                    // The second test allows clipped draws that are scissored by multiple elements
+                    // to remain scissor-only.
+                    fullyApplied = e.innerBounds() == e.outerBounds() ||
+                                   e.innerBounds().contains(scissorBounds);
+                } else {
+                    if (!e.innerBounds().isEmpty() &&
+                        out->windowRectsState().numWindows() < maxWindowRectangles) {
+                        // TODO: If we have more difference ops than available window rects, we
+                        // should prioritize those with the largest inner bounds.
+                        windowRects.addWindow(e.innerBounds());
+                        fullyApplied = e.innerBounds() == e.outerBounds();
+                    }
+                }
+
+                if (!fullyApplied && remainingAnalyticFPs > 0) {
+                    std::tie(fullyApplied, clipFP) = analytic_clip_fp(e.asElement(),
+                                                                      *caps->shaderCaps(),
+                                                                      std::move(clipFP));
+                    if (fullyApplied) {
+                        remainingAnalyticFPs--;
+                    } else if (ccpr && e.aa() == GrAA::kYes) {
+                        // While technically the element is turned into a mask, each atlas entry
+                        // counts towards the FP complexity of the clip.
+                        // TODO - CCPR needs a stable ops task ID so we can't create FPs until we
+                        // know any other mask generation is finished. It also only works with AA
+                        // shapes, future atlas systems can improve on this.
+                        elementsForAtlas.push_back(&e);
+                        remainingAnalyticFPs--;
+                        fullyApplied = true;
+                    }
+                }
+
+                if (!fullyApplied) {
+                    elementsForMask.push_back(&e.asElement());
+                    maskRequiresAA |= (e.aa() == GrAA::kYes);
+                }
+
+                break;
+            }
+        }
+    }
+
+    if (!scissorIsNeeded) {
+        // More detailed analysis of the element shapes determined no clip is needed
+        SkASSERT(elementsForMask.empty() && elementsForAtlas.empty() && !clipFP);
+        return Effect::kUnclipped;
+    }
+
+    // Fill out the GrAppliedClip with what we know so far, possibly with a tightened scissor
+    if (cs.op() == SkClipOp::kIntersect &&
+        (!elementsForMask.empty() || !elementsForAtlas.empty())) {
+        SkAssertResult(scissorBounds.intersect(draw.outerBounds()));
+    }
+    if (!GrClip::IsInsideClip(scissorBounds, *bounds)) {
+        out->hardClip().addScissor(scissorBounds, bounds);
+    }
+    if (!windowRects.empty()) {
+        out->hardClip().addWindowRectangles(windowRects, GrWindowRectsState::Mode::kExclusive);
+    }
+
+    // Now rasterize any remaining elements, either to the stencil or a SW mask. All elements are
+    // flattened into a single mask.
+    if (!elementsForMask.empty()) {
+        bool stencilUnavailable = context->priv().caps()->avoidStencilBuffers() ||
+                                  rtc->wrapsVkSecondaryCB();
+
+        bool hasSWMask = false;
+        if ((rtc->numSamples() <= 1 && maskRequiresAA) || stencilUnavailable) {
+            // Must use a texture mask to represent the combined clip elements since the stencil
+            // cannot be used, or cannot handle smooth clips.
+            std::tie(hasSWMask, clipFP) = GetSWMaskFP(
+                    context, &fMasks, cs, scissorBounds, elementsForMask.begin(),
+                    elementsForMask.count(), std::move(clipFP));
+        }
+
+        if (!hasSWMask) {
+            if (stencilUnavailable) {
+                SkDebugf("WARNING: Clip mask requires stencil, but stencil unavailable. "
+                            "Draw will be ignored.\n");
+                return Effect::kClippedOut;
+            } else {
+                // Rasterize the remaining elements to the stencil buffer
+                render_stencil_mask(context, rtc, cs.genID(), scissorBounds,
+                                    elementsForMask.begin(), elementsForMask.count(), out);
+            }
+        }
+    }
+
+    // Finish CCPR paths now that the render target's ops task is stable.
+    if (!elementsForAtlas.empty()) {
+        uint32_t opsTaskID = rtc->getOpsTask()->uniqueID();
+        for (int i = 0; i < elementsForAtlas.count(); ++i) {
+            SkASSERT(elementsForAtlas[i]->aa() == GrAA::kYes);
+            clipFP = clip_atlas_fp(ccpr, opsTaskID, scissorBounds, elementsForAtlas[i]->asElement(),
+                                   elementsForAtlas[i]->devicePath(), *caps, std::move(clipFP));
+        }
+    }
+
+    if (clipFP) {
+        // This will include all analytic FPs, all CCPR atlas FPs, and a SW mask FP.
+        out->addCoverageFP(std::move(clipFP));
+    }
+
+    SkASSERT(out->doesClip());
+    return Effect::kClipped;
+}
+
+GrClipStack::SaveRecord& GrClipStack::writableSaveRecord(bool* wasDeferred) {
+    SaveRecord& current = fSaves.back();
+    if (current.canBeUpdated()) {
+        // Current record is still open, so it can be modified directly
+        *wasDeferred = false;
+        return current;
+    } else {
+        // Must undefer the save to get a new record.
+        SkAssertResult(current.popSave());
+        *wasDeferred = true;
+        return fSaves.emplace_back(current, fMasks.count(), fElements.count());
+    }
+}
+
+void GrClipStack::clipShader(sk_sp<SkShader> shader) {
+    // Shaders can't bring additional coverage
+    if (this->currentSaveRecord().state() == ClipState::kEmpty) {
+        return;
+    }
+
+    bool wasDeferred;
+    this->writableSaveRecord(&wasDeferred).addShader(std::move(shader));
+    // Masks and geometry elements are not invalidated by updating the clip shader
+}
+
+void GrClipStack::replaceClip(const SkIRect& rect) {
+    bool wasDeferred;
+    SaveRecord& save = this->writableSaveRecord(&wasDeferred);
+
+    if (!wasDeferred) {
+        save.removeElements(&fElements);
+        save.invalidateMasks(fProxyProvider, &fMasks);
+    }
+
+    save.reset(fDeviceBounds);
+    if (rect != fDeviceBounds) {
+        this->clipRect(SkMatrix::I(), SkRect::Make(rect), GrAA::kNo, SkClipOp::kIntersect);
+    }
+}
+
+void GrClipStack::clip(RawElement&& element) {
+    if (this->currentSaveRecord().state() == ClipState::kEmpty) {
+        return;
+    }
+
+    // Reduce the path to anything simpler, will apply the transform if it's a scale+translate
+    // and ensures the element's bounds are clipped to the device (NOT the conservative clip bounds,
+    // since those are based on the net effect of all elements while device bounds clipping happens
+    // implicitly. During addElement, we may still be able to invalidate some older elements).
+    element.simplify(fDeviceBounds, fForceAA);
+    SkASSERT(!element.shape().inverted());
+
+    // An empty op means do nothing (for difference), or close the save record, so we try and detect
+    // that early before doing additional unnecessary save record allocation.
+    if (element.shape().isEmpty()) {
+        if (element.op() == SkClipOp::kDifference) {
+            // If the shape is empty and we're subtracting, this has no effect on the clip
+            return;
+        }
+        // else we will make the clip empty, but we need a new save record to record that change
+        // in the clip state; fall through to below and updateForElement() will handle it.
+    }
+
+    bool wasDeferred;
+    SaveRecord& save = this->writableSaveRecord(&wasDeferred);
+    SkDEBUGCODE(uint32_t oldGenID = save.genID();)
+    SkDEBUGCODE(int elementCount = fElements.count();)
+    if (!save.addElement(std::move(element), &fElements)) {
+        if (wasDeferred) {
+            // We made a new save record, but ended up not adding an element to the stack.
+            // So instead of keeping an empty save record around, pop it off and restore the counter
+            SkASSERT(elementCount == fElements.count());
+            fSaves.pop_back();
+            fSaves.back().pushSave();
+        } else {
+            // Should not have changed gen ID if the element and save were not modified
+            SkASSERT(oldGenID == save.genID());
+        }
+    } else {
+        // The gen ID should be new, and should not be invalid
+        SkASSERT(oldGenID != save.genID() && save.genID() != kInvalidGenID);
+        if (fProxyProvider && !wasDeferred) {
+            // We modified an active save record so any old masks it had can be invalidated
+            save.invalidateMasks(fProxyProvider, &fMasks);
+        }
+    }
+}
+
+GrFPResult GrClipStack::GetSWMaskFP(GrRecordingContext* context, Mask::Stack* masks,
+                                    const SaveRecord& current, const SkIRect& bounds,
+                                    const Element** elements, int count,
+                                    std::unique_ptr<GrFragmentProcessor> clipFP) {
+    GrProxyProvider* proxyProvider = context->priv().proxyProvider();
+    GrSurfaceProxyView maskProxy;
+
+    SkIRect maskBounds; // may not be 'bounds' if we reuse a large clip mask
+    // Check the existing masks from this save record for compatibility
+    for (const Mask& m : masks->ritems()) {
+        if (m.genID() != current.genID()) {
+            break;
+        }
+        if (m.appliesToDraw(current, bounds)) {
+            maskProxy = proxyProvider->findCachedProxyWithColorTypeFallback(
+                    m.key(), kMaskOrigin, GrColorType::kAlpha_8, 1);
+            if (maskProxy) {
+                maskBounds = m.bounds();
+                break;
+            }
+        }
+    }
+
+    if (!maskProxy) {
+        // No existing mask was found, so need to render a new one
+        maskProxy = render_sw_mask(context, bounds, elements, count);
+        if (!maskProxy) {
+            // If we still don't have one, there's nothing we can do
+            return GrFPFailure(std::move(clipFP));
+        }
+
+        // Register the mask for later invalidation
+        Mask& mask = masks->emplace_back(current, bounds);
+        proxyProvider->assignUniqueKeyToProxy(mask.key(), maskProxy.asTextureProxy());
+        maskBounds = bounds;
+    }
+
+    // Wrap the mask in an FP that samples it for coverage
+    SkASSERT(maskProxy && maskProxy.origin() == kMaskOrigin);
+
+    GrSamplerState samplerState(GrSamplerState::WrapMode::kClampToBorder,
+                                GrSamplerState::Filter::kNearest);
+    // Maps the device coords passed to the texture effect to the top-left corner of the mask, and
+    // make sure that the draw bounds are pre-mapped into the mask's space as well.
+    auto m = SkMatrix::Translate(-maskBounds.fLeft, -maskBounds.fTop);
+    auto subset = SkRect::Make(bounds);
+    subset.offset(-maskBounds.fLeft, -maskBounds.fTop);
+    // We scissor to bounds. The mask's texel centers are aligned to device space
+    // pixel centers. Hence this domain of texture coordinates.
+    auto domain = subset.makeInset(0.5, 0.5);
+    auto fp = GrTextureEffect::MakeSubset(std::move(maskProxy), kPremul_SkAlphaType, m,
+                                          samplerState, subset, domain, *context->priv().caps());
+    fp = GrDeviceSpaceEffect::Make(std::move(fp));
+
+    // Must combine the coverage sampled from the texture effect with the previous coverage
+    fp = GrBlendFragmentProcessor::Make(std::move(clipFP), std::move(fp), SkBlendMode::kModulate);
+    return GrFPSuccess(std::move(fp));
+}
diff --git a/src/gpu/GrClipStack.h b/src/gpu/GrClipStack.h
new file mode 100644
index 0000000..52d2ffc
--- /dev/null
+++ b/src/gpu/GrClipStack.h
@@ -0,0 +1,363 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#ifndef GrClipStack_DEFINED
+
+#define GrClipStack_DEFINED
+
+#include "include/core/SkClipOp.h"
+#include "include/core/SkMatrix.h"
+#include "include/core/SkShader.h"
+#include "include/private/GrResourceKey.h"
+#include "src/gpu/GrClip.h"
+#include "src/gpu/GrSurfaceProxyView.h"
+#include "src/gpu/GrTBlockList.h"
+#include "src/gpu/geometry/GrShape.h"
+
+class GrAppliedClip;
+class GrProxyProvider;
+class GrRecordingContext;
+class GrRenderTargetContext;
+class GrSWMaskHelper;
+
+class GrClipStack final : public GrClip {
+public:
+    enum class ClipState : uint8_t {
+        kEmpty, kWideOpen, kDeviceRect, kDeviceRRect, kComplex
+    };
+
+    // All data describing a geometric modification to the clip
+    struct Element {
+        GrShape  fShape;
+        SkMatrix fLocalToDevice;
+        SkClipOp fOp;
+        GrAA     fAA;
+    };
+
+    // The SkMatrixProvider must outlive the GrClipStack.
+    GrClipStack(const SkIRect& deviceBounds, const SkMatrixProvider* matrixProvider, bool forceAA);
+
+    ~GrClipStack() override;
+
+    GrClipStack(const GrClipStack&) = delete;
+    GrClipStack& operator=(const GrClipStack&) = delete;
+
+    ClipState clipState() const { return this->currentSaveRecord().state(); }
+
+    class ElementIter;
+    // Provides for-range over active, valid clip elements from most recent to oldest.
+    // The iterator provides items as "const Element&".
+    inline ElementIter begin() const;
+    inline ElementIter end() const;
+
+    // Clip stack manipulation
+    void save();
+    void restore();
+
+    void clipRect(const SkMatrix& ctm, const SkRect& rect, GrAA aa, SkClipOp op) {
+        this->clip({ctm, GrShape(rect), aa, op});
+    }
+    void clipRRect(const SkMatrix& ctm, const SkRRect& rrect, GrAA aa, SkClipOp op) {
+        this->clip({ctm, GrShape(rrect), aa, op});
+    }
+    void clipPath(const SkMatrix& ctm, const SkPath& path, GrAA aa, SkClipOp op) {
+        this->clip({ctm, GrShape(path), aa, op});
+    }
+    void clipShader(sk_sp<SkShader> shader);
+
+    void replaceClip(const SkIRect& rect);
+
+    // GrClip implementation
+    GrClip::Effect apply(GrRecordingContext*, GrRenderTargetContext*, GrAAType aa,
+                         bool hasUserStencilSettings,
+                         GrAppliedClip*, SkRect* bounds) const override;
+    GrClip::PreClipResult preApply(const SkRect& drawBounds, GrAA aa) const override;
+    SkIRect getConservativeBounds() const override;
+
+#ifdef GR_TEST_UTILS
+    GrUniqueKey testingOnly_getLastSWMaskKey() const {
+        return fMasks.empty() ? GrUniqueKey() : fMasks.back().key();
+    }
+#endif
+
+private:
+    class SaveRecord;
+    class Mask;
+
+    // Internally, a lot of clip reasoning is based on an op, outer bounds, and whether a shape
+    // contains another (possibly just conservatively based on inner/outer device-space bounds).
+    //
+    // Element and SaveRecord store this information directly, but a draw fits the same definition
+    // with an implicit intersect op and empty inner bounds. The OpDraw and RRectDraw types provide
+    // the same interface as Element and SaveRecord for internal clip reasoning templates.
+    class Draw;
+
+    // Wraps the geometric Element data with logic for containment and bounds testing.
+    class RawElement : private Element {
+    public:
+        using Stack = GrTBlockList<RawElement, 1>;
+
+        RawElement(const SkMatrix& localToDevice, const GrShape& shape, GrAA aa, SkClipOp op);
+
+        // Common clip type interface
+        SkClipOp        op() const { return fOp; }
+        const SkIRect&  outerBounds() const { return fOuterBounds; }
+        bool            contains(const SaveRecord& s) const;
+        bool            contains(const Draw& d) const;
+        bool            contains(const RawElement& e) const;
+
+        // Additional element-specific data
+        const Element&  asElement() const { return *this; }
+
+        const GrShape&  shape() const { return fShape; }
+        const SkMatrix& localToDevice() const { return fLocalToDevice; }
+        const SkIRect&  innerBounds() const { return fInnerBounds; }
+        GrAA            aa() const { return fAA; }
+
+        SkPath*         devicePath() const { return &fDevicePath; }
+
+        ClipState       clipType() const;
+
+        // As new elements are pushed on to the stack, they may make older elements redundant.
+        // The old elements are marked invalid so they are skipped during clip application, but may
+        // become active again when a save record is restored.
+        bool isInvalid() const { return fInvalidatedByIndex >= 0; }
+        void markInvalid(const SaveRecord& current);
+        void restoreValid(const SaveRecord& current);
+
+        // 'added' represents a new op added to the element stack. Its combination with this element
+        // can result in a number of possibilities:
+        //  1. The entire clip is empty (signaled by both this and 'added' being invalidated).
+        //  2. The 'added' op supercedes this element (this element is invalidated).
+        //  3. This op supercedes the 'added' element (the added element is marked invalidated).
+        //  4. Their combination can be represented by a single new op (in which case this
+        //     element should be invalidated, and the combined shape stored in 'added').
+        //  5. Or both elements remain needed to describe the clip (both are valid and unchanged).
+        //
+        // The calling element will only modify its invalidation index since it could belong
+        // to part of the inactive stack (that might be restored later). All merged state/geometry
+        // is handled by modifying 'added'.
+        void updateForElement(RawElement* added, const SaveRecord& current);
+
+        void simplify(const SkIRect& deviceBounds, bool forceAA);
+
+    private:
+        bool combine(const RawElement& other, const SaveRecord& current);
+
+        SkMatrix fDeviceToLocal; // cached inverse of fLocalToDevice for contains() optimization
+        // TODO: This is only needed because CCPR tracks clip paths in device space; if we didn't
+        // cache this, every use of the path would be re-transformed and get its own atlas entry.
+        mutable SkPath fDevicePath;    // lazily initialized the first time it's needed
+
+        // Device space bounds, rounded in or out to pixel boundaries and accounting for any
+        // uncertainty around anti-aliasing and rasterization snapping.
+        SkIRect  fInnerBounds;
+        SkIRect  fOuterBounds;
+
+        // Elements are invalidated by SaveRecords as the record is updated with new elements that
+        // override old geometry. An invalidated element stores the index of the first element of
+        // the save record that invalidated it. This makes it easy to undo when the save record is
+        // popped from the stack, and is stable as the current save record is modified.
+        int fInvalidatedByIndex;
+    };
+
+    // Represents an alpha mask with the rasterized coverage from elements in a draw query that
+    // could not be converted to analytic coverage FPs.
+    // TODO: This is only required for SW masks. Stencil masks and atlas masks don't have resources
+    // owned by the GrClipStack. Once SW masks are no longer needed, this can go away.
+    class Mask {
+    public:
+        using Stack = GrTBlockList<Mask, 1>;
+
+        Mask(const SaveRecord& current, const SkIRect& bounds);
+
+        ~Mask() {
+            // The key should have been released by the clip stack before hand
+            SkASSERT(!fKey.isValid());
+        }
+
+        const GrUniqueKey& key() const { return fKey; }
+        const SkIRect&     bounds() const { return fBounds; }
+        uint32_t           genID() const { return fGenID; }
+
+        bool appliesToDraw(const SaveRecord& current, const SkIRect& drawBounds) const;
+        void invalidate(GrProxyProvider* proxyProvider);
+
+        SkDEBUGCODE(const SaveRecord* owner() const { return fOwner; })
+    private:
+        GrUniqueKey fKey;
+        // The gen ID of the save record and the query bounds uniquely define the set of elements
+        // that would go into a mask. If the save record adds new elements, its gen ID would change.
+        // If the draw had different bounds it would select a different set of masked elements.
+        // Repeatedly querying an unmodified save record with the same bounds is idempotent.
+        SkIRect     fBounds;
+        uint32_t    fGenID;
+
+        SkDEBUGCODE(const SaveRecord* fOwner;)
+    };
+
+    // Represents a saved point in the clip stack, and manages the life time of elements added to
+    // stack within the record's life time. Also provides the logic for determining active elements
+    // given a draw query.
+    class SaveRecord {
+    public:
+        using Stack = GrTBlockList<SaveRecord, 2>;
+
+        explicit SaveRecord(const SkIRect& deviceBounds);
+
+        SaveRecord(const SaveRecord& prior, int startingMaskIndex, int startingElementIndex);
+
+        // The common clip type interface
+        SkClipOp        op() const { return fStackOp; }
+        const SkIRect&  outerBounds() const { return fOuterBounds; }
+        bool            contains(const Draw& d) const;
+        bool            contains(const RawElement& e) const;
+
+        // Additional save record-specific data/functionality
+        const SkShader* shader() const { return fShader.get(); }
+        const SkIRect&  innerBounds() const { return fInnerBounds; }
+        int             firstActiveElementIndex() const { return fStartingElementIndex; }
+        int             oldestElementIndex() const { return fOldestValidIndex; }
+        bool            canBeUpdated() const { return (fDeferredSaveCount == 0); }
+
+        ClipState       state() const;
+        uint32_t        genID() const;
+
+        // Deferred save manipulation
+        void pushSave() {
+            SkASSERT(fDeferredSaveCount >= 0);
+            fDeferredSaveCount++;
+        }
+        // Returns true if the record should stay alive. False means the GrClipStack must delete it
+        bool popSave() {
+            fDeferredSaveCount--;
+            SkASSERT(fDeferredSaveCount >= -1);
+            return fDeferredSaveCount >= 0;
+        }
+
+        // Return true if the element was added to 'elements', or otherwise affected the save record
+        // (e.g. turned it empty).
+        bool addElement(RawElement&& toAdd, RawElement::Stack* elements);
+
+        void addShader(sk_sp<SkShader> shader);
+        void reset(const SkIRect& bounds);
+
+        // Remove the elements owned by this save record, which must happen before the save record
+        // itself is removed from the clip stack.
+        void removeElements(RawElement::Stack* elements);
+
+        // Restore element validity now that this record is the new top of the stack.
+        void restoreElements(RawElement::Stack* elements);
+
+        void invalidateMasks(GrProxyProvider* proxyProvider, Mask::Stack* masks);
+
+    private:
+        // These functions modify 'elements' and element-dependent state of the record
+        // (such as valid index and fState).
+        bool appendElement(RawElement&& toAdd, RawElement::Stack* elements);
+        void replaceWithElement(RawElement&& toAdd, RawElement::Stack* elements);
+
+        // Inner bounds is always contained in outer bounds, or it is empty. All bounds will be
+        // contained in the device bounds.
+        SkIRect   fInnerBounds; // Inside is full coverage (stack op == intersect) or 0 cov (diff)
+        SkIRect   fOuterBounds; // Outside is 0 coverage (op == intersect) or full cov (diff)
+
+        // A save record can have up to one shader, multiple shaders are automatically blended
+        sk_sp<SkShader> fShader;
+
+        const int fStartingMaskIndex; // First mask owned by this save record
+        const int fStartingElementIndex;  // First element owned by this save record
+        int       fOldestValidIndex; // Index of oldest element that remains valid for this record
+
+        int       fDeferredSaveCount; // Number of save() calls without modifications (yet)
+
+        // Will be kIntersect unless every valid element is kDifference, which is significant
+        // because if kDifference then there is an implicit extra outer bounds at the device edges.
+        SkClipOp  fStackOp;
+        ClipState fState;
+        uint32_t  fGenID;
+    };
+
+    // Adds the element to the clip, handling allocating a new save record on the stack if
+    // there is a deferred save.
+    void clip(RawElement&& element);
+
+    const SaveRecord& currentSaveRecord() const {
+        SkASSERT(!fSaves.empty());
+        return fSaves.back();
+    }
+
+    // Will return the current save record, properly updating deferred saves
+    // and initializing a first record if it were empty.
+    SaveRecord& writableSaveRecord(bool* wasDeferred);
+
+    // Generate or find a cached SW coverage mask and return an FP that samples it.
+    // 'elements' is an array of pointers to elements in the stack.
+    static GrFPResult GetSWMaskFP(GrRecordingContext* context, Mask::Stack* masks,
+                                  const SaveRecord& current, const SkIRect& bounds,
+                                  const Element** elements, int count,
+                                  std::unique_ptr<GrFragmentProcessor> clipFP);
+
+    RawElement::Stack        fElements;
+    SaveRecord::Stack        fSaves; // always has one wide open record at the top
+
+    // The masks are recorded during apply() calls so we can cache them; they are not modifications
+    // of the actual clip stack.
+    // NOTE: These fields can go away once a context has a dedicated clip atlas
+    mutable Mask::Stack      fMasks;
+    mutable GrProxyProvider* fProxyProvider;
+
+    const SkIRect            fDeviceBounds;
+    const SkMatrixProvider*  fMatrixProvider;
+
+    // When there's MSAA, clip elements are applied using the stencil buffer. If a backend cannot
+    // disable MSAA per draw, then all elements are effectively AA'ed. Tracking them as such makes
+    // keeps the entire stack as simple as possible.
+    bool                     fForceAA;
+};
+
+// Clip element iteration
+class GrClipStack::ElementIter {
+public:
+    bool operator!=(const ElementIter& o) const {
+        return o.fItem != fItem && o.fRemaining != fRemaining;
+    }
+
+    const Element& operator*() const { return (*fItem).asElement(); }
+
+    ElementIter& operator++() {
+        // Skip over invalidated elements
+        do {
+            fRemaining--;
+            ++fItem;
+        } while(fRemaining > 0 && (*fItem).isInvalid());
+
+        return *this;
+    }
+
+    ElementIter(RawElement::Stack::CRIter::Item item, int r) : fItem(item), fRemaining(r) {}
+
+    RawElement::Stack::CRIter::Item fItem;
+    int fRemaining;
+
+    friend class GrClipStack;
+};
+
+GrClipStack::ElementIter GrClipStack::begin() const {
+    if (this->currentSaveRecord().state() == ClipState::kEmpty ||
+        this->currentSaveRecord().state() == ClipState::kWideOpen) {
+        // No visible clip elements when empty or wide open
+        return this->end();
+    }
+    int count = fElements.count() - this->currentSaveRecord().oldestElementIndex();
+    return ElementIter(fElements.ritems().begin(), count);
+}
+
+GrClipStack::ElementIter GrClipStack::end() const {
+    return ElementIter(fElements.ritems().end(), 0);
+}
+
+#endif
diff --git a/src/gpu/GrRenderTargetContext.h b/src/gpu/GrRenderTargetContext.h
index 0567ab4..55d3a04 100644
--- a/src/gpu/GrRenderTargetContext.h
+++ b/src/gpu/GrRenderTargetContext.h
@@ -600,6 +600,7 @@
     GrAAType chooseAAType(GrAA);
 
     friend class GrClipStackClip;               // for access to getOpsTask
+    friend class GrClipStack;                   // ""
     friend class GrOnFlushResourceProvider;     // for access to getOpsTask (http://skbug.com/9357)
 
     friend class GrRenderTargetContextPriv;
diff --git a/src/gpu/SkGpuDevice.cpp b/src/gpu/SkGpuDevice.cpp
index 5da51cc..ceb57c3 100644
--- a/src/gpu/SkGpuDevice.cpp
+++ b/src/gpu/SkGpuDevice.cpp
@@ -130,7 +130,15 @@
                     renderTargetContext->surfaceProps())
         , fContext(SkRef(context))
         , fRenderTargetContext(std::move(renderTargetContext))
+#if SK_USE_NEW_GR_CLIP_STACK
+        , fClip(SkIRect::MakeWH(fRenderTargetContext->width(),
+                                fRenderTargetContext->height()),
+                &this->asMatrixProvider(),
+                fRenderTargetContext->numSamples() > 1 &&
+                        !fRenderTargetContext->caps()->multisampleDisableSupport()) {
+#else
         , fClip(fRenderTargetContext->dimensions(), &this->cs(), &this->asMatrixProvider()) {
+#endif
     if (flags & kNeedClear_Flag) {
         this->clearAll();
     }
@@ -243,6 +251,64 @@
 
 ///////////////////////////////////////////////////////////////////////////////
 
+#if SK_USE_NEW_GR_CLIP_STACK
+
+void SkGpuDevice::onClipRegion(const SkRegion& globalRgn, SkClipOp op) {
+    SkASSERT(op == SkClipOp::kIntersect || op == SkClipOp::kDifference);
+
+    if (globalRgn.isEmpty()) {
+        fClip.clipRect(SkMatrix::I(), SkRect::MakeEmpty(), GrAA::kNo, op);
+    } else if (globalRgn.isRect()) {
+        fClip.clipRect(this->globalToDevice(), SkRect::Make(globalRgn.getBounds()), GrAA::kNo, op);
+    } else {
+        SkPath path;
+        globalRgn.getBoundaryPath(&path);
+        fClip.clipPath(this->globalToDevice(), path, GrAA::kNo, op);
+    }
+}
+
+void SkGpuDevice::onAsRgnClip(SkRegion* region) const {
+    SkRegion deviceBounds(fClip.getConservativeBounds());
+    for (const GrClipStack::Element& e : fClip) {
+        SkRegion tmp;
+        if (e.fShape.isRect() && e.fLocalToDevice.isIdentity()) {
+            tmp.setRect(e.fShape.rect().roundOut());
+        } else {
+            SkPath tmpPath;
+            e.fShape.asPath(&tmpPath);
+            tmpPath.transform(e.fLocalToDevice);
+            tmp.setPath(tmpPath, deviceBounds);
+        }
+
+        region->op(tmp, (SkRegion::Op) e.fOp);
+    }
+}
+
+bool SkGpuDevice::onClipIsAA() const {
+    for (const GrClipStack::Element& e : fClip) {
+        if (e.fAA == GrAA::kYes) {
+            return true;
+        }
+    }
+    return false;
+}
+
+SkBaseDevice::ClipType SkGpuDevice::onGetClipType() const {
+    GrClipStack::ClipState state = fClip.clipState();
+    if (state == GrClipStack::ClipState::kEmpty) {
+        return ClipType::kEmpty;
+    } else if (state == GrClipStack::ClipState::kDeviceRect ||
+               state == GrClipStack::ClipState::kWideOpen) {
+        return ClipType::kRect;
+    } else {
+        return ClipType::kComplex;
+    }
+}
+
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+
 void SkGpuDevice::drawPaint(const SkPaint& paint) {
     ASSERT_SINGLE_OWNER
     GR_CREATE_TRACE_MARKER_CONTEXT("SkGpuDevice", "drawPaint", fContext.get());
diff --git a/src/gpu/SkGpuDevice.h b/src/gpu/SkGpuDevice.h
index a135d39..b5fb274 100644
--- a/src/gpu/SkGpuDevice.h
+++ b/src/gpu/SkGpuDevice.h
@@ -13,8 +13,6 @@
 #include "include/core/SkRegion.h"
 #include "include/core/SkSurface.h"
 #include "include/gpu/GrTypes.h"
-#include "src/core/SkClipStackDevice.h"
-#include "src/gpu/GrClipStackClip.h"
 #include "src/gpu/GrRenderTargetContext.h"
 #include "src/gpu/SkGr.h"
 
@@ -27,11 +25,30 @@
 class SkSurface;
 class SkVertices;
 
+#ifndef SK_USE_NEW_GR_CLIP_STACK
+// NOTE: If this is non-zero, SkGpuDevice extends SkBaseDevice directly and manages its clip stack
+// using GrClipStack. When false, SkGpuDevice continues to extend SkClipStackDevice and uses
+// SkClipStack and GrClipStackClip to manage the clip stack.
+#define SK_USE_NEW_GR_CLIP_STACK 0
+#endif
+
+#if SK_USE_NEW_GR_CLIP_STACK
+    #include "src/core/SkDevice.h"
+    #include "src/gpu/GrClipStack.h"
+    #define BASE_DEVICE   SkBaseDevice
+    #define GR_CLIP_STACK GrClipStack
+#else
+    #include "src/core/SkClipStackDevice.h"
+    #include "src/gpu/GrClipStackClip.h"
+    #define BASE_DEVICE   SkClipStackDevice
+    #define GR_CLIP_STACK GrClipStackClip
+#endif
+
 /**
  *  Subclass of SkBaseDevice, which directs all drawing to the GrGpu owned by the
  *  canvas.
  */
-class SkGpuDevice : public SkClipStackDevice {
+class SkGpuDevice : public BASE_DEVICE  {
 public:
     enum InitContents {
         kClear_InitContents,
@@ -126,11 +143,50 @@
     bool onReadPixels(const SkPixmap&, int, int) override;
     bool onWritePixels(const SkPixmap&, int, int) override;
 
+#if SK_USE_NEW_GR_CLIP_STACK
+    void onSave() override { fClip.save(); }
+    void onRestore() override { fClip.restore(); }
+
+    void onClipRect(const SkRect& rect, SkClipOp op, bool aa) override {
+        SkASSERT(op == SkClipOp::kIntersect || op == SkClipOp::kDifference);
+        fClip.clipRect(this->localToDevice(), rect, GrAA(aa), op);
+    }
+    void onClipRRect(const SkRRect& rrect, SkClipOp op, bool aa) override {
+        SkASSERT(op == SkClipOp::kIntersect || op == SkClipOp::kDifference);
+        fClip.clipRRect(this->localToDevice(), rrect, GrAA(aa), op);
+    }
+    void onClipPath(const SkPath& path, SkClipOp op, bool aa) override {
+        SkASSERT(op == SkClipOp::kIntersect || op == SkClipOp::kDifference);
+        fClip.clipPath(this->localToDevice(), path, GrAA(aa), op);
+    }
+    void onClipShader(sk_sp<SkShader> shader) override {
+        fClip.clipShader(std::move(shader));
+    }
+    void onReplaceClip(const SkIRect& rect) override {
+        // Transform from "global/canvas" coordinates to relative to this device
+        SkIRect deviceRect = this->globalToDevice().mapRect(SkRect::Make(rect)).round();
+        fClip.replaceClip(deviceRect);
+    }
+    void onClipRegion(const SkRegion& globalRgn, SkClipOp op) override;
+    void onAsRgnClip(SkRegion*) const override;
+    ClipType onGetClipType() const override;
+    bool onClipIsAA() const override;
+
+    void onSetDeviceClipRestriction(SkIRect* mutableClipRestriction) override {
+        SkASSERT(mutableClipRestriction->isEmpty());
+    }
+    bool onClipIsWideOpen() const override {
+        return fClip.clipState() == GrClipStack::ClipState::kWideOpen;
+    }
+    SkIRect onDevClipBounds() const override { return fClip.getConservativeBounds(); }
+#endif
+
 private:
     // We want these unreffed in RenderTargetContext, GrContext order.
     sk_sp<GrRecordingContext> fContext;
     std::unique_ptr<GrRenderTargetContext> fRenderTargetContext;
-    GrClipStackClip  fClip;
+
+    GR_CLIP_STACK   fClip;
 
     enum Flags {
         kNeedClear_Flag = 1 << 0,  //!< Surface requires an initial clear
@@ -175,7 +231,10 @@
 
     friend class GrAtlasTextContext;
     friend class SkSurface_Gpu;      // for access to surfaceProps
-    using INHERITED = SkClipStackDevice;
+    using INHERITED = BASE_DEVICE;
 };
 
+#undef BASE_DEVICE
+#undef GR_CLIP_STACK
+
 #endif
diff --git a/src/gpu/geometry/GrQuadUtils.h b/src/gpu/geometry/GrQuadUtils.h
index dbbc462..1288ba5 100644
--- a/src/gpu/geometry/GrQuadUtils.h
+++ b/src/gpu/geometry/GrQuadUtils.h
@@ -46,6 +46,8 @@
      */
     bool CropToRect(const SkRect& cropRect, GrAA cropAA, DrawQuad* quad, bool computeLocal=true);
 
+    inline void Outset(const skvx::Vec<4, float>& edgeDistances, GrQuad* quad);
+
     class TessellationHelper {
     public:
         // Set the original device and (optional) local coordinates that are inset or outset
@@ -193,4 +195,10 @@
 
 }; // namespace GrQuadUtils
 
+void GrQuadUtils::Outset(const skvx::Vec<4, float>& edgeDistances, GrQuad* quad) {
+    TessellationHelper outsetter;
+    outsetter.reset(*quad, nullptr);
+    outsetter.outset(edgeDistances, quad, nullptr);
+}
+
 #endif
diff --git a/src/gpu/geometry/GrShape.cpp b/src/gpu/geometry/GrShape.cpp
index 3884c40..592e062 100644
--- a/src/gpu/geometry/GrShape.cpp
+++ b/src/gpu/geometry/GrShape.cpp
@@ -8,9 +8,10 @@
 #include "src/gpu/geometry/GrShape.h"
 
 #include "src/core/SkPathPriv.h"
+#include "src/core/SkRRectPriv.h"
 
 GrShape& GrShape::operator=(const GrShape& shape) {
-    switch(shape.type()) {
+    switch (shape.type()) {
         case Type::kEmpty:
             this->reset();
             break;
@@ -32,8 +33,6 @@
         case Type::kLine:
             this->setLine(shape.fLine);
             break;
-        default:
-            SkUNREACHABLE;
     }
 
     fStart = shape.fStart;
@@ -245,7 +244,7 @@
     // The type specific functions automatically fall through to the simpler shapes, so
     // we only need to start in the right place.
     bool wasClosed = false;
-    switch(fType) {
+    switch (fType) {
         case Type::kEmpty:
             // do nothing
             break;
@@ -282,8 +281,8 @@
     return wasClosed;
 }
 
-bool GrShape::contains(const SkRect& rect) const {
-    switch(this->type()) {
+bool GrShape::conservativeContains(const SkRect& rect) const {
+    switch (this->type()) {
         case Type::kEmpty:
         case Type::kPoint: // fall through since a point has 0 area
         case Type::kLine:  // fall through, "" (currently choosing not to test if 'rect' == line)
@@ -302,13 +301,29 @@
             } else {
                 return false;
             }
-        default:
-            SkUNREACHABLE;
     }
+    SkUNREACHABLE;
+}
+
+bool GrShape::conservativeContains(const SkPoint& point) const {
+    switch (this->type()) {
+        case Type::kEmpty:
+        case Type::kPoint: // fall through, currently choosing not to test if shape == point
+        case Type::kLine:  // fall through, ""
+        case Type::kArc:
+            return false;
+        case Type::kRect:
+            return fRect.contains(point.fX, point.fY);
+        case Type::kRRect:
+            return SkRRectPriv::ContainsPoint(fRRect, point);
+        case Type::kPath:
+            return fPath.contains(point.fX, point.fY);
+    }
+    SkUNREACHABLE;
 }
 
 bool GrShape::closed() const {
-    switch(this->type()) {
+    switch (this->type()) {
         case Type::kEmpty: // fall through
         case Type::kRect:  // fall through
         case Type::kRRect:
@@ -321,13 +336,12 @@
         case Type::kPoint: // fall through
         case Type::kLine:
             return false;
-        default:
-            SkUNREACHABLE;
     }
+    SkUNREACHABLE;
 }
 
 bool GrShape::convex(bool simpleFill) const {
-    switch(this->type()) {
+    switch (this->type()) {
         case Type::kEmpty: // fall through
         case Type::kRect:  // fall through
         case Type::kRRect:
@@ -341,16 +355,15 @@
         case Type::kPoint: // fall through
         case Type::kLine:
             return false;
-        default:
-            SkUNREACHABLE;
     }
+    SkUNREACHABLE;
 }
 
 SkRect GrShape::bounds() const {
     // Bounds where left == bottom or top == right can indicate a line or point shape. We return
     // inverted bounds for a truly empty shape.
     static constexpr SkRect kInverted = SkRect::MakeLTRB(1, 1, -1, -1);
-    switch(this->type()) {
+    switch (this->type()) {
         case Type::kEmpty:
             return kInverted;
         case Type::kPoint:
@@ -368,15 +381,14 @@
                                         fLine.fP2.fX, fLine.fP2.fY);
             b.sort();
             return b; }
-        default:
-            SkUNREACHABLE;
     }
+    SkUNREACHABLE;
 }
 
 uint32_t GrShape::segmentMask() const {
     // In order to match what a path would report, this has to inspect the shapes slightly
     // to reflect what they might simplify to.
-    switch(this->type()) {
+    switch (this->type()) {
         case Type::kEmpty:
             return 0;
         case Type::kRRect:
@@ -399,9 +411,8 @@
         case Type::kLine:  // ""
         case Type::kRect:
             return SkPath::kLine_SegmentMask;
-        default:
-            SkUNREACHABLE;
     }
+    SkUNREACHABLE;
 }
 
 void GrShape::asPath(SkPath* out, bool simpleFill) const {
@@ -416,7 +427,7 @@
         }
     } // Else when we're already a path, that will assign the fill type directly to 'out'.
 
-    switch(this->type()) {
+    switch (this->type()) {
         case Type::kEmpty:
             return;
         case Type::kPoint:
@@ -447,7 +458,6 @@
             out->moveTo(fLine.fP1);
             out->lineTo(fLine.fP2);
             return;
-        default:
-            SkUNREACHABLE;
     }
+    SkUNREACHABLE;
 }
diff --git a/src/gpu/geometry/GrShape.h b/src/gpu/geometry/GrShape.h
index 76ee86a..9dd8976 100644
--- a/src/gpu/geometry/GrShape.h
+++ b/src/gpu/geometry/GrShape.h
@@ -50,9 +50,9 @@
 public:
     // The current set of types GrShape can represent directly
     enum class Type : uint8_t {
-        kEmpty, kPoint, kRect, kRRect, kPath, kArc, kLine, kLast = kLine
+        kEmpty, kPoint, kRect, kRRect, kPath, kArc, kLine
     };
-    static constexpr int kTypeCount = static_cast<int>(Type::kLast) + 1;
+    static constexpr int kTypeCount = static_cast<int>(Type::kLine) + 1;
 
     // The direction and start index used when a shape does not have a representable winding,
     // or when that information was discarded during simplification (kIgnoreWinding_Flag).
@@ -69,7 +69,7 @@
     explicit GrShape(const GrArc& arc) { this->setArc(arc); }
     explicit GrShape(const GrLineSegment& line){ this->setLine(line); }
 
-    explicit GrShape(const GrShape& shape) { *this = shape; }
+    GrShape(const GrShape& shape) { *this = shape; }
 
     ~GrShape() { this->reset(); }
 
@@ -207,8 +207,10 @@
     // path), even if the final simplification results in a point, line, or empty.
     bool simplify(unsigned flags = kAll_Flags);
 
-    // True if the given bounding box is completely inside the shape.
-    bool contains(const SkRect& rect) const;
+    // True if the given bounding box is completely inside the shape, if it's conservatively treated
+    // as a filled, closed shape.
+    bool conservativeContains(const SkRect& rect) const;
+    bool conservativeContains(const SkPoint& point) const;
 
     // True if the underlying geometry represents a closed shape, without the need for an
     // implicit close (note that if simplified earlier with 'simpleFill' = true, a shape that was
diff --git a/tests/GrClipStackTest.cpp b/tests/GrClipStackTest.cpp
new file mode 100644
index 0000000..6b421fb
--- /dev/null
+++ b/tests/GrClipStackTest.cpp
@@ -0,0 +1,1980 @@
+
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "src/gpu/GrClipStack.h"
+#include "tests/Test.h"
+
+#include "include/core/SkPath.h"
+#include "include/core/SkRRect.h"
+#include "include/core/SkRect.h"
+#include "include/core/SkRegion.h"
+#include "include/core/SkShader.h"
+#include "include/gpu/GrDirectContext.h"
+#include "src/core/SkMatrixProvider.h"
+#include "src/core/SkRRectPriv.h"
+#include "src/core/SkRectPriv.h"
+#include "src/gpu/GrContextPriv.h"
+#include "src/gpu/GrProxyProvider.h"
+#include "src/gpu/GrRenderTargetContext.h"
+
+namespace {
+
+class TestCaseBuilder;
+class ElementsBuilder;
+
+enum class SavePolicy {
+    kNever,
+    kAtStart,
+    kAtEnd,
+    kBetweenEveryOp
+};
+// TODO: We could add a RestorePolicy enum that tests different places to restore, but that would
+// make defining the test expectations and order independence more cumbersome.
+
+class TestCase {
+public:
+    // Provides fluent API to describe actual clip commands and expected clip elements:
+    // TestCase test = TestCase::Build("example", deviceBounds)
+    //                          .actual().rect(r, GrAA::kYes, SkClipOp::kIntersect)
+    //                                   .localToDevice(matrix)
+    //                                   .nonAA()
+    //                                   .difference()
+    //                                   .path(p1)
+    //                                   .path(p2)
+    //                                   .finishElements()
+    //                          .expectedState(kDeviceRect)
+    //                          .expectedBounds(r.roundOut())
+    //                          .expect().rect(r, GrAA::kYes, SkClipOp::kIntersect)
+    //                                   .finishElements()
+    //                          .finishTest();
+    static TestCaseBuilder Build(const char* name, const SkIRect& deviceBounds);
+
+    void run(const std::vector<int>& order, SavePolicy policy, skiatest::Reporter* reporter) const;
+
+    const SkIRect& deviceBounds() const { return fDeviceBounds; }
+    GrClipStack::ClipState expectedState() const { return fExpectedState; }
+    const std::vector<GrClipStack::Element>& initialElements() const { return fElements; }
+    const std::vector<GrClipStack::Element>& expectedElements() const { return fExpectedElements; }
+
+private:
+    friend class TestCaseBuilder;
+
+    TestCase(SkString name,
+             const SkIRect& deviceBounds,
+             GrClipStack::ClipState expectedState,
+             std::vector<GrClipStack::Element> actual,
+             std::vector<GrClipStack::Element> expected)
+        : fName(name)
+        , fElements(std::move(actual))
+        , fDeviceBounds(deviceBounds)
+        , fExpectedElements(std::move(expected))
+        , fExpectedState(expectedState) {}
+
+    SkString getTestName(const std::vector<int>& order, SavePolicy policy) const;
+
+    // This may be tighter than GrClipStack::getConservativeBounds() because this always accounts
+    // for difference ops, whereas GrClipStack only sometimes can subtract the inner bounds for a
+    // difference op.
+    std::pair<SkIRect, bool> getOptimalBounds() const;
+
+    SkString fName;
+
+    // The input shapes+state to GrClipStack
+    std::vector<GrClipStack::Element> fElements;
+    SkIRect fDeviceBounds;
+
+    // The expected output of iterating over the GrClipStack after all fElements are added, although
+    // order is not important
+    std::vector<GrClipStack::Element> fExpectedElements;
+    GrClipStack::ClipState fExpectedState;
+};
+
+class ElementsBuilder {
+public:
+    // Update the default matrix, aa, and op state for elements that are added.
+    ElementsBuilder& localToDevice(const SkMatrix& m) {  fLocalToDevice = m; return *this; }
+    ElementsBuilder& aa() { fAA = GrAA::kYes; return *this; }
+    ElementsBuilder& nonAA() { fAA = GrAA::kNo; return *this; }
+    ElementsBuilder& intersect() { fOp = SkClipOp::kIntersect; return *this; }
+    ElementsBuilder& difference() { fOp = SkClipOp::kDifference; return *this; }
+
+    // Add rect, rrect, or paths to the list of elements, possibly overriding the last set
+    // matrix, aa, and op state.
+    ElementsBuilder& rect(const SkRect& rect) {
+        return this->rect(rect, fLocalToDevice, fAA, fOp);
+    }
+    ElementsBuilder& rect(const SkRect& rect, GrAA aa, SkClipOp op) {
+        return this->rect(rect, fLocalToDevice, aa, op);
+    }
+    ElementsBuilder& rect(const SkRect& rect, const SkMatrix& m, GrAA aa, SkClipOp op) {
+        fElements->push_back({GrShape(rect), m, op, aa});
+        return *this;
+    }
+
+    ElementsBuilder& rrect(const SkRRect& rrect) {
+        return this->rrect(rrect, fLocalToDevice, fAA, fOp);
+    }
+    ElementsBuilder& rrect(const SkRRect& rrect, GrAA aa, SkClipOp op) {
+        return this->rrect(rrect, fLocalToDevice, aa, op);
+    }
+    ElementsBuilder& rrect(const SkRRect& rrect, const SkMatrix& m, GrAA aa, SkClipOp op) {
+        fElements->push_back({GrShape(rrect), m, op, aa});
+        return *this;
+    }
+
+    ElementsBuilder& path(const SkPath& path) {
+        return this->path(path, fLocalToDevice, fAA, fOp);
+    }
+    ElementsBuilder& path(const SkPath& path, GrAA aa, SkClipOp op) {
+        return this->path(path, fLocalToDevice, aa, op);
+    }
+    ElementsBuilder& path(const SkPath& path, const SkMatrix& m, GrAA aa, SkClipOp op) {
+        fElements->push_back({GrShape(path), m, op, aa});
+        return *this;
+    }
+
+    // Finish and return the original test case builder
+    TestCaseBuilder& finishElements() {
+        return *fBuilder;
+    }
+
+private:
+    friend class TestCaseBuilder;
+
+    ElementsBuilder(TestCaseBuilder* builder, std::vector<GrClipStack::Element>* elements)
+            : fBuilder(builder)
+            , fElements(elements) {}
+
+    SkMatrix fLocalToDevice = SkMatrix::I();
+    GrAA     fAA = GrAA::kNo;
+    SkClipOp fOp = SkClipOp::kIntersect;
+
+    TestCaseBuilder*                   fBuilder;
+    std::vector<GrClipStack::Element>* fElements;
+};
+
+class TestCaseBuilder {
+public:
+    ElementsBuilder actual() { return ElementsBuilder(this, &fActualElements); }
+    ElementsBuilder expect() { return ElementsBuilder(this, &fExpectedElements); }
+
+    TestCaseBuilder& expectActual() {
+        fExpectedElements = fActualElements;
+        return *this;
+    }
+
+    TestCaseBuilder& state(GrClipStack::ClipState state) {
+        fExpectedState = state;
+        return *this;
+    }
+
+    TestCase finishTest() {
+        TestCase test(fName, fDeviceBounds, fExpectedState,
+                      std::move(fActualElements), std::move(fExpectedElements));
+
+        fExpectedState = GrClipStack::ClipState::kWideOpen;
+        return test;
+    }
+
+private:
+    friend class TestCase;
+
+    explicit TestCaseBuilder(const char* name, const SkIRect& deviceBounds)
+            : fName(name)
+            , fDeviceBounds(deviceBounds)
+            , fExpectedState(GrClipStack::ClipState::kWideOpen) {}
+
+    SkString fName;
+    SkIRect  fDeviceBounds;
+    GrClipStack::ClipState fExpectedState;
+
+    std::vector<GrClipStack::Element> fActualElements;
+    std::vector<GrClipStack::Element> fExpectedElements;
+};
+
+TestCaseBuilder TestCase::Build(const char* name, const SkIRect& deviceBounds) {
+    return TestCaseBuilder(name, deviceBounds);
+}
+
+SkString TestCase::getTestName(const std::vector<int>& order, SavePolicy policy) const {
+    SkString name = fName;
+
+    SkString policyName;
+    switch(policy) {
+        case SavePolicy::kNever:
+            policyName = "never";
+            break;
+        case SavePolicy::kAtStart:
+            policyName = "start";
+            break;
+        case SavePolicy::kAtEnd:
+            policyName = "end";
+            break;
+        case SavePolicy::kBetweenEveryOp:
+            policyName = "between";
+            break;
+    }
+
+    name.appendf("(save %s, order [", policyName.c_str());
+    for (size_t i = 0; i < order.size(); ++i) {
+        if (i > 0) {
+            name.append(",");
+        }
+        name.appendf("%d", order[i]);
+    }
+    name.append("])");
+    return name;
+}
+
+std::pair<SkIRect, bool> TestCase::getOptimalBounds() const {
+    if (fExpectedState == GrClipStack::ClipState::kEmpty) {
+        return {SkIRect::MakeEmpty(), true};
+    }
+
+    bool expectOptimal = true;
+    SkRegion region(fDeviceBounds);
+    for (const GrClipStack::Element& e : fExpectedElements) {
+        bool intersect = (e.fOp == SkClipOp::kIntersect && !e.fShape.inverted()) ||
+                         (e.fOp == SkClipOp::kDifference && e.fShape.inverted());
+
+        SkIRect elementBounds;
+        SkRegion::Op op;
+        if (intersect) {
+            op = SkRegion::kIntersect_Op;
+            expectOptimal &= e.fLocalToDevice.isIdentity();
+            elementBounds = GrClip::GetPixelIBounds(e.fLocalToDevice.mapRect(e.fShape.bounds()),
+                                                    e.fAA, GrClip::BoundsType::kExterior);
+        } else {
+            op = SkRegion::kDifference_Op;
+            expectOptimal = false;
+            if (e.fShape.isRect() && e.fLocalToDevice.isIdentity()) {
+                elementBounds = GrClip::GetPixelIBounds(e.fShape.rect(), e.fAA,
+                                                        GrClip::BoundsType::kInterior);
+            } else if (e.fShape.isRRect() && e.fLocalToDevice.isIdentity()) {
+                elementBounds = GrClip::GetPixelIBounds(SkRRectPriv::InnerBounds(e.fShape.rrect()),
+                                                        e.fAA, GrClip::BoundsType::kInterior);
+            } else {
+                elementBounds = SkIRect::MakeEmpty();
+            }
+        }
+
+        region.op(SkRegion(elementBounds), op);
+    }
+    return {region.getBounds(), expectOptimal};
+}
+
+static bool compare_elements(const GrClipStack::Element& a, const GrClipStack::Element& b) {
+    if (a.fAA != b.fAA || a.fOp != b.fOp || a.fLocalToDevice != b.fLocalToDevice ||
+        a.fShape.type() != b.fShape.type()) {
+        return false;
+    }
+    switch(a.fShape.type()) {
+        case GrShape::Type::kRect:
+            return a.fShape.rect() == b.fShape.rect();
+        case GrShape::Type::kRRect:
+            return a.fShape.rrect() == b.fShape.rrect();
+        case GrShape::Type::kPath:
+            // A path's points are never transformed, the only modification is fill type which does
+            // not change the generation ID. For convex polygons, we check == so that more complex
+            // test cases can be evaluated.
+            return a.fShape.path().getGenerationID() == b.fShape.path().getGenerationID() ||
+                   (a.fShape.convex() &&
+                    a.fShape.segmentMask() == SkPathSegmentMask::kLine_SkPathSegmentMask &&
+                    a.fShape.path() == b.fShape.path());
+        default:
+            SkDEBUGFAIL("Shape type not handled by test case yet.");
+            return false;
+    }
+}
+
+void TestCase::run(const std::vector<int>& order, SavePolicy policy,
+                   skiatest::Reporter* reporter) const {
+    SkASSERT(fElements.size() == order.size());
+
+    SkSimpleMatrixProvider matrixProvider(SkMatrix::I());
+    GrClipStack cs(fDeviceBounds, &matrixProvider, false);
+
+    if (policy == SavePolicy::kAtStart) {
+        cs.save();
+    }
+
+    for (int i : order) {
+        if (policy == SavePolicy::kBetweenEveryOp) {
+            cs.save();
+        }
+        const GrClipStack::Element& e = fElements[i];
+        switch(e.fShape.type()) {
+            case GrShape::Type::kRect:
+                cs.clipRect(e.fLocalToDevice, e.fShape.rect(), e.fAA, e.fOp);
+                break;
+            case GrShape::Type::kRRect:
+                cs.clipRRect(e.fLocalToDevice, e.fShape.rrect(), e.fAA, e.fOp);
+                break;
+            case GrShape::Type::kPath:
+                cs.clipPath(e.fLocalToDevice, e.fShape.path(), e.fAA, e.fOp);
+                break;
+            default:
+                SkDEBUGFAIL("Shape type not handled by test case yet.");
+        }
+    }
+
+    if (policy == SavePolicy::kAtEnd) {
+        cs.save();
+    }
+
+    // Now validate
+    SkString name = this->getTestName(order, policy);
+    REPORTER_ASSERT(reporter, cs.clipState() == fExpectedState,
+                    "%s, clip state expected %d, actual %d",
+                    name.c_str(), (int) fExpectedState, (int) cs.clipState());
+    SkIRect actualBounds = cs.getConservativeBounds();
+    SkIRect optimalBounds;
+    bool expectOptimal;
+    std::tie(optimalBounds, expectOptimal) = this->getOptimalBounds();
+
+    if (expectOptimal) {
+        REPORTER_ASSERT(reporter, actualBounds == optimalBounds,
+                "%s, bounds expected [%d %d %d %d], actual [%d %d %d %d]",
+                name.c_str(), optimalBounds.fLeft, optimalBounds.fTop,
+                optimalBounds.fRight, optimalBounds.fBottom,
+                actualBounds.fLeft, actualBounds.fTop,
+                actualBounds.fRight, actualBounds.fBottom);
+    } else {
+        REPORTER_ASSERT(reporter, actualBounds.contains(optimalBounds),
+                "%s, bounds are not conservative, optimal [%d %d %d %d], actual [%d %d %d %d]",
+                name.c_str(), optimalBounds.fLeft, optimalBounds.fTop,
+                optimalBounds.fRight, optimalBounds.fBottom,
+                actualBounds.fLeft, actualBounds.fTop,
+                actualBounds.fRight, actualBounds.fBottom);
+    }
+
+    size_t matchedElements = 0;
+    for (const GrClipStack::Element& a : cs) {
+        bool found = false;
+        for (const GrClipStack::Element& e : fExpectedElements) {
+            if (compare_elements(a, e)) {
+                // shouldn't match multiple expected elements or it's a bad test case
+                SkASSERT(!found);
+                found = true;
+            }
+        }
+
+        REPORTER_ASSERT(reporter, found,
+                        "%s, unexpected clip element in stack: shape %d, aa %d, op %d",
+                        name.c_str(), (int) a.fShape.type(), (int) a.fAA, (int) a.fOp);
+        matchedElements += found ? 1 : 0;
+    }
+    REPORTER_ASSERT(reporter, matchedElements == fExpectedElements.size(),
+                    "%s, did not match all expected elements: expected %d but matched only %d",
+                    name.c_str(), fExpectedElements.size(), matchedElements);
+
+    // Validate restoration behavior
+    if (policy == SavePolicy::kAtEnd) {
+        GrClipStack::ClipState oldState = cs.clipState();
+        cs.restore();
+        REPORTER_ASSERT(reporter, cs.clipState() == oldState,
+                        "%s, restoring an empty save record should not change clip state: "
+                        "expected %d but got %d", (int) oldState, (int) cs.clipState());
+    } else if (policy != SavePolicy::kNever) {
+        int restoreCount = policy == SavePolicy::kAtStart ? 1 : (int) order.size();
+        for (int i = 0; i < restoreCount; ++i) {
+            cs.restore();
+        }
+        // Should be wide open if everything is restored to base state
+        REPORTER_ASSERT(reporter, cs.clipState() == GrClipStack::ClipState::kWideOpen,
+                        "%s, restore should make stack become wide-open, not %d",
+                        (int) cs.clipState());
+    }
+}
+
+// All clip operations are commutative so applying actual elements in every possible order should
+// always produce the same set of expected elements.
+static void run_test_case(skiatest::Reporter* r, const TestCase& test) {
+    int n = (int) test.initialElements().size();
+    std::vector<int> order(n);
+    std::vector<int> stack(n);
+
+    // Initial order sequence and zeroed stack
+    for (int i = 0; i < n; ++i) {
+        order[i] = i;
+        stack[i] = 0;
+    }
+
+    auto runTest = [&]() {
+        static const SavePolicy kPolicies[] = { SavePolicy::kNever, SavePolicy::kAtStart,
+                                                SavePolicy::kAtEnd, SavePolicy::kBetweenEveryOp };
+        for (auto policy : kPolicies) {
+            test.run(order, policy, r);
+        }
+    };
+
+    // Heap's algorithm (non-recursive) to generate every permutation over the test case's elements
+    // https://en.wikipedia.org/wiki/Heap%27s_algorithm
+    runTest();
+
+    static constexpr int kMaxRuns = 720; // Don't run more than 6! configurations, even if n > 6
+    int testRuns = 1;
+
+    int i = 0;
+    while (i < n && testRuns < kMaxRuns) {
+        if (stack[i] < i) {
+            using std::swap;
+            if (i % 2 == 0) {
+                swap(order[0], order[i]);
+            } else {
+                swap(order[stack[i]], order[i]);
+            }
+
+            runTest();
+            stack[i]++;
+            i = 0;
+            testRuns++;
+        } else {
+            stack[i] = 0;
+            ++i;
+        }
+    }
+}
+
+static SkPath make_octagon(const SkRect& r, SkScalar lr, SkScalar tb) {
+    SkPath p;
+    p.moveTo(r.fLeft + lr, r.fTop);
+    p.lineTo(r.fRight - lr, r.fTop);
+    p.lineTo(r.fRight, r.fTop + tb);
+    p.lineTo(r.fRight, r.fBottom - tb);
+    p.lineTo(r.fRight - lr, r.fBottom);
+    p.lineTo(r.fLeft + lr, r.fBottom);
+    p.lineTo(r.fLeft, r.fBottom - tb);
+    p.lineTo(r.fLeft, r.fTop + tb);
+    p.close();
+    return p;
+}
+
+static SkPath make_octagon(const SkRect& r) {
+    SkScalar lr = 0.3f * r.width();
+    SkScalar tb = 0.3f * r.height();
+    return make_octagon(r, lr, tb);
+}
+
+static constexpr SkIRect kDeviceBounds = {0, 0, 100, 100};
+
+} // anonymous namespace
+
+///////////////////////////////////////////////////////////////////////////////
+// These tests use the TestCase infrastructure to define clip stacks and
+// associated expectations.
+
+// Tests that the initialized state of the clip stack is wide-open
+DEF_TEST(GrClipStack_InitialState, r) {
+    run_test_case(r, TestCase::Build("initial-state", SkIRect::MakeWH(100, 100)).finishTest());
+}
+
+// Tests that intersection of rects combine to a single element when they have the same AA type,
+// or are pixel-aligned.
+DEF_TEST(GrClipStack_RectRectAACombine, r) {
+    SkRect pixelAligned = {0, 0, 10, 10};
+    SkRect fracRect1 = pixelAligned.makeOffset(5.3f, 3.7f);
+    SkRect fracRect2 = {fracRect1.fLeft + 0.75f * fracRect1.width(),
+                        fracRect1.fTop + 0.75f * fracRect1.height(),
+                        fracRect1.fRight, fracRect1.fBottom};
+
+    SkRect fracIntersect;
+    SkAssertResult(fracIntersect.intersect(fracRect1, fracRect2));
+    SkRect alignedIntersect;
+    SkAssertResult(alignedIntersect.intersect(pixelAligned, fracRect1));
+
+    // Both AA combine to one element
+    run_test_case(r, TestCase::Build("aa", kDeviceBounds)
+                              .actual().aa().intersect()
+                                       .rect(fracRect1).rect(fracRect2)
+                                       .finishElements()
+                              .expect().aa().intersect().rect(fracIntersect).finishElements()
+                              .state(GrClipStack::ClipState::kDeviceRect)
+                              .finishTest());
+
+    // Both non-AA combine to one element
+    run_test_case(r, TestCase::Build("nonaa", kDeviceBounds)
+                              .actual().nonAA().intersect()
+                                       .rect(fracRect1).rect(fracRect2)
+                                       .finishElements()
+                              .expect().nonAA().intersect().rect(fracIntersect).finishElements()
+                              .state(GrClipStack::ClipState::kDeviceRect)
+                              .finishTest());
+
+    // Pixel-aligned AA and non-AA combine
+    run_test_case(r, TestCase::Build("aligned-aa+nonaa", kDeviceBounds)
+                             .actual().intersect()
+                                      .aa().rect(pixelAligned).nonAA().rect(fracRect1)
+                                      .finishElements()
+                             .expect().nonAA().intersect().rect(alignedIntersect).finishElements()
+                             .state(GrClipStack::ClipState::kDeviceRect)
+                             .finishTest());
+
+    // AA and pixel-aligned non-AA combine
+    run_test_case(r, TestCase::Build("aa+aligned-nonaa", kDeviceBounds)
+                              .actual().intersect()
+                                       .aa().rect(fracRect1).nonAA().rect(pixelAligned)
+                                       .finishElements()
+                              .expect().aa().intersect().rect(alignedIntersect).finishElements()
+                              .state(GrClipStack::ClipState::kDeviceRect)
+                              .finishTest());
+
+    // Other mixed AA modes do not combine
+    run_test_case(r, TestCase::Build("aa+nonaa", kDeviceBounds)
+                              .actual().intersect()
+                                       .aa().rect(fracRect1).nonAA().rect(fracRect2)
+                                       .finishElements()
+                              .expectActual()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+}
+
+// Tests that an intersection and a difference op do not combine, even if they would have if both
+// were intersection ops.
+DEF_TEST(GrClipStack_DifferenceNoCombine, r) {
+    SkRect r1 = {15.f, 14.f, 23.22f, 58.2f};
+    SkRect r2 = r1.makeOffset(5.f, 8.f);
+    SkASSERT(r1.intersects(r2));
+
+    run_test_case(r, TestCase::Build("no-combine", kDeviceBounds)
+                              .actual().aa().intersect().rect(r1)
+                                       .difference().rect(r2)
+                                       .finishElements()
+                              .expectActual()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+}
+
+// Tests that intersection of rects in the same coordinate space can still be combined, but do not
+// when the spaces differ.
+DEF_TEST(GrClipStack_RectRectNonAxisAligned, r) {
+    SkRect pixelAligned = {0, 0, 10, 10};
+    SkRect fracRect1 = pixelAligned.makeOffset(5.3f, 3.7f);
+    SkRect fracRect2 = {fracRect1.fLeft + 0.75f * fracRect1.width(),
+                        fracRect1.fTop + 0.75f * fracRect1.height(),
+                        fracRect1.fRight, fracRect1.fBottom};
+
+    SkRect fracIntersect;
+    SkAssertResult(fracIntersect.intersect(fracRect1, fracRect2));
+
+    SkMatrix lm = SkMatrix::RotateDeg(45.f);
+
+    // Both AA combine
+    run_test_case(r, TestCase::Build("aa", kDeviceBounds)
+                              .actual().aa().intersect().localToDevice(lm)
+                                       .rect(fracRect1).rect(fracRect2)
+                                       .finishElements()
+                              .expect().aa().intersect().localToDevice(lm)
+                                       .rect(fracIntersect).finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+
+    // Both non-AA combine
+    run_test_case(r, TestCase::Build("nonaa", kDeviceBounds)
+                              .actual().nonAA().intersect().localToDevice(lm)
+                                       .rect(fracRect1).rect(fracRect2)
+                                       .finishElements()
+                              .expect().nonAA().intersect().localToDevice(lm)
+                                       .rect(fracIntersect).finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+
+    // Integer-aligned coordinates under a local matrix with mixed AA don't combine, though
+    run_test_case(r, TestCase::Build("local-aa", kDeviceBounds)
+                              .actual().intersect().localToDevice(lm)
+                                       .aa().rect(pixelAligned).nonAA().rect(fracRect1)
+                                       .finishElements()
+                              .expectActual()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+}
+
+// Tests that intersection of two round rects can simplify to a single round rect when they have
+// the same AA type.
+DEF_TEST(GrClipStack_RRectRRectAACombine, r) {
+    SkRRect r1 = SkRRect::MakeRectXY(SkRect::MakeWH(12, 12), 2.f, 2.f);
+    SkRRect r2 = r1.makeOffset(6.f, 6.f);
+
+    SkRRect intersect = SkRRectPriv::ConservativeIntersect(r1, r2);
+    SkASSERT(!intersect.isEmpty());
+
+    // Both AA combine
+    run_test_case(r, TestCase::Build("aa", kDeviceBounds)
+                              .actual().aa().intersect()
+                                       .rrect(r1).rrect(r2)
+                                       .finishElements()
+                              .expect().aa().intersect().rrect(intersect).finishElements()
+                              .state(GrClipStack::ClipState::kDeviceRRect)
+                              .finishTest());
+
+    // Both non-AA combine
+    run_test_case(r, TestCase::Build("nonaa", kDeviceBounds)
+                              .actual().nonAA().intersect()
+                                       .rrect(r1).rrect(r2)
+                                       .finishElements()
+                              .expect().nonAA().intersect().rrect(intersect).finishElements()
+                              .state(GrClipStack::ClipState::kDeviceRRect)
+                              .finishTest());
+
+    // Mixed do not combine
+    run_test_case(r, TestCase::Build("aa+nonaa", kDeviceBounds)
+                              .actual().intersect()
+                                       .aa().rrect(r1).nonAA().rrect(r2)
+                                       .finishElements()
+                              .expectActual()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+
+    // Same AA state can combine in the same local coordinate space
+    SkMatrix lm = SkMatrix::RotateDeg(45.f);
+    run_test_case(r, TestCase::Build("local-aa", kDeviceBounds)
+                              .actual().aa().intersect().localToDevice(lm)
+                                       .rrect(r1).rrect(r2)
+                                       .finishElements()
+                              .expect().aa().intersect().localToDevice(lm)
+                                       .rrect(intersect).finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("local-nonaa", kDeviceBounds)
+                              .actual().nonAA().intersect().localToDevice(lm)
+                                       .rrect(r1).rrect(r2)
+                                       .finishElements()
+                              .expect().nonAA().intersect().localToDevice(lm)
+                                       .rrect(intersect).finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+}
+
+// Tests that intersection of a round rect and rect can simplify to a new round rect or even a rect.
+DEF_TEST(GrClipStack_RectRRectCombine, r) {
+    SkRRect rrect = SkRRect::MakeRectXY({0, 0, 10, 10}, 2.f, 2.f);
+    SkRect cutTop = {-10, -10, 10, 4};
+    SkRect cutMid = {-10, 3, 10, 7};
+
+    // Rect + RRect becomes a round rect with some square corners
+    SkVector cutCorners[4] = {{2.f, 2.f}, {2.f, 2.f}, {0, 0}, {0, 0}};
+    SkRRect cutRRect;
+    cutRRect.setRectRadii({0, 0, 10, 4}, cutCorners);
+    run_test_case(r, TestCase::Build("still-rrect", kDeviceBounds)
+                              .actual().intersect().aa().rrect(rrect).rect(cutTop).finishElements()
+                              .expect().intersect().aa().rrect(cutRRect).finishElements()
+                              .state(GrClipStack::ClipState::kDeviceRRect)
+                              .finishTest());
+
+    // Rect + RRect becomes a rect
+    SkRect cutRect = {0, 3, 10, 7};
+    run_test_case(r, TestCase::Build("to-rect", kDeviceBounds)
+                               .actual().intersect().aa().rrect(rrect).rect(cutMid).finishElements()
+                               .expect().intersect().aa().rect(cutRect).finishElements()
+                               .state(GrClipStack::ClipState::kDeviceRect)
+                               .finishTest());
+
+    // But they can only combine when the intersecting shape is representable as a [r]rect.
+    cutRect = {0, 0, 1.5f, 5.f};
+    run_test_case(r, TestCase::Build("no-combine", kDeviceBounds)
+                              .actual().intersect().aa().rrect(rrect).rect(cutRect).finishElements()
+                              .expectActual()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+}
+
+// Tests that a rect shape is actually pre-clipped to the device bounds
+DEF_TEST(GrClipStack_RectDeviceClip, r) {
+    SkRect crossesDeviceEdge = {20.f, kDeviceBounds.fTop - 13.2f,
+                                kDeviceBounds.fRight + 15.5f, 30.f};
+    SkRect insideDevice = {20.f, kDeviceBounds.fTop, kDeviceBounds.fRight, 30.f};
+
+    run_test_case(r, TestCase::Build("device-aa-rect", kDeviceBounds)
+                              .actual().intersect().aa().rect(crossesDeviceEdge).finishElements()
+                              .expect().intersect().aa().rect(insideDevice).finishElements()
+                              .state(GrClipStack::ClipState::kDeviceRect)
+                              .finishTest());
+
+    run_test_case(r, TestCase::Build("device-nonaa-rect", kDeviceBounds)
+                              .actual().intersect().nonAA().rect(crossesDeviceEdge).finishElements()
+                              .expect().intersect().nonAA().rect(insideDevice).finishElements()
+                              .state(GrClipStack::ClipState::kDeviceRect)
+                              .finishTest());
+}
+
+// Tests that other shapes' bounds are contained by the device bounds, even if their shape is not.
+DEF_TEST(GrClipStack_ShapeDeviceBoundsClip, r) {
+    SkRect crossesDeviceEdge = {20.f, kDeviceBounds.fTop - 13.2f,
+                                kDeviceBounds.fRight + 15.5f, 30.f};
+
+    // RRect
+    run_test_case(r, TestCase::Build("device-rrect", kDeviceBounds)
+                              .actual().intersect().aa()
+                                       .rrect(SkRRect::MakeRectXY(crossesDeviceEdge, 4.f, 4.f))
+                                       .finishElements()
+                              .expectActual()
+                              .state(GrClipStack::ClipState::kDeviceRRect)
+                              .finishTest());
+
+    // Path
+    run_test_case(r, TestCase::Build("device-path", kDeviceBounds)
+                              .actual().intersect().aa()
+                                       .path(make_octagon(crossesDeviceEdge))
+                                       .finishElements()
+                              .expectActual()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+}
+
+// Tests that a simplifiable path turns into a simpler element type
+DEF_TEST(GrClipStack_PathSimplify, r) {
+    // Empty, point, and line paths -> empty
+    SkPath empty;
+    run_test_case(r, TestCase::Build("empty", kDeviceBounds)
+                              .actual().path(empty).finishElements()
+                              .state(GrClipStack::ClipState::kEmpty)
+                              .finishTest());
+    SkPath point;
+    point.moveTo({0.f, 0.f});
+    run_test_case(r, TestCase::Build("point", kDeviceBounds)
+                              .actual().path(point).finishElements()
+                              .state(GrClipStack::ClipState::kEmpty)
+                              .finishTest());
+
+    SkPath line;
+    line.moveTo({0.f, 0.f});
+    line.lineTo({10.f, 5.f});
+    run_test_case(r, TestCase::Build("line", kDeviceBounds)
+                              .actual().path(line).finishElements()
+                              .state(GrClipStack::ClipState::kEmpty)
+                              .finishTest());
+
+    // Rect path -> rect element
+    SkRect rect = {0.f, 2.f, 10.f, 15.4f};
+    SkPath rectPath;
+    rectPath.addRect(rect);
+    run_test_case(r, TestCase::Build("rect", kDeviceBounds)
+                              .actual().path(rectPath).finishElements()
+                              .expect().rect(rect).finishElements()
+                              .state(GrClipStack::ClipState::kDeviceRect)
+                              .finishTest());
+
+    // Oval path -> rrect element
+    SkPath ovalPath;
+    ovalPath.addOval(rect);
+    run_test_case(r, TestCase::Build("oval", kDeviceBounds)
+                              .actual().path(ovalPath).finishElements()
+                              .expect().rrect(SkRRect::MakeOval(rect)).finishElements()
+                              .state(GrClipStack::ClipState::kDeviceRRect)
+                              .finishTest());
+
+    // RRect path -> rrect element
+    SkRRect rrect = SkRRect::MakeRectXY(rect, 2.f, 2.f);
+    SkPath rrectPath;
+    rrectPath.addRRect(rrect);
+    run_test_case(r, TestCase::Build("rrect", kDeviceBounds)
+                              .actual().path(rrectPath).finishElements()
+                              .expect().rrect(rrect).finishElements()
+                              .state(GrClipStack::ClipState::kDeviceRRect)
+                              .finishTest());
+}
+
+// Tests that repeated identical clip operations are idempotent
+DEF_TEST(GrClipStack_RepeatElement, r) {
+    // Same rect
+    SkRect rect = {5.3f, 62.f, 20.f, 85.f};
+    run_test_case(r, TestCase::Build("same-rects", kDeviceBounds)
+                              .actual().rect(rect).rect(rect).rect(rect).finishElements()
+                              .expect().rect(rect).finishElements()
+                              .state(GrClipStack::ClipState::kDeviceRect)
+                              .finishTest());
+    SkMatrix lm;
+    lm.setRotate(30.f, rect.centerX(), rect.centerY());
+    run_test_case(r, TestCase::Build("same-local-rects", kDeviceBounds)
+                              .actual().localToDevice(lm).rect(rect).rect(rect).rect(rect)
+                                       .finishElements()
+                              .expect().localToDevice(lm).rect(rect).finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+
+    // Same rrect
+    SkRRect rrect = SkRRect::MakeRectXY(rect, 5.f, 2.5f);
+    run_test_case(r, TestCase::Build("same-rrects", kDeviceBounds)
+                              .actual().rrect(rrect).rrect(rrect).rrect(rrect).finishElements()
+                              .expect().rrect(rrect).finishElements()
+                              .state(GrClipStack::ClipState::kDeviceRRect)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("same-local-rrects", kDeviceBounds)
+                              .actual().localToDevice(lm).rrect(rrect).rrect(rrect).rrect(rrect)
+                                       .finishElements()
+                              .expect().localToDevice(lm).rrect(rrect).finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+
+    // Same convex path, by ==
+    run_test_case(r, TestCase::Build("same-convex", kDeviceBounds)
+                              .actual().path(make_octagon(rect)).path(make_octagon(rect))
+                                       .finishElements()
+                              .expect().path(make_octagon(rect)).finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("same-local-convex", kDeviceBounds)
+                              .actual().localToDevice(lm)
+                                       .path(make_octagon(rect)).path(make_octagon(rect))
+                                       .finishElements()
+                              .expect().localToDevice(lm).path(make_octagon(rect))
+                                       .finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+
+    // Same complicated path by gen-id but not ==
+    SkPath path; // an hour glass
+    path.moveTo({0.f, 0.f});
+    path.lineTo({20.f, 20.f});
+    path.lineTo({0.f, 20.f});
+    path.lineTo({20.f, 0.f});
+    path.close();
+
+    run_test_case(r, TestCase::Build("same-path", kDeviceBounds)
+                              .actual().path(path).path(path).path(path).finishElements()
+                              .expect().path(path).finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("same-local-path", kDeviceBounds)
+                              .actual().localToDevice(lm)
+                                       .path(path).path(path).path(path).finishElements()
+                              .expect().localToDevice(lm).path(path)
+                                       .finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+}
+
+// Tests that inverse-filled paths are canonicalized to a regular fill and a swapped clip op
+DEF_TEST(GrClipStack_InverseFilledPath, r) {
+    SkRect rect = {0.f, 0.f, 16.f, 17.f};
+    SkPath rectPath;
+    rectPath.addRect(rect);
+
+    SkPath inverseRectPath = rectPath;
+    inverseRectPath.toggleInverseFillType();
+
+    SkPath complexPath = make_octagon(rect);
+    SkPath inverseComplexPath = complexPath;
+    inverseComplexPath.toggleInverseFillType();
+
+    // Inverse filled rect + intersect -> diff rect
+    run_test_case(r, TestCase::Build("inverse-rect-intersect", kDeviceBounds)
+                              .actual().aa().intersect().path(inverseRectPath).finishElements()
+                              .expect().aa().difference().rect(rect).finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+
+    // Inverse filled rect + difference -> int. rect
+    run_test_case(r, TestCase::Build("inverse-rect-difference", kDeviceBounds)
+                              .actual().aa().difference().path(inverseRectPath).finishElements()
+                              .expect().aa().intersect().rect(rect).finishElements()
+                              .state(GrClipStack::ClipState::kDeviceRect)
+                              .finishTest());
+
+    // Inverse filled path + intersect -> diff path
+    run_test_case(r, TestCase::Build("inverse-path-intersect", kDeviceBounds)
+                              .actual().aa().intersect().path(inverseComplexPath).finishElements()
+                              .expect().aa().difference().path(complexPath).finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+
+    // Inverse filled path + difference -> int. path
+    run_test_case(r, TestCase::Build("inverse-path-difference", kDeviceBounds)
+                              .actual().aa().difference().path(inverseComplexPath).finishElements()
+                              .expect().aa().intersect().path(complexPath).finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+}
+
+// Tests that clip operations that are offscreen either make the clip empty or stay wide open
+DEF_TEST(GrClipStack_Offscreen, r) {
+    SkRect offscreenRect = {kDeviceBounds.fRight + 10.f, kDeviceBounds.fTop + 20.f,
+                            kDeviceBounds.fRight + 40.f, kDeviceBounds.fTop + 60.f};
+    SkASSERT(!offscreenRect.intersects(SkRect::Make(kDeviceBounds)));
+
+    SkRRect offscreenRRect = SkRRect::MakeRectXY(offscreenRect, 5.f, 5.f);
+    SkPath offscreenPath = make_octagon(offscreenRect);
+
+    // Intersect -> empty
+    run_test_case(r, TestCase::Build("intersect-combo", kDeviceBounds)
+                              .actual().aa().intersect()
+                                       .rect(offscreenRect)
+                                       .rrect(offscreenRRect)
+                                       .path(offscreenPath)
+                                       .finishElements()
+                              .state(GrClipStack::ClipState::kEmpty)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("intersect-rect", kDeviceBounds)
+                              .actual().aa().intersect()
+                                       .rect(offscreenRect)
+                                       .finishElements()
+                              .state(GrClipStack::ClipState::kEmpty)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("intersect-rrect", kDeviceBounds)
+                              .actual().aa().intersect()
+                                       .rrect(offscreenRRect)
+                                       .finishElements()
+                              .state(GrClipStack::ClipState::kEmpty)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("intersect-path", kDeviceBounds)
+                              .actual().aa().intersect()
+                                       .path(offscreenPath)
+                                       .finishElements()
+                              .state(GrClipStack::ClipState::kEmpty)
+                              .finishTest());
+
+    // Difference -> wide open
+    run_test_case(r, TestCase::Build("difference-combo", kDeviceBounds)
+                              .actual().aa().difference()
+                                       .rect(offscreenRect)
+                                       .rrect(offscreenRRect)
+                                       .path(offscreenPath)
+                                       .finishElements()
+                              .state(GrClipStack::ClipState::kWideOpen)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("difference-rect", kDeviceBounds)
+                              .actual().aa().difference()
+                                       .rect(offscreenRect)
+                                       .finishElements()
+                              .state(GrClipStack::ClipState::kWideOpen)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("difference-rrect", kDeviceBounds)
+                              .actual().aa().difference()
+                                       .rrect(offscreenRRect)
+                                       .finishElements()
+                              .state(GrClipStack::ClipState::kWideOpen)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("difference-path", kDeviceBounds)
+                              .actual().aa().difference()
+                                       .path(offscreenPath)
+                                       .finishElements()
+                              .state(GrClipStack::ClipState::kWideOpen)
+                              .finishTest());
+}
+
+// Tests that an empty shape updates the clip state directly without needing an element
+DEF_TEST(GrClipStack_EmptyShape, r) {
+    // Intersect -> empty
+    run_test_case(r, TestCase::Build("empty-intersect", kDeviceBounds)
+                              .actual().intersect().rect(SkRect::MakeEmpty()).finishElements()
+                              .state(GrClipStack::ClipState::kEmpty)
+                              .finishTest());
+
+    // Difference -> no-op
+    run_test_case(r, TestCase::Build("empty-difference", kDeviceBounds)
+                              .actual().difference().rect(SkRect::MakeEmpty()).finishElements()
+                              .state(GrClipStack::ClipState::kWideOpen)
+                              .finishTest());
+
+    SkRRect rrect = SkRRect::MakeRectXY({4.f, 10.f, 16.f, 32.f}, 2.f, 2.f);
+    run_test_case(r, TestCase::Build("noop-difference", kDeviceBounds)
+                              .actual().difference().rrect(rrect).rect(SkRect::MakeEmpty())
+                                       .finishElements()
+                              .expect().difference().rrect(rrect).finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+}
+
+// Tests that sufficiently large difference operations can shrink the conservative bounds
+DEF_TEST(GrClipStack_DifferenceBounds, r) {
+    SkRect rightSide = {50.f, -10.f, 2.f * kDeviceBounds.fRight, kDeviceBounds.fBottom + 10.f};
+    SkRect clipped = rightSide;
+    SkAssertResult(clipped.intersect(SkRect::Make(kDeviceBounds)));
+
+    run_test_case(r, TestCase::Build("difference-cut", kDeviceBounds)
+                              .actual().nonAA().difference().rect(rightSide).finishElements()
+                              .expect().nonAA().difference().rect(clipped).finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+}
+
+// Tests that intersections can combine even if there's a difference operation in the middle
+DEF_TEST(GrClipStack_NoDifferenceInterference, r) {
+    SkRect intR1 = {0.f, 0.f, 30.f, 30.f};
+    SkRect intR2 = {15.f, 15.f, 45.f, 45.f};
+    SkRect intCombo = {15.f, 15.f, 30.f, 30.f};
+    SkRect diff = {20.f, 6.f, 50.f, 50.f};
+
+    run_test_case(r, TestCase::Build("cross-diff-combine", kDeviceBounds)
+                              .actual().rect(intR1, GrAA::kYes, SkClipOp::kIntersect)
+                                       .rect(diff, GrAA::kYes, SkClipOp::kDifference)
+                                       .rect(intR2, GrAA::kYes, SkClipOp::kIntersect)
+                                       .finishElements()
+                              .expect().rect(intCombo, GrAA::kYes, SkClipOp::kIntersect)
+                                       .rect(diff, GrAA::kYes, SkClipOp::kDifference)
+                                       .finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+}
+
+// Tests that multiple path operations are all recorded, but not otherwise consolidated
+DEF_TEST(GrClipStack_MultiplePaths, r) {
+    // Chosen to be greater than the number of inline-allocated elements and save records of the
+    // GrClipStack so that we test heap allocation as well.
+    static constexpr int kNumOps = 16;
+
+    auto b = TestCase::Build("many-paths-difference", kDeviceBounds);
+    SkRect d = {0.f, 0.f, 12.f, 12.f};
+    for (int i = 0; i < kNumOps; ++i) {
+        b.actual().path(make_octagon(d), GrAA::kNo, SkClipOp::kDifference);
+
+        d.offset(15.f, 0.f);
+        if (d.fRight > kDeviceBounds.fRight) {
+            d.fLeft = 0.f;
+            d.fRight = 12.f;
+            d.offset(0.f, 15.f);
+        }
+    }
+
+    run_test_case(r, b.expectActual()
+                      .state(GrClipStack::ClipState::kComplex)
+                      .finishTest());
+
+    b = TestCase::Build("many-paths-intersect", kDeviceBounds);
+    d = {0.f, 0.f, 12.f, 12.f};
+    for (int i = 0; i < kNumOps; ++i) {
+        b.actual().path(make_octagon(d), GrAA::kYes, SkClipOp::kIntersect);
+        d.offset(0.01f, 0.01f);
+    }
+
+    run_test_case(r, b.expectActual()
+                      .state(GrClipStack::ClipState::kComplex)
+                      .finishTest());
+}
+
+// Tests that a single rect is treated as kDeviceRect state when it's axis-aligned and intersect.
+DEF_TEST(GrClipStack_DeviceRect, r) {
+    // Axis-aligned + intersect -> kDeviceRect
+    SkRect rect = {0, 0, 20, 20};
+    run_test_case(r, TestCase::Build("device-rect", kDeviceBounds)
+                              .actual().intersect().aa().rect(rect).finishElements()
+                              .expectActual()
+                              .state(GrClipStack::ClipState::kDeviceRect)
+                              .finishTest());
+
+    // Not axis-aligned -> kComplex
+    SkMatrix lm = SkMatrix::RotateDeg(15.f);
+    run_test_case(r, TestCase::Build("unaligned-rect", kDeviceBounds)
+                              .actual().localToDevice(lm).intersect().aa().rect(rect)
+                                       .finishElements()
+                              .expectActual()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+
+    // Not intersect -> kComplex
+    run_test_case(r, TestCase::Build("diff-rect", kDeviceBounds)
+                              .actual().difference().aa().rect(rect).finishElements()
+                              .expectActual()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+}
+
+// Tests that a single rrect is treated as kDeviceRRect state when it's axis-aligned and intersect.
+DEF_TEST(GrClipStack_DeviceRRect, r) {
+    // Axis-aligned + intersect -> kDeviceRRect
+    SkRect rect = {0, 0, 20, 20};
+    SkRRect rrect = SkRRect::MakeRectXY(rect, 5.f, 5.f);
+    run_test_case(r, TestCase::Build("device-rrect", kDeviceBounds)
+                              .actual().intersect().aa().rrect(rrect).finishElements()
+                              .expectActual()
+                              .state(GrClipStack::ClipState::kDeviceRRect)
+                              .finishTest());
+
+    // Not axis-aligned -> kComplex
+    SkMatrix lm = SkMatrix::RotateDeg(15.f);
+    run_test_case(r, TestCase::Build("unaligned-rrect", kDeviceBounds)
+                              .actual().localToDevice(lm).intersect().aa().rrect(rrect)
+                                       .finishElements()
+                              .expectActual()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+
+    // Not intersect -> kComplex
+    run_test_case(r, TestCase::Build("diff-rrect", kDeviceBounds)
+                              .actual().difference().aa().rrect(rrect).finishElements()
+                              .expectActual()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+}
+
+// Tests that scale+translate matrices are pre-applied to rects and rrects, which also then allows
+// elements with different scale+translate matrices to be consolidated as if they were in the same
+// coordinate space.
+DEF_TEST(GrClipStack_ScaleTranslate, r) {
+    SkMatrix lm = SkMatrix::Scale(2.f, 4.f);
+    lm.postTranslate(15.5f, 14.3f);
+
+    // Rect -> matrix is applied up front
+    SkRect rect = {0.f, 0.f, 10.f, 10.f};
+    run_test_case(r, TestCase::Build("st+rect", kDeviceBounds)
+                              .actual().rect(rect, lm, GrAA::kYes, SkClipOp::kIntersect)
+                                       .finishElements()
+                              .expect().rect(lm.mapRect(rect), GrAA::kYes, SkClipOp::kIntersect)
+                                       .finishElements()
+                              .state(GrClipStack::ClipState::kDeviceRect)
+                              .finishTest());
+
+    // RRect -> matrix is applied up front
+    SkRRect localRRect = SkRRect::MakeRectXY(rect, 2.f, 2.f);
+    SkRRect deviceRRect;
+    SkAssertResult(localRRect.transform(lm, &deviceRRect));
+    run_test_case(r, TestCase::Build("st+rrect", kDeviceBounds)
+                              .actual().rrect(localRRect, lm, GrAA::kYes, SkClipOp::kIntersect)
+                                       .finishElements()
+                              .expect().rrect(deviceRRect, GrAA::kYes, SkClipOp::kIntersect)
+                                       .finishElements()
+                              .state(GrClipStack::ClipState::kDeviceRRect)
+                              .finishTest());
+
+    // Path -> matrix is NOT applied
+    run_test_case(r, TestCase::Build("st+path", kDeviceBounds)
+                              .actual().intersect().localToDevice(lm).path(make_octagon(rect))
+                                       .finishElements()
+                              .expectActual()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+}
+
+// Tests that a convex path element can contain a rect or round rect, allowing the stack to be
+// simplified
+DEF_TEST(GrClipStack_ConvexPathContains, r) {
+    SkRect rect = {15.f, 15.f, 30.f, 30.f};
+    SkRRect rrect = SkRRect::MakeRectXY(rect, 5.f, 5.f);
+    SkPath bigPath = make_octagon(rect.makeOutset(10.f, 10.f), 5.f, 5.f);
+
+    // Intersect -> path element isn't kept
+    run_test_case(r, TestCase::Build("convex+rect-intersect", kDeviceBounds)
+                              .actual().aa().intersect().rect(rect).path(bigPath).finishElements()
+                              .expect().aa().intersect().rect(rect).finishElements()
+                              .state(GrClipStack::ClipState::kDeviceRect)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("convex+rrect-intersect", kDeviceBounds)
+                              .actual().aa().intersect().rrect(rrect).path(bigPath).finishElements()
+                              .expect().aa().intersect().rrect(rrect).finishElements()
+                              .state(GrClipStack::ClipState::kDeviceRRect)
+                              .finishTest());
+
+    // Difference -> path element is the only one left
+    run_test_case(r, TestCase::Build("convex+rect-difference", kDeviceBounds)
+                              .actual().aa().difference().rect(rect).path(bigPath).finishElements()
+                              .expect().aa().difference().path(bigPath).finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("convex+rrect-difference", kDeviceBounds)
+                              .actual().aa().difference().rrect(rrect).path(bigPath)
+                                       .finishElements()
+                              .expect().aa().difference().path(bigPath).finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+
+    // Intersect small shape + difference big path -> empty
+    run_test_case(r, TestCase::Build("convex-diff+rect-int", kDeviceBounds)
+                              .actual().aa().intersect().rect(rect)
+                                       .difference().path(bigPath).finishElements()
+                              .state(GrClipStack::ClipState::kEmpty)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("convex-diff+rrect-int", kDeviceBounds)
+                              .actual().aa().intersect().rrect(rrect)
+                                       .difference().path(bigPath).finishElements()
+                              .state(GrClipStack::ClipState::kEmpty)
+                              .finishTest());
+
+    // Diff small shape + intersect big path -> both
+    run_test_case(r, TestCase::Build("convex-int+rect-diff", kDeviceBounds)
+                              .actual().aa().intersect().path(bigPath).difference().rect(rect)
+                                       .finishElements()
+                              .expectActual()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("convex-int+rrect-diff", kDeviceBounds)
+                              .actual().aa().intersect().path(bigPath).difference().rrect(rrect)
+                                       .finishElements()
+                              .expectActual()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+}
+
+// Tests that rects/rrects in different coordinate spaces can be consolidated when one is fully
+// contained by the other.
+DEF_TEST(GrClipStack_NonAxisAlignedContains, r) {
+    SkMatrix lm1 = SkMatrix::RotateDeg(45.f);
+    SkRect bigR = {-20.f, -20.f, 20.f, 20.f};
+    SkRRect bigRR = SkRRect::MakeRectXY(bigR, 1.f, 1.f);
+
+    SkMatrix lm2 = SkMatrix::RotateDeg(-45.f);
+    SkRect smR = {-10.f, -10.f, 10.f, 10.f};
+    SkRRect smRR = SkRRect::MakeRectXY(smR, 1.f, 1.f);
+
+    // I+I should select the smaller 2nd shape (r2 or rr2)
+    run_test_case(r, TestCase::Build("rect-rect-ii", kDeviceBounds)
+                              .actual().rect(bigR, lm1, GrAA::kYes, SkClipOp::kIntersect)
+                                       .rect(smR, lm2, GrAA::kYes, SkClipOp::kIntersect)
+                                       .finishElements()
+                              .expect().rect(smR, lm2, GrAA::kYes, SkClipOp::kIntersect)
+                                       .finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("rrect-rrect-ii", kDeviceBounds)
+                              .actual().rrect(bigRR, lm1, GrAA::kYes, SkClipOp::kIntersect)
+                                       .rrect(smRR, lm2, GrAA::kYes, SkClipOp::kIntersect)
+                                       .finishElements()
+                              .expect().rrect(smRR, lm2, GrAA::kYes, SkClipOp::kIntersect)
+                                       .finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("rect-rrect-ii", kDeviceBounds)
+                              .actual().rect(bigR, lm1, GrAA::kYes, SkClipOp::kIntersect)
+                                       .rrect(smRR, lm2, GrAA::kYes, SkClipOp::kIntersect)
+                                       .finishElements()
+                              .expect().rrect(smRR, lm2, GrAA::kYes, SkClipOp::kIntersect)
+                                       .finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("rrect-rect-ii", kDeviceBounds)
+                              .actual().rrect(bigRR, lm1, GrAA::kYes, SkClipOp::kIntersect)
+                                       .rect(smR, lm2, GrAA::kYes, SkClipOp::kIntersect)
+                                       .finishElements()
+                              .expect().rect(smR, lm2, GrAA::kYes, SkClipOp::kIntersect)
+                                       .finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+
+    // D+D should select the larger shape (r1 or rr1)
+    run_test_case(r, TestCase::Build("rect-rect-dd", kDeviceBounds)
+                              .actual().rect(bigR, lm1, GrAA::kYes, SkClipOp::kDifference)
+                                       .rect(smR, lm2, GrAA::kYes, SkClipOp::kDifference)
+                                       .finishElements()
+                              .expect().rect(bigR, lm1, GrAA::kYes, SkClipOp::kDifference)
+                                       .finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("rrect-rrect-dd", kDeviceBounds)
+                              .actual().rrect(bigRR, lm1, GrAA::kYes, SkClipOp::kDifference)
+                                       .rrect(smRR, lm2, GrAA::kYes, SkClipOp::kDifference)
+                                       .finishElements()
+                              .expect().rrect(bigRR, lm1, GrAA::kYes, SkClipOp::kDifference)
+                                       .finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("rect-rrect-dd", kDeviceBounds)
+                              .actual().rect(bigR, lm1, GrAA::kYes, SkClipOp::kDifference)
+                                       .rrect(smRR, lm2, GrAA::kYes, SkClipOp::kDifference)
+                                       .finishElements()
+                              .expect().rect(bigR, lm1, GrAA::kYes, SkClipOp::kDifference)
+                                         .finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("rrect-rect-dd", kDeviceBounds)
+                              .actual().rrect(bigRR, lm1, GrAA::kYes, SkClipOp::kDifference)
+                                       .rect(smR, lm2, GrAA::kYes, SkClipOp::kDifference)
+                                       .finishElements()
+                              .expect().rrect(bigRR, lm1, GrAA::kYes, SkClipOp::kDifference)
+                                       .finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+
+    // D(1)+I(2) should result in empty
+    run_test_case(r, TestCase::Build("rectD-rectI", kDeviceBounds)
+                              .actual().rect(bigR, lm1, GrAA::kYes, SkClipOp::kDifference)
+                                       .rect(smR, lm2, GrAA::kYes, SkClipOp::kIntersect)
+                                       .finishElements()
+                              .state(GrClipStack::ClipState::kEmpty)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("rrectD-rrectI", kDeviceBounds)
+                              .actual().rrect(bigRR, lm1, GrAA::kYes, SkClipOp::kDifference)
+                                       .rrect(smRR, lm2, GrAA::kYes, SkClipOp::kIntersect)
+                                       .finishElements()
+                              .state(GrClipStack::ClipState::kEmpty)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("rectD-rrectI", kDeviceBounds)
+                              .actual().rect(bigR, lm1, GrAA::kYes, SkClipOp::kDifference)
+                                       .rrect(smRR, lm2, GrAA::kYes, SkClipOp::kIntersect)
+                                       .finishElements()
+                              .state(GrClipStack::ClipState::kEmpty)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("rrectD-rectI", kDeviceBounds)
+                              .actual().rrect(bigRR, lm1, GrAA::kYes, SkClipOp::kDifference)
+                                       .rect(smR, lm2, GrAA::kYes, SkClipOp::kIntersect)
+                                       .finishElements()
+                              .state(GrClipStack::ClipState::kEmpty)
+                              .finishTest());
+
+    // I(1)+D(2) should result in both shapes
+    run_test_case(r, TestCase::Build("rectI+rectD", kDeviceBounds)
+                              .actual().rect(bigR, lm1, GrAA::kYes, SkClipOp::kIntersect)
+                                       .rect(smR, lm2, GrAA::kYes, SkClipOp::kDifference)
+                                       .finishElements()
+                              .expectActual()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("rrectI+rrectD", kDeviceBounds)
+                              .actual().rrect(bigRR, lm1, GrAA::kYes, SkClipOp::kIntersect)
+                                       .rrect(smRR, lm2, GrAA::kYes, SkClipOp::kDifference)
+                                       .finishElements()
+                              .expectActual()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("rrectI+rectD", kDeviceBounds)
+                              .actual().rrect(bigRR, lm1, GrAA::kYes, SkClipOp::kIntersect)
+                                       .rect(smR, lm2, GrAA::kYes, SkClipOp::kDifference)
+                                       .finishElements()
+                              .expectActual()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("rectI+rrectD", kDeviceBounds)
+                              .actual().rect(bigR, lm1, GrAA::kYes, SkClipOp::kIntersect)
+                                       .rrect(smRR, lm2, GrAA::kYes, SkClipOp::kDifference)
+                                       .finishElements()
+                              .expectActual()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+}
+
+// Tests that shapes with mixed AA state that contain each other can still be consolidated,
+// unless they are too close to the edge and non-AA snapping can't be predicted
+DEF_TEST(GrClipStack_MixedAAContains, r) {
+    SkMatrix lm1 = SkMatrix::RotateDeg(45.f);
+    SkRect r1 = {-20.f, -20.f, 20.f, 20.f};
+
+    SkMatrix lm2 = SkMatrix::RotateDeg(-45.f);
+    SkRect r2Safe = {-10.f, -10.f, 10.f, 10.f};
+    SkRect r2Unsafe = {-19.5f, -19.5f, 19.5f, 19.5f};
+
+    // Non-AA sufficiently inside AA element can discard the outer AA element
+    run_test_case(r, TestCase::Build("mixed-outeraa-combine", kDeviceBounds)
+                              .actual().rect(r1, lm1, GrAA::kYes, SkClipOp::kIntersect)
+                                       .rect(r2Safe, lm2, GrAA::kNo, SkClipOp::kIntersect)
+                                       .finishElements()
+                              .expect().rect(r2Safe, lm2, GrAA::kNo, SkClipOp::kIntersect)
+                                       .finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+    // Vice versa
+    run_test_case(r, TestCase::Build("mixed-inneraa-combine", kDeviceBounds)
+                              .actual().rect(r1, lm1, GrAA::kNo, SkClipOp::kIntersect)
+                                       .rect(r2Safe, lm2, GrAA::kYes, SkClipOp::kIntersect)
+                                       .finishElements()
+                              .expect().rect(r2Safe, lm2, GrAA::kYes, SkClipOp::kIntersect)
+                                       .finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+
+    // Non-AA too close to AA edges keeps both
+    run_test_case(r, TestCase::Build("mixed-outeraa-nocombine", kDeviceBounds)
+                              .actual().rect(r1, lm1, GrAA::kYes, SkClipOp::kIntersect)
+                                       .rect(r2Unsafe, lm2, GrAA::kNo, SkClipOp::kIntersect)
+                                       .finishElements()
+                              .expectActual()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("mixed-inneraa-nocombine", kDeviceBounds)
+                              .actual().rect(r1, lm1, GrAA::kNo, SkClipOp::kIntersect)
+                                       .rect(r2Unsafe, lm2, GrAA::kYes, SkClipOp::kIntersect)
+                                       .finishElements()
+                              .expectActual()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+}
+
+// Tests that a shape that contains the device bounds updates the clip state directly
+DEF_TEST(GrClipStack_ShapeContainsDevice, r) {
+    SkRect rect = SkRect::Make(kDeviceBounds).makeOutset(10.f, 10.f);
+    SkRRect rrect = SkRRect::MakeRectXY(rect, 10.f, 10.f);
+    SkPath convex = make_octagon(rect, 10.f, 10.f);
+
+    // Intersect -> no-op
+    run_test_case(r, TestCase::Build("rect-intersect", kDeviceBounds)
+                              .actual().intersect().rect(rect).finishElements()
+                              .state(GrClipStack::ClipState::kWideOpen)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("rrect-intersect", kDeviceBounds)
+                              .actual().intersect().rrect(rrect).finishElements()
+                              .state(GrClipStack::ClipState::kWideOpen)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("convex-intersect", kDeviceBounds)
+                              .actual().intersect().path(convex).finishElements()
+                              .state(GrClipStack::ClipState::kWideOpen)
+                              .finishTest());
+
+    // Difference -> empty
+    run_test_case(r, TestCase::Build("rect-difference", kDeviceBounds)
+                              .actual().difference().rect(rect).finishElements()
+                              .state(GrClipStack::ClipState::kEmpty)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("rrect-difference", kDeviceBounds)
+                              .actual().difference().rrect(rrect).finishElements()
+                              .state(GrClipStack::ClipState::kEmpty)
+                              .finishTest());
+    run_test_case(r, TestCase::Build("convex-difference", kDeviceBounds)
+                              .actual().difference().path(convex).finishElements()
+                              .state(GrClipStack::ClipState::kEmpty)
+                              .finishTest());
+}
+
+// Tests that shapes that do not overlap make for an empty clip (when intersecting), pick just the
+// intersecting op (when mixed), or are all kept (when diff'ing).
+DEF_TEST(GrClipStack_DisjointShapes, r) {
+    SkRect rt = {10.f, 10.f, 20.f, 20.f};
+    SkRRect rr = SkRRect::MakeOval(rt.makeOffset({20.f, 0.f}));
+    SkPath p = make_octagon(rt.makeOffset({0.f, 20.f}));
+
+    // I+I
+    run_test_case(r, TestCase::Build("iii", kDeviceBounds)
+                              .actual().aa().intersect().rect(rt).rrect(rr).path(p).finishElements()
+                              .state(GrClipStack::ClipState::kEmpty)
+                              .finishTest());
+
+    // D+D
+    run_test_case(r, TestCase::Build("ddd", kDeviceBounds)
+                              .actual().nonAA().difference().rect(rt).rrect(rr).path(p)
+                                       .finishElements()
+                              .expectActual()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+
+    // I+D from rect
+    run_test_case(r, TestCase::Build("idd", kDeviceBounds)
+                              .actual().aa().intersect().rect(rt)
+                                       .nonAA().difference().rrect(rr).path(p)
+                                       .finishElements()
+                              .expect().aa().intersect().rect(rt).finishElements()
+                              .state(GrClipStack::ClipState::kDeviceRect)
+                              .finishTest());
+
+    // I+D from rrect
+    run_test_case(r, TestCase::Build("did", kDeviceBounds)
+                              .actual().aa().intersect().rrect(rr)
+                                       .nonAA().difference().rect(rt).path(p)
+                                       .finishElements()
+                              .expect().aa().intersect().rrect(rr).finishElements()
+                              .state(GrClipStack::ClipState::kDeviceRRect)
+                              .finishTest());
+
+    // I+D from path
+    run_test_case(r, TestCase::Build("ddi", kDeviceBounds)
+                              .actual().aa().intersect().path(p)
+                                       .nonAA().difference().rect(rt).rrect(rr)
+                                       .finishElements()
+                              .expect().aa().intersect().path(p).finishElements()
+                              .state(GrClipStack::ClipState::kComplex)
+                              .finishTest());
+}
+
+DEF_TEST(GrClipStack_ComplexClip, r) {
+    static constexpr float kN = 10.f;
+    static constexpr float kR = kN / 3.f;
+
+    // 4 rectangles that overlap by kN x 2kN (horiz), 2kN x kN (vert), or kN x kN (diagonal)
+    static const SkRect kTL = {0.f, 0.f, 2.f * kN, 2.f * kN};
+    static const SkRect kTR = {kN,  0.f, 3.f * kN, 2.f * kN};
+    static const SkRect kBL = {0.f, kN,  2.f * kN, 3.f * kN};
+    static const SkRect kBR = {kN,  kN,  3.f * kN, 3.f * kN};
+
+    enum ShapeType { kRect, kRRect, kConvex };
+
+    SkRect rects[] = { kTL, kTR, kBL, kBR };
+    for (ShapeType type : { kRect, kRRect, kConvex }) {
+        for (int opBits = 6; opBits < 16; ++opBits) {
+            SkString name;
+            name.appendf("complex-%d-%d", (int) type, opBits);
+
+            SkRect expectedRectIntersection = SkRect::Make(kDeviceBounds);
+            SkRRect expectedRRectIntersection = SkRRect::MakeRect(expectedRectIntersection);
+
+            auto b = TestCase::Build(name.c_str(), kDeviceBounds);
+            for (int i = 0; i < 4; ++i) {
+                SkClipOp op = (opBits & (1 << i)) ? SkClipOp::kIntersect : SkClipOp::kDifference;
+                switch(type) {
+                    case kRect: {
+                        SkRect r = rects[i];
+                        if (op == SkClipOp::kDifference) {
+                            // Shrink the rect for difference ops, otherwise in the rect testcase
+                            // any difference op would remove the intersection of the other ops
+                            // given how the rects are defined, and that's just not interesting.
+                            r.inset(kR, kR);
+                        }
+                        b.actual().rect(r, GrAA::kYes, op);
+                        if (op == SkClipOp::kIntersect) {
+                            SkAssertResult(expectedRectIntersection.intersect(r));
+                        } else {
+                            b.expect().rect(r, GrAA::kYes, SkClipOp::kDifference);
+                        }
+                        break; }
+                    case kRRect: {
+                        SkRRect rrect = SkRRect::MakeRectXY(rects[i], kR, kR);
+                        b.actual().rrect(rrect, GrAA::kYes, op);
+                        if (op == SkClipOp::kIntersect) {
+                            expectedRRectIntersection = SkRRectPriv::ConservativeIntersect(
+                                    expectedRRectIntersection, rrect);
+                            SkASSERT(!expectedRRectIntersection.isEmpty());
+                        } else {
+                            b.expect().rrect(rrect, GrAA::kYes, SkClipOp::kDifference);
+                        }
+                        break; }
+                    case kConvex:
+                        b.actual().path(make_octagon(rects[i], kR, kR), GrAA::kYes, op);
+                        // NOTE: We don't set any expectations here, since convex just calls
+                        // expectActual() at the end.
+                        break;
+                }
+            }
+
+            // The expectations differ depending on the shape type
+            GrClipStack::ClipState state = GrClipStack::ClipState::kComplex;
+            if (type == kConvex) {
+                // The simplest case is when the paths cannot be combined together, so we expect
+                // the actual elements to be unmodified (both intersect and difference).
+                b.expectActual();
+            } else if (opBits) {
+                // All intersection ops were pre-computed into expectedR[R]ectIntersection
+                // - difference ops already added in the for loop
+                if (type == kRect) {
+                    SkASSERT(expectedRectIntersection != SkRect::Make(kDeviceBounds) &&
+                             !expectedRectIntersection.isEmpty());
+                    b.expect().rect(expectedRectIntersection, GrAA::kYes, SkClipOp::kIntersect);
+                    if (opBits == 0xf) {
+                        state = GrClipStack::ClipState::kDeviceRect;
+                    }
+                } else {
+                    SkASSERT(expectedRRectIntersection !=
+                                    SkRRect::MakeRect(SkRect::Make(kDeviceBounds)) &&
+                             !expectedRRectIntersection.isEmpty());
+                    b.expect().rrect(expectedRRectIntersection, GrAA::kYes, SkClipOp::kIntersect);
+                    if (opBits == 0xf) {
+                        state = GrClipStack::ClipState::kDeviceRRect;
+                    }
+                }
+            }
+
+            run_test_case(r, b.state(state).finishTest());
+        }
+    }
+}
+
+// ///////////////////////////////////////////////////////////////////////////////
+// // These tests do not use the TestCase infrastructure and manipulate a
+// // GrClipStack directly.
+
+// Tests that replaceClip() works as expected across save/restores
+DEF_TEST(GrClipStack_ReplaceClip, r) {
+    GrClipStack cs(kDeviceBounds, nullptr, false);
+
+    SkRRect rrect = SkRRect::MakeRectXY({15.f, 12.25f, 40.3f, 23.5f}, 4.f, 6.f);
+    cs.clipRRect(SkMatrix::I(), rrect, GrAA::kYes, SkClipOp::kIntersect);
+
+    SkIRect replace = {50, 25, 75, 40}; // Is disjoint from the rrect element
+    cs.save();
+    cs.replaceClip(replace);
+
+    REPORTER_ASSERT(r, cs.clipState() == GrClipStack::ClipState::kDeviceRect,
+                    "Clip did not become a device rect");
+    REPORTER_ASSERT(r, cs.getConservativeBounds() == replace, "Unexpected replaced clip bounds");
+    const GrClipStack::Element& replaceElement = *cs.begin();
+    REPORTER_ASSERT(r, replaceElement.fShape.rect() == SkRect::Make(replace) &&
+                       replaceElement.fAA == GrAA::kNo &&
+                       replaceElement.fOp == SkClipOp::kIntersect &&
+                       replaceElement.fLocalToDevice == SkMatrix::I(),
+                    "Unexpected replace element state");
+
+    // Restore should undo the replaced clip and bring back the rrect
+    cs.restore();
+    REPORTER_ASSERT(r, cs.clipState() == GrClipStack::ClipState::kDeviceRRect,
+                    "Unexpected state after restore, not kDeviceRRect");
+    const GrClipStack::Element& rrectElem = *cs.begin();
+    REPORTER_ASSERT(r, rrectElem.fShape.rrect() == rrect &&
+                       rrectElem.fAA == GrAA::kYes &&
+                       rrectElem.fOp == SkClipOp::kIntersect &&
+                       rrectElem.fLocalToDevice == SkMatrix::I(),
+                    "RRect element state not restored properly after replace clip undone");
+}
+
+// Tests that when a stack is forced to always be AA, non-AA elements become AA
+DEF_TEST(GrClipStack_ForceAA, r) {
+    GrClipStack cs(kDeviceBounds, nullptr, true);
+
+    // AA will remain AA
+    SkRect aaRect = {0.25f, 12.43f, 25.2f, 23.f};
+    cs.clipRect(SkMatrix::I(), aaRect, GrAA::kYes, SkClipOp::kIntersect);
+
+    // Non-AA will become AA
+    SkPath nonAAPath = make_octagon({2.f, 10.f, 16.f, 20.f});
+    cs.clipPath(SkMatrix::I(), nonAAPath, GrAA::kNo, SkClipOp::kIntersect);
+
+    // Non-AA can combine with AA that wouldn't normally have combined
+    SkRect nonAARect = {4.5f, 5.f, 17.25f, 18.23f};
+    cs.clipRect(SkMatrix::I(), nonAARect, GrAA::kNo, SkClipOp::kIntersect);
+
+    // The stack reports elements newest first, but the non-AA rect op was combined in place with
+    // the first aa rect, so we should see nonAAPath as AA, and then the intersection of rects.
+    SkRect expectedRect = aaRect;
+    SkAssertResult(expectedRect.intersect(nonAARect));
+    auto elements = cs.begin();
+
+    const GrClipStack::Element& aaPath = *elements;
+    REPORTER_ASSERT(r, aaPath.fShape.path() == nonAAPath, "Expected path element");
+    REPORTER_ASSERT(r, aaPath.fAA == GrAA::kYes, "Path element not promoted to AA");
+
+    ++elements;
+    const GrClipStack::Element& rect = *elements;
+    REPORTER_ASSERT(r, rect.fShape.rect() == expectedRect, "Mixed AA rects did not combine");
+    REPORTER_ASSERT(r, rect.fAA == GrAA::kYes, "Rect elements not promoted to AA");
+
+    ++elements;
+    REPORTER_ASSERT(r, !(elements != cs.end()), "Expected only two clip elements");
+}
+
+// Tests preApply works as expected for device rects, rrects, and reports clipped-out, etc. as
+// expected.
+DEF_TEST(GrClipStack_PreApply, r) {
+    GrClipStack cs(kDeviceBounds, nullptr, false);
+
+    // Offscreen is kClippedOut
+    GrClip::PreClipResult result = cs.preApply({-10.f, -10.f, -1.f, -1.f}, GrAA::kYes);
+    REPORTER_ASSERT(r, result.fEffect == GrClip::Effect::kClippedOut,
+                    "Offscreen draw is kClippedOut");
+
+    // Intersecting screen with wide-open clip is kUnclipped
+    result = cs.preApply({-10.f, -10.f, 10.f, 10.f}, GrAA::kYes);
+    REPORTER_ASSERT(r, result.fEffect == GrClip::Effect::kUnclipped,
+                    "Wide open screen intersection is still kUnclipped");
+
+    // Empty clip is clipped out
+    cs.save();
+    cs.clipRect(SkMatrix::I(), SkRect::MakeEmpty(), GrAA::kNo, SkClipOp::kIntersect);
+    result = cs.preApply({0.f, 0.f, 20.f, 20.f}, GrAA::kYes);
+    REPORTER_ASSERT(r, result.fEffect == GrClip::Effect::kClippedOut,
+                    "Empty clip stack preApplies as kClippedOut");
+    cs.restore();
+
+    // Contained inside clip is kUnclipped (using rrect for the outer clip element since paths
+    // don't support an inner bounds and anything complex is otherwise skipped in preApply).
+    SkRect rect = {10.f, 10.f, 40.f, 40.f};
+    SkRRect bigRRect = SkRRect::MakeRectXY(rect.makeOutset(5.f, 5.f), 5.f, 5.f);
+    cs.save();
+    cs.clipRRect(SkMatrix::I(), bigRRect, GrAA::kYes, SkClipOp::kIntersect);
+    result = cs.preApply(rect, GrAA::kYes);
+    REPORTER_ASSERT(r, result.fEffect == GrClip::Effect::kUnclipped,
+                    "Draw contained within clip is kUnclipped");
+
+    // Disjoint from clip (but still on screen) is kClippedOut
+    result = cs.preApply({50.f, 50.f, 60.f, 60.f}, GrAA::kYes);
+    REPORTER_ASSERT(r, result.fEffect == GrClip::Effect::kClippedOut,
+                    "Draw not intersecting clip is kClippedOut");
+    cs.restore();
+
+    // Intersecting clip is kClipped for complex shape
+    cs.save();
+    SkPath path = make_octagon(rect.makeOutset(5.f, 5.f), 5.f, 5.f);
+    cs.clipPath(SkMatrix::I(), path, GrAA::kYes, SkClipOp::kIntersect);
+    result = cs.preApply(path.getBounds(), GrAA::kNo);
+    REPORTER_ASSERT(r, result.fEffect == GrClip::Effect::kClipped && !result.fIsRRect,
+                    "Draw with complex clip is kClipped, but is not an rrect");
+    cs.restore();
+
+    // Intersecting clip is kDeviceRect for axis-aligned rect clip
+    cs.save();
+    cs.clipRect(SkMatrix::I(), rect, GrAA::kYes, SkClipOp::kIntersect);
+    result = cs.preApply(rect.makeOffset(2.f, 2.f), GrAA::kNo);
+    REPORTER_ASSERT(r, result.fEffect == GrClip::Effect::kClipped &&
+                       result.fAA == GrAA::kYes &&
+                       result.fIsRRect &&
+                       result.fRRect == SkRRect::MakeRect(rect),
+                    "kDeviceRect clip stack should be reported by preApply");
+    cs.restore();
+
+    // Intersecting clip is kDeviceRRect for axis-aligned rrect clip
+    cs.save();
+    SkRRect clipRRect = SkRRect::MakeRectXY(rect, 5.f, 5.f);
+    cs.clipRRect(SkMatrix::I(), clipRRect, GrAA::kYes, SkClipOp::kIntersect);
+    result = cs.preApply(rect.makeOffset(2.f, 2.f), GrAA::kNo);
+    REPORTER_ASSERT(r, result.fEffect == GrClip::Effect::kClipped &&
+                       result.fAA == GrAA::kYes &&
+                       result.fIsRRect &&
+                       result.fRRect == clipRRect,
+                    "kDeviceRRect clip stack should be reported by preApply");
+    cs.restore();
+}
+
+// Tests the clip shader entry point
+DEF_TEST(GrClipStack_Shader, r) {
+    sk_sp<SkShader> shader = SkShaders::Color({0.f, 0.f, 0.f, 0.5f}, nullptr);
+
+    SkSimpleMatrixProvider matrixProvider = SkMatrix::I();
+    sk_sp<GrDirectContext> context = GrDirectContext::MakeMock(nullptr);
+    std::unique_ptr<GrRenderTargetContext> rtc = GrRenderTargetContext::Make(
+            context.get(), GrColorType::kRGBA_8888, SkColorSpace::MakeSRGB(),
+            SkBackingFit::kExact, kDeviceBounds.size());
+
+    GrClipStack cs(kDeviceBounds, &matrixProvider, false);
+    cs.save();
+    cs.clipShader(shader);
+
+    REPORTER_ASSERT(r, cs.clipState() == GrClipStack::ClipState::kComplex,
+                    "A clip shader should be reported as a complex clip");
+
+    GrAppliedClip out(kDeviceBounds.size());
+    SkRect drawBounds = {10.f, 11.f, 16.f, 32.f};
+    GrClip::Effect effect = cs.apply(context.get(), rtc.get(), GrAAType::kCoverage, false,
+                                     &out, &drawBounds);
+
+    REPORTER_ASSERT(r, effect == GrClip::Effect::kClipped,
+                    "apply() should return kClipped for a clip shader");
+    REPORTER_ASSERT(r, out.hasCoverageFragmentProcessor(),
+                    "apply() should have converted clip shader to a coverage FP");
+
+    GrAppliedClip out2(kDeviceBounds.size());
+    drawBounds = {-15.f, -10.f, -1.f, 10.f}; // offscreen
+    effect = cs.apply(context.get(), rtc.get(), GrAAType::kCoverage, false,
+                      &out2, &drawBounds);
+    REPORTER_ASSERT(r, effect == GrClip::Effect::kClippedOut,
+                    "apply() should still discard offscreen draws with a clip shader");
+
+    cs.restore();
+    REPORTER_ASSERT(r, cs.clipState() == GrClipStack::ClipState::kWideOpen,
+                    "restore() should get rid of the clip shader");
+
+
+    // Adding a clip shader on top of a device rect clip should prevent preApply from reporting
+    // it as a device rect
+    cs.clipRect(SkMatrix::I(), {10, 15, 30, 30}, GrAA::kNo, SkClipOp::kIntersect);
+    SkASSERT(cs.clipState() == GrClipStack::ClipState::kDeviceRect); // test precondition
+    cs.clipShader(shader);
+    GrClip::PreClipResult result = cs.preApply(SkRect::Make(kDeviceBounds), GrAA::kYes);
+    REPORTER_ASSERT(r, result.fEffect == GrClip::Effect::kClipped && !result.fIsRRect,
+                    "A clip shader should not produce a device rect from preApply");
+}
+
+// Tests apply() under simple circumstances, that don't require actual rendering of masks, or
+// atlases. This lets us define the test regularly instead of a GPU-only test.
+// - This is not exhaustive and is challenging to unit test, so apply() is predominantly tested by
+//   the GMs instead.
+DEF_TEST(GrClipStack_SimpleApply, r) {
+    SkSimpleMatrixProvider matrixProvider = SkMatrix::I();
+    sk_sp<GrDirectContext> context = GrDirectContext::MakeMock(nullptr);
+    std::unique_ptr<GrRenderTargetContext> rtc = GrRenderTargetContext::Make(
+            context.get(), GrColorType::kRGBA_8888, SkColorSpace::MakeSRGB(),
+            SkBackingFit::kExact, kDeviceBounds.size());
+
+    GrClipStack cs(kDeviceBounds, &matrixProvider, false);
+
+    // Offscreen draw is kClippedOut
+    {
+        SkRect drawBounds = {-15.f, -15.f, -1.f, -1.f};
+
+        GrAppliedClip out(kDeviceBounds.size());
+        GrClip::Effect effect = cs.apply(context.get(), rtc.get(), GrAAType::kCoverage, false,
+                                        &out, &drawBounds);
+        REPORTER_ASSERT(r, effect == GrClip::Effect::kClippedOut, "Offscreen draw is clipped out");
+    }
+
+    // Draw contained in clip is kUnclipped
+    {
+        SkRect drawBounds = {15.4f, 16.3f, 26.f, 32.f};
+        cs.save();
+        cs.clipPath(SkMatrix::I(), make_octagon(drawBounds.makeOutset(5.f, 5.f), 5.f, 5.f),
+                    GrAA::kYes, SkClipOp::kIntersect);
+
+        GrAppliedClip out(kDeviceBounds.size());
+        GrClip::Effect effect = cs.apply(context.get(), rtc.get(), GrAAType::kCoverage, false,
+                                         &out, &drawBounds);
+        REPORTER_ASSERT(r, effect == GrClip::Effect::kUnclipped, "Draw inside clip is unclipped");
+        cs.restore();
+    }
+
+    // Draw bounds are cropped to device space before checking contains
+    {
+        SkRect clipRect = {kDeviceBounds.fRight - 20.f, 10.f, kDeviceBounds.fRight, 20.f};
+        SkRect drawRect = clipRect.makeOffset(10.f, 0.f);
+
+        cs.save();
+        cs.clipRect(SkMatrix::I(), clipRect, GrAA::kNo, SkClipOp::kIntersect);
+
+        GrAppliedClip out(kDeviceBounds.size());
+        GrClip::Effect effect = cs.apply(context.get(), rtc.get(), GrAAType::kCoverage, false,
+                                         &out, &drawRect);
+        REPORTER_ASSERT(r, SkRect::Make(kDeviceBounds).contains(drawRect),
+                        "Draw rect should be clipped to device rect");
+        REPORTER_ASSERT(r, effect == GrClip::Effect::kUnclipped,
+                        "After device clipping, this should be detected as contained within clip");
+        cs.restore();
+    }
+
+    // Non-AA device rect intersect is just a scissor
+    {
+        SkRect clipRect = {15.3f, 17.23f, 30.2f, 50.8f};
+        SkRect drawRect = clipRect.makeOutset(10.f, 10.f);
+        SkIRect expectedScissor = clipRect.round();
+
+        cs.save();
+        cs.clipRect(SkMatrix::I(), clipRect, GrAA::kNo, SkClipOp::kIntersect);
+
+        GrAppliedClip out(kDeviceBounds.size());
+        GrClip::Effect effect = cs.apply(context.get(), rtc.get(), GrAAType::kCoverage, false,
+                                         &out, &drawRect);
+        REPORTER_ASSERT(r, effect == GrClip::Effect::kClipped, "Draw should be clipped by rect");
+        REPORTER_ASSERT(r, !out.hasCoverageFragmentProcessor(), "Clip should not use coverage FPs");
+        REPORTER_ASSERT(r, !out.hardClip().hasStencilClip(), "Clip should not need stencil");
+        REPORTER_ASSERT(r, !out.hardClip().windowRectsState().enabled(),
+                        "Clip should not need window rects");
+        REPORTER_ASSERT(r, out.scissorState().enabled() &&
+                           out.scissorState().rect() == expectedScissor,
+                        "Clip has unexpected scissor rectangle");
+        cs.restore();
+    }
+
+    // Analytic coverage FPs
+    auto testHasCoverageFP = [&](SkRect drawBounds) {
+        GrAppliedClip out(kDeviceBounds.size());
+        GrClip::Effect effect = cs.apply(context.get(), rtc.get(), GrAAType::kCoverage, false,
+                                         &out, &drawBounds);
+        REPORTER_ASSERT(r, effect == GrClip::Effect::kClipped, "Draw should be clipped");
+        REPORTER_ASSERT(r, out.scissorState().enabled(), "Coverage FPs should still set scissor");
+        REPORTER_ASSERT(r, out.hasCoverageFragmentProcessor(), "Clip should use coverage FP");
+    };
+
+    // Axis-aligned rect can be an analytic FP
+    {
+        cs.save();
+        cs.clipRect(SkMatrix::I(), {10.2f, 8.342f, 63.f, 23.3f}, GrAA::kYes,
+                    SkClipOp::kDifference);
+        testHasCoverageFP({9.f, 10.f, 30.f, 18.f});
+        cs.restore();
+    }
+
+    // Axis-aligned round rect can be an analytic FP
+    {
+        SkRect rect = {4.f, 8.f, 20.f, 20.f};
+        cs.save();
+        cs.clipRRect(SkMatrix::I(), SkRRect::MakeRectXY(rect, 3.f, 3.f), GrAA::kYes,
+                     SkClipOp::kIntersect);
+        testHasCoverageFP(rect.makeOffset(2.f, 2.f));
+        cs.restore();
+    }
+
+    // Transformed rect can be an analytic FP
+    {
+        SkRect rect = {14.f, 8.f, 30.f, 22.34f};
+        SkMatrix rot = SkMatrix::RotateDeg(34.f);
+        cs.save();
+        cs.clipRect(rot, rect, GrAA::kNo, SkClipOp::kIntersect);
+        testHasCoverageFP(rot.mapRect(rect));
+        cs.restore();
+    }
+
+    // Convex polygons can be an analytic FP
+    {
+        SkRect rect = {15.f, 15.f, 45.f, 45.f};
+        cs.save();
+        cs.clipPath(SkMatrix::I(), make_octagon(rect), GrAA::kYes, SkClipOp::kIntersect);
+        testHasCoverageFP(rect.makeOutset(2.f, 2.f));
+        cs.restore();
+    }
+}
+
+// Must disable CCPR in order to trigger SW mask generation when the clip stack is applied.
+static void only_allow_default(GrContextOptions* options) {
+    options->fGpuPathRenderers = GpuPathRenderers::kNone;
+}
+
+DEF_GPUTEST_FOR_CONTEXTS(GrClipStack_SWMask,
+                         sk_gpu_test::GrContextFactory::IsRenderingContext,
+                         r, ctxInfo, only_allow_default) {
+    GrDirectContext* context = ctxInfo.directContext();
+    std::unique_ptr<GrRenderTargetContext> rtc = GrRenderTargetContext::Make(
+            context, GrColorType::kRGBA_8888, nullptr, SkBackingFit::kExact, kDeviceBounds.size());
+
+    SkSimpleMatrixProvider matrixProvider = SkMatrix::I();
+    std::unique_ptr<GrClipStack> cs(new GrClipStack(kDeviceBounds, &matrixProvider, false));
+
+    auto addMaskRequiringClip = [&](SkScalar x, SkScalar y, SkScalar radius) {
+        SkPath path;
+        path.addCircle(x, y, radius);
+        path.addCircle(x + radius / 2.f, y + radius / 2.f, radius);
+        path.setFillType(SkPathFillType::kEvenOdd);
+
+        // Use AA so that clip application does not route through the stencil buffer
+        cs->clipPath(SkMatrix::I(), path, GrAA::kYes, SkClipOp::kIntersect);
+    };
+
+    auto drawRect = [&](SkRect drawBounds) {
+        GrPaint paint;
+        paint.setColor4f({1.f, 1.f, 1.f, 1.f});
+        rtc->drawRect(cs.get(), std::move(paint), GrAA::kYes, SkMatrix::I(), drawBounds);
+    };
+
+    auto generateMask = [&](SkRect drawBounds) {
+        GrUniqueKey priorKey = cs->testingOnly_getLastSWMaskKey();
+        drawRect(drawBounds);
+        GrUniqueKey newKey = cs->testingOnly_getLastSWMaskKey();
+        REPORTER_ASSERT(r, priorKey != newKey, "Did not generate a new SW mask key as expected");
+        return newKey;
+    };
+
+    auto verifyKeys = [&](const std::vector<GrUniqueKey>& expectedKeys,
+                          const std::vector<GrUniqueKey>& releasedKeys) {
+        context->flush();
+        GrProxyProvider* proxyProvider = context->priv().proxyProvider();
+
+#ifdef SK_DEBUG
+        // The proxy providers key count fluctuates based on proxy lifetime, but we want to
+        // verify the resource count, and that requires using key tags that are debug-only.
+        SkASSERT(expectedKeys.size() > 0 || releasedKeys.size() > 0);
+        const char* tag = expectedKeys.size() > 0 ? expectedKeys[0].tag() : releasedKeys[0].tag();
+        GrResourceCache* cache = context->priv().getResourceCache();
+        int numProxies = cache->countUniqueKeysWithTag(tag);
+        REPORTER_ASSERT(r, (int) expectedKeys.size() == numProxies,
+                        "Unexpected proxy count, got %d, not %d",
+                        numProxies, (int) expectedKeys.size());
+#endif
+
+        for (const auto& key : expectedKeys) {
+            auto proxy = proxyProvider->findOrCreateProxyByUniqueKey(key);
+            REPORTER_ASSERT(r, SkToBool(proxy), "Unable to find resource for expected mask key");
+        }
+        for (const auto& key : releasedKeys) {
+            auto proxy = proxyProvider->findOrCreateProxyByUniqueKey(key);
+            REPORTER_ASSERT(r, !SkToBool(proxy), "SW mask not released as expected");
+        }
+    };
+
+    // Creates a mask for a complex clip
+    cs->save();
+    addMaskRequiringClip(5.f, 5.f, 20.f);
+    GrUniqueKey keyADepth1 = generateMask({0.f, 0.f, 20.f, 20.f});
+    GrUniqueKey keyBDepth1 = generateMask({10.f, 10.f, 30.f, 30.f});
+    verifyKeys({keyADepth1, keyBDepth1}, {});
+
+    // Creates a new mask for a new save record, but doesn't delete the old records
+    cs->save();
+    addMaskRequiringClip(6.f, 6.f, 15.f);
+    GrUniqueKey keyADepth2 = generateMask({0.f, 0.f, 20.f, 20.f});
+    GrUniqueKey keyBDepth2 = generateMask({10.f, 10.f, 30.f, 30.f});
+    verifyKeys({keyADepth1, keyBDepth1, keyADepth2, keyBDepth2}, {});
+
+    // Release after modifying the current record (even if we don't draw anything)
+    addMaskRequiringClip(4.f, 4.f, 15.f);
+    GrUniqueKey keyCDepth2 = generateMask({4.f, 4.f, 16.f, 20.f});
+    verifyKeys({keyADepth1, keyBDepth1, keyCDepth2}, {keyADepth2, keyBDepth2});
+
+    // Release after restoring an older record
+    cs->restore();
+    verifyKeys({keyADepth1, keyBDepth1}, {keyCDepth2});
+
+    // Drawing finds the old masks at depth 1 still w/o making new ones
+    drawRect({0.f, 0.f, 20.f, 20.f});
+    drawRect({10.f, 10.f, 30.f, 30.f});
+    verifyKeys({keyADepth1, keyBDepth1}, {});
+
+    // Drawing something contained within a previous mask also does not make a new one
+    drawRect({5.f, 5.f, 15.f, 15.f});
+    verifyKeys({keyADepth1, keyBDepth1}, {});
+
+    // Release on destruction
+    cs = nullptr;
+    verifyKeys({}, {keyADepth1, keyBDepth1});
+}