Coverage counting path renderer

Initial implementation of a GPU path renderer that draws antialiased
paths by counting coverage in an offscreen buffer.

Initially disabled until it has had time to soak.

Bug: skia:
Change-Id: I003d8cfdf8dc62641581b5ea2dc4f0aa00108df6
Reviewed-on: https://skia-review.googlesource.com/21541
Commit-Queue: Chris Dalton <csmartdalton@google.com>
Reviewed-by: Greg Daniel <egdaniel@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
Reviewed-by: Robert Phillips <robertphillips@google.com>
diff --git a/gn/gpu.gni b/gn/gpu.gni
index f7160f0..06f9908 100644
--- a/gn/gpu.gni
+++ b/gn/gpu.gni
@@ -289,6 +289,24 @@
   "$_src/gpu/ops/GrTessellatingPathRenderer.cpp",
   "$_src/gpu/ops/GrTessellatingPathRenderer.h",
 
+  # coverage counting path renderer
+  "$_src/gpu/ccpr/GrCCPRAtlas.cpp",
+  "$_src/gpu/ccpr/GrCCPRAtlas.h",
+  "$_src/gpu/ccpr/GrCCPRCoverageOpsBuilder.cpp",
+  "$_src/gpu/ccpr/GrCCPRCoverageOpsBuilder.h",
+  "$_src/gpu/ccpr/GrCCPRCoverageProcessor.cpp",
+  "$_src/gpu/ccpr/GrCCPRCoverageProcessor.h",
+  "$_src/gpu/ccpr/GrCCPRCubicProcessor.cpp",
+  "$_src/gpu/ccpr/GrCCPRCubicProcessor.h",
+  "$_src/gpu/ccpr/GrCCPRPathProcessor.cpp",
+  "$_src/gpu/ccpr/GrCCPRPathProcessor.h",
+  "$_src/gpu/ccpr/GrCCPRQuadraticProcessor.cpp",
+  "$_src/gpu/ccpr/GrCCPRQuadraticProcessor.h",
+  "$_src/gpu/ccpr/GrCCPRTriangleProcessor.cpp",
+  "$_src/gpu/ccpr/GrCCPRTriangleProcessor.h",
+  "$_src/gpu/ccpr/GrCoverageCountingPathRenderer.cpp",
+  "$_src/gpu/ccpr/GrCoverageCountingPathRenderer.h",
+
   "$_src/gpu/effects/Gr1DKernelEffect.h",
   "$_src/gpu/effects/GrBlurredEdgeFragmentProcessor.cpp",
   "$_src/gpu/effects/GrBlurredEdgeFragmentProcessor.h",
diff --git a/gn/samples.gni b/gn/samples.gni
index f388e79..58f8972 100644
--- a/gn/samples.gni
+++ b/gn/samples.gni
@@ -26,6 +26,7 @@
   "$_samplecode/SampleBigGradient.cpp",
   "$_samplecode/SampleBitmapRect.cpp",
   "$_samplecode/SampleBlur.cpp",
+  "$_samplecode/SampleCCPRGeometry.cpp",
   "$_samplecode/SampleCamera.cpp",
   "$_samplecode/SampleChart.cpp",
   "$_samplecode/SampleCircle.cpp",
diff --git a/include/gpu/GrContextOptions.h b/include/gpu/GrContextOptions.h
index 5337138..f560b31 100644
--- a/include/gpu/GrContextOptions.h
+++ b/include/gpu/GrContextOptions.h
@@ -91,10 +91,12 @@
         kAAConvex          = 1 << 4,
         kAALinearizing     = 1 << 5,
         kSmall             = 1 << 6,
-        kTessellating      = 1 << 7,
-        kDefault           = 1 << 8,
+        kCoverageCounting  = 1 << 7,
+        kTessellating      = 1 << 8,
+        kDefault           = 1 << 9,
 
-        kAll               = kDefault | (kDefault - 1),
+        // Temporarily disabling CCPR by default until it has had a time to soak.
+        kAll               = (kDefault | (kDefault - 1)) & ~kCoverageCounting,
 
         // For legacy. To be removed when updated in Android.
         kDistanceField     = kSmall
diff --git a/samplecode/SampleCCPRGeometry.cpp b/samplecode/SampleCCPRGeometry.cpp
new file mode 100644
index 0000000..a44f03b
--- /dev/null
+++ b/samplecode/SampleCCPRGeometry.cpp
@@ -0,0 +1,342 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "SkTypes.h"
+
+#if SK_SUPPORT_GPU
+
+#include "GrContextPriv.h"
+#include "GrRenderTargetContext.h"
+#include "GrRenderTargetContextPriv.h"
+#include "GrResourceProvider.h"
+#include "SampleCode.h"
+#include "SkCanvas.h"
+#include "SkGeometry.h"
+#include "SkMakeUnique.h"
+#include "SkPaint.h"
+#include "SkPath.h"
+#include "SkView.h"
+#include "ccpr/GrCCPRCoverageProcessor.h"
+#include "gl/GrGLGpu.cpp"
+#include "ops/GrDrawOp.h"
+
+using PrimitiveInstance = GrCCPRCoverageProcessor::PrimitiveInstance;
+using Mode = GrCCPRCoverageProcessor::Mode;
+
+static int num_points(Mode mode)  {
+    return mode >= GrCCPRCoverageProcessor::Mode::kSerpentineInsets ? 4 : 3;
+}
+
+static int is_curve(Mode mode)  {
+    return mode >= GrCCPRCoverageProcessor::Mode::kQuadraticHulls;
+}
+
+/**
+ * This sample visualizes the AA bloat geometry generated by the ccpr geometry shaders. It
+ * increases the AA bloat by 50x and outputs color instead of coverage (coverage=+1 -> green,
+ * coverage=0 -> black, coverage=-1 -> red). Use the keys 1-7 to cycle through the different
+ * geometry processors.
+ */
+class CCPRGeometryView : public SampleView {
+public:
+    CCPRGeometryView() { this->updateGpuData(); }
+    void onDrawContent(SkCanvas*) override;
+
+    SkView::Click* onFindClickHandler(SkScalar x, SkScalar y, unsigned) override;
+    bool onClick(SampleView::Click*) override;
+    bool onQuery(SkEvent* evt) override;
+
+private:
+    class Click;
+    class Op;
+
+    void updateAndInval() {
+        this->updateGpuData();
+        this->inval(nullptr);
+    }
+
+    void updateGpuData();
+
+    Mode fMode = Mode::kTriangleHulls;
+
+    SkPoint fPoints[4] = {
+        {100.05f, 100.05f},
+        {100.05f, 300.95f},
+        {400.75f, 300.95f},
+        {400.75f, 100.05f}
+    };
+
+    SkSTArray<16, SkPoint>            fGpuPoints;
+    SkSTArray<3, PrimitiveInstance>   fGpuInstances;
+
+    typedef SampleView INHERITED;
+};
+
+class CCPRGeometryView::Op : public GrDrawOp {
+    DEFINE_OP_CLASS_ID
+
+public:
+    Op(CCPRGeometryView* view)
+            : INHERITED(ClassID())
+            , fView(view) {
+        this->setBounds(SkRect::MakeLargest(), GrOp::HasAABloat::kNo, GrOp::IsZeroArea::kNo);
+    }
+
+    const char* name() const override { return "[Testing/Sample code] CCPRGeometryView::Op"; }
+
+private:
+    FixedFunctionFlags fixedFunctionFlags() const override { return FixedFunctionFlags::kNone; }
+    RequiresDstTexture finalize(const GrCaps&, const GrAppliedClip*) override {
+        return RequiresDstTexture::kNo;
+    }
+    bool onCombineIfPossible(GrOp* other, const GrCaps& caps) override { return false; }
+    void onPrepare(GrOpFlushState*) override {}
+    void onExecute(GrOpFlushState*) override;
+
+    CCPRGeometryView* fView;
+
+    typedef GrDrawOp INHERITED;
+};
+
+void CCPRGeometryView::onDrawContent(SkCanvas* canvas) {
+    SkAutoCanvasRestore acr(canvas, true);
+    canvas->setMatrix(SkMatrix::I());
+
+    SkPath outline;
+    outline.moveTo(fPoints[0]);
+    if (4 == num_points(fMode)) {
+        outline.cubicTo(fPoints[1], fPoints[2], fPoints[3]);
+    } else if (is_curve(fMode)) {
+        outline.quadTo(fPoints[1], fPoints[3]);
+    } else {
+        outline.lineTo(fPoints[1]);
+        outline.lineTo(fPoints[3]);
+    }
+    outline.close();
+
+    SkPaint outlinePaint;
+    outlinePaint.setColor(0x30000000);
+    outlinePaint.setStyle(SkPaint::kStroke_Style);
+    outlinePaint.setStrokeWidth(0);
+    outlinePaint.setAntiAlias(true);
+
+    canvas->drawPath(outline, outlinePaint);
+
+    const char* caption = "Use GPU backend to visualize geometry.";
+
+    if (GrRenderTargetContext* rtc =
+        canvas->internal_private_accessTopLayerRenderTargetContext()) {
+        rtc->priv().testingOnly_addDrawOp(skstd::make_unique<Op>(this));
+        caption = GrCCPRCoverageProcessor::GetProcessorName(fMode);
+    }
+
+    SkPaint pointsPaint;
+    pointsPaint.setColor(SK_ColorBLUE);
+    pointsPaint.setStrokeWidth(8);
+    pointsPaint.setAntiAlias(true);
+
+    if (4 == num_points(fMode)) {
+        canvas->drawPoints(SkCanvas::kPoints_PointMode, 4, fPoints, pointsPaint);
+    } else {
+        canvas->drawPoints(SkCanvas::kPoints_PointMode, 2, fPoints, pointsPaint);
+        canvas->drawPoints(SkCanvas::kPoints_PointMode, 1, fPoints + 3, pointsPaint);
+    }
+
+    SkPaint captionPaint;
+    captionPaint.setTextSize(20);
+    captionPaint.setColor(SK_ColorBLACK);
+    captionPaint.setAntiAlias(true);
+    canvas->drawText(caption, strlen(caption), 10, 30, captionPaint);
+}
+
+void CCPRGeometryView::updateGpuData() {
+    int vertexCount = num_points(fMode);
+    int instanceCount = 1;
+
+    fGpuPoints.reset();
+    fGpuInstances.reset();
+
+    if (4 == vertexCount) {
+        double t[2], s[2];
+        SkCubicType type = SkClassifyCubic(fPoints, t, s);
+        SkSTArray<2, float> chops;
+        for (int i = 0; i < 2; ++i) {
+            float chop = t[i] / s[i];
+            if (chop > 0 && chop < 1) {
+                chops.push_back(chop);
+            }
+        }
+
+        instanceCount = chops.count() + 1;
+        SkPoint chopped[10];
+        SkChopCubicAt(fPoints, chopped, chops.begin(), chops.count());
+
+        // Endpoints first, then control points.
+        for (int i = 0; i <= instanceCount; ++i) {
+            fGpuPoints.push_back(chopped[3*i]);
+        }
+        if (3 == instanceCount && SkCubicType::kLoop == type) {
+            fGpuPoints[2] = fGpuPoints[1]; // Account for floating point error.
+        }
+        for (int i = 0; i < instanceCount; ++i) {
+            fGpuPoints.push_back(chopped[3*i + 1]);
+            fGpuPoints.push_back(chopped[3*i + 2]);
+            // FIXME: we don't bother to send down the correct KLM t,s roots.
+            fGpuPoints.push_back({0, 0});
+            fGpuPoints.push_back({0, 0});
+        }
+
+        if (fMode < Mode::kLoopInsets && SkCubicType::kLoop == type) {
+            fMode = (Mode) ((int) fMode + 2);
+        }
+        if (fMode >= Mode::kLoopInsets && SkCubicType::kLoop != type) {
+            fMode = (Mode) ((int) fMode - 2);
+        }
+    } else {
+        // Endpoints.
+        fGpuPoints.push_back(fPoints[0]);
+        fGpuPoints.push_back(fPoints[3]);
+        // Control points.
+        fGpuPoints.push_back(fPoints[1]);
+    }
+
+    if (4 == vertexCount) {
+        int controlPointsIdx = instanceCount + 1;
+        for (int i = 0; i < instanceCount; ++i) {
+            fGpuInstances.push_back().fCubicData = {controlPointsIdx + i * 4, i};
+        }
+    } else if (is_curve(fMode)) {
+        fGpuInstances.push_back().fQuadraticData = {2, 0};
+    } else {
+        fGpuInstances.push_back().fTriangleData = {0, 2, 1}; // Texel buffer has endpoints first.
+    }
+
+    for (PrimitiveInstance& instance : fGpuInstances) {
+        instance.fPackedAtlasOffset = 0;
+    }
+}
+
+void CCPRGeometryView::Op::onExecute(GrOpFlushState* state) {
+    GrResourceProvider* rp = state->resourceProvider();
+    GrContext* context = state->gpu()->getContext();
+    GrGLGpu* glGpu = kOpenGL_GrBackend == context->contextPriv().getBackend() ?
+                     static_cast<GrGLGpu*>(state->gpu()) : nullptr;
+    int vertexCount = num_points(fView->fMode);
+
+    sk_sp<GrBuffer> pointsBuffer(rp->createBuffer(fView->fGpuPoints.count() * sizeof(SkPoint),
+                                                  kTexel_GrBufferType, kDynamic_GrAccessPattern,
+                                                  GrResourceProvider::kNoPendingIO_Flag |
+                                                  GrResourceProvider::kRequireGpuMemory_Flag,
+                                                  fView->fGpuPoints.begin()));
+    if (!pointsBuffer) {
+        return;
+    }
+
+    sk_sp<GrBuffer> instanceBuffer(rp->createBuffer(fView->fGpuInstances.count() * 4 * sizeof(int),
+                                                    kVertex_GrBufferType, kDynamic_GrAccessPattern,
+                                                    GrResourceProvider::kNoPendingIO_Flag |
+                                                    GrResourceProvider::kRequireGpuMemory_Flag,
+                                                    fView->fGpuInstances.begin()));
+    if (!instanceBuffer) {
+        return;
+    }
+
+    GrPipeline pipeline(state->drawOpArgs().fRenderTarget, GrPipeline::ScissorState::kDisabled,
+                        SkBlendMode::kSrcOver);
+
+    GrCCPRCoverageProcessor ccprProc(fView->fMode, pointsBuffer.get());
+    SkDEBUGCODE(ccprProc.enableDebugVisualizations();)
+
+    GrMesh mesh(4 == vertexCount ?  GrPrimitiveType::kLinesAdjacency : GrPrimitiveType::kTriangles);
+    mesh.setInstanced(instanceBuffer.get(), fView->fGpuInstances.count(), 0, vertexCount);
+
+    if (glGpu) {
+        glGpu->handleDirtyContext();
+        GR_GL_CALL(glGpu->glInterface(), PolygonMode(GR_GL_FRONT_AND_BACK, GR_GL_LINE));
+        GR_GL_CALL(glGpu->glInterface(), Enable(GR_GL_LINE_SMOOTH));
+    }
+
+    state->commandBuffer()->draw(pipeline, ccprProc, &mesh, nullptr, 1, this->bounds());
+
+    if (glGpu) {
+        context->resetContext(kMisc_GrGLBackendState);
+    }
+}
+
+class CCPRGeometryView::Click : public SampleView::Click {
+public:
+    Click(SkView* target, int ptIdx) : SampleView::Click(target), fPtIdx(ptIdx) {}
+
+    void doClick(SkPoint points[]) {
+        if (fPtIdx >= 0) {
+            this->dragPoint(points, fPtIdx);
+        } else {
+            for (int i = 0; i < 4; ++i) {
+                this->dragPoint(points, i);
+            }
+        }
+    }
+
+private:
+    void dragPoint(SkPoint points[], int idx)  {
+        SkIPoint delta = fICurr - fIPrev;
+        points[idx] += SkPoint::Make(delta.x(), delta.y());
+    }
+
+    int fPtIdx;
+};
+
+SkView::Click* CCPRGeometryView::onFindClickHandler(SkScalar x, SkScalar y, unsigned) {
+    for (int i = 0; i < 4; ++i) {
+        if (4 != num_points(fMode) && 2 == i) {
+            continue;
+        }
+        if (fabs(x - fPoints[i].x()) < 20 && fabsf(y - fPoints[i].y()) < 20) {
+            return new Click(this, i);
+        }
+    }
+    return new Click(this, -1);
+}
+
+bool CCPRGeometryView::onClick(SampleView::Click* click) {
+    Click* myClick = (Click*) click;
+    myClick->doClick(fPoints);
+    this->updateAndInval();
+    return true;
+}
+
+bool CCPRGeometryView::onQuery(SkEvent* evt) {
+    if (SampleCode::TitleQ(*evt)) {
+        SampleCode::TitleR(evt, "CCPRGeometry");
+        return true;
+    }
+    SkUnichar unichar;
+    if (SampleCode::CharQ(*evt, &unichar)) {
+        if (unichar >= '1' && unichar <= '7') {
+            fMode = Mode(unichar - '1');
+            if (fMode >= Mode::kCombinedTriangleHullsAndEdges) {
+                fMode = Mode(int(fMode) + 1);
+            }
+            this->updateAndInval();
+            return true;
+        }
+        if (unichar == 'D') {
+            SkDebugf("    SkPoint fPoints[4] = {\n");
+            SkDebugf("        {%f, %f},\n", fPoints[0].x(), fPoints[0].y());
+            SkDebugf("        {%f, %f},\n", fPoints[1].x(), fPoints[1].y());
+            SkDebugf("        {%f, %f},\n", fPoints[2].x(), fPoints[2].y());
+            SkDebugf("        {%f, %f}\n", fPoints[3].x(), fPoints[3].y());
+            SkDebugf("    };\n");
+            return true;
+        }
+    }
+    return this->INHERITED::onQuery(evt);
+}
+
+DEF_SAMPLE( return new CCPRGeometryView; )
+
+#endif // SK_SUPPORT_GPU
diff --git a/src/gpu/GrPathRendererChain.cpp b/src/gpu/GrPathRendererChain.cpp
index 076dea7..eda7a65 100644
--- a/src/gpu/GrPathRendererChain.cpp
+++ b/src/gpu/GrPathRendererChain.cpp
@@ -12,8 +12,11 @@
 #include "GrShaderCaps.h"
 #include "gl/GrGLCaps.h"
 #include "GrContext.h"
+#include "GrContextPriv.h"
 #include "GrGpu.h"
 
+#include "ccpr/GrCoverageCountingPathRenderer.h"
+
 #include "ops/GrAAConvexPathRenderer.h"
 #include "ops/GrAAHairLinePathRenderer.h"
 #include "ops/GrAALinearizingConvexPathRenderer.h"
@@ -56,6 +59,12 @@
     if (options.fGpuPathRenderers & GpuPathRenderers::kSmall) {
         fChain.push_back(sk_make_sp<GrSmallPathRenderer>());
     }
+    if (options.fGpuPathRenderers & Options::GpuPathRenderers::kCoverageCounting) {
+        if (auto ccpr = GrCoverageCountingPathRenderer::CreateIfSupported(*context->caps())) {
+            context->contextPriv().addOnFlushCallbackObject(ccpr.get());
+            fChain.push_back(std::move(ccpr));
+        }
+    }
     if (options.fGpuPathRenderers & GpuPathRenderers::kTessellating) {
         fChain.push_back(sk_make_sp<GrTessellatingPathRenderer>());
     }
diff --git a/src/gpu/GrRenderTargetContext.h b/src/gpu/GrRenderTargetContext.h
index 4485d63..21f9018 100644
--- a/src/gpu/GrRenderTargetContext.h
+++ b/src/gpu/GrRenderTargetContext.h
@@ -20,7 +20,9 @@
 #include "SkSurfaceProps.h"
 
 class GrBackendSemaphore;
+class GrCCPRAtlas;
 class GrClip;
+class GrCoverageCountingPathRenderer;
 class GrDrawingManager;
 class GrDrawOp;
 class GrFixedClip;
@@ -381,6 +383,8 @@
     friend class GrMSAAPathRenderer;                 // for access to add[Mesh]DrawOp
     friend class GrStencilAndCoverPathRenderer;      // for access to add[Mesh]DrawOp
     friend class GrTessellatingPathRenderer;         // for access to add[Mesh]DrawOp
+    friend class GrCCPRAtlas;                        // for access to addDrawOp
+    friend class GrCoverageCountingPathRenderer;     // for access to addDrawOp
     // for a unit test
     friend void test_draw_op(GrRenderTargetContext*,
                              sk_sp<GrFragmentProcessor>, sk_sp<GrTextureProxy>);
diff --git a/src/gpu/ccpr/GrCCPRAtlas.cpp b/src/gpu/ccpr/GrCCPRAtlas.cpp
new file mode 100644
index 0000000..8eb3086
--- /dev/null
+++ b/src/gpu/ccpr/GrCCPRAtlas.cpp
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "GrCCPRAtlas.h"
+
+#include "GrOnFlushResourceProvider.h"
+#include "GrClip.h"
+#include "GrRectanizer_skyline.h"
+#include "GrTextureProxy.h"
+#include "GrRenderTargetContext.h"
+#include "SkMakeUnique.h"
+#include "SkMathPriv.h"
+#include "ccpr/GrCCPRCoverageProcessor.h"
+#include "ops/GrDrawOp.h"
+
+class GrCCPRAtlas::Node {
+public:
+    Node(std::unique_ptr<Node> previous, int l, int t, int r, int b)
+            : fPrevious(std::move(previous))
+            , fX(l), fY(t)
+            , fRectanizer(r - l, b - t) {}
+
+    Node* previous() const { return fPrevious.get(); }
+
+    bool addRect(int w, int h, SkIPoint16* loc) {
+        static constexpr int kPad = 1;
+
+        if (!fRectanizer.addRect(w + kPad, h + kPad, loc)) {
+            return false;
+        }
+        loc->fX += fX;
+        loc->fY += fY;
+        return true;
+    }
+
+private:
+    const std::unique_ptr<Node>   fPrevious;
+    const int                     fX, fY;
+    GrRectanizerSkyline           fRectanizer;
+};
+
+GrCCPRAtlas::GrCCPRAtlas(const GrCaps& caps, int minWidth, int minHeight)
+        : fMaxAtlasSize(caps.maxRenderTargetSize())
+        , fDrawBounds{0, 0} {
+    SkASSERT(fMaxAtlasSize <= caps.maxTextureSize());
+    SkASSERT(SkTMax(minWidth, minHeight) <= fMaxAtlasSize);
+    int initialSize = GrNextPow2(SkTMax(minWidth, minHeight));
+    initialSize = SkTMax(int(kMinSize), initialSize);
+    initialSize = SkTMin(initialSize, fMaxAtlasSize);
+    fHeight = fWidth = initialSize;
+    fTopNode = skstd::make_unique<Node>(nullptr, 0, 0, initialSize, initialSize);
+}
+
+GrCCPRAtlas::~GrCCPRAtlas() {
+}
+
+bool GrCCPRAtlas::addRect(int w, int h, SkIPoint16* loc) {
+    // This can't be called anymore once finalize() has been called.
+    SkASSERT(!fTextureProxy);
+
+    if (!this->internalPlaceRect(w, h, loc)) {
+        return false;
+    }
+
+    fDrawBounds.fWidth = SkTMax(fDrawBounds.width(), loc->x() + w);
+    fDrawBounds.fHeight = SkTMax(fDrawBounds.height(), loc->y() + h);
+    return true;
+}
+
+bool GrCCPRAtlas::internalPlaceRect(int w, int h, SkIPoint16* loc) {
+    SkASSERT(SkTMax(w, h) < fMaxAtlasSize);
+
+    for (Node* node = fTopNode.get(); node; node = node->previous()) {
+        if (node->addRect(w, h, loc)) {
+            return true;
+        }
+    }
+
+    // The rect didn't fit. Grow the atlas and try again.
+    do {
+        SkASSERT(SkTMax(fWidth, fHeight) <= fMaxAtlasSize);
+        if (fWidth == fMaxAtlasSize && fHeight == fMaxAtlasSize) {
+            return false;
+        }
+        if (fHeight <= fWidth) {
+            int top = fHeight;
+            fHeight = SkTMin(fHeight * 2, fMaxAtlasSize);
+            fTopNode = skstd::make_unique<Node>(std::move(fTopNode), 0, top, fWidth, fHeight);
+        } else {
+            int left = fWidth;
+            fWidth = SkTMin(fWidth * 2, fMaxAtlasSize);
+            fTopNode = skstd::make_unique<Node>(std::move(fTopNode), left, 0, fWidth, fHeight);
+        }
+    } while (!fTopNode->addRect(w, h, loc));
+
+    return true;
+}
+
+sk_sp<GrRenderTargetContext> GrCCPRAtlas::finalize(GrOnFlushResourceProvider* onFlushRP,
+                                                     std::unique_ptr<GrDrawOp> atlasOp) {
+    SkASSERT(!fTextureProxy);
+
+    GrSurfaceDesc desc;
+    desc.fOrigin = GrCCPRCoverageProcessor::kAtlasOrigin;
+    desc.fWidth = fWidth;
+    desc.fHeight = fHeight;
+    desc.fConfig = kAlpha_half_GrPixelConfig;
+    sk_sp<GrRenderTargetContext> rtc = onFlushRP->makeRenderTargetContext(desc, nullptr, nullptr);
+    if (!rtc) {
+        SkDebugf("WARNING: failed to allocate a %ix%i atlas. Some paths will not be drawn.\n",
+                 fWidth, fHeight);
+        return nullptr;
+    }
+
+    SkIRect clearRect = SkIRect::MakeSize(fDrawBounds);
+    rtc->clear(&clearRect, 0, true);
+    rtc->addDrawOp(GrNoClip(), std::move(atlasOp));
+
+    fTextureProxy = sk_ref_sp(rtc->asTextureProxy());
+    return rtc;
+}
diff --git a/src/gpu/ccpr/GrCCPRAtlas.h b/src/gpu/ccpr/GrCCPRAtlas.h
new file mode 100644
index 0000000..a9ccd73
--- /dev/null
+++ b/src/gpu/ccpr/GrCCPRAtlas.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrCCPRAtlas_DEFINED
+#define GrCCPRAtlas_DEFINED
+
+#include "SkRefCnt.h"
+#include "SkSize.h"
+
+class GrCaps;
+class GrDrawOp;
+class GrOnFlushResourceProvider;
+class GrRenderTargetContext;
+class GrTextureProxy;
+struct SkIPoint16;
+
+/**
+ * This class implements a dynamic size GrRectanizer that grows until it reaches the implementation-
+ * dependent max texture size. When finalized, it also creates and stores a GrTextureProxy for the
+ * underlying atlas.
+ */
+class GrCCPRAtlas {
+public:
+    static constexpr int kMinSize = 1024;
+
+    GrCCPRAtlas(const GrCaps&, int minWidth, int minHeight);
+    ~GrCCPRAtlas();
+
+    bool addRect(int devWidth, int devHeight, SkIPoint16* loc);
+    const SkISize& drawBounds() { return fDrawBounds; }
+
+    sk_sp<GrRenderTargetContext> SK_WARN_UNUSED_RESULT finalize(GrOnFlushResourceProvider*,
+                                                                std::unique_ptr<GrDrawOp> atlasOp);
+
+    sk_sp<GrTextureProxy> textureProxy() const { return fTextureProxy; }
+
+private:
+    class Node;
+
+    bool internalPlaceRect(int w, int h, SkIPoint16* loc);
+
+    const int                                fMaxAtlasSize;
+
+    int                                      fWidth;
+    int                                      fHeight;
+    SkISize                                  fDrawBounds;
+    std::unique_ptr<Node>                    fTopNode;
+
+    sk_sp<GrTextureProxy>                    fTextureProxy;
+};
+
+#endif
diff --git a/src/gpu/ccpr/GrCCPRCoverageOpsBuilder.cpp b/src/gpu/ccpr/GrCCPRCoverageOpsBuilder.cpp
new file mode 100644
index 0000000..d14cf1e
--- /dev/null
+++ b/src/gpu/ccpr/GrCCPRCoverageOpsBuilder.cpp
@@ -0,0 +1,640 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "GrCCPRCoverageOpsBuilder.h"
+
+#include "GrBuffer.h"
+#include "GrGpuCommandBuffer.h"
+#include "GrOnFlushResourceProvider.h"
+#include "GrOpFlushState.h"
+#include "SkGeometry.h"
+#include "SkMakeUnique.h"
+#include "SkMathPriv.h"
+#include "SkPath.h"
+#include "SkPathPriv.h"
+#include "SkPoint.h"
+#include "SkNx.h"
+#include "ops/GrDrawOp.h"
+#include "../pathops/SkPathOpsCubic.h"
+#include <numeric>
+
+class GrCCPRCoverageOpsBuilder::CoverageOp : public GrDrawOp {
+public:
+    using PrimitiveTallies = GrCCPRCoverageOpsBuilder::PrimitiveTallies;
+
+    DEFINE_OP_CLASS_ID
+
+    CoverageOp(const SkISize& drawBounds, sk_sp<GrBuffer> pointsBuffer,
+               sk_sp<GrBuffer> trianglesBuffer,
+               const PrimitiveTallies baseInstances[kNumScissorModes],
+               const PrimitiveTallies endInstances[kNumScissorModes], SkTArray<ScissorBatch>&&);
+
+    // GrDrawOp interface.
+    const char* name() const override { return "GrCCPRCoverageOpsBuilder::CoverageOp"; }
+    FixedFunctionFlags fixedFunctionFlags() const override { return FixedFunctionFlags::kNone; }
+    RequiresDstTexture finalize(const GrCaps&, const GrAppliedClip*) override {
+        return RequiresDstTexture::kNo;
+    }
+    bool onCombineIfPossible(GrOp* other, const GrCaps& caps) override { return false; }
+    void onPrepare(GrOpFlushState*) override {}
+    void onExecute(GrOpFlushState*) override;
+
+private:
+    void drawMaskPrimitives(GrOpFlushState*, const GrPipeline&, const GrCCPRCoverageProcessor::Mode,
+                            GrPrimitiveType, int vertexCount,
+                            int PrimitiveTallies::* instanceType) const;
+
+    const SkISize                  fDrawBounds;
+    const sk_sp<GrBuffer>          fPointsBuffer;
+    const sk_sp<GrBuffer>          fTrianglesBuffer;
+    const PrimitiveTallies         fBaseInstances[GrCCPRCoverageOpsBuilder::kNumScissorModes];
+    const PrimitiveTallies         fInstanceCounts[GrCCPRCoverageOpsBuilder::kNumScissorModes];
+    const SkTArray<ScissorBatch>   fScissorBatches;
+
+    mutable SkTArray<GrMesh>                     fMeshesScratchBuffer;
+    mutable SkTArray<GrPipeline::DynamicState>   fDynamicStatesScratchBuffer;
+
+    typedef GrDrawOp INHERITED;
+};
+
+/**
+ * This is a view matrix that accumulates two bounding boxes as it maps points: device-space bounds
+ * and "45 degree" device-space bounds (| 1 -1 | * devCoords).
+ *                                      | 1  1 |
+ */
+class GrCCPRCoverageOpsBuilder::AccumulatingViewMatrix {
+public:
+    AccumulatingViewMatrix(const SkMatrix& m, const SkPoint& initialPoint);
+
+    SkPoint transform(const SkPoint& pt);
+    void getAccumulatedBounds(SkRect* devBounds, SkRect* devBounds45) const;
+
+private:
+    Sk4f fX;
+    Sk4f fY;
+    Sk4f fT;
+
+    Sk4f fTopLeft;
+    Sk4f fBottomRight;
+};
+
+static int num_pts(uint8_t verb) {
+    switch (verb) {
+        case SkPath::kClose_Verb:
+        case SkPath::kDone_Verb:
+        default:
+            SkFAIL("Path verb does not have an endpoint.");
+            return 0;
+        case SkPath::kMove_Verb:
+        case SkPath::kLine_Verb:
+            return 1;
+        case SkPath::kQuad_Verb:
+            return 2;
+        case SkPath::kConic_Verb:
+            return 2;
+        case SkPath::kCubic_Verb:
+            return 3;
+    }
+}
+
+static SkPoint to_skpoint(double x, double y) {
+    return {static_cast<SkScalar>(x), static_cast<SkScalar>(y)};
+}
+
+static SkPoint to_skpoint(const SkDPoint& dpoint) {
+    return to_skpoint(dpoint.fX, dpoint.fY);
+}
+
+bool GrCCPRCoverageOpsBuilder::init(GrOnFlushResourceProvider* onFlushRP,
+                                    const MaxBufferItems& maxBufferItems) {
+    const int maxPoints = maxBufferItems.fMaxFanPoints + maxBufferItems.fMaxControlPoints;
+    fPointsBuffer = onFlushRP->makeBuffer(kTexel_GrBufferType, maxPoints * 2 * sizeof(float));
+    if (!fPointsBuffer) {
+        return false;
+    }
+
+    const MaxPrimitives* const maxPrimitives = maxBufferItems.fMaxPrimitives;
+    const int maxInstances = (maxPrimitives[0].sum() + maxPrimitives[1].sum());
+    fInstanceBuffer = onFlushRP->makeBuffer(kVertex_GrBufferType, maxInstances * 4 * sizeof(int));
+    if (!fInstanceBuffer) {
+        fPointsBuffer.reset();
+        return false;
+    }
+
+    fFanPtsIdx = 0;
+    fControlPtsIdx = maxBufferItems.fMaxFanPoints;
+    SkDEBUGCODE(fMaxFanPoints = maxBufferItems.fMaxFanPoints);
+    SkDEBUGCODE(fMaxControlPoints = maxBufferItems.fMaxControlPoints);
+
+    int baseInstance = 0;
+    for (int i = 0; i < kNumScissorModes; ++i) {
+        fBaseInstances[i].fTriangles = baseInstance;
+        baseInstance += maxPrimitives[i].fMaxTriangles;
+
+        fBaseInstances[i].fQuadratics = baseInstance;
+        baseInstance += maxPrimitives[i].fMaxQuadratics;
+
+        fBaseInstances[i].fSerpentines = baseInstance;
+        baseInstance += maxPrimitives[i].fMaxCubics;
+
+        // Loops grow backwards.
+        fBaseInstances[i].fLoops = baseInstance;
+
+        fInstanceIndices[i] = fBaseInstances[i];
+    }
+
+    fPointsData = static_cast<SkPoint*>(fPointsBuffer->map());
+    SkASSERT(fPointsData);
+    GR_STATIC_ASSERT(SK_SCALAR_IS_FLOAT);
+    GR_STATIC_ASSERT(8 == sizeof(SkPoint));
+
+    fInstanceData = static_cast<PrimitiveInstance*>(fInstanceBuffer->map());
+    SkASSERT(fInstanceData);
+
+    return true;
+}
+
+void GrCCPRCoverageOpsBuilder::parsePath(ScissorMode scissorMode, const SkMatrix& viewMatrix,
+                                         const SkPath& path, SkRect* devBounds,
+                                         SkRect* devBounds45) {
+    // Make sure they haven't called finalize yet (or not called init).
+    SkASSERT(fPointsData);
+    SkASSERT(fInstanceData);
+
+    fCurrScissorMode = scissorMode;
+    fCurrPathIndices = fInstanceIndices[(int)fCurrScissorMode];
+    fCurrContourStartIdx = fFanPtsIdx;
+
+    const SkPoint* const pts = SkPathPriv::PointData(path);
+    int ptsIdx = 0;
+
+    SkASSERT(!path.isEmpty());
+    SkASSERT(path.countPoints() > 0);
+    AccumulatingViewMatrix m(viewMatrix, pts[0]);
+
+    for (SkPath::Verb verb : SkPathPriv::Verbs(path)) {
+        switch (verb) {
+            case SkPath::kMove_Verb:
+                this->startContour(m, pts[ptsIdx++]);
+                continue;
+            case SkPath::kClose_Verb:
+                this->closeContour();
+                continue;
+            case SkPath::kLine_Verb:
+                this->fanTo(m, pts[ptsIdx]);
+                break;
+            case SkPath::kQuad_Verb:
+                SkASSERT(ptsIdx >= 1); // SkPath should have inserted an implicit moveTo if needed.
+                this->quadraticTo(m, &pts[ptsIdx - 1]);
+                break;
+            case SkPath::kCubic_Verb:
+                SkASSERT(ptsIdx >= 1); // SkPath should have inserted an implicit moveTo if needed.
+                this->cubicTo(m, &pts[ptsIdx - 1]);
+                break;
+            case SkPath::kConic_Verb:
+                SkFAIL("Conics are not supported.");
+            default:
+                SkFAIL("Unexpected path verb.");
+        }
+
+        ptsIdx += num_pts(verb);
+    }
+
+    this->closeContour();
+
+    m.getAccumulatedBounds(devBounds, devBounds45);
+    SkDEBUGCODE(this->validate();)
+}
+
+void GrCCPRCoverageOpsBuilder::saveParsedPath(const SkIRect& clippedDevIBounds,
+                                              int16_t atlasOffsetX, int16_t atlasOffsetY) {
+    const PrimitiveTallies& baseIndices = fInstanceIndices[(int)fCurrScissorMode];
+    const int32_t packedAtlasOffset = (atlasOffsetY << 16) | (atlasOffsetX & 0xffff);
+    for (int i = baseIndices.fTriangles; i < fCurrPathIndices.fTriangles; ++i) {
+        fInstanceData[i].fPackedAtlasOffset = packedAtlasOffset;
+    }
+    for (int i = baseIndices.fQuadratics; i < fCurrPathIndices.fQuadratics; ++i) {
+        fInstanceData[i].fPackedAtlasOffset = packedAtlasOffset;
+    }
+    for (int i = baseIndices.fSerpentines; i < fCurrPathIndices.fSerpentines; ++i) {
+        fInstanceData[i].fPackedAtlasOffset = packedAtlasOffset;
+    }
+    for (int i = baseIndices.fLoops - 1; i >= fCurrPathIndices.fLoops; --i) {
+        fInstanceData[i].fPackedAtlasOffset = packedAtlasOffset;
+    }
+    if (ScissorMode::kScissored == fCurrScissorMode) {
+        fScissorBatches.push_back() = {
+            fCurrPathIndices - fInstanceIndices[(int)fCurrScissorMode],
+            clippedDevIBounds.makeOffset(atlasOffsetX, atlasOffsetY)
+        };
+    }
+    fInstanceIndices[(int)fCurrScissorMode] = fCurrPathIndices;
+}
+
+void GrCCPRCoverageOpsBuilder::startContour(AccumulatingViewMatrix& m, const SkPoint& anchorPoint) {
+    this->closeContour();
+    fCurrPathSpaceAnchorPoint = anchorPoint;
+    fPointsData[fFanPtsIdx++] = m.transform(anchorPoint);
+    SkASSERT(fCurrContourStartIdx == fFanPtsIdx - 1);
+}
+
+void GrCCPRCoverageOpsBuilder::fanTo(AccumulatingViewMatrix& m, const SkPoint& pt) {
+    SkASSERT(fCurrContourStartIdx < fFanPtsIdx);
+    if (pt == fCurrPathSpaceAnchorPoint) {
+        this->startContour(m, pt);
+        return;
+    }
+    fPointsData[fFanPtsIdx++] = m.transform(pt);
+}
+
+void GrCCPRCoverageOpsBuilder::quadraticTo(AccumulatingViewMatrix& m, const SkPoint P[3]) {
+    SkASSERT(fCurrPathIndices.fQuadratics < fBaseInstances[(int)fCurrScissorMode].fSerpentines);
+
+    this->fanTo(m, P[2]);
+    fPointsData[fControlPtsIdx++] = m.transform(P[1]);
+
+    fInstanceData[fCurrPathIndices.fQuadratics++].fQuadraticData = {
+        fControlPtsIdx - 1,
+        fFanPtsIdx - 2
+    };
+}
+
+void GrCCPRCoverageOpsBuilder::cubicTo(AccumulatingViewMatrix& m, const SkPoint P[4]) {
+    double t[2], s[2];
+    SkCubicType type = SkClassifyCubic(P, t, s);
+
+    if (SkCubicType::kLineOrPoint == type) {
+        this->fanTo(m, P[3]);
+        return;
+    }
+
+    if (SkCubicType::kQuadratic == type) {
+        SkScalar x1 = P[1].y() - P[0].y(),  y1 = P[0].x() - P[1].x(),
+                 k1 = x1 * P[0].x() + y1 * P[0].y();
+        SkScalar x2 = P[2].y() - P[3].y(),  y2 = P[3].x() - P[2].x(),
+                 k2 = x2 * P[3].x() + y2 * P[3].y();
+        SkScalar rdet = 1 / (x1*y2 - y1*x2);
+        SkPoint Q[3] = {P[0], {(y2*k1 - y1*k2) * rdet, (x1*k2 - x2*k1) * rdet}, P[3]};
+        this->quadraticTo(m, Q);
+        return;
+    }
+
+    SkDCubic C;
+    C.set(P);
+
+    for (int x = 0; x <= 1; ++x) {
+        if (t[x] * s[x] <= 0) { // This is equivalent to tx/sx <= 0.
+            // This technically also gets taken if tx/sx = infinity, but the code still does
+            // the right thing in that edge case.
+            continue; // Don't increment x0.
+        }
+        if (fabs(t[x]) >= fabs(s[x])) { // tx/sx >= 1.
+            break;
+        }
+
+        const double chopT = double(t[x]) / double(s[x]);
+        SkASSERT(chopT >= 0 && chopT <= 1);
+        if (chopT <= 0 || chopT >= 1) { // floating-point error.
+            continue;
+        }
+
+        SkDCubicPair chopped = C.chopAt(chopT);
+
+        // Ensure the double points are identical if this is a loop (more workarounds for FP error).
+        if (SkCubicType::kLoop == type && 0 == t[0]) {
+            chopped.pts[3] = chopped.pts[0];
+        }
+
+        // (This might put ts0/ts1 out of order, but it doesn't matter anymore at this point.)
+        this->emitCubicSegment(m, type, chopped.first(),
+                               to_skpoint(t[1 - x], s[1 - x] * chopT), to_skpoint(1, 1));
+        t[x] = 0;
+        s[x] = 1;
+
+        const double r = s[1 - x] * chopT;
+        t[1 - x] -= r;
+        s[1 - x] -= r;
+
+        C = chopped.second();
+    }
+
+    this->emitCubicSegment(m, type, C, to_skpoint(t[0], s[0]), to_skpoint(t[1], s[1]));
+}
+
+void GrCCPRCoverageOpsBuilder::emitCubicSegment(AccumulatingViewMatrix& m,
+                                                SkCubicType type, const SkDCubic& C,
+                                                const SkPoint& ts0, const SkPoint& ts1) {
+    SkASSERT(fCurrPathIndices.fSerpentines < fCurrPathIndices.fLoops);
+
+    fPointsData[fControlPtsIdx++] = m.transform(to_skpoint(C[1]));
+    fPointsData[fControlPtsIdx++] = m.transform(to_skpoint(C[2]));
+    this->fanTo(m, to_skpoint(C[3]));
+
+    // Also emit the cubic's root t,s values as "control points".
+    fPointsData[fControlPtsIdx++] = ts0;
+    fPointsData[fControlPtsIdx++] = ts1;
+
+    // Serpentines grow up from the front, and loops grow down from the back.
+    fInstanceData[SkCubicType::kLoop != type ?
+                  fCurrPathIndices.fSerpentines++ : --fCurrPathIndices.fLoops].fCubicData = {
+        fControlPtsIdx - 4,
+        fFanPtsIdx - 2
+    };
+}
+
+void GrCCPRCoverageOpsBuilder::closeContour() {
+    int fanSize = fFanPtsIdx - fCurrContourStartIdx;
+    if (fanSize >= 3) {
+         // Technically this can grow to fanSize + log3(fanSize), but we approximate with log2.
+        SkAutoSTMalloc<300, int32_t> indices(fanSize + SkNextLog2(fanSize));
+        std::iota(indices.get(), indices.get() + fanSize, fCurrContourStartIdx);
+        this->emitHierarchicalFan(indices, fanSize);
+    }
+
+    // Reset the current contour.
+    fCurrContourStartIdx = fFanPtsIdx;
+}
+
+void GrCCPRCoverageOpsBuilder::emitHierarchicalFan(int32_t indices[], int count) {
+    if (count < 3) {
+        return;
+    }
+
+    const int32_t oneThirdPt = count / 3;
+    const int32_t twoThirdsPt = (2 * count) / 3;
+    SkASSERT(fCurrPathIndices.fTriangles < fBaseInstances[(int)fCurrScissorMode].fQuadratics);
+
+    fInstanceData[fCurrPathIndices.fTriangles++].fTriangleData = {
+        indices[0],
+        indices[oneThirdPt],
+        indices[twoThirdsPt]
+    };
+
+    this->emitHierarchicalFan(indices, oneThirdPt + 1);
+    this->emitHierarchicalFan(&indices[oneThirdPt], twoThirdsPt - oneThirdPt + 1);
+
+    int32_t oldIndex = indices[count];
+    indices[count] = indices[0];
+    this->emitHierarchicalFan(&indices[twoThirdsPt], count - twoThirdsPt + 1);
+    indices[count] = oldIndex;
+}
+
+std::unique_ptr<GrDrawOp> GrCCPRCoverageOpsBuilder::createIntermediateOp(SkISize drawBounds) {
+    auto op = skstd::make_unique<CoverageOp>(drawBounds, fPointsBuffer, fInstanceBuffer,
+                                             fBaseInstances, fInstanceIndices,
+                                             std::move(fScissorBatches));
+    SkASSERT(fScissorBatches.empty());
+
+    fBaseInstances[0] = fInstanceIndices[0];
+    fBaseInstances[1] = fInstanceIndices[1];
+    return std::move(op);
+}
+
+std::unique_ptr<GrDrawOp> GrCCPRCoverageOpsBuilder::finalize(SkISize drawBounds) {
+    fPointsBuffer->unmap();
+    SkDEBUGCODE(fPointsData = nullptr);
+
+    fInstanceBuffer->unmap();
+    SkDEBUGCODE(fInstanceData = nullptr);
+
+    return skstd::make_unique<CoverageOp>(drawBounds, std::move(fPointsBuffer),
+                                          std::move(fInstanceBuffer), fBaseInstances,
+                                          fInstanceIndices, std::move(fScissorBatches));
+}
+
+#ifdef SK_DEBUG
+
+void GrCCPRCoverageOpsBuilder::validate() {
+    SkASSERT(fFanPtsIdx <= fMaxFanPoints);
+    SkASSERT(fControlPtsIdx <= fMaxFanPoints + fMaxControlPoints);
+    for (int i = 0; i < kNumScissorModes; ++i) {
+        SkASSERT(fInstanceIndices[i].fTriangles <= fBaseInstances[i].fQuadratics);
+        SkASSERT(fInstanceIndices[i].fQuadratics <= fBaseInstances[i].fSerpentines);
+        SkASSERT(fInstanceIndices[i].fSerpentines <= fInstanceIndices[i].fLoops);
+    }
+}
+
+#endif
+
+using MaxBufferItems = GrCCPRCoverageOpsBuilder::MaxBufferItems;
+
+void MaxBufferItems::countPathItems(GrCCPRCoverageOpsBuilder::ScissorMode scissorMode,
+                                    const SkPath& path) {
+    MaxPrimitives& maxPrimitives = fMaxPrimitives[(int)scissorMode];
+    int currFanPts = 0;
+
+    for (SkPath::Verb verb : SkPathPriv::Verbs(path)) {
+        switch (verb) {
+        case SkPath::kMove_Verb:
+        case SkPath::kClose_Verb:
+            fMaxFanPoints += currFanPts;
+            maxPrimitives.fMaxTriangles += SkTMax(0, currFanPts - 2);
+            currFanPts = SkPath::kMove_Verb == verb ? 1 : 0;
+            continue;
+        case SkPath::kLine_Verb:
+            SkASSERT(currFanPts > 0);
+            ++currFanPts;
+            continue;
+        case SkPath::kQuad_Verb:
+            SkASSERT(currFanPts > 0);
+            ++currFanPts;
+            ++fMaxControlPoints;
+            ++maxPrimitives.fMaxQuadratics;
+            continue;
+        case SkPath::kCubic_Verb: {
+            SkASSERT(currFanPts > 0);
+            // Over-allocate for the worst case when the cubic is chopped into 3 segments.
+            static constexpr int kMaxSegments = 3;
+            currFanPts += kMaxSegments;
+            // Each cubic segment has two control points.
+            fMaxControlPoints += kMaxSegments * 2;
+            // Each cubic segment also emits two root t,s values as "control points".
+            fMaxControlPoints += kMaxSegments * 2;
+            maxPrimitives.fMaxCubics += kMaxSegments;
+            // The cubic may also turn out to be a quadratic. While we over-allocate by a fair
+            // amount, this is still a relatively small amount of space.
+            ++maxPrimitives.fMaxQuadratics;
+            continue;
+        }
+        case SkPath::kConic_Verb:
+            SkASSERT(currFanPts > 0);
+            SkFAIL("Conics are not supported.");
+        default:
+            SkFAIL("Unexpected path verb.");
+        }
+    }
+
+    fMaxFanPoints += currFanPts;
+    maxPrimitives.fMaxTriangles += SkTMax(0, currFanPts - 2);
+
+    ++fMaxPaths;
+}
+
+using CoverageOp = GrCCPRCoverageOpsBuilder::CoverageOp;
+
+GrCCPRCoverageOpsBuilder::CoverageOp::CoverageOp(const SkISize& drawBounds,
+                                             sk_sp<GrBuffer> pointsBuffer,
+                                             sk_sp<GrBuffer> trianglesBuffer,
+                                             const PrimitiveTallies baseInstances[kNumScissorModes],
+                                             const PrimitiveTallies endInstances[kNumScissorModes],
+                                             SkTArray<ScissorBatch>&& scissorBatches)
+        : INHERITED(ClassID())
+        , fDrawBounds(drawBounds)
+        , fPointsBuffer(std::move(pointsBuffer))
+        , fTrianglesBuffer(std::move(trianglesBuffer))
+        , fBaseInstances{baseInstances[0], baseInstances[1]}
+        , fInstanceCounts{endInstances[0] - baseInstances[0], endInstances[1] - baseInstances[1]}
+        , fScissorBatches(std::move(scissorBatches)) {
+    SkASSERT(fPointsBuffer);
+    SkASSERT(fTrianglesBuffer);
+    this->setBounds(SkRect::MakeIWH(fDrawBounds.width(), fDrawBounds.height()),
+                    GrOp::HasAABloat::kNo, GrOp::IsZeroArea::kNo);
+}
+
+void CoverageOp::onExecute(GrOpFlushState* flushState) {
+    using Mode = GrCCPRCoverageProcessor::Mode;
+
+    SkDEBUGCODE(GrCCPRCoverageProcessor::Validate(flushState->drawOpArgs().fRenderTarget));
+
+    GrPipeline pipeline(flushState->drawOpArgs().fRenderTarget, GrPipeline::ScissorState::kEnabled,
+                        SkBlendMode::kPlus);
+
+    fMeshesScratchBuffer.reserve(1 + fScissorBatches.count());
+    fDynamicStatesScratchBuffer.reserve(1 + fScissorBatches.count());
+
+    // Triangles.
+    auto constexpr kTrianglesGrPrimitiveType = GrCCPRCoverageProcessor::kTrianglesGrPrimitiveType;
+    this->drawMaskPrimitives(flushState, pipeline, Mode::kCombinedTriangleHullsAndEdges,
+                             kTrianglesGrPrimitiveType, 3, &PrimitiveTallies::fTriangles);
+    this->drawMaskPrimitives(flushState, pipeline, Mode::kTriangleCorners,
+                             kTrianglesGrPrimitiveType, 3, &PrimitiveTallies::fTriangles);
+
+    // Quadratics.
+    auto constexpr kQuadraticsGrPrimitiveType = GrCCPRCoverageProcessor::kQuadraticsGrPrimitiveType;
+    this->drawMaskPrimitives(flushState, pipeline, Mode::kQuadraticHulls,
+                             kQuadraticsGrPrimitiveType, 3, &PrimitiveTallies::fQuadratics);
+    this->drawMaskPrimitives(flushState, pipeline, Mode::kQuadraticFlatEdges,
+                             kQuadraticsGrPrimitiveType, 3, &PrimitiveTallies::fQuadratics);
+
+    // Cubics.
+    auto constexpr kCubicsGrPrimitiveType = GrCCPRCoverageProcessor::kCubicsGrPrimitiveType;
+    this->drawMaskPrimitives(flushState, pipeline, Mode::kSerpentineInsets,
+                             kCubicsGrPrimitiveType, 4, &PrimitiveTallies::fSerpentines);
+    this->drawMaskPrimitives(flushState, pipeline, Mode::kLoopInsets,
+                             kCubicsGrPrimitiveType, 4, &PrimitiveTallies::fLoops);
+    this->drawMaskPrimitives(flushState, pipeline, Mode::kSerpentineBorders,
+                             kCubicsGrPrimitiveType, 4, &PrimitiveTallies::fSerpentines);
+    this->drawMaskPrimitives(flushState, pipeline, Mode::kLoopBorders,
+                             kCubicsGrPrimitiveType, 4, &PrimitiveTallies::fLoops);
+}
+
+void CoverageOp::drawMaskPrimitives(GrOpFlushState* flushState, const GrPipeline& pipeline,
+                                    GrCCPRCoverageProcessor::Mode mode, GrPrimitiveType primType,
+                                    int vertexCount, int PrimitiveTallies::* instanceType) const {
+    SkASSERT(pipeline.getScissorState().enabled());
+
+    fMeshesScratchBuffer.reset();
+    fDynamicStatesScratchBuffer.reset();
+
+    if (const int instanceCount = fInstanceCounts[(int)ScissorMode::kNonScissored].*instanceType) {
+        const int baseInstance = fBaseInstances[(int)ScissorMode::kNonScissored].*instanceType;
+        // Loops grow backwards, which is indicated by a negative instance count.
+        GrMesh& mesh = fMeshesScratchBuffer.emplace_back(primType);
+        mesh.setInstanced(fTrianglesBuffer.get(), abs(instanceCount),
+                          baseInstance + SkTMin(instanceCount, 0), vertexCount);
+        fDynamicStatesScratchBuffer.push_back().fScissorRect.setXYWH(0, 0, fDrawBounds.width(),
+                                                                     fDrawBounds.height());
+    }
+
+    if (fInstanceCounts[(int)ScissorMode::kScissored].*instanceType) {
+        int baseInstance = fBaseInstances[(int)ScissorMode::kScissored].*instanceType;
+        for (const ScissorBatch& batch : fScissorBatches) {
+            SkASSERT(this->bounds().contains(batch.fScissor));
+            const int instanceCount = batch.fInstanceCounts.*instanceType;
+            if (!instanceCount) {
+                continue;
+            }
+            // Loops grow backwards, which is indicated by a negative instance count.
+            GrMesh& mesh = fMeshesScratchBuffer.emplace_back(primType);
+            mesh.setInstanced(fTrianglesBuffer.get(), abs(instanceCount),
+                              baseInstance + SkTMin(instanceCount,0), vertexCount);
+            fDynamicStatesScratchBuffer.push_back().fScissorRect = batch.fScissor;
+            baseInstance += instanceCount;
+        }
+    }
+
+    SkASSERT(fMeshesScratchBuffer.count() == fDynamicStatesScratchBuffer.count());
+
+    if (!fMeshesScratchBuffer.empty()) {
+        GrCCPRCoverageProcessor proc(mode, fPointsBuffer.get());
+        flushState->commandBuffer()->draw(pipeline, proc, fMeshesScratchBuffer.begin(),
+                                          fDynamicStatesScratchBuffer.begin(),
+                                          fMeshesScratchBuffer.count(), this->bounds());
+    }
+}
+
+using PrimitiveTallies = CoverageOp::PrimitiveTallies;
+
+inline PrimitiveTallies PrimitiveTallies::operator+(const PrimitiveTallies& b) const {
+    return {fTriangles + b.fTriangles,
+            fQuadratics + b.fQuadratics,
+            fSerpentines + b.fSerpentines,
+            fLoops + b.fLoops};
+}
+
+inline PrimitiveTallies PrimitiveTallies::operator-(const PrimitiveTallies& b) const {
+    return {fTriangles - b.fTriangles,
+            fQuadratics - b.fQuadratics,
+            fSerpentines - b.fSerpentines,
+            fLoops - b.fLoops};
+}
+
+inline int PrimitiveTallies::sum() const {
+    return fTriangles + fQuadratics + fSerpentines + fLoops;
+}
+
+using AccumulatingViewMatrix = GrCCPRCoverageOpsBuilder::AccumulatingViewMatrix;
+
+inline AccumulatingViewMatrix::AccumulatingViewMatrix(const SkMatrix& m,
+                                                      const SkPoint& initialPoint) {
+    // m45 transforms into 45 degree space in order to find the octagon's diagonals. We could
+    // use SK_ScalarRoot2Over2 if we wanted an orthonormal transform, but this is irrelevant as
+    // long as the shader uses the correct inverse when coming back to device space.
+    SkMatrix m45;
+    m45.setSinCos(1, 1);
+    m45.preConcat(m);
+
+    fX = Sk4f(m.getScaleX(), m.getSkewY(), m45.getScaleX(), m45.getSkewY());
+    fY = Sk4f(m.getSkewX(), m.getScaleY(), m45.getSkewX(), m45.getScaleY());
+    fT = Sk4f(m.getTranslateX(), m.getTranslateY(), m45.getTranslateX(), m45.getTranslateY());
+
+    Sk4f transformed = SkNx_fma(fY, Sk4f(initialPoint.y()), fT);
+    transformed = SkNx_fma(fX, Sk4f(initialPoint.x()), transformed);
+    fTopLeft = fBottomRight = transformed;
+}
+
+inline SkPoint AccumulatingViewMatrix::transform(const SkPoint& pt) {
+    Sk4f transformed = SkNx_fma(fY, Sk4f(pt.y()), fT);
+    transformed = SkNx_fma(fX, Sk4f(pt.x()), transformed);
+
+    fTopLeft = Sk4f::Min(fTopLeft, transformed);
+    fBottomRight = Sk4f::Max(fBottomRight, transformed);
+
+    // TODO: vst1_lane_f32? (Sk4f::storeLane?)
+    float data[4];
+    transformed.store(data);
+    return SkPoint::Make(data[0], data[1]);
+}
+
+inline void AccumulatingViewMatrix::getAccumulatedBounds(SkRect* devBounds,
+                                                         SkRect* devBounds45) const {
+    float topLeft[4], bottomRight[4];
+    fTopLeft.store(topLeft);
+    fBottomRight.store(bottomRight);
+    devBounds->setLTRB(topLeft[0], topLeft[1], bottomRight[0], bottomRight[1]);
+    devBounds45->setLTRB(topLeft[2], topLeft[3], bottomRight[2], bottomRight[3]);
+}
diff --git a/src/gpu/ccpr/GrCCPRCoverageOpsBuilder.h b/src/gpu/ccpr/GrCCPRCoverageOpsBuilder.h
new file mode 100644
index 0000000..92d0203
--- /dev/null
+++ b/src/gpu/ccpr/GrCCPRCoverageOpsBuilder.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrCCPRCoverageOpsBuilder_DEFINED
+#define GrCCPRCoverageOpsBuilder_DEFINED
+
+#include "GrBuffer.h"
+#include "SkRefCnt.h"
+#include "SkRect.h"
+#include "ccpr/GrCCPRCoverageProcessor.h"
+
+class GrCCPRCoverageOp;
+class GrDrawOp;
+class GrOnFlushResourceProvider;
+class GrResourceProvider;
+class SkMatrix;
+class SkPath;
+struct SkDCubic;
+enum class SkCubicType;
+
+/**
+ * This class produces GrDrawOps that render coverage count masks and atlases. A path is added to
+ * the current op in two steps:
+ *
+ *   1) parsePath(ScissorMode, viewMatrix, path, &devBounds, &devBounds45);
+ *
+ *   <client decides where to put the mask within an atlas, if wanted>
+ *
+ *   2) saveParsedPath(offsetX, offsetY, clipBounds);
+ *
+ * The client can then produce a GrDrawOp for all currently saved paths by calling either
+ * createIntermediateOp() or finalize().
+ */
+class GrCCPRCoverageOpsBuilder {
+public:
+    // Indicates whether a path should enforce a scissor clip when rendering its mask. (Specified
+    // as an int because these values get used directly as indices into arrays.)
+    enum class ScissorMode : int {
+        kNonScissored = 0,
+        kScissored = 1
+    };
+    static constexpr int kNumScissorModes = 2;
+
+    struct MaxPrimitives {
+        int fMaxTriangles = 0;
+        int fMaxQuadratics = 0;
+        int fMaxCubics = 0;
+
+        void operator+=(const MaxPrimitives&);
+        int sum() const;
+    };
+
+    struct MaxBufferItems {
+        int             fMaxFanPoints = 0;
+        int             fMaxControlPoints = 0;
+        MaxPrimitives   fMaxPrimitives[kNumScissorModes];
+        int             fMaxPaths = 0;
+
+        void operator+=(const MaxBufferItems&);
+        void countPathItems(ScissorMode, const SkPath&);
+    };
+
+    GrCCPRCoverageOpsBuilder() : fScissorBatches(512) {
+        SkDEBUGCODE(fPointsData = nullptr;)
+        SkDEBUGCODE(fInstanceData = nullptr;)
+    }
+
+    bool init(GrOnFlushResourceProvider*, const MaxBufferItems&);
+
+    // Parses an SkPath into a temporary staging area. The path will not yet be included in the next
+    // Op until there is a matching call to saveParsedPath.
+    //
+    // Returns two tight bounding boxes: device space and "45 degree" (| 1 -1 | * devCoords) space.
+    //                                                                 | 1  1 |
+    void parsePath(ScissorMode, const SkMatrix&, const SkPath&, SkRect* devBounds,
+                   SkRect* devBounds45);
+
+    // Commits the currently-parsed path from the staging area to the GPU buffers and next Op.
+    // Accepts an optional post-device-space translate for placement in an atlas.
+    void saveParsedPath(const SkIRect& clippedDevIBounds,
+                        int16_t atlasOffsetX, int16_t atlasOffsetY);
+
+    // Flushes all currently-saved paths to a GrDrawOp and leaves the GPU buffers open to accept
+    // new paths (e.g. for when an atlas runs out of space).
+    // NOTE: if there is a parsed path in the staging area, it will not be included. But the client
+    // may still call saveParsedPath to include it in a future Op.
+    std::unique_ptr<GrDrawOp> SK_WARN_UNUSED_RESULT createIntermediateOp(SkISize drawBounds);
+
+    // Flushes the remaining saved paths to a final GrDrawOp and closes off the GPU buffers. This
+    // must be called before attempting to draw any Ops produced by this class.
+    std::unique_ptr<GrDrawOp> SK_WARN_UNUSED_RESULT finalize(SkISize drawBounds);
+
+    class CoverageOp;
+    class AccumulatingViewMatrix;
+
+private:
+    using PrimitiveInstance = GrCCPRCoverageProcessor::PrimitiveInstance;
+
+    struct PrimitiveTallies {
+        int fTriangles;
+        int fQuadratics;
+        int fSerpentines;
+        int fLoops;
+
+        PrimitiveTallies operator+(const PrimitiveTallies&) const;
+        PrimitiveTallies operator-(const PrimitiveTallies&) const;
+        int sum() const;
+    };
+
+    struct ScissorBatch {
+        PrimitiveTallies   fInstanceCounts;
+        SkIRect            fScissor;
+    };
+
+    void startContour(AccumulatingViewMatrix&, const SkPoint& anchorPoint);
+    void fanTo(AccumulatingViewMatrix&, const SkPoint& pt);
+    void quadraticTo(AccumulatingViewMatrix&, const SkPoint P[3]);
+    void cubicTo(AccumulatingViewMatrix&, const SkPoint P[4]);
+    void emitCubicSegment(AccumulatingViewMatrix&, SkCubicType, const SkDCubic&,
+                          const SkPoint& ts0, const SkPoint& ts1);
+    void closeContour();
+    void emitHierarchicalFan(int32_t indices[], int count);
+    SkDEBUGCODE(void validate();)
+
+    ScissorMode              fCurrScissorMode;
+    PrimitiveTallies         fCurrPathIndices;
+    int32_t                  fCurrContourStartIdx;
+    SkPoint                  fCurrPathSpaceAnchorPoint;
+
+    sk_sp<GrBuffer>          fPointsBuffer;
+    SkPoint*                 fPointsData;
+    int32_t                  fFanPtsIdx;
+    int32_t                  fControlPtsIdx;
+    SkDEBUGCODE(int          fMaxFanPoints;)
+    SkDEBUGCODE(int          fMaxControlPoints;)
+
+    sk_sp<GrBuffer>          fInstanceBuffer;
+    PrimitiveInstance*       fInstanceData;
+    PrimitiveTallies         fBaseInstances[kNumScissorModes];
+    PrimitiveTallies         fInstanceIndices[kNumScissorModes];
+
+    SkTArray<ScissorBatch>   fScissorBatches;
+};
+
+inline void GrCCPRCoverageOpsBuilder::MaxBufferItems::operator+=(const MaxBufferItems& b) {
+    fMaxFanPoints += b.fMaxFanPoints;
+    fMaxControlPoints += b.fMaxControlPoints;
+    fMaxPrimitives[0] += b.fMaxPrimitives[0];
+    fMaxPrimitives[1] += b.fMaxPrimitives[1];
+    fMaxPaths += b.fMaxPaths;
+}
+
+inline void GrCCPRCoverageOpsBuilder::MaxPrimitives::operator+=(const MaxPrimitives& b) {
+    fMaxTriangles += b.fMaxTriangles;
+    fMaxQuadratics += b.fMaxQuadratics;
+    fMaxCubics += b.fMaxCubics;
+}
+
+inline int GrCCPRCoverageOpsBuilder::MaxPrimitives::sum() const {
+    return fMaxTriangles + fMaxQuadratics + fMaxCubics;
+}
+
+#endif
diff --git a/src/gpu/ccpr/GrCCPRCoverageProcessor.cpp b/src/gpu/ccpr/GrCCPRCoverageProcessor.cpp
new file mode 100644
index 0000000..5f1833a
--- /dev/null
+++ b/src/gpu/ccpr/GrCCPRCoverageProcessor.cpp
@@ -0,0 +1,355 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "GrCCPRCoverageProcessor.h"
+
+#include "ccpr/GrCCPRTriangleProcessor.h"
+#include "ccpr/GrCCPRQuadraticProcessor.h"
+#include "ccpr/GrCCPRCubicProcessor.h"
+#include "glsl/GrGLSLFragmentShaderBuilder.h"
+#include "glsl/GrGLSLGeometryShaderBuilder.h"
+#include "glsl/GrGLSLProgramBuilder.h"
+#include "glsl/GrGLSLVertexShaderBuilder.h"
+
+const char* GrCCPRCoverageProcessor::GetProcessorName(Mode mode) {
+    switch (mode) {
+        case Mode::kTriangleHulls:
+            return "GrCCPRTriangleHullAndEdgeProcessor (hulls)";
+        case Mode::kTriangleEdges:
+            return "GrCCPRTriangleHullAndEdgeProcessor (edges)";
+        case Mode::kCombinedTriangleHullsAndEdges:
+            return "GrCCPRTriangleHullAndEdgeProcessor (combined hulls & edges)";
+        case Mode::kTriangleCorners:
+            return "GrCCPRTriangleCornerProcessor";
+        case Mode::kQuadraticHulls:
+            return "GrCCPRQuadraticHullProcessor";
+        case Mode::kQuadraticFlatEdges:
+            return "GrCCPRQuadraticSharedEdgeProcessor";
+        case Mode::kSerpentineInsets:
+            return "GrCCPRCubicInsetProcessor (serpentine)";
+        case Mode::kSerpentineBorders:
+            return "GrCCPRCubicBorderProcessor (serpentine)";
+        case Mode::kLoopInsets:
+            return "GrCCPRCubicInsetProcessor (loop)";
+        case Mode::kLoopBorders:
+            return "GrCCPRCubicBorderProcessor (loop)";
+    }
+    SkFAIL("Unexpected ccpr coverage processor mode.");
+    return nullptr;
+}
+
+GrCCPRCoverageProcessor::GrCCPRCoverageProcessor(Mode mode, GrBuffer* pointsBuffer)
+        : fMode(mode)
+        , fInstanceAttrib(this->addInstanceAttrib("instance", kVec4i_GrVertexAttribType,
+                                                  kHigh_GrSLPrecision)) {
+    fPointsBufferAccess.reset(kRG_float_GrPixelConfig, pointsBuffer, kVertex_GrShaderFlag);
+    this->addBufferAccess(&fPointsBufferAccess);
+
+    this->setWillUseGeoShader();
+
+    this->initClassID<GrCCPRCoverageProcessor>();
+}
+
+void GrCCPRCoverageProcessor::getGLSLProcessorKey(const GrShaderCaps&,
+                                                  GrProcessorKeyBuilder* b) const {
+    b->add32(int(fMode));
+}
+
+GrGLSLPrimitiveProcessor* GrCCPRCoverageProcessor::createGLSLInstance(const GrShaderCaps&) const {
+    switch (fMode) {
+        using GeometryType = GrCCPRTriangleHullAndEdgeProcessor::GeometryType;
+
+        case Mode::kTriangleHulls:
+            return new GrCCPRTriangleHullAndEdgeProcessor(GeometryType::kHulls);
+        case Mode::kTriangleEdges:
+            return new GrCCPRTriangleHullAndEdgeProcessor(GeometryType::kEdges);
+        case Mode::kCombinedTriangleHullsAndEdges:
+            return new GrCCPRTriangleHullAndEdgeProcessor(GeometryType::kHullsAndEdges);
+        case Mode::kTriangleCorners:
+            return new GrCCPRTriangleCornerProcessor();
+        case Mode::kQuadraticHulls:
+            return new GrCCPRQuadraticHullProcessor();
+        case Mode::kQuadraticFlatEdges:
+            return new GrCCPRQuadraticSharedEdgeProcessor();
+        case Mode::kSerpentineInsets:
+            return new GrCCPRCubicInsetProcessor(GrCCPRCubicProcessor::Type::kSerpentine);
+        case Mode::kSerpentineBorders:
+            return new GrCCPRCubicBorderProcessor(GrCCPRCubicProcessor::Type::kSerpentine);
+        case Mode::kLoopInsets:
+            return new GrCCPRCubicInsetProcessor(GrCCPRCubicProcessor::Type::kLoop);
+        case Mode::kLoopBorders:
+            return new GrCCPRCubicBorderProcessor(GrCCPRCubicProcessor::Type::kLoop);
+    }
+    SkFAIL("Unexpected ccpr coverage processor mode.");
+    return nullptr;
+}
+
+using PrimitiveProcessor = GrCCPRCoverageProcessor::PrimitiveProcessor;
+
+void PrimitiveProcessor::onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) {
+    const GrCCPRCoverageProcessor& proc = args.fGP.cast<GrCCPRCoverageProcessor>();
+
+    GrGLSLVaryingHandler* varyingHandler = args.fVaryingHandler;
+    switch (fCoverageType) {
+        case CoverageType::kOne:
+        case CoverageType::kShader:
+            varyingHandler->addFlatVarying("wind", &fFragWind, kLow_GrSLPrecision);
+            break;
+        case CoverageType::kInterpolated:
+            varyingHandler->addVarying("coverage_times_wind", &fFragCoverageTimesWind,
+                                       kMedium_GrSLPrecision);
+            break;
+    }
+    this->resetVaryings(varyingHandler);
+
+    varyingHandler->emitAttributes(proc);
+
+    this->emitVertexShader(proc, args.fVertBuilder, args.fTexelBuffers[0], args.fRTAdjustName,
+                           gpArgs);
+    this->emitGeometryShader(proc, args.fGeomBuilder, args.fRTAdjustName);
+    this->emitCoverage(proc, args.fFragBuilder, args.fOutputColor, args.fOutputCoverage);
+
+    SkASSERT(!args.fFPCoordTransformHandler->nextCoordTransform());
+}
+
+void PrimitiveProcessor::emitVertexShader(const GrCCPRCoverageProcessor& proc,
+                                          GrGLSLVertexBuilder* v,
+                                          const TexelBufferHandle& pointsBuffer,
+                                          const char* rtAdjust, GrGPArgs* gpArgs) const {
+    v->codeAppendf("int packedoffset = %s.w;", proc.instanceAttrib());
+    v->codeAppend ("highp vec2 atlasoffset = vec2((packedoffset<<16) >> 16, packedoffset >> 16);");
+
+    this->onEmitVertexShader(proc, v, pointsBuffer, "atlasoffset", rtAdjust, gpArgs);
+}
+
+void PrimitiveProcessor::emitGeometryShader(const GrCCPRCoverageProcessor& proc,
+                                            GrGLSLGeometryBuilder* g, const char* rtAdjust) const {
+    g->declareGlobal(fGeomWind);
+    this->emitWind(g, rtAdjust, fGeomWind.c_str());
+
+    SkString emitVertexFn;
+    SkSTArray<2, GrShaderVar> emitArgs;
+    const char* position = emitArgs.emplace_back("position", kVec2f_GrSLType,
+                                                 GrShaderVar::kNonArray,
+                                                 kHigh_GrSLPrecision).c_str();
+    const char* coverage = emitArgs.emplace_back("coverage", kFloat_GrSLType,
+                                                 GrShaderVar::kNonArray,
+                                                 kHigh_GrSLPrecision).c_str();
+    g->emitFunction(kVoid_GrSLType, "emitVertex", emitArgs.count(), emitArgs.begin(), [&]() {
+        SkString fnBody;
+        this->emitPerVertexGeometryCode(&fnBody, position, coverage, fGeomWind.c_str());
+        if (fFragWind.gsOut()) {
+            fnBody.appendf("%s = %s;", fFragWind.gsOut(), fGeomWind.c_str());
+        }
+        if (fFragCoverageTimesWind.gsOut()) {
+            fnBody.appendf("%s = %s * %s;",
+                           fFragCoverageTimesWind.gsOut(), coverage, fGeomWind.c_str());
+        }
+        fnBody.append ("gl_Position = vec4(position, 0, 1);");
+        fnBody.append ("EmitVertex();");
+        return fnBody;
+    }().c_str(), &emitVertexFn);
+
+    g->codeAppendf("highp vec2 bloat = %f * abs(%s.xz);", kAABloatRadius, rtAdjust);
+
+#ifdef SK_DEBUG
+    if (proc.debugVisualizations()) {
+        g->codeAppendf("bloat *= %f;", GrCCPRCoverageProcessor::kDebugBloat);
+    }
+#endif
+
+    return this->onEmitGeometryShader(g, emitVertexFn.c_str(), fGeomWind.c_str(), rtAdjust);
+}
+
+int PrimitiveProcessor::emitHullGeometry(GrGLSLGeometryBuilder* g, const char* emitVertexFn,
+                                         const char* polygonPts, int numSides,
+                                         const char* wedgeIdx, const char* insetPts) const {
+    SkASSERT(numSides >= 3);
+
+    if (!insetPts) {
+        g->codeAppendf("highp vec2 centroidpt = %s * vec%i(%f);",
+                       polygonPts, numSides, 1.0 / numSides);
+    }
+
+    g->codeAppendf("int previdx = (%s + %i) %% %i, "
+                       "nextidx = (%s + 1) %% %i;",
+                   wedgeIdx, numSides - 1, numSides, wedgeIdx, numSides);
+
+    g->codeAppendf("highp vec2 self = %s[%s];"
+                   "highp int leftidx = %s > 0 ? previdx : nextidx;"
+                   "highp int rightidx = %s > 0 ? nextidx : previdx;",
+                   polygonPts, wedgeIdx, fGeomWind.c_str(), fGeomWind.c_str());
+
+    // Which quadrant does the vector from self -> right fall into?
+    g->codeAppendf("highp vec2 right = %s[rightidx];", polygonPts);
+    if (3 == numSides) {
+        // TODO: evaluate perf gains.
+        g->codeAppend ("highp vec2 qsr = sign(right - self);");
+    } else {
+        SkASSERT(4 == numSides);
+        g->codeAppendf("highp vec2 diag = %s[(%s + 2) %% 4];", polygonPts, wedgeIdx);
+        g->codeAppend ("highp vec2 qsr = sign((right != self ? right : diag) - self);");
+    }
+
+    // Which quadrant does the vector from left -> self fall into?
+    g->codeAppendf("highp vec2 qls = sign(self - %s[leftidx]);", polygonPts);
+
+    // d2 just helps us reduce triangle counts with orthogonal, axis-aligned lines.
+    // TODO: evaluate perf gains.
+    const char* dr2 = "dr";
+    if (3 == numSides) {
+        // TODO: evaluate perf gains.
+        g->codeAppend ("highp vec2 dr = vec2(qsr.y != 0 ? +qsr.y : +qsr.x, "
+                                            "qsr.x != 0 ? -qsr.x : +qsr.y);");
+        g->codeAppend ("highp vec2 dr2 = vec2(qsr.y != 0 ? +qsr.y : -qsr.x, "
+                                             "qsr.x != 0 ? -qsr.x : -qsr.y);");
+        g->codeAppend ("highp vec2 dl = vec2(qls.y != 0 ? +qls.y : +qls.x, "
+                                            "qls.x != 0 ? -qls.x : +qls.y);");
+        dr2 = "dr2";
+    } else {
+        g->codeAppend ("highp vec2 dr = vec2(qsr.y != 0 ? +qsr.y : 1, "
+                                            "qsr.x != 0 ? -qsr.x : 1);");
+        g->codeAppend ("highp vec2 dl = (qls == vec2(0)) ? dr : vec2(qls.y != 0 ? +qls.y : 1, "
+                                                                    "qls.x != 0 ? -qls.x : 1);");
+    }
+    g->codeAppendf("bvec2 dnotequal = notEqual(%s, dl);", dr2);
+
+    // Emit one third of what is the convex hull of pixel-size boxes centered on the vertices.
+    // Each invocation emits a different third.
+    if (insetPts) {
+        g->codeAppendf("%s(%s[rightidx], 1);", emitVertexFn, insetPts);
+    }
+    g->codeAppendf("%s(right + bloat * dr, 1);", emitVertexFn);
+    if (insetPts) {
+        g->codeAppendf("%s(%s[%s], 1);", emitVertexFn, insetPts, wedgeIdx);
+    } else {
+        g->codeAppendf("%s(centroidpt, 1);", emitVertexFn);
+    }
+    g->codeAppendf("%s(self + bloat * %s, 1);", emitVertexFn, dr2);
+    g->codeAppend ("if (any(dnotequal)) {");
+    g->codeAppendf(    "%s(self + bloat * dl, 1);", emitVertexFn);
+    g->codeAppend ("}");
+    g->codeAppend ("if (all(dnotequal)) {");
+    g->codeAppendf(    "%s(self + bloat * vec2(-dl.y, dl.x), 1);", emitVertexFn);
+    g->codeAppend ("}");
+    g->codeAppend ("EndPrimitive();");
+
+    return insetPts ? 6 : 5;
+}
+
+int PrimitiveProcessor::emitEdgeGeometry(GrGLSLGeometryBuilder* g, const char* emitVertexFn,
+                                         const char* leftPt, const char* rightPt,
+                                         const char* distanceEquation) const {
+    if (!distanceEquation) {
+        this->emitEdgeDistanceEquation(g, leftPt, rightPt, "highp vec3 edge_distance_equation");
+        distanceEquation = "edge_distance_equation";
+    }
+
+    // qlr is defined in emitEdgeDistanceEquation.
+    g->codeAppendf("highp mat2 endpts = mat2(%s - bloat * qlr, %s + bloat * qlr);",
+                   leftPt, rightPt);
+    g->codeAppendf("mediump vec2 endpts_coverage = %s.xy * endpts + %s.z;",
+                   distanceEquation, distanceEquation);
+
+    // d1 is defined in emitEdgeDistanceEquation.
+    g->codeAppend ("highp vec2 d2 = d1;");
+    g->codeAppend ("bool aligned = qlr.x == 0 || qlr.y == 0;");
+    g->codeAppend ("if (aligned) {");
+    g->codeAppend (    "d1 -= qlr;");
+    g->codeAppend (    "d2 += qlr;");
+    g->codeAppend ("}");
+
+    // Emit the convex hull of 2 pixel-size boxes centered on the endpoints of the edge. Each
+    // invocation emits a different edge. Emit negative coverage that subtracts the appropiate
+    // amount back out from the hull we drew above.
+    g->codeAppend ("if (!aligned) {");
+    g->codeAppendf(    "%s(endpts[0], endpts_coverage[0]);", emitVertexFn);
+    g->codeAppend ("}");
+    g->codeAppendf("%s(%s + bloat * d1, -1);", emitVertexFn, leftPt);
+    g->codeAppendf("%s(%s - bloat * d2, 0);", emitVertexFn, leftPt);
+    g->codeAppendf("%s(%s + bloat * d2, -1);", emitVertexFn, rightPt);
+    g->codeAppendf("%s(%s - bloat * d1, 0);", emitVertexFn, rightPt);
+    g->codeAppend ("if (!aligned) {");
+    g->codeAppendf(    "%s(endpts[1], endpts_coverage[1]);", emitVertexFn);
+    g->codeAppend ("}");
+    g->codeAppend ("EndPrimitive();");
+
+    return 6;
+}
+
+void PrimitiveProcessor::emitEdgeDistanceEquation(GrGLSLGeometryBuilder* g,
+                                                  const char* leftPt, const char* rightPt,
+                                                  const char* outputDistanceEquation) const {
+    // Which quadrant does the vector from left -> right fall into?
+    g->codeAppendf("highp vec2 qlr = sign(%s - %s);", rightPt, leftPt);
+    g->codeAppend ("highp vec2 d1 = vec2(qlr.y, -qlr.x);");
+
+    g->codeAppendf("highp vec2 n = vec2(%s.y - %s.y, %s.x - %s.x);",
+                   rightPt, leftPt, leftPt, rightPt);
+    g->codeAppendf("highp vec2 kk = n * mat2(%s + bloat * d1, %s - bloat * d1);", leftPt, leftPt);
+    // Clamp for when n=0. wind=0 when n=0 so as long as we don't get Inf or NaN we are fine.
+    g->codeAppendf("highp float scale = 1 / max(kk[0] - kk[1], 1e-30);");
+
+    g->codeAppendf("%s = vec3(-n, kk[1]) * scale;", outputDistanceEquation);
+}
+
+void PrimitiveProcessor::emitCoverage(const GrCCPRCoverageProcessor& proc, GrGLSLFragmentBuilder* f,
+                                      const char* outputColor, const char* outputCoverage) const {
+    switch (fCoverageType) {
+        case CoverageType::kOne:
+            f->codeAppendf("%s.a = %s;", outputColor, fFragWind.fsIn());
+            break;
+        case CoverageType::kInterpolated:
+            f->codeAppendf("%s.a = %s;", outputColor, fFragCoverageTimesWind.fsIn());
+            break;
+        case CoverageType::kShader:
+            f->codeAppendf("mediump float coverage = 0;");
+            this->emitShaderCoverage(f, "coverage");
+            f->codeAppendf("%s.a = coverage * %s;", outputColor, fFragWind.fsIn());
+            break;
+    }
+
+    f->codeAppendf("%s = vec4(1);", outputCoverage);
+
+#ifdef SK_DEBUG
+    if (proc.debugVisualizations()) {
+        f->codeAppendf("%s = vec4(-%s.a, %s.a, 0, 1);", outputColor, outputColor, outputColor);
+    }
+#endif
+}
+
+int PrimitiveProcessor::defineSoftSampleLocations(GrGLSLFragmentBuilder* f,
+                                                  const char* samplesName) const {
+    // Standard DX11 sample locations.
+#if defined(SK_BUILD_FOR_ANDROID) || defined(SK_BUILD_FOR_IOS)
+    f->defineConstant("highp vec2[8]", samplesName, "vec2[8]("
+        "vec2(+1, -3)/16, vec2(-1, +3)/16, vec2(+5, +1)/16, vec2(-3, -5)/16, "
+        "vec2(-5, +5)/16, vec2(-7, -1)/16, vec2(+3, +7)/16, vec2(+7, -7)/16."
+    ")");
+    return 8;
+#else
+    f->defineConstant("highp vec2[16]", samplesName, "vec2[16]("
+        "vec2(+1, +1)/16, vec2(-1, -3)/16, vec2(-3, +2)/16, vec2(+4, -1)/16, "
+        "vec2(-5, -2)/16, vec2(+2, +5)/16, vec2(+5, +3)/16, vec2(+3, -5)/16, "
+        "vec2(-2, +6)/16, vec2( 0, -7)/16, vec2(-4, -6)/16, vec2(-6, +4)/16, "
+        "vec2(-8,  0)/16, vec2(+7, -4)/16, vec2(+6, +7)/16, vec2(-7, -8)/16."
+    ")");
+    return 16;
+#endif
+}
+
+#ifdef SK_DEBUG
+
+#include "GrRenderTarget.h"
+
+void GrCCPRCoverageProcessor::Validate(GrRenderTarget* atlasTexture) {
+    SkASSERT(kAtlasOrigin == atlasTexture->origin());
+    SkASSERT(GrPixelConfigIsAlphaOnly(atlasTexture->config()));
+    SkASSERT(GrPixelConfigIsFloatingPoint(atlasTexture->config()));
+}
+
+#endif
diff --git a/src/gpu/ccpr/GrCCPRCoverageProcessor.h b/src/gpu/ccpr/GrCCPRCoverageProcessor.h
new file mode 100644
index 0000000..86f7d46
--- /dev/null
+++ b/src/gpu/ccpr/GrCCPRCoverageProcessor.h
@@ -0,0 +1,253 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrCCPRCoverageProcessor_DEFINED
+#define GrCCPRCoverageProcessor_DEFINED
+
+#include "GrGeometryProcessor.h"
+#include "glsl/GrGLSLGeometryProcessor.h"
+#include "glsl/GrGLSLVarying.h"
+
+class GrGLSLFragmentBuilder;
+
+/**
+ * This is the geometry processor for the simple convex primitive shapes (triangles and closed curve
+ * segments) from which ccpr paths are composed. The output is a single-channel alpha value,
+ * positive for clockwise primitives and negative for counter-clockwise, that indicates coverage.
+ *
+ * The caller is responsible to render all modes for all applicable primitives into a cleared,
+ * floating point, alpha-only render target using SkBlendMode::kPlus. Once all of a path's
+ * primitives have been drawn, the render target contains a composite coverage count that can then
+ * be used to draw the path (see GrCCPRPathProcessor).
+ *
+ * Caller provides the primitives' (x,y) points in an fp32x2 (RG) texel buffer, and an instance
+ * buffer with a single int32x4 attrib for each primitive (defined below). There are no vertex
+ * attribs.
+ *
+ * Draw calls are instanced, with one vertex per bezier point (3 for triangles). They use the
+ * corresponding GrPrimitiveType as defined below.
+ */
+class GrCCPRCoverageProcessor : public GrGeometryProcessor {
+public:
+    // Use top-left to avoid a uniform access in the fragment shader.
+    static constexpr GrSurfaceOrigin kAtlasOrigin = kTopLeft_GrSurfaceOrigin;
+
+    static constexpr GrPrimitiveType kTrianglesGrPrimitiveType = GrPrimitiveType::kTriangles;
+    static constexpr GrPrimitiveType kQuadraticsGrPrimitiveType = GrPrimitiveType::kTriangles;
+    static constexpr GrPrimitiveType kCubicsGrPrimitiveType = GrPrimitiveType::kLinesAdjacency;
+
+    struct PrimitiveInstance {
+        union {
+            struct {
+                int32_t fPt0Idx;
+                int32_t fPt1Idx;
+                int32_t fPt2Idx;
+            } fTriangleData;
+
+            struct {
+                int32_t fControlPtIdx;
+                int32_t fEndPtsIdx; // The endpoints (P0 and P2) are adjacent in the texel buffer.
+            } fQuadraticData;
+
+            struct {
+                int32_t fControlPtsKLMRootsIdx; // The control points (P1 and P2) are adjacent in
+                                                // the texel buffer, followed immediately by the
+                                                // homogenous KLM roots ({tl,sl}, {tm,sm}).
+                int32_t fEndPtsIdx; // The endpoints (P0 and P3) are adjacent in the texel buffer.
+            } fCubicData;
+        };
+
+        int32_t fPackedAtlasOffset; // (offsetY << 16) | (offsetX & 0xffff)
+    };
+
+    GR_STATIC_ASSERT(4 * 4 == sizeof(PrimitiveInstance));
+
+    enum class Mode {
+        // Triangles.
+        kTriangleHulls,
+        kTriangleEdges,
+        kCombinedTriangleHullsAndEdges,
+        kTriangleCorners,
+
+        // Quadratics.
+        kQuadraticHulls,
+        kQuadraticFlatEdges,
+
+        // Cubics.
+        kSerpentineInsets,
+        kSerpentineBorders,
+        kLoopInsets,
+        kLoopBorders
+    };
+    static const char* GetProcessorName(Mode);
+
+    GrCCPRCoverageProcessor(Mode, GrBuffer* pointsBuffer);
+
+    const char* instanceAttrib() const { return fInstanceAttrib.fName; }
+    const char* name() const override { return GetProcessorName(fMode); }
+    SkString dumpInfo() const override {
+        return SkStringPrintf("%s\n%s", this->name(), this->INHERITED::dumpInfo().c_str());
+    }
+
+    void getGLSLProcessorKey(const GrShaderCaps&, GrProcessorKeyBuilder*) const override;
+    GrGLSLPrimitiveProcessor* createGLSLInstance(const GrShaderCaps&) const override;
+
+#ifdef SK_DEBUG
+    static constexpr float kDebugBloat = 50;
+
+    // Increases the 1/2 pixel AA bloat by a factor of kDebugBloat and outputs color instead of
+    // coverage (coverage=+1 -> green, coverage=0 -> black, coverage=-1 -> red).
+    void enableDebugVisualizations() { fDebugVisualizations = true; }
+    bool debugVisualizations() const { return fDebugVisualizations; }
+
+    static void Validate(GrRenderTarget* atlasTexture);
+#endif
+
+    class PrimitiveProcessor;
+
+private:
+    const Mode         fMode;
+    const Attribute&   fInstanceAttrib;
+    BufferAccess       fPointsBufferAccess;
+    SkDEBUGCODE(bool   fDebugVisualizations = false;)
+
+    typedef GrGeometryProcessor INHERITED;
+};
+
+/**
+ * This class represents the actual SKSL implementation for the various primitives and modes of
+ * GrCCPRCoverageProcessor.
+ */
+class GrCCPRCoverageProcessor::PrimitiveProcessor : public GrGLSLGeometryProcessor {
+protected:
+    // Slightly undershoot a bloat radius of 0.5 so vertices that fall on integer boundaries don't
+    // accidentally bleed into neighbor pixels.
+    static constexpr float kAABloatRadius = 0.491111f;
+
+    // Specifies how the fragment shader should calculate sk_FragColor.a.
+    enum class CoverageType {
+        kOne, // Output +1 all around, modulated by wind.
+        kInterpolated, // Interpolate the coverage values that the geometry shader associates with
+                       // each point, modulated by wind.
+        kShader // Call emitShaderCoverage and let the subclass decide, then a modulate by wind.
+    };
+
+    PrimitiveProcessor(CoverageType coverageType)
+            : fCoverageType(coverageType)
+            , fGeomWind("wind", kFloat_GrSLType, GrShaderVar::kNonArray, kLow_GrSLPrecision)
+            , fFragWind(kFloat_GrSLType)
+            , fFragCoverageTimesWind(kFloat_GrSLType) {}
+
+    // Called before generating shader code. Subclass should add its custom varyings to the handler
+    // and update its corresponding internal member variables.
+    virtual void resetVaryings(GrGLSLVaryingHandler*) {}
+
+    // Here the subclass fetches its vertex from the texel buffer, translates by atlasOffset, and
+    // sets "fPositionVar" in the GrGPArgs.
+    virtual void onEmitVertexShader(const GrCCPRCoverageProcessor&, GrGLSLVertexBuilder*,
+                                    const TexelBufferHandle& pointsBuffer, const char* atlasOffset,
+                                    const char* rtAdjust, GrGPArgs*) const = 0;
+
+    // Here the subclass determines the winding direction of its primitive. It must write a value of
+    // either -1, 0, or +1 to "outputWind" (e.g. "sign(area)"). Fractional values are not valid.
+    virtual void emitWind(GrGLSLGeometryBuilder*, const char* rtAdjust,
+                          const char* outputWind) const = 0;
+
+    // This is where the subclass generates the actual geometry to be rasterized by hardware:
+    //
+    //   emitVertexFn(point1, coverage);
+    //   emitVertexFn(point2, coverage);
+    //   ...
+    //   EndPrimitive();
+    //
+    // Generally a subclass will want to use emitHullGeometry and/or emitEdgeGeometry rather than
+    // calling emitVertexFn directly.
+    //
+    // Subclass must also call GrGLSLGeometryBuilder::configure.
+    virtual void onEmitGeometryShader(GrGLSLGeometryBuilder*, const char* emitVertexFn,
+                                      const char* wind, const char* rtAdjust) const = 0;
+
+    // This is a hook to inject code in the geometry shader's "emitVertex" function. Subclass
+    // should use this to write values to its custom varyings.
+    // NOTE: even flat varyings should be rewritten at each vertex.
+    virtual void emitPerVertexGeometryCode(SkString* fnBody, const char* position,
+                                           const char* coverage, const char* wind) const {}
+
+    // Called when the subclass has selected CoverageType::kShader. Primitives should produce
+    // coverage values between +0..1. Base class modulates the sign for wind.
+    // TODO: subclasses might have good spots to stuff the winding information without burning a
+    // whole new varying slot. Consider requiring them to generate the correct coverage sign.
+    virtual void emitShaderCoverage(GrGLSLFragmentBuilder*, const char* outputCoverage) const {
+        SkFAIL("Shader coverage not implemented when using CoverageType::kShader.");
+    }
+
+    // Emits one wedge of the conservative raster hull of a convex polygon. The complete hull has
+    // one wedge for each side of the polygon (i.e. call this N times, generally from different
+    // geometry shader invocations). Coverage is +1 all around.
+    //
+    // Logically, the conservative raster hull is equivalent to the convex hull of pixel-size boxes
+    // centered on the vertices.
+    //
+    // If an optional inset polygon is provided, then this emits a border from the inset to the
+    // hull, rather than the entire hull.
+    //
+    // Geometry shader must be configured to output triangle strips.
+    //
+    // Returns the maximum number of vertices that will be emitted.
+    int emitHullGeometry(GrGLSLGeometryBuilder*, const char* emitVertexFn, const char* polygonPts,
+                         int numSides, const char* wedgeIdx, const char* insetPts = nullptr) const;
+
+    // Emits the conservative raster of an edge (i.e. convex hull of two pixel-size boxes centered
+    // on the endpoints). Coverage is -1 on the outside border of the edge geometry and 0 on the
+    // inside. This effectively converts a jagged conservative raster edge into a smooth antialiased
+    // edge when using CoverageType::kInterpolated.
+    //
+    // If the subclass has already called emitEdgeDistanceEquation, then provide the distance
+    // equation. Otherwise this function will call emitEdgeDistanceEquation implicitly.
+    //
+    // Geometry shader must be configured to output triangle strips.
+    //
+    // Returns the maximum number of vertices that will be emitted.
+    int emitEdgeGeometry(GrGLSLGeometryBuilder*, const char* emitVertexFn, const char* leftPt,
+                         const char* rightPt, const char* distanceEquation = nullptr) const;
+
+    // Defines an equation ("dot(vec3(pt, 1), distance_equation)") that is -1 on the outside border
+    // of a conservative raster edge and 0 on the inside (see emitEdgeGeometry).
+    void emitEdgeDistanceEquation(GrGLSLGeometryBuilder*, const char* leftPt, const char* rightPt,
+                                  const char* outputDistanceEquation) const;
+
+    // Defines a global vec2 array that contains MSAA sample locations as offsets from pixel center.
+    // Subclasses can use this for software multisampling.
+    //
+    // Returns the number of samples.
+    int defineSoftSampleLocations(GrGLSLFragmentBuilder*, const char* samplesName) const;
+
+private:
+    void setData(const GrGLSLProgramDataManager& pdman, const GrPrimitiveProcessor&,
+                 FPCoordTransformIter&& transformIter) final {
+        this->setTransformDataHelper(SkMatrix::I(), pdman, &transformIter);
+    }
+
+    void onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) final;
+
+    void emitVertexShader(const GrCCPRCoverageProcessor&, GrGLSLVertexBuilder*,
+                          const TexelBufferHandle& pointsBuffer, const char* rtAdjust,
+                          GrGPArgs* gpArgs) const;
+    void emitGeometryShader(const GrCCPRCoverageProcessor&, GrGLSLGeometryBuilder*,
+                            const char* rtAdjust) const;
+    void emitCoverage(const GrCCPRCoverageProcessor&, GrGLSLFragmentBuilder*,
+                      const char* outputColor, const char* outputCoverage) const;
+
+    const CoverageType   fCoverageType;
+    GrShaderVar          fGeomWind;
+    GrGLSLGeoToFrag      fFragWind;
+    GrGLSLGeoToFrag      fFragCoverageTimesWind;
+
+    typedef GrGLSLGeometryProcessor INHERITED;
+};
+
+#endif
diff --git a/src/gpu/ccpr/GrCCPRCubicProcessor.cpp b/src/gpu/ccpr/GrCCPRCubicProcessor.cpp
new file mode 100644
index 0000000..9dfa8e1
--- /dev/null
+++ b/src/gpu/ccpr/GrCCPRCubicProcessor.cpp
@@ -0,0 +1,323 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "GrCCPRCubicProcessor.h"
+
+#include "glsl/GrGLSLFragmentShaderBuilder.h"
+#include "glsl/GrGLSLGeometryShaderBuilder.h"
+#include "glsl/GrGLSLVertexShaderBuilder.h"
+
+void GrCCPRCubicProcessor::onEmitVertexShader(const GrCCPRCoverageProcessor& proc,
+                                              GrGLSLVertexBuilder* v,
+                                              const TexelBufferHandle& pointsBuffer,
+                                              const char* atlasOffset, const char* rtAdjust,
+                                              GrGPArgs* gpArgs) const {
+    float inset = 1 - kAABloatRadius;
+#ifdef SK_DEBUG
+    if (proc.debugVisualizations()) {
+        inset *= GrCCPRCoverageProcessor::kDebugBloat;
+    }
+#endif
+
+    // Fetch all 4 cubic bezier points.
+    v->codeAppendf("ivec4 indices = ivec4(%s.y, %s.x, %s.x + 1, %s.y + 1);",
+                   proc.instanceAttrib(), proc.instanceAttrib(), proc.instanceAttrib(),
+                   proc.instanceAttrib());
+    v->codeAppend ("highp mat4x2 bezierpts = mat4x2(");
+    v->appendTexelFetch(pointsBuffer, "indices[sk_VertexID]");
+    v->codeAppend (".xy, ");
+    v->appendTexelFetch(pointsBuffer, "indices[(sk_VertexID + 1) % 4]");
+    v->codeAppend (".xy, ");
+    v->appendTexelFetch(pointsBuffer, "indices[(sk_VertexID + 2) % 4]");
+    v->codeAppend (".xy, ");
+    v->appendTexelFetch(pointsBuffer, "indices[(sk_VertexID + 3) % 4]");
+    v->codeAppend (".xy);");
+
+    // Find the corner of the inset geometry that corresponds to this bezier vertex (bezierpts[0]).
+    v->codeAppend ("highp mat2 N = mat2(bezierpts[3].y - bezierpts[0].y, "
+                                       "bezierpts[0].x - bezierpts[3].x, "
+                                       "bezierpts[1].y - bezierpts[0].y, "
+                                       "bezierpts[0].x - bezierpts[1].x);");
+    v->codeAppend ("highp mat2 P = mat2(bezierpts[3], bezierpts[1]);");
+    v->codeAppend ("if (abs(determinant(N)) < 2) {"); // Area of [pts[3], pts[0], pts[1]] < 1px.
+                       // The inset corner doesn't exist because we are effectively colinear with
+                       // both neighbor vertices. Just duplicate a neighbor's inset corner.
+    v->codeAppend (    "int smallidx = (dot(N[0], N[0]) > dot(N[1], N[1])) ? 1 : 0;");
+    v->codeAppend (    "N[smallidx] = vec2(bezierpts[2].y - bezierpts[3 - smallidx * 2].y, "
+                                          "bezierpts[3 - smallidx * 2].x - bezierpts[2].x);");
+    v->codeAppend (    "P[smallidx] = bezierpts[2];");
+    v->codeAppend ("}");
+    v->codeAppend ("N[0] *= sign(dot(N[0], P[1] - P[0]));");
+    v->codeAppend ("N[1] *= sign(dot(N[1], P[0] - P[1]));");
+
+    v->codeAppendf("highp vec2 K = vec2(dot(N[0], P[0] + %f * sign(N[0])), "
+                                       "dot(N[1], P[1] + %f * sign(N[1])));", inset, inset);
+    v->codeAppendf("%s.xy = K * inverse(N) + %s;", fInset.vsOut(), atlasOffset);
+    v->codeAppendf("%s.xy = %s.xy * %s.xz + %s.yw;",
+                   fInset.vsOut(), fInset.vsOut(), rtAdjust, rtAdjust);
+
+    // The z component tells the gemetry shader how "sharp" this corner is.
+    v->codeAppendf("%s.z = determinant(N) * sign(%s.x) * sign(%s.z);",
+                   fInset.vsOut(), rtAdjust, rtAdjust);
+
+    // Fetch one of the t,s klm root values for the geometry shader.
+    v->codeAppendf("%s = ", fTS.vsOut());
+    v->appendTexelFetch(pointsBuffer,
+                        SkStringPrintf("%s.x + 2 + sk_VertexID/2", proc.instanceAttrib()).c_str());
+    v->codeAppend ("[sk_VertexID % 2];");
+
+    // Emit the vertex position.
+    v->codeAppendf("highp vec2 self = bezierpts[0] + %s;", atlasOffset);
+    gpArgs->fPositionVar.set(kVec2f_GrSLType, "self");
+}
+
+void GrCCPRCubicProcessor::emitWind(GrGLSLGeometryBuilder* g, const char* rtAdjust,
+                                    const char* outputWind) const {
+    // We will define bezierpts in onEmitGeometryShader.
+    g->codeAppend ("highp float area_times_2 = determinant(mat3(1, bezierpts[0], "
+                                                               "1, bezierpts[2], "
+                                                               "0, bezierpts[3] - bezierpts[1]));");
+    // Drop curves that are nearly flat. The KLM  math becomes unstable in this case.
+    g->codeAppendf("if (2 * abs(area_times_2) < length((bezierpts[3] - bezierpts[0]) * %s.zx)) {",
+                   rtAdjust);
+#ifndef SK_BUILD_FOR_MAC
+    g->codeAppend (    "return;");
+#else
+    // Returning from this geometry shader makes Mac very unhappy. Instead we make wind 0.
+    g->codeAppend (    "area_times_2 = 0;");
+#endif
+    g->codeAppend ("}");
+    g->codeAppendf("%s = sign(area_times_2);", outputWind);
+}
+
+void GrCCPRCubicProcessor::onEmitGeometryShader(GrGLSLGeometryBuilder* g, const char* emitVertexFn,
+                                                const char* wind, const char* rtAdjust) const {
+    // Prepend bezierpts at the start of the shader.
+    g->codePrependf("highp mat4x2 bezierpts = mat4x2(sk_in[0].gl_Position.xy, "
+                                                    "sk_in[1].gl_Position.xy, "
+                                                    "sk_in[2].gl_Position.xy, "
+                                                    "sk_in[3].gl_Position.xy);");
+
+    // Evaluate the cubic at t=.5 for an approximate midpoint.
+    g->codeAppendf("highp vec2 midpoint = bezierpts * vec4(.125, .375, .375, .125);");
+
+    // Finish finding the inset geometry we started in the vertex shader. The z component tells us
+    // how "sharp" an inset corner is. And the vertex shader already skips one corner if it is
+    // colinear with its neighbors. So at this point, if a corner is flat, it means the inset
+    // geometry is all empty (it should never be non-convex because the curve gets chopped into
+    // convex segments ahead of time).
+    g->codeAppendf("bool isempty = "
+                       "any(lessThan(vec4(%s[0].z, %s[1].z, %s[2].z, %s[3].z) * %s, vec4(2)));",
+                   fInset.gsIn(), fInset.gsIn(), fInset.gsIn(), fInset.gsIn(), wind);
+    g->codeAppendf("highp vec2 inset[4];");
+    g->codeAppend ("for (int i = 0; i < 4; ++i) {");
+    g->codeAppendf(    "inset[i] = isempty ? midpoint : %s[i].xy;", fInset.gsIn());
+    g->codeAppend ("}");
+
+    // We determine crossover and/or degeneracy by how many inset edges run the opposite direction
+    // of their corresponding bezier edge. If there is one backwards edge, the inset geometry is
+    // actually triangle with a vertex at the crossover point. If there are >1 backwards edges, the
+    // inset geometry doesn't exist (i.e. the bezier quadrilateral isn't large enough) and we
+    // degenerate to the midpoint.
+    g->codeAppend ("lowp float backwards[4];");
+    g->codeAppend ("lowp int numbackwards = 0;");
+    g->codeAppend ("for (int i = 0; i < 4; ++i) {");
+    g->codeAppend (    "lowp int j = (i + 1) % 4;");
+    g->codeAppendf(    "highp vec2 inner = inset[j] - inset[i];");
+    g->codeAppendf(    "highp vec2 outer = sk_in[j].gl_Position.xy - sk_in[i].gl_Position.xy;");
+    g->codeAppendf(    "backwards[i] = sign(dot(outer, inner));");
+    g->codeAppendf(    "numbackwards += backwards[i] < 0 ? 1 : 0;");
+    g->codeAppend ("}");
+
+    // Find the crossover point. If there actually isn't one, this math is meaningless and will get
+    // dropped on the floor later.
+    g->codeAppend ("lowp int x = (backwards[0] != backwards[2]) ? 1 : 0;");
+    g->codeAppend ("lowp int x3 = (x + 3) % 4;");
+    g->codeAppend ("highp mat2 X = mat2(inset[x].y - inset[x+1].y, "
+                                       "inset[x+1].x - inset[x].x, "
+                                       "inset[x+2].y - inset[x3].y, "
+                                       "inset[x3].x - inset[x+2].x);");
+    g->codeAppend ("highp vec2 KK = vec2(dot(X[0], inset[x]), dot(X[1], inset[x+2]));");
+    g->codeAppend ("highp vec2 crossoverpoint = KK * inverse(X);");
+
+    // Determine what point backwards edges should collapse into. If there is one backwards edge,
+    // it should collapse to the crossover point. If >1, they should all collapse to the midpoint.
+    g->codeAppend ("highp vec2 collapsepoint = numbackwards == 1 ? crossoverpoint : midpoint;");
+
+    // Collapse backwards egdes to the "collapse" point.
+    g->codeAppend ("for (int i = 0; i < 4; ++i) {");
+    g->codeAppend (    "if (backwards[i] < 0) {");
+    g->codeAppend (        "inset[i] = inset[(i + 1) % 4] = collapsepoint;");
+    g->codeAppend (    "}");
+    g->codeAppend ("}");
+
+    // Calculate the KLM matrix.
+    g->declareGlobal(fKLMMatrix);
+    g->codeAppend ("highp vec4 K, L, M;");
+    if (Type::kSerpentine == fType) {
+        g->codeAppend ("highp vec2 l,m;");
+        g->codeAppendf("l.ts = vec2(%s[0], %s[1]);", fTS.gsIn(), fTS.gsIn());
+        g->codeAppendf("m.ts = vec2(%s[2], %s[3]);", fTS.gsIn(), fTS.gsIn());
+        g->codeAppend ("K = vec4(0, l.s * m.s, -l.t * m.s - m.t * l.s, l.t * m.t);");
+        g->codeAppend ("L = vec4(-1,3,-3,1) * l.ssst * l.sstt * l.sttt;");
+        g->codeAppend ("M = vec4(-1,3,-3,1) * m.ssst * m.sstt * m.sttt;");
+
+    } else {
+        g->codeAppend ("highp vec2 d,e;");
+        g->codeAppendf("d.ts = vec2(%s[0], %s[1]);", fTS.gsIn(), fTS.gsIn());
+        g->codeAppendf("e.ts = vec2(%s[2], %s[3]);", fTS.gsIn(), fTS.gsIn());
+        g->codeAppend ("highp vec4 dxe = vec4(d.s * e.s, d.s * e.t, d.t * e.s, d.t * e.t);");
+        g->codeAppend ("K = vec4(0, dxe.x, -dxe.y - dxe.z, dxe.w);");
+        g->codeAppend ("L = vec4(-1,1,-1,1) * d.sstt * (dxe.xyzw + vec4(0, 2*dxe.zy, 0));");
+        g->codeAppend ("M = vec4(-1,1,-1,1) * e.sstt * (dxe.xzyw + vec4(0, 2*dxe.yz, 0));");
+    }
+
+    g->codeAppend ("highp mat2x4 C = mat4(-1,  3, -3,  1, "
+                                         " 3, -6,  3,  0, "
+                                         "-3,  3,  0,  0, "
+                                         " 1,  0,  0,  0) * transpose(bezierpts);");
+
+    g->codeAppend ("highp vec2 absdet = abs(C[0].xx * C[1].zy - C[1].xx * C[0].zy);");
+    g->codeAppend ("lowp int middlerow = absdet[0] > absdet[1] ? 2 : 1;");
+
+    g->codeAppend ("highp mat3 CI = inverse(mat3(C[0][0], C[0][middlerow], C[0][3], "
+                                                "C[1][0], C[1][middlerow], C[1][3], "
+                                                "      0,               0,       1));");
+    g->codeAppendf("%s = CI * mat3(K[0], K[middlerow], K[3], "
+                                  "L[0], L[middlerow], L[3], "
+                                  "M[0], M[middlerow], M[3]);", fKLMMatrix.c_str());
+
+    // Orient the KLM matrix so we fill the correct side of the curve.
+    g->codeAppendf("lowp vec2 orientation = sign(vec3(midpoint, 1) * mat2x3(%s[1], %s[2]));",
+                   fKLMMatrix.c_str(), fKLMMatrix.c_str());
+    g->codeAppendf("%s *= mat3(orientation[0] * orientation[1], 0, 0, "
+                              "0, orientation[0], 0, "
+                              "0, 0, orientation[1]);", fKLMMatrix.c_str());
+
+    g->declareGlobal(fKLMDerivatives);
+    g->codeAppendf("%s[0] = %s[0].xy * %s.xz;",
+                   fKLMDerivatives.c_str(), fKLMMatrix.c_str(), rtAdjust);
+    g->codeAppendf("%s[1] = %s[1].xy * %s.xz;",
+                   fKLMDerivatives.c_str(), fKLMMatrix.c_str(), rtAdjust);
+    g->codeAppendf("%s[2] = %s[2].xy * %s.xz;",
+                   fKLMDerivatives.c_str(), fKLMMatrix.c_str(), rtAdjust);
+
+    this->emitCubicGeometry(g, emitVertexFn, wind, rtAdjust);
+}
+
+void GrCCPRCubicInsetProcessor::emitCubicGeometry(GrGLSLGeometryBuilder* g,
+                                                  const char* emitVertexFn, const char* wind,
+                                                  const char* rtAdjust) const {
+    // FIXME: we should clip this geometry at the tip of the curve.
+    g->codeAppendf("%s(inset[0], 1);", emitVertexFn);
+    g->codeAppendf("%s(inset[1], 1);", emitVertexFn);
+    g->codeAppendf("%s(inset[3], 1);", emitVertexFn);
+    g->codeAppendf("%s(inset[2], 1);", emitVertexFn);
+    g->codeAppend ("EndPrimitive();");
+
+    g->configure(GrGLSLGeometryBuilder::InputType::kLinesAdjacency,
+                 GrGLSLGeometryBuilder::OutputType::kTriangleStrip,
+                 4, 1);
+}
+
+void GrCCPRCubicInsetProcessor::emitPerVertexGeometryCode(SkString* fnBody, const char* position,
+                                                          const char* /*coverage*/,
+                                                          const char* /*wind*/) const {
+    fnBody->appendf("highp vec3 klm = vec3(%s, 1) * %s;", position, fKLMMatrix.c_str());
+    fnBody->appendf("%s = klm;", fKLM.gsOut());
+    fnBody->appendf("%s[0] = 3 * klm[0] * %s[0];", fGradMatrix.gsOut(), fKLMDerivatives.c_str());
+    fnBody->appendf("%s[1] = -klm[1] * %s[2].xy - klm[2] * %s[1].xy;",
+                    fGradMatrix.gsOut(), fKLMDerivatives.c_str(), fKLMDerivatives.c_str());
+}
+
+void GrCCPRCubicInsetProcessor::emitShaderCoverage(GrGLSLFragmentBuilder* f,
+                                                   const char* outputCoverage) const {
+    f->codeAppendf("highp float k = %s.x, l = %s.y, m = %s.z;",
+                   fKLM.fsIn(), fKLM.fsIn(), fKLM.fsIn());
+    f->codeAppend ("highp float f = k*k*k - l*m;");
+    f->codeAppendf("highp vec2 grad = %s * vec2(k, 1);", fGradMatrix.fsIn());
+    f->codeAppend ("highp float d = f * inversesqrt(dot(grad, grad));");
+    f->codeAppendf("%s = clamp(0.5 - d, 0, 1);", outputCoverage);
+}
+
+void GrCCPRCubicBorderProcessor::emitCubicGeometry(GrGLSLGeometryBuilder* g,
+                                                   const char* emitVertexFn, const char* wind,
+                                                   const char* rtAdjust) const {
+    // We defined bezierpts in onEmitGeometryShader.
+    g->declareGlobal(fEdgeDistanceEquation);
+    g->codeAppendf("int edgeidx0 = %s > 0 ? 3 : 0;", wind);
+    g->codeAppendf("highp vec2 edgept0 = bezierpts[edgeidx0];");
+    g->codeAppendf("highp vec2 edgept1 = bezierpts[3 - edgeidx0];");
+    this->emitEdgeDistanceEquation(g, "edgept0", "edgept1", fEdgeDistanceEquation.c_str());
+    g->codeAppendf("%s.z += 0.5;", fEdgeDistanceEquation.c_str()); // outer = -.5, inner = .5
+
+    g->declareGlobal(fEdgeDistanceDerivatives);
+    g->codeAppendf("%s = %s.xy * %s.xz;",
+                   fEdgeDistanceDerivatives.c_str(), fEdgeDistanceEquation.c_str(), rtAdjust);
+
+    g->declareGlobal(fEdgeSpaceTransform);
+    g->codeAppend ("highp vec4 edgebbox = vec4(min(bezierpts[0], bezierpts[3]) - bloat, "
+                                              "max(bezierpts[0], bezierpts[3]) + bloat);");
+    g->codeAppendf("%s.xy = 2 / vec2(edgebbox.zw - edgebbox.xy);", fEdgeSpaceTransform.c_str());
+    g->codeAppendf("%s.zw = -1 - %s.xy * edgebbox.xy;",
+                   fEdgeSpaceTransform.c_str(), fEdgeSpaceTransform.c_str());
+
+    int maxVertices = this->emitHullGeometry(g, emitVertexFn, "bezierpts", 4, "sk_InvocationID",
+                                             "inset");
+
+    g->configure(GrGLSLGeometryBuilder::InputType::kLinesAdjacency,
+                 GrGLSLGeometryBuilder::OutputType::kTriangleStrip,
+                 maxVertices, 4);
+}
+
+void GrCCPRCubicBorderProcessor::emitPerVertexGeometryCode(SkString* fnBody, const char* position,
+                                                           const char* /*coverage*/,
+                                                           const char* /*wind*/) const {
+    fnBody->appendf("highp vec3 klm = vec3(%s, 1) * %s;", position, fKLMMatrix.c_str());
+    fnBody->appendf("highp float d = dot(vec3(%s, 1), %s);",
+                    position, fEdgeDistanceEquation.c_str());
+    fnBody->appendf("%s = vec4(klm, d);", fKLMD.gsOut());
+    fnBody->appendf("%s = vec4(%s[0].x, %s[1].x, %s[2].x, %s.x);",
+                    fdKLMDdx.gsOut(), fKLMDerivatives.c_str(), fKLMDerivatives.c_str(),
+                    fKLMDerivatives.c_str(), fEdgeDistanceDerivatives.c_str());
+    fnBody->appendf("%s = vec4(%s[0].y, %s[1].y, %s[2].y, %s.y);",
+                    fdKLMDdy.gsOut(), fKLMDerivatives.c_str(), fKLMDerivatives.c_str(),
+                    fKLMDerivatives.c_str(), fEdgeDistanceDerivatives.c_str());
+    fnBody->appendf("%s = position * %s.xy + %s.zw;", fEdgeSpaceCoord.gsOut(),
+                    fEdgeSpaceTransform.c_str(), fEdgeSpaceTransform.c_str());
+
+    // Otherwise, fEdgeDistances = fEdgeDistances * sign(wind * rtAdjust.x * rdAdjust.z).
+    GR_STATIC_ASSERT(kTopLeft_GrSurfaceOrigin == GrCCPRCoverageProcessor::kAtlasOrigin);
+}
+
+void GrCCPRCubicBorderProcessor::emitShaderCoverage(GrGLSLFragmentBuilder* f,
+                                                    const char* outputCoverage) const {
+    // Use software msaa to determine coverage.
+    const int sampleCount = this->defineSoftSampleLocations(f, "samples");
+
+    // Along the shared edge, we start with distance-to-edge coverage, then subtract out the
+    // remaining pixel coverage that is still inside the shared edge, but outside the curve.
+    // Outside the shared edege, we just use standard msaa to count samples inside the curve.
+    f->codeAppendf("bool use_edge = all(lessThan(abs(%s), vec2(1)));", fEdgeSpaceCoord.fsIn());
+    f->codeAppendf("%s = (use_edge ? clamp(%s.w + 0.5, 0, 1) : 0) * %i;",
+                   outputCoverage, fKLMD.fsIn(), sampleCount);
+
+    f->codeAppendf("highp mat2x4 grad_klmd = mat2x4(%s, %s);", fdKLMDdx.fsIn(), fdKLMDdy.fsIn());
+
+    f->codeAppendf("for (int i = 0; i < %i; ++i) {", sampleCount);
+    f->codeAppendf(    "highp vec4 klmd = grad_klmd * samples[i] + %s;", fKLMD.fsIn());
+    f->codeAppend (    "lowp float f = klmd.y * klmd.z - klmd.x * klmd.x * klmd.x;");
+    // A sample is inside our cubic sub-section if it is inside the implicit AND L & M are both
+    // positive. This works because the sections get chopped at the K/L and K/M intersections.
+    f->codeAppend (    "bvec4 inside = greaterThan(vec4(f,klmd.yzw), vec4(0));");
+    f->codeAppend (    "lowp float in_curve = all(inside.xyz) ? 1 : 0;");
+    f->codeAppend (    "lowp float in_edge = inside.w ? 1 : 0;");
+    f->codeAppendf(    "%s += use_edge ? in_edge * (in_curve - 1) : in_curve;", outputCoverage);
+    f->codeAppend ("}");
+
+    f->codeAppendf("%s *= %f;", outputCoverage, 1.0 / sampleCount);
+}
diff --git a/src/gpu/ccpr/GrCCPRCubicProcessor.h b/src/gpu/ccpr/GrCCPRCubicProcessor.h
new file mode 100644
index 0000000..f31dad7
--- /dev/null
+++ b/src/gpu/ccpr/GrCCPRCubicProcessor.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrCCPRCubicProcessor_DEFINED
+#define GrCCPRCubicProcessor_DEFINED
+
+#include "ccpr/GrCCPRCoverageProcessor.h"
+
+class GrGLSLGeometryBuilder;
+
+/**
+ * This class renders the coverage of convex closed cubic segments using the techniques outlined in
+ * "Resolution Independent Curve Rendering using Programmable Graphics Hardware" by Charles Loop and
+ * Jim Blinn:
+ *
+ * https://www.microsoft.com/en-us/research/wp-content/uploads/2005/01/p1000-loop.pdf
+ *
+ * The caller is expected to chop cubics at the KLM roots (a.k.a. inflection points and loop
+ * intersection points, resulting in necessarily convex segments) before feeding them into this
+ * processor.
+ *
+ * The curves are rendered in two passes:
+ *
+ * Pass 1: Draw the (convex) bezier quadrilateral, inset by 1/2 pixel all around, and use the
+ *         gradient-based AA technique outlined in the Loop/Blinn paper to compute coverage.
+ *
+ * Pass 2: Draw a border around the previous inset, up to the bezier quadrilatral's conservative
+ *         raster hull, and compute coverage using pseudo MSAA. This pass is necessary because the
+ *         gradient approach does not work near the L and M lines.
+ *
+ * FIXME: The pseudo MSAA border is slow and ugly. We should investigate an alternate solution of
+ * just approximating the curve with straight lines for short distances across the problem points
+ * instead.
+ */
+class GrCCPRCubicProcessor : public GrCCPRCoverageProcessor::PrimitiveProcessor {
+public:
+    enum class Type {
+        kSerpentine,
+        kLoop
+    };
+
+    GrCCPRCubicProcessor(Type type)
+            : INHERITED(CoverageType::kShader)
+            , fType(type)
+            , fInset(kVec3f_GrSLType)
+            , fTS(kFloat_GrSLType)
+            , fKLMMatrix("klm_matrix", kMat33f_GrSLType, GrShaderVar::kNonArray,
+                         kHigh_GrSLPrecision)
+            , fKLMDerivatives("klm_derivatives", kVec2f_GrSLType, 3, kHigh_GrSLPrecision) {}
+
+    void resetVaryings(GrGLSLVaryingHandler* varyingHandler) override {
+        varyingHandler->addVarying("insets", &fInset, kHigh_GrSLPrecision);
+        varyingHandler->addVarying("ts", &fTS, kHigh_GrSLPrecision);
+    }
+
+    void onEmitVertexShader(const GrCCPRCoverageProcessor&, GrGLSLVertexBuilder*,
+                            const TexelBufferHandle& pointsBuffer, const char* atlasOffset,
+                            const char* rtAdjust, GrGPArgs*) const override;
+    void emitWind(GrGLSLGeometryBuilder*, const char* rtAdjust, const char* outputWind) const final;
+    void onEmitGeometryShader(GrGLSLGeometryBuilder*, const char* emitVertexFn, const char* wind,
+                              const char* rtAdjust) const final;
+
+protected:
+    virtual void emitCubicGeometry(GrGLSLGeometryBuilder*, const char* emitVertexFn,
+                                   const char* wind, const char* rtAdjust) const = 0;
+
+    const Type        fType;
+    GrGLSLVertToGeo   fInset;
+    GrGLSLVertToGeo   fTS;
+    GrShaderVar       fKLMMatrix;
+    GrShaderVar       fKLMDerivatives;
+
+    typedef GrCCPRCoverageProcessor::PrimitiveProcessor INHERITED;
+};
+
+class GrCCPRCubicInsetProcessor : public GrCCPRCubicProcessor {
+public:
+    GrCCPRCubicInsetProcessor(Type type)
+            : INHERITED(type)
+            , fKLM(kVec3f_GrSLType)
+            , fGradMatrix(kMat22f_GrSLType) {}
+
+    void resetVaryings(GrGLSLVaryingHandler* varyingHandler) override {
+        this->INHERITED::resetVaryings(varyingHandler);
+        varyingHandler->addVarying("klm", &fKLM, kHigh_GrSLPrecision);
+        varyingHandler->addVarying("grad_matrix", &fGradMatrix, kHigh_GrSLPrecision);
+    }
+
+    void emitCubicGeometry(GrGLSLGeometryBuilder*, const char* emitVertexFn,
+                           const char* wind, const char* rtAdjust) const override;
+    void emitPerVertexGeometryCode(SkString* fnBody, const char* position, const char* coverage,
+                                   const char* wind) const override;
+    void emitShaderCoverage(GrGLSLFragmentBuilder*, const char* outputCoverage) const override;
+
+protected:
+    GrGLSLGeoToFrag   fKLM;
+    GrGLSLGeoToFrag   fGradMatrix;
+
+    typedef GrCCPRCubicProcessor INHERITED;
+};
+
+class GrCCPRCubicBorderProcessor : public GrCCPRCubicProcessor {
+public:
+    GrCCPRCubicBorderProcessor(Type type)
+            : INHERITED(type)
+            , fEdgeDistanceEquation("edge_distance_equation", kVec3f_GrSLType,
+                                    GrShaderVar::kNonArray, kHigh_GrSLPrecision)
+            , fEdgeDistanceDerivatives("edge_distance_derivatives", kVec2f_GrSLType,
+                                        GrShaderVar::kNonArray, kHigh_GrSLPrecision)
+            , fEdgeSpaceTransform("edge_space_transform", kVec4f_GrSLType, GrShaderVar::kNonArray,
+                                  kHigh_GrSLPrecision)
+            , fKLMD(kVec4f_GrSLType)
+            , fdKLMDdx(kVec4f_GrSLType)
+            , fdKLMDdy(kVec4f_GrSLType)
+            , fEdgeSpaceCoord(kVec2f_GrSLType) {}
+
+    void resetVaryings(GrGLSLVaryingHandler* varyingHandler) override {
+        this->INHERITED::resetVaryings(varyingHandler);
+        varyingHandler->addVarying("klmd", &fKLMD, kHigh_GrSLPrecision);
+        varyingHandler->addFlatVarying("dklmddx", &fdKLMDdx, kHigh_GrSLPrecision);
+        varyingHandler->addFlatVarying("dklmddy", &fdKLMDdy, kHigh_GrSLPrecision);
+        varyingHandler->addVarying("edge_space_coord", &fEdgeSpaceCoord, kHigh_GrSLPrecision);
+    }
+
+    void emitCubicGeometry(GrGLSLGeometryBuilder*, const char* emitVertexFn,
+                           const char* wind, const char* rtAdjust) const override;
+    void emitPerVertexGeometryCode(SkString* fnBody, const char* position, const char* coverage,
+                                   const char* wind) const override;
+    void emitShaderCoverage(GrGLSLFragmentBuilder*, const char* outputCoverage) const override;
+
+protected:
+    GrShaderVar        fEdgeDistanceEquation;
+    GrShaderVar        fEdgeDistanceDerivatives;
+    GrShaderVar        fEdgeSpaceTransform;
+    GrGLSLGeoToFrag    fKLMD;
+    GrGLSLGeoToFrag    fdKLMDdx;
+    GrGLSLGeoToFrag    fdKLMDdy;
+    GrGLSLGeoToFrag    fEdgeSpaceCoord;
+
+    typedef GrCCPRCubicProcessor INHERITED;
+};
+
+#endif
diff --git a/src/gpu/ccpr/GrCCPRPathProcessor.cpp b/src/gpu/ccpr/GrCCPRPathProcessor.cpp
new file mode 100644
index 0000000..bc2e45c
--- /dev/null
+++ b/src/gpu/ccpr/GrCCPRPathProcessor.cpp
@@ -0,0 +1,206 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "GrCCPRPathProcessor.h"
+
+#include "GrOnFlushResourceProvider.h"
+#include "GrTexture.h"
+#include "glsl/GrGLSLFragmentShaderBuilder.h"
+#include "glsl/GrGLSLGeometryProcessor.h"
+#include "glsl/GrGLSLProgramBuilder.h"
+#include "glsl/GrGLSLVarying.h"
+
+// Slightly undershoot an AA bloat radius of 0.5 so vertices that fall on integer boundaries don't
+// accidentally reach into neighboring path masks within the atlas.
+constexpr float kAABloatRadius = 0.491111f;
+
+// Paths are drawn as octagons. Each point on the octagon is the intersection of two lines: one edge
+// from the path's bounding box and one edge from its 45-degree bounding box. The below inputs
+// define a vertex by the two edges that need to be intersected. Normals point out of the octagon,
+// and the bounding boxes are sent in as instance attribs.
+static constexpr float kOctoEdgeNorms[8 * 4] = {
+    // bbox   // bbox45
+    -1, 0,    -1,+1,
+    -1, 0,    -1,-1,
+     0,-1,    -1,-1,
+     0,-1,    +1,-1,
+    +1, 0,    +1,-1,
+    +1, 0,    +1,+1,
+     0,+1,    +1,+1,
+     0,+1,    -1,+1,
+};
+
+GR_DECLARE_STATIC_UNIQUE_KEY(gVertexBufferKey);
+
+// Index buffer for the octagon defined above.
+static uint16_t kOctoIndices[GrCCPRPathProcessor::kPerInstanceIndexCount] = {
+    0, 4, 2,
+    0, 6, 4,
+    0, 2, 1,
+    2, 4, 3,
+    4, 6, 5,
+    6, 0, 7,
+};
+
+GR_DECLARE_STATIC_UNIQUE_KEY(gIndexBufferKey);
+
+GrCCPRPathProcessor::GrCCPRPathProcessor(GrResourceProvider* rp, sk_sp<GrTextureProxy> atlas,
+                                         SkPath::FillType fillType, const GrShaderCaps& shaderCaps)
+        : fFillType(fillType) {
+    this->addInstanceAttrib("devbounds", kVec4f_GrVertexAttribType, kHigh_GrSLPrecision);
+    this->addInstanceAttrib("devbounds45", kVec4f_GrVertexAttribType, kHigh_GrSLPrecision);
+    this->addInstanceAttrib("view_matrix", kVec4f_GrVertexAttribType, kHigh_GrSLPrecision);
+    this->addInstanceAttrib("view_translate", kVec2f_GrVertexAttribType, kHigh_GrSLPrecision);
+    // FIXME: this could be a vector of two shorts if it were supported by Ganesh.
+    this->addInstanceAttrib("atlas_offset", kVec2i_GrVertexAttribType, kHigh_GrSLPrecision);
+    this->addInstanceAttrib("color", kVec4ub_GrVertexAttribType, kLow_GrSLPrecision);
+
+    SkASSERT(offsetof(Instance, fDevBounds) ==
+             this->getInstanceAttrib(InstanceAttribs::kDevBounds).fOffsetInRecord);
+    SkASSERT(offsetof(Instance, fDevBounds45) ==
+             this->getInstanceAttrib(InstanceAttribs::kDevBounds45).fOffsetInRecord);
+    SkASSERT(offsetof(Instance, fViewMatrix) ==
+             this->getInstanceAttrib(InstanceAttribs::kViewMatrix).fOffsetInRecord);
+    SkASSERT(offsetof(Instance, fViewTranslate) ==
+             this->getInstanceAttrib(InstanceAttribs::kViewTranslate).fOffsetInRecord);
+    SkASSERT(offsetof(Instance, fAtlasOffset) ==
+             this->getInstanceAttrib(InstanceAttribs::kAtlasOffset).fOffsetInRecord);
+    SkASSERT(offsetof(Instance, fColor) ==
+             this->getInstanceAttrib(InstanceAttribs::kColor).fOffsetInRecord);
+    SkASSERT(sizeof(Instance) == this->getInstanceStride());
+
+    GR_STATIC_ASSERT(6 == kNumInstanceAttribs);
+
+    this->addVertexAttrib("edge_norms", kVec4f_GrVertexAttribType, kHigh_GrSLPrecision);
+
+    fAtlasAccess.reset(std::move(atlas), GrSamplerParams::FilterMode::kNone_FilterMode,
+                       SkShader::TileMode::kClamp_TileMode, kFragment_GrShaderFlag);
+    fAtlasAccess.instantiate(rp);
+    this->addTextureSampler(&fAtlasAccess);
+
+    this->initClassID<GrCCPRPathProcessor>();
+}
+
+void GrCCPRPathProcessor::getGLSLProcessorKey(const GrShaderCaps&, GrProcessorKeyBuilder* b) const {
+    b->add32((fFillType << 16) | this->atlas()->origin());
+}
+
+class GLSLPathProcessor : public GrGLSLGeometryProcessor {
+public:
+    void onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) override;
+
+private:
+    void setData(const GrGLSLProgramDataManager& pdman, const GrPrimitiveProcessor& primProc,
+                 FPCoordTransformIter&& transformIter) override {
+        const GrCCPRPathProcessor& proc = primProc.cast<GrCCPRPathProcessor>();
+        pdman.set2f(fAtlasAdjustUniform, 1.0f / proc.atlas()->width(),
+                    1.0f / proc.atlas()->height());
+        this->setTransformDataHelper(SkMatrix::I(), pdman, &transformIter);
+    }
+
+    GrGLSLUniformHandler::UniformHandle fAtlasAdjustUniform;
+
+    typedef GrGLSLGeometryProcessor INHERITED;
+};
+
+GrGLSLPrimitiveProcessor* GrCCPRPathProcessor::createGLSLInstance(const GrShaderCaps&) const {
+    return new GLSLPathProcessor();
+}
+
+void GLSLPathProcessor::onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) {
+    using InstanceAttribs = GrCCPRPathProcessor::InstanceAttribs;
+    const GrCCPRPathProcessor& proc = args.fGP.cast<GrCCPRPathProcessor>();
+    GrGLSLUniformHandler* uniHandler = args.fUniformHandler;
+    GrGLSLVaryingHandler* varyingHandler = args.fVaryingHandler;
+
+    const char* atlasAdjust;
+    fAtlasAdjustUniform = uniHandler->addUniform(
+            kVertex_GrShaderFlag,
+            kVec2f_GrSLType, kHigh_GrSLPrecision, "atlas_adjust", &atlasAdjust);
+
+    varyingHandler->emitAttributes(proc);
+
+    GrGLSLVertToFrag texcoord(kVec2f_GrSLType);
+    GrGLSLVertToFrag color(kVec4f_GrSLType);
+    varyingHandler->addVarying("texcoord", &texcoord, kHigh_GrSLPrecision);
+    varyingHandler->addFlatPassThroughAttribute(&proc.getInstanceAttrib(InstanceAttribs::kColor),
+                                                args.fOutputColor, kLow_GrSLPrecision);
+
+    // Vertex shader.
+    GrGLSLVertexBuilder* v = args.fVertBuilder;
+
+    // Find the intersections of (bloated) devBounds and devBounds45 in order to come up with an
+    // octagon that circumscribes the (bloated) path. A vertex is the intersection of two lines:
+    // one edge from the path's bounding box and one edge from its 45-degree bounding box.
+    v->codeAppendf("highp mat2 N = mat2(%s);", proc.getEdgeNormsAttrib().fName);
+
+    // N[0] is the normal for the edge we are intersecting from the regular bounding box, pointing
+    // out of the octagon.
+    v->codeAppendf("highp vec2 refpt = (min(N[0].x, N[0].y) < 0) ? %s.xy : %s.zw;",
+                   proc.getInstanceAttrib(InstanceAttribs::kDevBounds).fName,
+                   proc.getInstanceAttrib(InstanceAttribs::kDevBounds).fName);
+    v->codeAppendf("refpt += N[0] * %f;", kAABloatRadius); // bloat for AA.
+
+    // N[1] is the normal for the edge we are intersecting from the 45-degree bounding box, pointing
+    // out of the octagon.
+    v->codeAppendf("highp vec2 refpt45 = (N[1].x < 0) ? %s.xy : %s.zw;",
+                   proc.getInstanceAttrib(InstanceAttribs::kDevBounds45).fName,
+                   proc.getInstanceAttrib(InstanceAttribs::kDevBounds45).fName);
+    v->codeAppendf("refpt45 *= mat2(.5,.5,-.5,.5);"); // transform back to device space.
+    v->codeAppendf("refpt45 += N[1] * %f;", kAABloatRadius); // bloat for AA.
+
+    v->codeAppend ("highp vec2 K = vec2(dot(N[0], refpt), dot(N[1], refpt45));");
+    v->codeAppendf("highp vec2 octocoord = K * inverse(N);");
+
+    gpArgs->fPositionVar.set(kVec2f_GrSLType, "octocoord");
+
+    // Convert to atlas coordinates in order to do our texture lookup.
+    v->codeAppendf("highp vec2 atlascoord = octocoord + vec2(%s);",
+                   proc.getInstanceAttrib(InstanceAttribs::kAtlasOffset).fName);
+    if (kTopLeft_GrSurfaceOrigin == proc.atlas()->origin()) {
+        v->codeAppendf("%s = atlascoord * %s;", texcoord.vsOut(), atlasAdjust);
+    } else {
+        SkASSERT(kBottomLeft_GrSurfaceOrigin == proc.atlas()->origin());
+        v->codeAppendf("%s = vec2(atlascoord.x * %s.x, 1 - atlascoord.y * %s.y);",
+                       texcoord.vsOut(), atlasAdjust, atlasAdjust);
+    }
+
+    // Convert to (local) path cordinates.
+    v->codeAppendf("highp vec2 pathcoord = inverse(mat2(%s)) * (octocoord - %s);",
+                   proc.getInstanceAttrib(InstanceAttribs::kViewMatrix).fName,
+                   proc.getInstanceAttrib(InstanceAttribs::kViewTranslate).fName);
+
+    this->emitTransforms(v, varyingHandler, uniHandler, gpArgs->fPositionVar, "pathcoord",
+                         args.fFPCoordTransformHandler);
+
+    // Fragment shader.
+    GrGLSLPPFragmentBuilder* f = args.fFragBuilder;
+
+    f->codeAppend ("mediump float coverage_count = ");
+    f->appendTextureLookup(args.fTexSamplers[0], texcoord.fsIn(), kVec2f_GrSLType);
+    f->codeAppend (".a;");
+
+    if (SkPath::kWinding_FillType == proc.fillType()) {
+        f->codeAppendf("%s = vec4(min(abs(coverage_count), 1));", args.fOutputCoverage);
+    } else {
+        SkASSERT(SkPath::kEvenOdd_FillType == proc.fillType());
+        f->codeAppend ("mediump float t = mod(abs(coverage_count), 2);");
+        f->codeAppendf("%s = vec4(1 - abs(t - 1));", args.fOutputCoverage);
+    }
+}
+
+sk_sp<GrBuffer> GrCCPRPathProcessor::FindOrMakeIndexBuffer(GrOnFlushResourceProvider* onFlushRP) {
+    GR_DEFINE_STATIC_UNIQUE_KEY(gIndexBufferKey);
+    return onFlushRP->findOrMakeStaticBuffer(gIndexBufferKey, kIndex_GrBufferType,
+                                             sizeof(kOctoIndices), kOctoIndices);
+}
+
+sk_sp<GrBuffer> GrCCPRPathProcessor::FindOrMakeVertexBuffer(GrOnFlushResourceProvider* onFlushRP) {
+    GR_DEFINE_STATIC_UNIQUE_KEY(gVertexBufferKey);
+    return onFlushRP->findOrMakeStaticBuffer(gVertexBufferKey, kVertex_GrBufferType,
+                                             sizeof(kOctoEdgeNorms), kOctoEdgeNorms);
+}
diff --git a/src/gpu/ccpr/GrCCPRPathProcessor.h b/src/gpu/ccpr/GrCCPRPathProcessor.h
new file mode 100644
index 0000000..a74455b
--- /dev/null
+++ b/src/gpu/ccpr/GrCCPRPathProcessor.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrCCPRPathProcessor_DEFINED
+#define GrCCPRPathProcessor_DEFINED
+
+#include "GrGeometryProcessor.h"
+#include "SkPath.h"
+#include <array>
+
+class GrOnFlushResourceProvider;
+class GrShaderCaps;
+
+/**
+ * This class draws AA paths using the coverage count masks produced by GrCCPRCoverageProcessor.
+ *
+ * Paths are drawn as bloated octagons, and coverage is derived from the coverage count mask and
+ * fill rule.
+ *
+ * The caller must set up an instance buffer as detailed below, then draw indexed-instanced
+ * triangles using the index and vertex buffers provided by this class.
+ */
+class GrCCPRPathProcessor : public GrGeometryProcessor {
+public:
+    static constexpr int kPerInstanceIndexCount = 6 * 3;
+    static sk_sp<GrBuffer> FindOrMakeIndexBuffer(GrOnFlushResourceProvider*);
+    static sk_sp<GrBuffer> FindOrMakeVertexBuffer(GrOnFlushResourceProvider*);
+
+    enum class InstanceAttribs {
+        kDevBounds,
+        kDevBounds45,
+        kViewMatrix, // FIXME: This causes a lot of duplication. It could move to a texel buffer.
+        kViewTranslate,
+        kAtlasOffset,
+        kColor
+    };
+    static constexpr int kNumInstanceAttribs = 1 + (int)InstanceAttribs::kColor;
+
+    struct Instance {
+        SkRect                   fDevBounds;
+        SkRect                   fDevBounds45; // Bounding box in "| 1  -1 | * devCoords" space.
+                                               //                  | 1   1 |
+        std::array<float, 4>     fViewMatrix;  // {kScaleX, kSkewy, kSkewX, kScaleY}
+        std::array<float, 2>     fViewTranslate;
+        std::array<int32_t, 2>   fAtlasOffset;
+        uint32_t                 fColor;
+
+        GR_STATIC_ASSERT(SK_SCALAR_IS_FLOAT);
+    };
+
+    GR_STATIC_ASSERT(4 * 17 == sizeof(Instance)); // FIXME: 4 * 16 by making fAtlasOffset int16_t's.
+
+    GrCCPRPathProcessor(GrResourceProvider*, sk_sp<GrTextureProxy> atlas, SkPath::FillType,
+                       const GrShaderCaps&);
+
+    const char* name() const override { return "GrCCPRPathProcessor"; }
+    const GrTexture* atlas() const { return fAtlasAccess.peekTexture(); }
+    SkPath::FillType fillType() const { return fFillType; }
+    const Attribute& getInstanceAttrib(InstanceAttribs attribID) const {
+        const Attribute& attrib = this->getAttrib((int)attribID);
+        SkASSERT(Attribute::InputRate::kPerInstance == attrib.fInputRate);
+        return attrib;
+    }
+    const Attribute& getEdgeNormsAttrib() const {
+        SkASSERT(1 + kNumInstanceAttribs == this->numAttribs());
+        const Attribute& attrib = this->getAttrib(kNumInstanceAttribs);
+        SkASSERT(Attribute::InputRate::kPerVertex == attrib.fInputRate);
+        return attrib;
+    }
+
+    void getGLSLProcessorKey(const GrShaderCaps&, GrProcessorKeyBuilder*) const override;
+    GrGLSLPrimitiveProcessor* createGLSLInstance(const GrShaderCaps&) const override;
+
+private:
+    const SkPath::FillType   fFillType;
+    TextureSampler           fAtlasAccess;
+
+    typedef GrGeometryProcessor INHERITED;
+};
+
+#endif
diff --git a/src/gpu/ccpr/GrCCPRQuadraticProcessor.cpp b/src/gpu/ccpr/GrCCPRQuadraticProcessor.cpp
new file mode 100644
index 0000000..8c58ea2
--- /dev/null
+++ b/src/gpu/ccpr/GrCCPRQuadraticProcessor.cpp
@@ -0,0 +1,179 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "GrCCPRQuadraticProcessor.h"
+
+#include "glsl/GrGLSLFragmentShaderBuilder.h"
+#include "glsl/GrGLSLGeometryShaderBuilder.h"
+#include "glsl/GrGLSLVertexShaderBuilder.h"
+
+void GrCCPRQuadraticProcessor::onEmitVertexShader(const GrCCPRCoverageProcessor& proc,
+                                                  GrGLSLVertexBuilder* v,
+                                                  const TexelBufferHandle& pointsBuffer,
+                                                  const char* atlasOffset, const char* rtAdjust,
+                                                  GrGPArgs* gpArgs) const {
+    v->codeAppendf("ivec3 indices = ivec3(%s.y, %s.x, %s.y + 1);",
+                   proc.instanceAttrib(), proc.instanceAttrib(), proc.instanceAttrib());
+    v->codeAppend ("highp vec2 self = ");
+    v->appendTexelFetch(pointsBuffer, "indices[sk_VertexID]");
+    v->codeAppendf(".xy + %s;", atlasOffset);
+    gpArgs->fPositionVar.set(kVec2f_GrSLType, "self");
+}
+
+void GrCCPRQuadraticProcessor::emitWind(GrGLSLGeometryBuilder* g, const char* rtAdjust,
+                                        const char* outputWind) const {
+    // We will define bezierpts in onEmitGeometryShader.
+    g->codeAppend ("highp float area_times_2 = determinant(mat2(bezierpts[1] - bezierpts[0], "
+                                                               "bezierpts[2] - bezierpts[0]));");
+    // Drop curves that are nearly flat, in favor of the higher quality triangle antialiasing.
+    g->codeAppendf("if (2 * abs(area_times_2) < length((bezierpts[2] - bezierpts[0]) * %s.zx)) {",
+                   rtAdjust);
+#ifndef SK_BUILD_FOR_MAC
+    g->codeAppend (    "return;");
+#else
+    // Returning from this geometry shader makes Mac very unhappy. Instead we make wind 0.
+    g->codeAppend (    "area_times_2 = 0;");
+#endif
+    g->codeAppend ("}");
+    g->codeAppendf("%s = sign(area_times_2);", outputWind);
+}
+
+void GrCCPRQuadraticProcessor::onEmitGeometryShader(GrGLSLGeometryBuilder* g,
+                                                    const char* emitVertexFn, const char* wind,
+                                                    const char* rtAdjust) const {
+    // Prepend bezierpts at the start of the shader.
+    g->codePrependf("highp mat3x2 bezierpts = mat3x2(sk_in[0].gl_Position.xy, "
+                                                    "sk_in[1].gl_Position.xy, "
+                                                    "sk_in[2].gl_Position.xy);");
+
+    g->declareGlobal(fCanonicalMatrix);
+    g->codeAppendf("%s = mat3(0.0, 0, 1, "
+                             "0.5, 0, 1, "
+                             "1.0, 1, 1) * "
+                        "inverse(mat3(bezierpts[0], 1, "
+                                     "bezierpts[1], 1, "
+                                     "bezierpts[2], 1));",
+                   fCanonicalMatrix.c_str());
+
+    g->declareGlobal(fCanonicalDerivatives);
+    g->codeAppendf("%s = mat2(%s) * mat2(%s.x, 0, 0, %s.z);",
+                   fCanonicalDerivatives.c_str(), fCanonicalMatrix.c_str(), rtAdjust, rtAdjust);
+
+    this->emitQuadraticGeometry(g, emitVertexFn, wind, rtAdjust);
+}
+
+void GrCCPRQuadraticProcessor::emitPerVertexGeometryCode(SkString* fnBody, const char* position,
+                                                         const char* /*coverage*/,
+                                                         const char* /*wind*/) const {
+    fnBody->appendf("%s.xy = (%s * vec3(%s, 1)).xy;",
+                    fCanonicalCoord.gsOut(), fCanonicalMatrix.c_str(), position);
+    fnBody->appendf("%s.zw = vec2(2 * %s.x * %s[0].x - %s[0].y, "
+                                 "2 * %s.x * %s[1].x - %s[1].y);",
+                    fCanonicalCoord.gsOut(), fCanonicalCoord.gsOut(),
+                    fCanonicalDerivatives.c_str(), fCanonicalDerivatives.c_str(),
+                    fCanonicalCoord.gsOut(), fCanonicalDerivatives.c_str(),
+                    fCanonicalDerivatives.c_str());
+}
+
+void GrCCPRQuadraticProcessor::emitShaderCoverage(GrGLSLFragmentBuilder* f,
+                                                  const char* outputCoverage) const {
+    f->codeAppendf("highp float d = (%s.x * %s.x - %s.y) * inversesqrt(dot(%s.zw, %s.zw));",
+                   fCanonicalCoord.fsIn(), fCanonicalCoord.fsIn(), fCanonicalCoord.fsIn(),
+                   fCanonicalCoord.fsIn(), fCanonicalCoord.fsIn());
+    f->codeAppendf("%s = clamp(0.5 - d, 0, 1);", outputCoverage);
+}
+
+void GrCCPRQuadraticHullProcessor::emitQuadraticGeometry(GrGLSLGeometryBuilder* g,
+                                                         const char* emitVertexFn,
+                                                         const char* wind,
+                                                         const char* rtAdjust) const {
+    // Find the point on the curve whose tangent is halfway between the tangents at the endpionts.
+    // We defined bezierpts in onEmitGeometryShader.
+    g->codeAppend ("highp vec2 n = (normalize(bezierpts[0] - bezierpts[1]) + "
+                                   "normalize(bezierpts[2] - bezierpts[1]));");
+    g->codeAppend ("highp float t = dot(bezierpts[0] - bezierpts[1], n) / "
+                                   "dot(bezierpts[2] - 2 * bezierpts[1] + bezierpts[0], n);");
+    g->codeAppend ("highp vec2 pt = (1 - t) * (1 - t) * bezierpts[0] + "
+                                   "2 * t * (1 - t) * bezierpts[1] + "
+                                   "t * t * bezierpts[2];");
+
+    // Clip the triangle by the tangent line at this halfway point.
+    g->codeAppend ("highp mat2 v = mat2(bezierpts[0] - bezierpts[1], "
+                                       "bezierpts[2] - bezierpts[1]);");
+    g->codeAppend ("highp vec2 nv = n * v;");
+    g->codeAppend ("highp vec2 d = abs(nv[0]) > 0.1 * max(bloat.x, bloat.y) ? "
+                                  "(dot(n, pt - bezierpts[1])) / nv : vec2(0);");
+
+    // Generate a 4-point hull of the curve from the clipped triangle.
+    g->codeAppendf("highp mat4x2 quadratic_hull = mat4x2(bezierpts[0], "
+                                                        "bezierpts[1] + d[0] * v[0], "
+                                                        "bezierpts[1] + d[1] * v[1], "
+                                                        "bezierpts[2]);");
+
+    int maxVerts = this->emitHullGeometry(g, emitVertexFn, "quadratic_hull", 4, "sk_InvocationID");
+
+    g->configure(GrGLSLGeometryBuilder::InputType::kTriangles,
+                 GrGLSLGeometryBuilder::OutputType::kTriangleStrip,
+                 maxVerts, 4);
+}
+
+void GrCCPRQuadraticSharedEdgeProcessor::emitQuadraticGeometry(GrGLSLGeometryBuilder* g,
+                                                               const char* emitVertexFn,
+                                                               const char* wind,
+                                                               const char* rtAdjust) const {
+    // We defined bezierpts in onEmitGeometryShader.
+    g->codeAppendf("int leftidx = %s > 0 ? 2 : 0;", wind);
+    g->codeAppendf("highp vec2 left = bezierpts[leftidx];");
+    g->codeAppendf("highp vec2 right = bezierpts[2 - leftidx];");
+    this->emitEdgeDistanceEquation(g, "left", "right", "highp vec3 edge_distance_equation");
+
+    g->declareGlobal(fEdgeDistanceDerivatives);
+    g->codeAppendf("%s = edge_distance_equation.xy * %s.xz;",
+                   fEdgeDistanceDerivatives.c_str(), rtAdjust);
+
+    int maxVertices = this->emitEdgeGeometry(g, emitVertexFn, "left", "right",
+                                             "edge_distance_equation");
+
+    g->configure(GrGLSLGeometryBuilder::InputType::kTriangles,
+                 GrGLSLGeometryBuilder::OutputType::kTriangleStrip, maxVertices, 1);
+}
+
+void GrCCPRQuadraticSharedEdgeProcessor::emitPerVertexGeometryCode(SkString* fnBody,
+                                                                   const char* position,
+                                                                   const char* coverage,
+                                                                   const char* wind) const {
+    this->INHERITED::emitPerVertexGeometryCode(fnBody, position, coverage, wind);
+    fnBody->appendf("%s = %s;", fFragCanonicalDerivatives.gsOut(), fCanonicalDerivatives.c_str());
+    fnBody->appendf("%s.x = %s + 0.5;", fEdgeDistance.gsOut(), coverage); // outer=-.5, inner=+.5.
+    fnBody->appendf("%s.yz = %s;", fEdgeDistance.gsOut(), fEdgeDistanceDerivatives.c_str());
+}
+
+void GrCCPRQuadraticSharedEdgeProcessor::emitShaderCoverage(GrGLSLFragmentBuilder* f,
+                                                            const char* outputCoverage) const {
+    // Erase what the previous hull shader wrote and replace with edge coverage.
+    this->INHERITED::emitShaderCoverage(f, outputCoverage);
+    f->codeAppendf("%s = %s.x + 0.5 - %s;",
+                   outputCoverage, fEdgeDistance.fsIn(), outputCoverage);
+
+    // Use software msaa to subtract out the remaining pixel coverage that is still inside the
+    // shared edge, but outside the curve.
+    int sampleCount = this->defineSoftSampleLocations(f, "samples");
+
+    f->codeAppendf("highp mat2x3 grad_xyd = mat2x3(%s[0],%s.y, %s[1],%s.z);",
+                   fFragCanonicalDerivatives.fsIn(), fEdgeDistance.fsIn(),
+                   fFragCanonicalDerivatives.fsIn(), fEdgeDistance.fsIn());
+    f->codeAppendf("highp vec3 center_xyd = vec3(%s.xy, %s.x);",
+                   fCanonicalCoord.fsIn(), fEdgeDistance.fsIn());
+
+    f->codeAppendf("for (int i = 0; i < %i; ++i) {", sampleCount);
+    f->codeAppend (    "highp vec3 xyd = grad_xyd * samples[i] + center_xyd;");
+    f->codeAppend (    "lowp float f = xyd.x * xyd.x - xyd.y;"); // f > 0 -> outside curve.
+    f->codeAppend (    "bvec2 outside_curve_inside_edge = greaterThan(vec2(f, xyd.z), vec2(0));");
+    f->codeAppendf(    "%s -= all(outside_curve_inside_edge) ? %f : 0;",
+                       outputCoverage, 1.0 / sampleCount);
+    f->codeAppendf("}");
+}
diff --git a/src/gpu/ccpr/GrCCPRQuadraticProcessor.h b/src/gpu/ccpr/GrCCPRQuadraticProcessor.h
new file mode 100644
index 0000000..c3e8d17
--- /dev/null
+++ b/src/gpu/ccpr/GrCCPRQuadraticProcessor.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrCCPRQuadraticProcessor_DEFINED
+#define GrCCPRQuadraticProcessor_DEFINED
+
+#include "ccpr/GrCCPRCoverageProcessor.h"
+
+/**
+ * This class renders the coverage of closed quadratic curves using the techniques outlined in
+ * "Resolution Independent Curve Rendering using Programmable Graphics Hardware" by Charles Loop and
+ * Jim Blinn:
+ *
+ * https://www.microsoft.com/en-us/research/wp-content/uploads/2005/01/p1000-loop.pdf
+ *
+ * The curves are rendered in two passes:
+ *
+ * Pass 1: Draw a conservative raster hull around the quadratic bezier points, and compute the
+ *         curve's coverage using the gradient-based AA technique outlined in the Loop/Blinn paper.
+ *
+ * Pass 2: Touch up and antialias the flat edge from P2 back to P0.
+ */
+class GrCCPRQuadraticProcessor : public GrCCPRCoverageProcessor::PrimitiveProcessor {
+public:
+    GrCCPRQuadraticProcessor()
+            : INHERITED(CoverageType::kShader)
+            , fCanonicalMatrix("canonical_matrix", kMat33f_GrSLType, GrShaderVar::kNonArray,
+                               kHigh_GrSLPrecision)
+            , fCanonicalDerivatives("canonical_derivatives", kMat22f_GrSLType,
+                                    GrShaderVar::kNonArray, kHigh_GrSLPrecision)
+            , fCanonicalCoord(kVec4f_GrSLType) {}
+
+    void resetVaryings(GrGLSLVaryingHandler* varyingHandler) override {
+        varyingHandler->addVarying("canonical_coord", &fCanonicalCoord, kHigh_GrSLPrecision);
+    }
+
+    void onEmitVertexShader(const GrCCPRCoverageProcessor&, GrGLSLVertexBuilder*,
+                            const TexelBufferHandle& pointsBuffer, const char* atlasOffset,
+                            const char* rtAdjust, GrGPArgs*) const override;
+    void emitWind(GrGLSLGeometryBuilder*, const char* rtAdjust, const char* outputWind) const final;
+    void onEmitGeometryShader(GrGLSLGeometryBuilder*, const char* emitVertexFn, const char* wind,
+                              const char* rtAdjust) const final;
+    void emitPerVertexGeometryCode(SkString* fnBody, const char* position, const char* coverage,
+                                   const char* wind) const override;
+    void emitShaderCoverage(GrGLSLFragmentBuilder* f, const char* outputCoverage) const override;
+
+protected:
+    virtual void emitQuadraticGeometry(GrGLSLGeometryBuilder*, const char* emitVertexFn,
+                                       const char* wind, const char* rtAdjust) const = 0;
+
+    GrShaderVar       fCanonicalMatrix;
+    GrShaderVar       fCanonicalDerivatives;
+    GrGLSLGeoToFrag   fCanonicalCoord;
+
+    typedef GrCCPRCoverageProcessor::PrimitiveProcessor INHERITED;
+};
+
+class GrCCPRQuadraticHullProcessor : public GrCCPRQuadraticProcessor {
+public:
+    void emitQuadraticGeometry(GrGLSLGeometryBuilder*, const char* emitVertexFn,
+                               const char* wind, const char* rtAdjust) const override;
+
+private:
+    typedef GrCCPRQuadraticProcessor INHERITED;
+};
+
+/**
+ * This pass touches up the flat edge (P2 -> P0) of a closed quadratic segment as follows:
+ *
+ *   1) Erase what the previous hull shader estimated for coverage.
+ *   2) Replace coverage with distance to the curve's flat edge (this is necessary when the edge
+ *      is shared and must create a "water-tight" seam).
+ *   3) Use pseudo MSAA to subtract out the remaining pixel coverage that is still inside the flat
+ *      edge, but outside the curve.
+ */
+class GrCCPRQuadraticSharedEdgeProcessor : public GrCCPRQuadraticProcessor {
+public:
+    GrCCPRQuadraticSharedEdgeProcessor()
+            : fXYD("xyd", kMat33f_GrSLType, GrShaderVar::kNonArray, kHigh_GrSLPrecision)
+            , fEdgeDistanceDerivatives("edge_distance_derivatives", kVec2f_GrSLType,
+                                       GrShaderVar::kNonArray, kHigh_GrSLPrecision)
+            , fFragCanonicalDerivatives(kMat22f_GrSLType)
+            , fEdgeDistance(kVec3f_GrSLType) {}
+
+    void resetVaryings(GrGLSLVaryingHandler* varyingHandler) override {
+        this->INHERITED::resetVaryings(varyingHandler);
+        varyingHandler->addFlatVarying("canonical_derivatives", &fFragCanonicalDerivatives,
+                                       kHigh_GrSLPrecision);
+        varyingHandler->addVarying("edge_distance", &fEdgeDistance, kHigh_GrSLPrecision);
+    }
+
+    void emitQuadraticGeometry(GrGLSLGeometryBuilder*, const char* emitVertexFn,
+                               const char* wind, const char* rtAdjust) const override;
+    void emitPerVertexGeometryCode(SkString* fnBody, const char* position, const char* coverage,
+                                   const char* wind) const override;
+    void emitShaderCoverage(GrGLSLFragmentBuilder*, const char* outputCoverage) const override;
+
+private:
+    GrShaderVar       fXYD;
+    GrShaderVar       fEdgeDistanceDerivatives;
+    GrGLSLGeoToFrag   fFragCanonicalDerivatives;
+    GrGLSLGeoToFrag   fEdgeDistance;
+
+    typedef GrCCPRQuadraticProcessor INHERITED;
+};
+
+#endif
diff --git a/src/gpu/ccpr/GrCCPRTriangleProcessor.cpp b/src/gpu/ccpr/GrCCPRTriangleProcessor.cpp
new file mode 100644
index 0000000..23f7b14
--- /dev/null
+++ b/src/gpu/ccpr/GrCCPRTriangleProcessor.cpp
@@ -0,0 +1,201 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "GrCCPRTriangleProcessor.h"
+
+#include "glsl/GrGLSLFragmentShaderBuilder.h"
+#include "glsl/GrGLSLGeometryShaderBuilder.h"
+#include "glsl/GrGLSLVertexShaderBuilder.h"
+
+void GrCCPRTriangleProcessor::onEmitVertexShader(const GrCCPRCoverageProcessor& proc,
+                                                 GrGLSLVertexBuilder* v,
+                                                 const TexelBufferHandle& pointsBuffer,
+                                                 const char* atlasOffset, const char* rtAdjust,
+                                                 GrGPArgs* gpArgs) const {
+    v->codeAppend ("highp vec2 self = ");
+    v->appendTexelFetch(pointsBuffer,
+                        SkStringPrintf("%s[sk_VertexID]", proc.instanceAttrib()).c_str());
+    v->codeAppendf(".xy + %s;", atlasOffset);
+    gpArgs->fPositionVar.set(kVec2f_GrSLType, "self");
+}
+
+void GrCCPRTriangleProcessor::defineInputVertices(GrGLSLGeometryBuilder* g) const {
+    // Prepend in_vertices at the start of the shader.
+    g->codePrependf("highp mat3x2 in_vertices = mat3x2(sk_in[0].gl_Position.xy, "
+                                                      "sk_in[1].gl_Position.xy, "
+                                                      "sk_in[2].gl_Position.xy);");
+}
+
+void GrCCPRTriangleProcessor::emitWind(GrGLSLGeometryBuilder* g, const char* /*rtAdjust*/,
+                                       const char* outputWind) const {
+    // We will define in_vertices in defineInputVertices.
+    g->codeAppendf("%s = sign(determinant(mat2(in_vertices[1] - in_vertices[0], "
+                                              "in_vertices[2] - in_vertices[0])));", outputWind);
+}
+
+void GrCCPRTriangleHullAndEdgeProcessor::onEmitGeometryShader(GrGLSLGeometryBuilder* g,
+                                                              const char* emitVertexFn,
+                                                              const char* wind,
+                                                              const char* rtAdjust) const {
+    this->defineInputVertices(g);
+    int maxOutputVertices = 0;
+
+    if (GeometryType::kEdges != fGeometryType) {
+        maxOutputVertices += this->emitHullGeometry(g, emitVertexFn, "in_vertices", 3,
+                                                    "sk_InvocationID");
+    }
+
+    if (GeometryType::kHulls != fGeometryType) {
+        g->codeAppend ("int edgeidx0 = sk_InvocationID, "
+                           "edgeidx1 = (edgeidx0 + 1) % 3;");
+        g->codeAppendf("highp vec2 edgept0 = in_vertices[%s > 0 ? edgeidx0 : edgeidx1];", wind);
+        g->codeAppendf("highp vec2 edgept1 = in_vertices[%s > 0 ? edgeidx1 : edgeidx0];", wind);
+
+        maxOutputVertices += this->emitEdgeGeometry(g, emitVertexFn, "edgept0", "edgept1");
+    }
+
+    g->configure(GrGLSLGeometryBuilder::InputType::kTriangles,
+                 GrGLSLGeometryBuilder::OutputType::kTriangleStrip,
+                 maxOutputVertices, 3);
+}
+
+void GrCCPRTriangleCornerProcessor::onEmitVertexShader(const GrCCPRCoverageProcessor& proc,
+                                                       GrGLSLVertexBuilder* v,
+                                                       const TexelBufferHandle& pointsBuffer,
+                                                       const char* atlasOffset,
+                                                       const char* rtAdjust,
+                                                       GrGPArgs* gpArgs) const {
+    this->INHERITED::onEmitVertexShader(proc, v, pointsBuffer, atlasOffset, rtAdjust, gpArgs);
+
+    // Fetch and transform the next point in the triangle.
+    v->codeAppend ("highp vec2 next = ");
+    v->appendTexelFetch(pointsBuffer,
+                        SkStringPrintf("%s[(sk_VertexID+1) %% 3]", proc.instanceAttrib()).c_str());
+    v->codeAppendf(".xy + %s;", atlasOffset);
+
+    // Find the plane that gives distance from the [self -> next] edge, normalized to its AA
+    // bloat width.
+    v->codeAppend ("highp vec2 n = vec2(next.y - self.y, self.x - next.x);");
+    v->codeAppendf("highp vec2 d = n * mat2(self + %f * sign(n), "
+                                           "self - %f * sign(n));", kAABloatRadius, kAABloatRadius);
+
+    // Clamp for when n=0. (wind=0 when n=0, so as long as we don't get Inf or NaN we are fine.)
+    v->codeAppendf("%s.xy = n / max(d[0] - d[1], 1e-30);", fEdgeDistance.vsOut());
+    v->codeAppendf("%s.z = -dot(%s.xy, self);", fEdgeDistance.vsOut(), fEdgeDistance.vsOut());
+
+    // Emit device coords to geo shader.
+    v->codeAppendf("%s = self;", fDevCoord.vsOut());
+}
+
+void GrCCPRTriangleCornerProcessor::onEmitGeometryShader(GrGLSLGeometryBuilder* g,
+                                                         const char* emitVertexFn, const char* wind,
+                                                         const char* rtAdjust) const {
+    this->defineInputVertices(g);
+
+    g->codeAppend ("highp vec2 self = in_vertices[sk_InvocationID];");
+    g->codeAppendf("%s(self + vec2(-bloat.x, -bloat.y), 1);", emitVertexFn);
+    g->codeAppendf("%s(self + vec2(-bloat.x, +bloat.y), 1);", emitVertexFn);
+    g->codeAppendf("%s(self + vec2(+bloat.x, -bloat.y), 1);", emitVertexFn);
+    g->codeAppendf("%s(self + vec2(+bloat.x, +bloat.y), 1);", emitVertexFn);
+    g->codeAppend ("EndPrimitive();");
+
+    g->configure(GrGLSLGeometryBuilder::InputType::kTriangles,
+                 GrGLSLGeometryBuilder::OutputType::kTriangleStrip,
+                 4, 3);
+}
+
+void GrCCPRTriangleCornerProcessor::emitPerVertexGeometryCode(SkString* fnBody,
+                                                              const char* position,
+                                                              const char* /*coverage*/,
+                                                              const char* wind) const {
+    fnBody->appendf("%s.xy = %s[(sk_InvocationID + 1) %% 3];",
+                    fNeighbors.gsOut(), fDevCoord.gsIn());
+    fnBody->appendf("%s.zw = %s[(sk_InvocationID + 2) %% 3];",
+                    fNeighbors.gsOut(), fDevCoord.gsIn());
+    fnBody->appendf("%s = mat3(%s[(sk_InvocationID + 2) %% 3], "
+                              "%s[sk_InvocationID], "
+                              "%s[(sk_InvocationID + 1) %% 3]) * %s;",
+                    fEdgeDistances.gsOut(), fEdgeDistance.gsIn(), fEdgeDistance.gsIn(),
+                    fEdgeDistance.gsIn(), wind);
+
+    // Otherwise, fEdgeDistances = mat3(...) * sign(wind * rtAdjust.x * rdAdjust.z).
+    GR_STATIC_ASSERT(kTopLeft_GrSurfaceOrigin == GrCCPRCoverageProcessor::kAtlasOrigin);
+
+    fnBody->appendf("%s = sk_InvocationID;", fCornerIdx.gsOut());
+}
+
+void GrCCPRTriangleCornerProcessor::emitShaderCoverage(GrGLSLFragmentBuilder* f,
+                                                       const char* outputCoverage) const {
+    // FIXME: Adreno breaks if we don't put the frag coord in an intermediate highp variable.
+    f->codeAppendf("highp vec2 fragcoord = sk_FragCoord.xy;");
+
+    // Approximate coverage by tracking where 4 horizontal lines enter and leave the triangle.
+    GrShaderVar samples("samples", kVec4f_GrSLType, GrShaderVar::kNonArray,
+                        kHigh_GrSLPrecision);
+    f->declareGlobal(samples);
+    f->codeAppendf("%s = fragcoord.y + vec4(-0.375, -0.125, 0.125, 0.375);", samples.c_str());
+
+    GrShaderVar leftedge("leftedge", kVec4f_GrSLType, GrShaderVar::kNonArray,
+                         kHigh_GrSLPrecision);
+    f->declareGlobal(leftedge);
+    f->codeAppendf("%s = vec4(fragcoord.x - 0.5);", leftedge.c_str());
+
+    GrShaderVar rightedge("rightedge", kVec4f_GrSLType, GrShaderVar::kNonArray,
+                          kHigh_GrSLPrecision);
+    f->declareGlobal(rightedge);
+    f->codeAppendf("%s = vec4(fragcoord.x + 0.5);", rightedge.c_str());
+
+    SkString sampleEdgeFn;
+    GrShaderVar edgeArg("edge_distance", kVec3f_GrSLType, GrShaderVar::kNonArray,
+                        kHigh_GrSLPrecision);
+    f->emitFunction(kVoid_GrSLType, "sampleEdge", 1, &edgeArg, [&]() {
+        SkString b;
+        b.appendf("highp float m = abs(%s.x) < 1e-3 ? 1e18 : -1 / %s.x;",
+                  edgeArg.c_str(), edgeArg.c_str());
+        b.appendf("highp vec4 edge = m * (%s.y * samples + %s.z);",
+                  edgeArg.c_str(), edgeArg.c_str());
+        b.appendf("if (%s.x <= 1e-3 || (abs(%s.x) < 1e-3 && %s.y > 0)) {",
+                  edgeArg.c_str(), edgeArg.c_str(), edgeArg.c_str());
+        b.appendf(    "%s = max(%s, edge);", leftedge.c_str(), leftedge.c_str());
+        b.append ("} else {");
+        b.appendf(    "%s = min(%s, edge);", rightedge.c_str(), rightedge.c_str());
+        b.append ("}");
+        return b;
+    }().c_str(), &sampleEdgeFn);
+
+    // See if the previous neighbor already handled this pixel.
+    f->codeAppendf("if (all(lessThan(abs(fragcoord - %s.zw), vec2(%f)))) {",
+                   fNeighbors.fsIn(), kAABloatRadius);
+    // Handle the case where all 3 corners defer to the previous neighbor.
+    f->codeAppendf(    "if (%s != 0 || !all(lessThan(abs(fragcoord - %s.xy), vec2(%f)))) {",
+                       fCornerIdx.fsIn(), fNeighbors.fsIn(), kAABloatRadius);
+    f->codeAppend (        "discard;");
+    f->codeAppend (    "}");
+    f->codeAppend ("}");
+
+    // Erase what the hull and two edges wrote at this corner in previous shaders (the two .5's
+    // for the edges and the -1 for the hull cancel each other out).
+    f->codeAppendf("%s = dot(vec3(fragcoord, 1) * mat2x3(%s), vec2(1));",
+                   outputCoverage, fEdgeDistances.fsIn());
+
+    // Sample the two edges at this corner.
+    f->codeAppendf("%s(%s[0]);", sampleEdgeFn.c_str(), fEdgeDistances.fsIn());
+    f->codeAppendf("%s(%s[1]);", sampleEdgeFn.c_str(), fEdgeDistances.fsIn());
+
+    // Handle the opposite edge if the next neighbor will defer to us.
+    f->codeAppendf("if (all(lessThan(abs(fragcoord - %s.xy), vec2(%f)))) {",
+                   fNeighbors.fsIn(), kAABloatRadius);
+    // Erase the coverage the opposite edge wrote to this corner.
+    f->codeAppendf(    "%s += dot(%s[2], vec3(fragcoord, 1)) + 0.5;",
+                       outputCoverage, fEdgeDistances.fsIn());
+    // Sample the opposite edge.
+    f->codeAppendf(    "%s(%s[2]);", sampleEdgeFn.c_str(), fEdgeDistances.fsIn());
+    f->codeAppend ("}");
+
+    f->codeAppendf("highp vec4 widths = max(%s - %s, 0);", rightedge.c_str(), leftedge.c_str());
+    f->codeAppendf("%s += dot(widths, vec4(0.25));", outputCoverage);
+}
diff --git a/src/gpu/ccpr/GrCCPRTriangleProcessor.h b/src/gpu/ccpr/GrCCPRTriangleProcessor.h
new file mode 100644
index 0000000..1e52d51
--- /dev/null
+++ b/src/gpu/ccpr/GrCCPRTriangleProcessor.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrCCPRTriangleProcessor_DEFINED
+#define GrCCPRTriangleProcessor_DEFINED
+
+#include "ccpr/GrCCPRCoverageProcessor.h"
+
+/**
+ * This class renders the coverage of triangles.
+ *
+ * Triangles are rendered in three passes:
+ *
+ * Pass 1: Draw the triangle's conservative raster hull with a coverage of 1. (Conservative raster
+ *         is drawn by considering 3 pixel size boxes, one centered at each vertex, and drawing the
+ *         convex hull of those boxes.)
+ *
+ * Pass 2: Smooth the edges that were over-rendered during Pass 1. Draw the conservative raster of
+ *         each edge (i.e. convex hull of two pixel-size boxes at the endpoints), interpolating from
+ *         coverage=-1 on the outside edge to coverage=0 on the inside edge.
+ *
+ * Pass 3: Touch up the corner pixels to have the correct coverage.
+ */
+class GrCCPRTriangleProcessor : public GrCCPRCoverageProcessor::PrimitiveProcessor {
+public:
+    GrCCPRTriangleProcessor(CoverageType initialCoverage) : INHERITED(initialCoverage) {}
+
+    void onEmitVertexShader(const GrCCPRCoverageProcessor&, GrGLSLVertexBuilder*,
+                            const TexelBufferHandle& pointsBuffer, const char* atlasOffset,
+                            const char* rtAdjust, GrGPArgs*) const override;
+    void emitWind(GrGLSLGeometryBuilder*, const char* rtAdjust, const char* outputWind) const final;
+
+protected:
+    void defineInputVertices(GrGLSLGeometryBuilder*) const;
+
+private:
+    typedef GrCCPRCoverageProcessor::PrimitiveProcessor INHERITED;
+};
+
+class GrCCPRTriangleHullAndEdgeProcessor : public GrCCPRTriangleProcessor {
+public:
+    enum class GeometryType {
+        kHulls,
+        kEdges,
+        kHullsAndEdges
+    };
+
+    GrCCPRTriangleHullAndEdgeProcessor(GeometryType geometryType)
+            : INHERITED(GeometryType::kHulls == geometryType ?
+                        CoverageType::kOne : CoverageType::kInterpolated)
+            , fGeometryType(geometryType) {}
+
+    void onEmitGeometryShader(GrGLSLGeometryBuilder*, const char* emitVertexFn, const char* wind,
+                              const char* rtAdjust) const override;
+
+private:
+    const GeometryType fGeometryType;
+
+    typedef GrCCPRTriangleProcessor INHERITED;
+};
+
+/**
+ * This pass fixes the corner pixels of a triangle. It erases the (incorrect) coverage that was
+ * written at the corners during the previous hull and edge passes, and then approximates the true
+ * coverage by sampling the triangle with horizontal lines.
+ */
+class GrCCPRTriangleCornerProcessor : public GrCCPRTriangleProcessor {
+public:
+    GrCCPRTriangleCornerProcessor()
+            : INHERITED(CoverageType::kShader)
+            , fEdgeDistance(kVec3f_GrSLType)
+            , fDevCoord(kVec2f_GrSLType)
+            , fNeighbors(kVec4f_GrSLType)
+            , fEdgeDistances(kMat33f_GrSLType)
+            , fCornerIdx(kInt_GrSLType) {}
+
+    void resetVaryings(GrGLSLVaryingHandler* varyingHandler) override {
+        this->INHERITED::resetVaryings(varyingHandler);
+        varyingHandler->addFlatVarying("edge_distance", &fEdgeDistance, kHigh_GrSLPrecision);
+        varyingHandler->addFlatVarying("devcoord", &fDevCoord, kHigh_GrSLPrecision);
+        varyingHandler->addFlatVarying("neighbors", &fNeighbors, kHigh_GrSLPrecision);
+        varyingHandler->addFlatVarying("edge_distances", &fEdgeDistances, kHigh_GrSLPrecision);
+        varyingHandler->addFlatVarying("corner_idx", &fCornerIdx, kLow_GrSLPrecision);
+    }
+
+    void onEmitVertexShader(const GrCCPRCoverageProcessor&, GrGLSLVertexBuilder*,
+                            const TexelBufferHandle& pointsBuffer, const char* atlasOffset,
+                            const char* rtAdjust, GrGPArgs*) const override;
+    void onEmitGeometryShader(GrGLSLGeometryBuilder*, const char* emitVertexFn, const char* wind,
+                              const char* rtAdjust) const override;
+    void emitPerVertexGeometryCode(SkString* fnBody, const char* position, const char* coverage,
+                                   const char* wind) const override;
+    void emitShaderCoverage(GrGLSLFragmentBuilder*, const char* outputCoverage) const override;
+
+private:
+    GrGLSLVertToGeo fEdgeDistance;
+    GrGLSLVertToGeo fDevCoord;
+    GrGLSLGeoToFrag fNeighbors;
+    GrGLSLGeoToFrag fEdgeDistances;
+    GrGLSLGeoToFrag fCornerIdx;
+
+    typedef GrCCPRTriangleProcessor INHERITED;
+};
+
+#endif
diff --git a/src/gpu/ccpr/GrCoverageCountingPathRenderer.cpp b/src/gpu/ccpr/GrCoverageCountingPathRenderer.cpp
new file mode 100644
index 0000000..45fad1f
--- /dev/null
+++ b/src/gpu/ccpr/GrCoverageCountingPathRenderer.cpp
@@ -0,0 +1,338 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "GrCoverageCountingPathRenderer.h"
+
+#include "GrCaps.h"
+#include "GrClip.h"
+#include "GrGpu.h"
+#include "GrGpuCommandBuffer.h"
+#include "SkMakeUnique.h"
+#include "SkMatrix.h"
+#include "GrOpFlushState.h"
+#include "GrRenderTargetOpList.h"
+#include "GrStyle.h"
+#include "ccpr/GrCCPRPathProcessor.h"
+
+using DrawPathsOp = GrCoverageCountingPathRenderer::DrawPathsOp;
+using ScissorMode = GrCCPRCoverageOpsBuilder::ScissorMode;
+
+bool GrCoverageCountingPathRenderer::IsSupported(const GrCaps& caps) {
+    const GrShaderCaps& shaderCaps = *caps.shaderCaps();
+    return shaderCaps.geometryShaderSupport() &&
+           shaderCaps.texelBufferSupport() &&
+           shaderCaps.integerSupport() &&
+           shaderCaps.flatInterpolationSupport() &&
+           shaderCaps.maxVertexSamplers() >= 1 &&
+           caps.instanceAttribSupport() &&
+           caps.isConfigTexturable(kAlpha_half_GrPixelConfig) &&
+           caps.isConfigRenderable(kAlpha_half_GrPixelConfig, /*withMSAA=*/false);
+}
+
+sk_sp<GrCoverageCountingPathRenderer>
+GrCoverageCountingPathRenderer::CreateIfSupported(const GrCaps& caps) {
+    return sk_sp<GrCoverageCountingPathRenderer>(IsSupported(caps) ?
+                                                 new GrCoverageCountingPathRenderer : nullptr);
+}
+
+bool GrCoverageCountingPathRenderer::onCanDrawPath(const CanDrawPathArgs& args) const {
+    if (!args.fShape->style().isSimpleFill() ||
+        args.fShape->inverseFilled() ||
+        args.fViewMatrix->hasPerspective() ||
+        GrAAType::kCoverage != args.fAAType) {
+        return false;
+    }
+
+    SkPath path;
+    args.fShape->asPath(&path);
+    return !SkPathPriv::ConicWeightCnt(path);
+}
+
+bool GrCoverageCountingPathRenderer::onDrawPath(const DrawPathArgs& args) {
+    SkASSERT(!fFlushing);
+    SkASSERT(!args.fShape->isEmpty());
+
+    auto op = skstd::make_unique<DrawPathsOp>(this, args, args.fPaint.getColor());
+    args.fRenderTargetContext->addDrawOp(*args.fClip, std::move(op));
+
+    return true;
+}
+
+GrCoverageCountingPathRenderer::DrawPathsOp::DrawPathsOp(GrCoverageCountingPathRenderer* ccpr,
+                                                         const DrawPathArgs& args, GrColor color)
+        : INHERITED(ClassID())
+        , fCCPR(ccpr)
+        , fSRGBFlags(GrPipeline::SRGBFlagsFromPaint(args.fPaint))
+        , fProcessors(std::move(args.fPaint))
+        , fTailDraw(&fHeadDraw)
+        , fOwningRTPendingOps(nullptr) {
+    SkDEBUGCODE(fBaseInstance = -1);
+    SkDEBUGCODE(fDebugInstanceCount = 1;)
+
+    GrRenderTargetContext* const rtc = args.fRenderTargetContext;
+
+    SkRect devBounds;
+    args.fViewMatrix->mapRect(&devBounds, args.fShape->bounds());
+
+    args.fClip->getConservativeBounds(rtc->width(), rtc->height(), &fHeadDraw.fClipBounds, nullptr);
+    fHeadDraw.fScissorMode = fHeadDraw.fClipBounds.contains(devBounds) ?
+                             ScissorMode::kNonScissored : ScissorMode::kScissored;
+    fHeadDraw.fMatrix = *args.fViewMatrix;
+    args.fShape->asPath(&fHeadDraw.fPath);
+    fHeadDraw.fColor = color; // Can't call args.fPaint.getColor() because it has been std::move'd.
+
+    // FIXME: intersect with clip bounds to (hopefully) improve batching.
+    // (This is nontrivial due to assumptions in generating the octagon cover geometry.)
+    this->setBounds(devBounds, GrOp::HasAABloat::kYes, GrOp::IsZeroArea::kNo);
+}
+
+GrDrawOp::RequiresDstTexture DrawPathsOp::finalize(const GrCaps& caps, const GrAppliedClip* clip) {
+    SingleDraw& onlyDraw = this->getOnlyPathDraw();
+    GrProcessorSet::Analysis analysis = fProcessors.finalize(onlyDraw.fColor,
+                                                        GrProcessorAnalysisCoverage::kSingleChannel,
+                                                        clip, false, caps, &onlyDraw.fColor);
+    return analysis.requiresDstTexture() ? RequiresDstTexture::kYes : RequiresDstTexture::kNo;
+}
+
+bool DrawPathsOp::onCombineIfPossible(GrOp* op, const GrCaps& caps) {
+    DrawPathsOp* that = op->cast<DrawPathsOp>();
+    SkASSERT(fCCPR == that->fCCPR);
+    SkASSERT(fOwningRTPendingOps);
+    SkASSERT(fDebugInstanceCount);
+    SkASSERT(that->fDebugInstanceCount);
+
+    if (this->getFillType() != that->getFillType() ||
+        fSRGBFlags != that->fSRGBFlags ||
+        fProcessors != that->fProcessors) {
+        return false;
+    }
+
+    if (RTPendingOps* owningRTPendingOps = that->fOwningRTPendingOps) {
+        SkASSERT(owningRTPendingOps == fOwningRTPendingOps);
+        owningRTPendingOps->fOpList.remove(that);
+    } else {
+        // wasRecorded is not called when the op gets combined first. Count path items here instead.
+        SingleDraw& onlyDraw = that->getOnlyPathDraw();
+        fOwningRTPendingOps->fMaxBufferItems.countPathItems(onlyDraw.fScissorMode, onlyDraw.fPath);
+    }
+
+    fTailDraw->fNext = &fOwningRTPendingOps->fDrawsAllocator.push_back(that->fHeadDraw);
+    fTailDraw = that->fTailDraw == &that->fHeadDraw ? fTailDraw->fNext : that->fTailDraw;
+
+    this->joinBounds(*that);
+
+    SkDEBUGCODE(fDebugInstanceCount += that->fDebugInstanceCount;)
+    SkDEBUGCODE(that->fDebugInstanceCount = 0);
+    return true;
+}
+
+void DrawPathsOp::wasRecorded(GrRenderTargetOpList* opList) {
+    SkASSERT(!fOwningRTPendingOps);
+    SingleDraw& onlyDraw = this->getOnlyPathDraw();
+    fOwningRTPendingOps = &fCCPR->fRTPendingOpsMap[opList->uniqueID()];
+    fOwningRTPendingOps->fOpList.addToTail(this);
+    fOwningRTPendingOps->fMaxBufferItems.countPathItems(onlyDraw.fScissorMode, onlyDraw.fPath);
+}
+
+void GrCoverageCountingPathRenderer::preFlush(GrOnFlushResourceProvider* onFlushRP,
+                                              const uint32_t* opListIDs, int numOpListIDs,
+                                              SkTArray<sk_sp<GrRenderTargetContext>>* results) {
+    using PathInstance = GrCCPRPathProcessor::Instance;
+
+    SkASSERT(!fPerFlushIndexBuffer);
+    SkASSERT(!fPerFlushVertexBuffer);
+    SkASSERT(!fPerFlushInstanceBuffer);
+    SkASSERT(fPerFlushAtlases.empty());
+    SkASSERT(!fFlushing);
+    SkDEBUGCODE(fFlushing = true;)
+
+    if (fRTPendingOpsMap.empty()) {
+        return; // Nothing to draw.
+    }
+
+    SkTInternalLList<DrawPathsOp> flushingOps;
+    GrCCPRCoverageOpsBuilder::MaxBufferItems maxBufferItems;
+
+    for (int i = 0; i < numOpListIDs; ++i) {
+        auto it = fRTPendingOpsMap.find(opListIDs[i]);
+        if (fRTPendingOpsMap.end() != it) {
+            RTPendingOps& rtPendingOps = it->second;
+            SkASSERT(!rtPendingOps.fOpList.isEmpty());
+            flushingOps.concat(std::move(rtPendingOps.fOpList));
+            maxBufferItems += rtPendingOps.fMaxBufferItems;
+        }
+    }
+
+    SkASSERT(flushingOps.isEmpty() == !maxBufferItems.fMaxPaths);
+    if (flushingOps.isEmpty()) {
+        return; // Still nothing to draw.
+    }
+
+    fPerFlushIndexBuffer = GrCCPRPathProcessor::FindOrMakeIndexBuffer(onFlushRP);
+    if (!fPerFlushIndexBuffer) {
+        SkDebugf("WARNING: failed to allocate ccpr path index buffer.\n");
+        return;
+    }
+
+    fPerFlushVertexBuffer = GrCCPRPathProcessor::FindOrMakeVertexBuffer(onFlushRP);
+    if (!fPerFlushVertexBuffer) {
+        SkDebugf("WARNING: failed to allocate ccpr path vertex buffer.\n");
+        return;
+    }
+
+    GrCCPRCoverageOpsBuilder atlasOpsBuilder;
+    if (!atlasOpsBuilder.init(onFlushRP, maxBufferItems)) {
+        SkDebugf("WARNING: failed to allocate buffers for coverage ops. No paths will be drawn.\n");
+        return;
+    }
+
+    fPerFlushInstanceBuffer = onFlushRP->makeBuffer(kVertex_GrBufferType,
+                                                   maxBufferItems.fMaxPaths * sizeof(PathInstance));
+    if (!fPerFlushInstanceBuffer) {
+        SkDebugf("WARNING: failed to allocate path instance buffer. No paths will be drawn.\n");
+        return;
+    }
+
+    PathInstance* pathInstanceData = static_cast<PathInstance*>(fPerFlushInstanceBuffer->map());
+    SkASSERT(pathInstanceData);
+    int pathInstanceIdx = 0;
+
+    GrCCPRAtlas* atlas = nullptr;
+    SkDEBUGCODE(int skippedPaths = 0;)
+
+    SkTInternalLList<DrawPathsOp>::Iter iter;
+    iter.init(flushingOps, SkTInternalLList<DrawPathsOp>::Iter::kHead_IterStart);
+    while (DrawPathsOp* op = iter.get()) {
+        SkASSERT(op->fDebugInstanceCount > 0);
+        SkASSERT(-1 == op->fBaseInstance);
+        op->fBaseInstance = pathInstanceIdx;
+
+        for (const DrawPathsOp::SingleDraw* draw = &op->fHeadDraw; draw; draw = draw->fNext) {
+            // parsePath gives us two tight bounding boxes: one in device space, as well as a second
+            // one rotated an additional 45 degrees. The path vertex shader uses these two bounding
+            // boxes to generate an octagon that circumscribes the path.
+            SkRect devBounds, devBounds45;
+            atlasOpsBuilder.parsePath(draw->fScissorMode, draw->fMatrix, draw->fPath, &devBounds,
+                                      &devBounds45);
+
+            SkRect clippedDevBounds = devBounds;
+            if (ScissorMode::kScissored == draw->fScissorMode &&
+                !clippedDevBounds.intersect(devBounds, SkRect::Make(draw->fClipBounds))) {
+                SkDEBUGCODE(--op->fDebugInstanceCount);
+                SkDEBUGCODE(++skippedPaths;)
+                continue;
+            }
+
+            SkIRect clippedDevIBounds;
+            clippedDevBounds.roundOut(&clippedDevIBounds);
+            const int h = clippedDevIBounds.height(), w = clippedDevIBounds.width();
+
+            SkIPoint16 atlasLocation;
+            if (atlas && !atlas->addRect(w, h, &atlasLocation)) {
+                // The atlas is out of room and can't grow any bigger.
+                auto atlasOp = atlasOpsBuilder.createIntermediateOp(atlas->drawBounds());
+                if (auto rtc = atlas->finalize(onFlushRP, std::move(atlasOp))) {
+                    results->push_back(std::move(rtc));
+                }
+                if (pathInstanceIdx > op->fBaseInstance) {
+                    op->addAtlasBatch(atlas, pathInstanceIdx);
+                }
+                atlas = nullptr;
+            }
+
+            if (!atlas) {
+                atlas = &fPerFlushAtlases.emplace_back(*onFlushRP->caps(), w, h);
+                SkAssertResult(atlas->addRect(w, h, &atlasLocation));
+            }
+
+            const SkMatrix& m = draw->fMatrix;
+            const int16_t offsetX = atlasLocation.x() - static_cast<int16_t>(clippedDevIBounds.x()),
+                          offsetY = atlasLocation.y() - static_cast<int16_t>(clippedDevIBounds.y());
+
+            pathInstanceData[pathInstanceIdx++] = {
+                devBounds,
+                devBounds45,
+                {{m.getScaleX(), m.getSkewY(), m.getSkewX(), m.getScaleY()}},
+                {{m.getTranslateX(), m.getTranslateY()}},
+                {{offsetX, offsetY}},
+                draw->fColor
+            };
+
+            atlasOpsBuilder.saveParsedPath(clippedDevIBounds, offsetX, offsetY);
+        }
+
+        SkASSERT(pathInstanceIdx == op->fBaseInstance + op->fDebugInstanceCount);
+        op->addAtlasBatch(atlas, pathInstanceIdx);
+
+        iter.next();
+    }
+
+    SkASSERT(pathInstanceIdx == maxBufferItems.fMaxPaths - skippedPaths);
+    fPerFlushInstanceBuffer->unmap();
+
+    std::unique_ptr<GrDrawOp> atlasOp = atlasOpsBuilder.finalize(atlas->drawBounds());
+    if (auto rtc = atlas->finalize(onFlushRP, std::move(atlasOp))) {
+        results->push_back(std::move(rtc));
+    }
+
+    // Erase these last, once we are done accessing data from the SingleDraw allocators.
+    for (int i = 0; i < numOpListIDs; ++i) {
+        fRTPendingOpsMap.erase(opListIDs[i]);
+    }
+}
+
+void DrawPathsOp::onExecute(GrOpFlushState* flushState) {
+    SkASSERT(fCCPR->fFlushing);
+
+    if (!fCCPR->fPerFlushInstanceBuffer) {
+        return; // Setup failed.
+    }
+
+    GrPipeline pipeline;
+    GrPipeline::InitArgs args;
+    args.fAppliedClip = flushState->drawOpArgs().fAppliedClip;
+    args.fCaps = &flushState->caps();
+    args.fProcessors = &fProcessors;
+    args.fFlags = fSRGBFlags;
+    args.fRenderTarget = flushState->drawOpArgs().fRenderTarget;
+    args.fDstProxy = flushState->drawOpArgs().fDstProxy;
+    pipeline.init(args);
+
+    int baseInstance = fBaseInstance;
+
+    for (int i = 0; i < fAtlasBatches.count(); baseInstance = fAtlasBatches[i++].fEndInstanceIdx) {
+        const AtlasBatch& batch = fAtlasBatches[i];
+        SkASSERT(batch.fEndInstanceIdx > baseInstance);
+
+        if (!batch.fAtlas->textureProxy()) {
+            continue; // Atlas failed to allocate.
+        }
+
+        GrCCPRPathProcessor coverProc(flushState->resourceProvider(), batch.fAtlas->textureProxy(),
+                                     this->getFillType(), *flushState->gpu()->caps()->shaderCaps());
+
+        GrMesh mesh(GrPrimitiveType::kTriangles);
+        mesh.setIndexedInstanced(fCCPR->fPerFlushIndexBuffer.get(),
+                                 GrCCPRPathProcessor::kPerInstanceIndexCount,
+                                 fCCPR->fPerFlushInstanceBuffer.get(),
+                                 batch.fEndInstanceIdx - baseInstance, baseInstance);
+        mesh.setVertexData(fCCPR->fPerFlushVertexBuffer.get());
+
+        flushState->commandBuffer()->draw(pipeline, coverProc, &mesh, nullptr, 1, this->bounds());
+    }
+
+    SkASSERT(baseInstance == fBaseInstance + fDebugInstanceCount);
+}
+
+void GrCoverageCountingPathRenderer::postFlush() {
+    SkASSERT(fFlushing);
+    fPerFlushAtlases.reset();
+    fPerFlushInstanceBuffer.reset();
+    fPerFlushVertexBuffer.reset();
+    fPerFlushIndexBuffer.reset();
+    SkDEBUGCODE(fFlushing = false;)
+}
diff --git a/src/gpu/ccpr/GrCoverageCountingPathRenderer.h b/src/gpu/ccpr/GrCoverageCountingPathRenderer.h
new file mode 100644
index 0000000..f55d0e1
--- /dev/null
+++ b/src/gpu/ccpr/GrCoverageCountingPathRenderer.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrCoverageCountingPathRenderer_DEFINED
+#define GrCoverageCountingPathRenderer_DEFINED
+
+#include "GrAllocator.h"
+#include "GrOnFlushResourceProvider.h"
+#include "GrPathRenderer.h"
+#include "SkTInternalLList.h"
+#include "ccpr/GrCCPRAtlas.h"
+#include "ccpr/GrCCPRCoverageOpsBuilder.h"
+#include "ops/GrDrawOp.h"
+#include <map>
+
+/**
+ * This is a path renderer that draws antialiased paths by counting coverage in an offscreen
+ * buffer. (See GrCCPRCoverageProcessor, GrCCPRPathProcessor)
+ *
+ * It also serves as the per-render-target tracker for pending path draws, and at the start of
+ * flush, it compiles GPU buffers and renders a "coverage count atlas" for the upcoming paths.
+ */
+class GrCoverageCountingPathRenderer
+    : public GrPathRenderer
+    , public GrOnFlushCallbackObject {
+
+    struct RTPendingOps;
+
+public:
+    static bool IsSupported(const GrCaps&);
+    static sk_sp<GrCoverageCountingPathRenderer> CreateIfSupported(const GrCaps&);
+
+    // GrPathRenderer overrides.
+    StencilSupport onGetStencilSupport(const GrShape&) const override {
+        return GrPathRenderer::kNoSupport_StencilSupport;
+    }
+    bool onCanDrawPath(const CanDrawPathArgs& args) const override;
+    bool onDrawPath(const DrawPathArgs&) final;
+
+    // GrOnFlushCallbackObject overrides.
+    void preFlush(GrOnFlushResourceProvider*, const uint32_t* opListIDs, int numOpListIDs,
+                  SkTArray<sk_sp<GrRenderTargetContext>>* results) override;
+    void postFlush() override;
+
+    // This is the Op that ultimately draws a path into its final destination, using the atlas we
+    // generate at flush time.
+    class DrawPathsOp : public GrDrawOp {
+    public:
+        DEFINE_OP_CLASS_ID
+        SK_DECLARE_INTERNAL_LLIST_INTERFACE(DrawPathsOp);
+
+        DrawPathsOp(GrCoverageCountingPathRenderer*, const DrawPathArgs&, GrColor);
+
+        // GrDrawOp overrides.
+        const char* name() const override { return "GrCoverageCountingPathRenderer::DrawPathsOp"; }
+        FixedFunctionFlags fixedFunctionFlags() const override { return FixedFunctionFlags::kNone; }
+        RequiresDstTexture finalize(const GrCaps&, const GrAppliedClip*) override;
+        void wasRecorded(GrRenderTargetOpList*) override;
+        bool onCombineIfPossible(GrOp* other, const GrCaps& caps) override;
+        void onPrepare(GrOpFlushState*) override {}
+        void onExecute(GrOpFlushState*) override;
+
+    private:
+        SkPath::FillType getFillType() const {
+            SkASSERT(fDebugInstanceCount >= 1);
+            return fHeadDraw.fPath.getFillType();
+        }
+
+        struct SingleDraw  {
+            using ScissorMode = GrCCPRCoverageOpsBuilder::ScissorMode;
+            SkIRect       fClipBounds;
+            ScissorMode   fScissorMode;
+            SkMatrix      fMatrix;
+            SkPath        fPath;
+            GrColor       fColor;
+            SingleDraw*   fNext = nullptr;
+        };
+
+        SingleDraw& getOnlyPathDraw() {
+            SkASSERT(&fHeadDraw == fTailDraw);
+            SkASSERT(1 == fDebugInstanceCount);
+            return fHeadDraw;
+        }
+
+        struct AtlasBatch {
+            const GrCCPRAtlas*   fAtlas;
+            int                  fEndInstanceIdx;
+        };
+
+        void addAtlasBatch(const GrCCPRAtlas* atlas, int endInstanceIdx) {
+            SkASSERT(endInstanceIdx > fBaseInstance);
+            SkASSERT(fAtlasBatches.empty() ||
+                     endInstanceIdx > fAtlasBatches.back().fEndInstanceIdx);
+            fAtlasBatches.push_back() = {atlas, endInstanceIdx};
+        }
+
+        GrCoverageCountingPathRenderer* const   fCCPR;
+        const uint32_t                          fSRGBFlags;
+        GrProcessorSet                          fProcessors;
+        SingleDraw                              fHeadDraw;
+        SingleDraw*                             fTailDraw;
+        RTPendingOps*                           fOwningRTPendingOps;
+        int                                     fBaseInstance;
+        SkDEBUGCODE(int                         fDebugInstanceCount;)
+        SkSTArray<1, AtlasBatch, true>          fAtlasBatches;
+
+        friend class GrCoverageCountingPathRenderer;
+
+        typedef GrDrawOp INHERITED;
+    };
+
+private:
+    GrCoverageCountingPathRenderer() = default;
+
+    struct RTPendingOps {
+        SkTInternalLList<DrawPathsOp>                 fOpList;
+        GrCCPRCoverageOpsBuilder::MaxBufferItems      fMaxBufferItems;
+        GrSTAllocator<256, DrawPathsOp::SingleDraw>   fDrawsAllocator;
+    };
+
+    // Map from render target ID to the individual render target's pending path ops.
+    std::map<uint32_t, RTPendingOps>   fRTPendingOpsMap;
+
+    sk_sp<GrBuffer>                    fPerFlushIndexBuffer;
+    sk_sp<GrBuffer>                    fPerFlushVertexBuffer;
+    sk_sp<GrBuffer>                    fPerFlushInstanceBuffer;
+    GrSTAllocator<4, GrCCPRAtlas>      fPerFlushAtlases;
+    SkDEBUGCODE(bool                   fFlushing = false;)
+};
+
+#endif
diff --git a/tools/flags/SkCommonFlagsPathRenderer.h b/tools/flags/SkCommonFlagsPathRenderer.h
index 12755dc..ac293a1 100644
--- a/tools/flags/SkCommonFlagsPathRenderer.h
+++ b/tools/flags/SkCommonFlagsPathRenderer.h
@@ -40,6 +40,8 @@
         return GpuPathRenderers::kAALinearizing;
     } else if (!strcmp(name, "small")) {
         return GpuPathRenderers::kSmall;
+    } else if (!strcmp(name, "ccpr")) {
+        return GpuPathRenderers::kCoverageCounting;
     } else if (!strcmp(name, "tess")) {
         return GpuPathRenderers::kTessellating;
     } else if (!strcmp(name, "grdefault")) {
diff --git a/tools/viewer/Viewer.cpp b/tools/viewer/Viewer.cpp
index 29e5b70..6753b6b 100644
--- a/tools/viewer/Viewer.cpp
+++ b/tools/viewer/Viewer.cpp
@@ -37,6 +37,8 @@
 
 #include "imgui.h"
 
+#include "ccpr/GrCoverageCountingPathRenderer.h"
+
 #include <stdlib.h>
 #include <map>
 
@@ -262,6 +264,7 @@
         gPathRendererNames[GpuPathRenderers::kStencilAndCover] = "NV_path_rendering";
         gPathRendererNames[GpuPathRenderers::kMSAA] = "Sample shading";
         gPathRendererNames[GpuPathRenderers::kSmall] = "Small paths (cached sdf or alpha masks)";
+        gPathRendererNames[GpuPathRenderers::kCoverageCounting] = "Coverage counting";
         gPathRendererNames[GpuPathRenderers::kTessellating] = "Tessellating";
         gPathRendererNames[GpuPathRenderers::kDefault] = "Original Ganesh path renderer";
         gPathRendererNames[GpuPathRenderers::kNone] = "Software masks";
@@ -1069,6 +1072,9 @@
                         prButton(GpuPathRenderers::kNone);
                     } else {
                         prButton(GpuPathRenderers::kAll);
+                        if (GrCoverageCountingPathRenderer::IsSupported(*ctx->caps())) {
+                            prButton(GpuPathRenderers::kCoverageCounting);
+                        }
                         prButton(GpuPathRenderers::kSmall);
                         prButton(GpuPathRenderers::kTessellating);
                         prButton(GpuPathRenderers::kNone);
@@ -1305,6 +1311,9 @@
         prState[kOptions].append(gPathRendererNames[GpuPathRenderers::kNone]);
     } else {
         prState[kOptions].append(gPathRendererNames[GpuPathRenderers::kAll]);
+        if (GrCoverageCountingPathRenderer::IsSupported(*ctx->caps())) {
+            prState[kOptions].append(gPathRendererNames[GpuPathRenderers::kCoverageCounting]);
+        }
         prState[kOptions].append(gPathRendererNames[GpuPathRenderers::kSmall]);
         prState[kOptions].append(gPathRendererNames[GpuPathRenderers::kTessellating]);
         prState[kOptions].append(gPathRendererNames[GpuPathRenderers::kNone]);