Move DstCopy on gpu into the GrXferProcessor.

BUG=skia:

Review URL: https://codereview.chromium.org/885923002
diff --git a/gyp/gpu.gypi b/gyp/gpu.gypi
index 1fbbec8..2036b89 100644
--- a/gyp/gpu.gypi
+++ b/gyp/gpu.gypi
@@ -175,6 +175,7 @@
       '<(skia_src_path)/gpu/GrTextureAccess.cpp',
       '<(skia_src_path)/gpu/GrTRecorder.h',
       '<(skia_src_path)/gpu/GrVertexBuffer.h',
+      '<(skia_src_path)/gpu/GrXferProcessor.cpp',
 
       '<(skia_src_path)/gpu/effects/Gr1DKernelEffect.h',
       '<(skia_src_path)/gpu/effects/GrConfigConversionEffect.cpp',
@@ -274,6 +275,8 @@
       '<(skia_src_path)/gpu/gl/GrGLVertexArray.h',
       '<(skia_src_path)/gpu/gl/GrGLVertexBuffer.cpp',
       '<(skia_src_path)/gpu/gl/GrGLVertexBuffer.h',
+      '<(skia_src_path)/gpu/gl/GrGLXferProcessor.cpp',
+      '<(skia_src_path)/gpu/gl/GrGLXferProcessor.h',
 
       # Files for building GLSL shaders
       '<(skia_src_path)/gpu/gl/builders/GrGLProgramBuilder.cpp',
diff --git a/include/gpu/GrXferProcessor.h b/include/gpu/GrXferProcessor.h
index 13e4d26..929eb6b 100644
--- a/include/gpu/GrXferProcessor.h
+++ b/include/gpu/GrXferProcessor.h
@@ -10,6 +10,7 @@
 
 #include "GrColor.h"
 #include "GrProcessor.h"
+#include "GrTexture.h"
 #include "GrTypes.h"
 #include "SkXfermode.h"
 
@@ -34,11 +35,10 @@
 class GrXferProcessor : public GrProcessor {
 public:
     /**
-     * Sets a unique key on the GrProcessorKeyBuilder that is directly associated with this xfer
-     * processor's GL backend implementation.
-     */
-    virtual void getGLProcessorKey(const GrGLCaps& caps,
-                                   GrProcessorKeyBuilder* b) const = 0;
+     * Sets a unique key on the GrProcessorKeyBuilder calls onGetGLProcessorKey(...) to get the
+     * specific subclass's key.
+     */ 
+    void getGLProcessorKey(const GrGLCaps& caps, GrProcessorKeyBuilder* b) const;
 
     /** Returns a new instance of the appropriate *GL* implementation class
         for the given GrXferProcessor; caller is responsible for deleting
@@ -103,9 +103,21 @@
 
     virtual void getBlendInfo(BlendInfo* blendInfo) const = 0;
 
-    /** Will this prceossor read the destination pixel value? */
     bool willReadDstColor() const { return fWillReadDstColor; }
 
+    /**
+     * Returns the texture to be used as the destination when reading the dst in the fragment
+     * shader. If the returned texture is NULL then the XP is either not reading the dst or we have
+     * extentions that support framebuffer fetching and thus don't need a copy of the dst texture.
+     */
+    const GrTexture* getDstCopyTexture() const { return fDstCopy.getTexture(); }
+
+    /**
+     * Returns the offset into the DstCopyTexture to use when reading it in the shader. This value
+     * is only valid if getDstCopyTexture() != NULL.
+     */
+    const SkIPoint& dstCopyTextureOffset() const { return fDstCopyTextureOffset; }
+
     /** 
      * Returns whether or not this xferProcossor will set a secondary output to be used with dual
      * source blending.
@@ -123,29 +135,43 @@
         if (this->classID() != that.classID()) {
             return false;
         }
+        if (this->fWillReadDstColor != that.fWillReadDstColor) {
+            return false;
+        }
+        if (this->fDstCopy.getTexture() != that.fDstCopy.getTexture()) {
+            return false;
+        }
+        if (this->fDstCopyTextureOffset != that.fDstCopyTextureOffset) {
+            return false;
+        }
         return this->onIsEqual(that);
     }
    
 protected:
-    GrXferProcessor() : fWillReadDstColor(false) {}
-
-    /**
-     * If the prceossor subclass will read the destination pixel value then it must call this
-     * function from its constructor. Otherwise, when its generated backend-specific prceossor class
-     * attempts to generate code that reads the destination pixel it will fail.
-     */
-    void setWillReadDstColor() { fWillReadDstColor = true; }
+    GrXferProcessor();
+    GrXferProcessor(const GrDeviceCoordTexture* dstCopy, bool willReadDstColor);
 
 private:
+    /**
+     * Sets a unique key on the GrProcessorKeyBuilder that is directly associated with this xfer
+     * processor's GL backend implementation.
+     */
+    virtual void onGetGLProcessorKey(const GrGLCaps& caps,
+                                     GrProcessorKeyBuilder* b) const = 0;
+
     virtual bool onIsEqual(const GrXferProcessor&) const = 0;
 
-    bool         fWillReadDstColor;
+    bool                    fWillReadDstColor;
+    SkIPoint                fDstCopyTextureOffset;
+    GrTextureAccess         fDstCopy;
 
     typedef GrFragmentProcessor INHERITED;
 };
 
 GR_MAKE_BITFIELD_OPS(GrXferProcessor::OptFlags);
 
+///////////////////////////////////////////////////////////////////////////////
+
 /**
  * We install a GrXPFactory (XPF) early on in the pipeline before all the final draw information is
  * known (e.g. whether there is fractional pixel coverage, will coverage be 1 or 4 channel, is the
@@ -159,8 +185,10 @@
  */
 class GrXPFactory : public SkRefCnt {
 public:
-    virtual GrXferProcessor* createXferProcessor(const GrProcOptInfo& colorPOI,
-                                                 const GrProcOptInfo& coveragePOI) const = 0;
+    GrXferProcessor* createXferProcessor(const GrProcOptInfo& colorPOI,
+                                         const GrProcOptInfo& coveragePOI,
+                                         const GrDeviceCoordTexture* dstCopy,
+                                         const GrDrawTargetCaps& caps) const;
 
     /**
      * This function returns true if the GrXferProcessor generated from this factory will be able to
@@ -202,10 +230,7 @@
      */
     virtual bool canTweakAlphaForCoverage() const = 0;
 
-    /**
-     *  Returns true if the XP generated by this factory will read dst.
-     */
-    virtual bool willReadDst() const = 0;
+    bool willNeedDstCopy(const GrDrawTargetCaps& caps) const; 
 
     bool isEqual(const GrXPFactory& that) const {
         if (this->classID() != that.classID()) {
@@ -232,6 +257,15 @@
     uint32_t fClassID;
 
 private:
+    virtual GrXferProcessor* onCreateXferProcessor(const GrProcOptInfo& colorPOI,
+                                                   const GrProcOptInfo& coveragePOI,
+                                                   const GrDeviceCoordTexture* dstCopy) const = 0;
+    /**
+     *  Returns true if the XP generated by this factory will explicitly read dst in the fragment
+     *  shader.
+     */
+    virtual bool willReadDstColor() const = 0;
+
     virtual bool onIsEqual(const GrXPFactory&) const = 0;
 
     static uint32_t GenClassID() {
diff --git a/include/gpu/effects/GrPorterDuffXferProcessor.h b/include/gpu/effects/GrPorterDuffXferProcessor.h
index af10fa2..e47ec1e 100644
--- a/include/gpu/effects/GrPorterDuffXferProcessor.h
+++ b/include/gpu/effects/GrPorterDuffXferProcessor.h
@@ -12,21 +12,22 @@
 #include "GrXferProcessor.h"
 #include "SkXfermode.h"
 
+class GrDrawTargetCaps;
 class GrProcOptInfo;
 
 class GrPorterDuffXferProcessor : public GrXferProcessor {
 public:
     static GrXferProcessor* Create(GrBlendCoeff srcBlend, GrBlendCoeff dstBlend,
-                                   GrColor constant = 0) {
-        return SkNEW_ARGS(GrPorterDuffXferProcessor, (srcBlend, dstBlend, constant));
+                                   GrColor constant, const GrDeviceCoordTexture* dstCopy,
+                                   bool willReadDstColor) {
+        return SkNEW_ARGS(GrPorterDuffXferProcessor, (srcBlend, dstBlend, constant, dstCopy,
+                                                      willReadDstColor));
     }
 
     ~GrPorterDuffXferProcessor() SK_OVERRIDE;
 
     const char* name() const SK_OVERRIDE { return "Porter Duff"; }
 
-    void getGLProcessorKey(const GrGLCaps& caps, GrProcessorKeyBuilder* b) const SK_OVERRIDE;
-
     GrGLXferProcessor* createGLInstance() const SK_OVERRIDE;
 
     bool hasSecondaryOutput() const SK_OVERRIDE;
@@ -75,7 +76,10 @@
     }
 
 private:
-    GrPorterDuffXferProcessor(GrBlendCoeff srcBlend, GrBlendCoeff dstBlend, GrColor constant);
+    GrPorterDuffXferProcessor(GrBlendCoeff srcBlend, GrBlendCoeff dstBlend, GrColor constant,
+                              const GrDeviceCoordTexture* dstCopy, bool willReadDstColor);
+
+    void onGetGLProcessorKey(const GrGLCaps& caps, GrProcessorKeyBuilder* b) const SK_OVERRIDE;
 
     bool onIsEqual(const GrXferProcessor& xpBase) const SK_OVERRIDE {
         const GrPorterDuffXferProcessor& xp = xpBase.cast<GrPorterDuffXferProcessor>();
@@ -119,9 +123,6 @@
         return SkNEW_ARGS(GrPorterDuffXPFactory, (src, dst));
     }
 
-    GrXferProcessor* createXferProcessor(const GrProcOptInfo& colorPOI,
-                                         const GrProcOptInfo& coveragePOI) const SK_OVERRIDE;
-
     bool supportsRGBCoverage(GrColor knownColor, uint32_t knownColorFlags) const SK_OVERRIDE;
 
     bool canApplyCoverage(const GrProcOptInfo& colorPOI,
@@ -132,11 +133,15 @@
     void getInvariantOutput(const GrProcOptInfo& colorPOI, const GrProcOptInfo& coveragePOI,
                             GrXPFactory::InvariantOutput*) const SK_OVERRIDE;
 
-    bool willReadDst() const SK_OVERRIDE { return false; }
-
 private:
     GrPorterDuffXPFactory(GrBlendCoeff src, GrBlendCoeff dst); 
 
+    GrXferProcessor* onCreateXferProcessor(const GrProcOptInfo& colorPOI,
+                                           const GrProcOptInfo& coveragePOI,
+                                           const GrDeviceCoordTexture* dstCopy) const SK_OVERRIDE;
+
+    bool willReadDstColor() const SK_OVERRIDE;
+
     bool onIsEqual(const GrXPFactory& xpfBase) const SK_OVERRIDE {
         const GrPorterDuffXPFactory& xpf = xpfBase.cast<GrPorterDuffXPFactory>();
         return (fSrcCoeff == xpf.fSrcCoeff && fDstCoeff == xpf.fDstCoeff);
diff --git a/src/effects/SkArithmeticMode_gpu.cpp b/src/effects/SkArithmeticMode_gpu.cpp
index 48b7bcd..8022d66 100644
--- a/src/effects/SkArithmeticMode_gpu.cpp
+++ b/src/effects/SkArithmeticMode_gpu.cpp
@@ -166,7 +166,15 @@
 
     ~GLArithmeticXP() SK_OVERRIDE {}
 
-    void emitCode(const EmitArgs& args) SK_OVERRIDE {
+    static void GenKey(const GrProcessor& processor, const GrGLCaps& caps,
+                       GrProcessorKeyBuilder* b) {
+        const GrArithmeticXP& arith = processor.cast<GrArithmeticXP>();
+        uint32_t key = arith.enforcePMColor() ? 1 : 0;
+        b->add32(key);
+    }
+
+private:
+    void onEmitCode(const EmitArgs& args) SK_OVERRIDE {
         GrGLFPFragmentBuilder* fsBuilder = args.fPB->getFragmentShaderBuilder();
 
         const char* dstColor = fsBuilder->dstColor();
@@ -184,21 +192,13 @@
                                args.fInputCoverage, dstColor);
     }
 
-    void setData(const GrGLProgramDataManager& pdman,
-                 const GrXferProcessor& processor) SK_OVERRIDE {
+    void onSetData(const GrGLProgramDataManager& pdman,
+                   const GrXferProcessor& processor) SK_OVERRIDE {
         const GrArithmeticXP& arith = processor.cast<GrArithmeticXP>();
         pdman.set4f(fKUni, arith.k1(), arith.k2(), arith.k3(), arith.k4());
         fEnforcePMColor = arith.enforcePMColor();
     };
 
-    static void GenKey(const GrProcessor& processor, const GrGLCaps& caps,
-                       GrProcessorKeyBuilder* b) {
-        const GrArithmeticXP& arith = processor.cast<GrArithmeticXP>();
-        uint32_t key = arith.enforcePMColor() ? 1 : 0;
-        b->add32(key);
-    }
-
-private:
     GrGLProgramDataManager::UniformHandle fKUni;
     bool fEnforcePMColor;
 
@@ -207,17 +207,18 @@
 
 ///////////////////////////////////////////////////////////////////////////////
 
-GrArithmeticXP::GrArithmeticXP(float k1, float k2, float k3, float k4, bool enforcePMColor)
-    : fK1(k1)
+GrArithmeticXP::GrArithmeticXP(float k1, float k2, float k3, float k4, bool enforcePMColor,
+                               const GrDeviceCoordTexture* dstCopy, bool willReadDstColor)
+    : INHERITED(dstCopy, willReadDstColor)
+    , fK1(k1)
     , fK2(k2)
     , fK3(k3)
     , fK4(k4)
     , fEnforcePMColor(enforcePMColor) {
     this->initClassID<GrPorterDuffXferProcessor>();
-    this->setWillReadDstColor();
 }
 
-void GrArithmeticXP::getGLProcessorKey(const GrGLCaps& caps, GrProcessorKeyBuilder* b) const {
+void GrArithmeticXP::onGetGLProcessorKey(const GrGLCaps& caps, GrProcessorKeyBuilder* b) const {
     GLArithmeticXP::GenKey(*this, caps, b);
 }
 
diff --git a/src/effects/SkArithmeticMode_gpu.h b/src/effects/SkArithmeticMode_gpu.h
index 2998eab..b282e22 100644
--- a/src/effects/SkArithmeticMode_gpu.h
+++ b/src/effects/SkArithmeticMode_gpu.h
@@ -13,6 +13,7 @@
 #if SK_SUPPORT_GPU
 
 #include "GrCoordTransform.h"
+#include "GrDrawTargetCaps.h"
 #include "GrFragmentProcessor.h"
 #include "GrTextureAccess.h"
 #include "GrTypes.h"
@@ -72,16 +73,17 @@
 
 class GrArithmeticXP : public GrXferProcessor {
 public:
-    static GrXferProcessor* Create(float k1, float k2, float k3, float k4, bool enforcePMColor) {
-        return SkNEW_ARGS(GrArithmeticXP, (k1, k2, k3, k4, enforcePMColor));
+    static GrXferProcessor* Create(float k1, float k2, float k3, float k4, bool enforcePMColor,
+                                         const GrDeviceCoordTexture* dstCopy,
+                                         bool willReadDstColor) {
+        return SkNEW_ARGS(GrArithmeticXP, (k1, k2, k3, k4, enforcePMColor, dstCopy,
+                                           willReadDstColor));
     }
 
     ~GrArithmeticXP() SK_OVERRIDE {};
 
     const char* name() const SK_OVERRIDE { return "Arithmetic"; }
 
-    void getGLProcessorKey(const GrGLCaps& caps, GrProcessorKeyBuilder* b) const SK_OVERRIDE;
-
     GrGLXferProcessor* createGLInstance() const SK_OVERRIDE;
 
     bool hasSecondaryOutput() const SK_OVERRIDE { return false; }
@@ -105,7 +107,10 @@
     bool enforcePMColor() const { return fEnforcePMColor; }
 
 private:
-    GrArithmeticXP(float k1, float k2, float k3, float k4, bool enforcePMColor);
+    GrArithmeticXP(float k1, float k2, float k3, float k4, bool enforcePMColor,
+                   const GrDeviceCoordTexture* dstCopy, bool willReadDstColor);
+
+    void onGetGLProcessorKey(const GrGLCaps& caps, GrProcessorKeyBuilder* b) const SK_OVERRIDE;
 
     bool onIsEqual(const GrXferProcessor& xpBase) const SK_OVERRIDE {
         const GrArithmeticXP& xp = xpBase.cast<GrArithmeticXP>();
@@ -133,11 +138,6 @@
         return SkNEW_ARGS(GrArithmeticXPFactory, (k1, k2, k3, k4, enforcePMColor));
     }
 
-    GrXferProcessor* createXferProcessor(const GrProcOptInfo& colorPOI,
-                                         const GrProcOptInfo& coveragePOI) const SK_OVERRIDE {
-        return GrArithmeticXP::Create(fK1, fK2, fK3, fK4, fEnforcePMColor);
-    }
-
     bool supportsRGBCoverage(GrColor knownColor, uint32_t knownColorFlags) const SK_OVERRIDE {
         return true;
     }
@@ -154,11 +154,18 @@
     void getInvariantOutput(const GrProcOptInfo& colorPOI, const GrProcOptInfo& coveragePOI,
                             GrXPFactory::InvariantOutput*) const SK_OVERRIDE;
 
-    bool willReadDst() const SK_OVERRIDE { return true; }
-
 private:
     GrArithmeticXPFactory(float k1, float k2, float k3, float k4, bool enforcePMColor); 
 
+    GrXferProcessor* onCreateXferProcessor(const GrProcOptInfo& colorPOI,
+                                           const GrProcOptInfo& coveragePOI,
+                                           const GrDeviceCoordTexture* dstCopy) const SK_OVERRIDE {
+        return GrArithmeticXP::Create(fK1, fK2, fK3, fK4, fEnforcePMColor, dstCopy,
+                                      this->willReadDstColor());
+    }
+
+    bool willReadDstColor() const SK_OVERRIDE { return true; }
+
     bool onIsEqual(const GrXPFactory& xpfBase) const SK_OVERRIDE {
         const GrArithmeticXPFactory& xpf = xpfBase.cast<GrArithmeticXPFactory>();
         if (fK1 != xpf.fK1 ||
diff --git a/src/gpu/GrBatchTarget.cpp b/src/gpu/GrBatchTarget.cpp
index 6a31353..d5d8d8f 100644
--- a/src/gpu/GrBatchTarget.cpp
+++ b/src/gpu/GrBatchTarget.cpp
@@ -21,8 +21,7 @@
         BufferedFlush* bf = iter.get();
         const GrPipeline* pipeline = bf->fPipeline;
         const GrPrimitiveProcessor* primProc = bf->fPrimitiveProcessor.get();
-        fGpu->buildProgramDesc(&desc, *primProc, *pipeline, pipeline->descInfo(),
-                               bf->fBatchTracker);
+        fGpu->buildProgramDesc(&desc, *primProc, *pipeline, bf->fBatchTracker);
 
         GrGpu::DrawArgs args(primProc, pipeline, &desc, &bf->fBatchTracker);
         for (int i = 0; i < bf->fDraws.count(); i++) {
@@ -38,8 +37,7 @@
     BufferedFlush* bf = fIter.get();
     const GrPipeline* pipeline = bf->fPipeline;
     const GrPrimitiveProcessor* primProc = bf->fPrimitiveProcessor.get();
-    fGpu->buildProgramDesc(&desc, *primProc, *pipeline, pipeline->descInfo(),
-                           bf->fBatchTracker);
+    fGpu->buildProgramDesc(&desc, *primProc, *pipeline, bf->fBatchTracker);
 
     GrGpu::DrawArgs args(primProc, pipeline, &desc, &bf->fBatchTracker);
     for (int i = 0; i < bf->fDraws.count(); i++) {
diff --git a/src/gpu/GrDrawTarget.cpp b/src/gpu/GrDrawTarget.cpp
index b89d70e..b4cd5a3 100644
--- a/src/gpu/GrDrawTarget.cpp
+++ b/src/gpu/GrDrawTarget.cpp
@@ -384,15 +384,15 @@
     return true;
 }
 
-bool GrDrawTarget::setupDstReadIfNecessary(GrPipelineBuilder* pipelineBuilder,
+bool GrDrawTarget::setupDstReadIfNecessary(const GrPipelineBuilder& pipelineBuilder,
                                            GrDeviceCoordTexture* dstCopy,
                                            const SkRect* drawBounds) {
-    if (this->caps()->dstReadInShaderSupport() || !pipelineBuilder->willEffectReadDstColor()) {
+    if (!pipelineBuilder.willXPNeedDstCopy(*this->caps())) {
         return true;
     }
     SkIRect copyRect;
     const GrClipData* clip = this->getClip();
-    GrRenderTarget* rt = pipelineBuilder->getRenderTarget();
+    GrRenderTarget* rt = pipelineBuilder.getRenderTarget();
     clip->getConservativeBounds(rt, &copyRect);
 
     if (drawBounds) {
@@ -416,6 +416,9 @@
     this->initCopySurfaceDstDesc(rt, &desc);
     desc.fWidth = copyRect.width();
     desc.fHeight = copyRect.height();
+    // Only xfer processors can use dst copies, and the contract with the XP is that we always
+    // supply a copy texture with origin in the top left.
+    desc.fOrigin = kTopLeft_GrSurfaceOrigin;
 
     SkAutoTUnref<GrTexture> copy(
         fContext->refScratchTexture(desc, GrContext::kApprox_ScratchTexMatch));
@@ -470,14 +473,9 @@
             info.setDevBounds(*devBounds);
         }
 
-        // TODO: We should continue with incorrect blending.
-        GrDeviceCoordTexture dstCopy;
-        if (!this->setupDstReadIfNecessary(pipelineBuilder, &dstCopy, devBounds)) {
-            return;
-        }
         this->setDrawBuffers(&info, gp->getVertexStride());
 
-        this->onDraw(*pipelineBuilder, gp, info, scissorState, dstCopy.texture() ? &dstCopy : NULL);
+        this->onDraw(*pipelineBuilder, gp, info, scissorState);
     }
 }
 
@@ -514,15 +512,9 @@
             info.setDevBounds(*devBounds);
         }
 
-        // TODO: We should continue with incorrect blending.
-        GrDeviceCoordTexture dstCopy;
-        if (!this->setupDstReadIfNecessary(pipelineBuilder, &dstCopy, devBounds)) {
-            return;
-        }
-
         this->setDrawBuffers(&info, gp->getVertexStride());
 
-        this->onDraw(*pipelineBuilder, gp, info, scissorState, dstCopy.texture() ? &dstCopy : NULL);
+        this->onDraw(*pipelineBuilder, gp, info, scissorState);
     }
 }
 
@@ -541,12 +533,7 @@
         return;
     }
 
-    GrDeviceCoordTexture dstCopy;
-    if (!this->setupDstReadIfNecessary(pipelineBuilder, &dstCopy, devBounds)) {
-        return;
-    }
-
-    this->onDrawBatch(batch, *pipelineBuilder, scissorState, dstCopy.texture() ? &dstCopy : NULL);
+    this->onDrawBatch(batch, *pipelineBuilder, scissorState, devBounds);
 }
 
 static const GrStencilSettings& winding_path_stencil_settings() {
@@ -636,13 +623,8 @@
                                             pipelineBuilder->getRenderTarget()->getStencilBuffer(),
                                             &stencilSettings);
 
-    GrDeviceCoordTexture dstCopy;
-    if (!this->setupDstReadIfNecessary(pipelineBuilder, &dstCopy, &devBounds)) {
-        return;
-    }
-
     this->onDrawPath(*pipelineBuilder, pathProc, path, scissorState, stencilSettings,
-                     dstCopy.texture() ? &dstCopy : NULL);
+                     &devBounds);
 }
 
 void GrDrawTarget::drawPaths(GrPipelineBuilder* pipelineBuilder,
@@ -676,18 +658,12 @@
                                             pipelineBuilder->getRenderTarget()->getStencilBuffer(),
                                             &stencilSettings);
 
-    // Don't compute a bounding box for setupDstReadIfNecessary(), we'll opt
+    // Don't compute a bounding box for dst copy texture, we'll opt
     // instead for it to just copy the entire dst. Realistically this is a moot
     // point, because any context that supports NV_path_rendering will also
     // support NV_blend_equation_advanced.
-    GrDeviceCoordTexture dstCopy;
-    if (!this->setupDstReadIfNecessary(pipelineBuilder, &dstCopy, NULL)) {
-        return;
-    }
-
     this->onDrawPaths(*pipelineBuilder, pathProc, pathRange, indices, indexType, transformValues,
-                      transformType, count, scissorState, stencilSettings,
-                      dstCopy.texture() ? &dstCopy : NULL);
+                      transformType, count, scissorState, stencilSettings, NULL);
 }
 
 void GrDrawTarget::clear(const SkIRect* rect,
@@ -793,12 +769,6 @@
         info.setDevBounds(*devBounds);
     }
 
-    // TODO: We should continue with incorrect blending.
-    GrDeviceCoordTexture dstCopy;
-    if (!this->setupDstReadIfNecessary(pipelineBuilder, &dstCopy, devBounds)) {
-        return;
-    }
-
     while (instanceCount) {
         info.fInstanceCount = SkTMin(instanceCount, maxInstancesPerDraw);
         info.fVertexCount = info.fInstanceCount * verticesPerInstance;
@@ -812,8 +782,7 @@
                             info.fVertexCount,
                             info.fIndexCount)) {
             this->setDrawBuffers(&info, gp->getVertexStride());
-            this->onDraw(*pipelineBuilder, gp, info, scissorState,
-                         dstCopy.texture() ? &dstCopy : NULL);
+            this->onDraw(*pipelineBuilder, gp, info, scissorState);
         }
         info.fStartVertex += info.fVertexCount;
         instanceCount -= info.fInstanceCount;
diff --git a/src/gpu/GrDrawTarget.h b/src/gpu/GrDrawTarget.h
index 18265c8..1b66517 100644
--- a/src/gpu/GrDrawTarget.h
+++ b/src/gpu/GrDrawTarget.h
@@ -677,7 +677,7 @@
     // Makes a copy of the dst if it is necessary for the draw. Returns false if a copy is required
     // but couldn't be made. Otherwise, returns true.  This method needs to be protected because it
     // needs to be accessed by GLPrograms to setup a correct drawstate
-    bool setupDstReadIfNecessary(GrPipelineBuilder*,
+    bool setupDstReadIfNecessary(const GrPipelineBuilder&,
                                  GrDeviceCoordTexture* dstCopy,
                                  const SkRect* drawBounds);
 
@@ -720,12 +720,11 @@
     virtual void onDraw(const GrPipelineBuilder&,
                         const GrGeometryProcessor*,
                         const DrawInfo&,
-                        const GrScissorState&,
-                        const GrDeviceCoordTexture* dstCopy) = 0;
+                        const GrScissorState&) = 0;
     virtual void onDrawBatch(GrBatch*,
                              const GrPipelineBuilder&,
                              const GrScissorState&,
-                             const GrDeviceCoordTexture* dstCopy) = 0;
+                             const SkRect* devBounds) = 0;
     // TODO copy in order drawbuffer onDrawRect to here
     virtual void onDrawRect(GrPipelineBuilder*,
                             GrColor color,
@@ -744,7 +743,7 @@
                             const GrPath*,
                             const GrScissorState&,
                             const GrStencilSettings&,
-                            const GrDeviceCoordTexture* dstCopy) = 0;
+                            const SkRect* devBounds) = 0;
     virtual void onDrawPaths(const GrPipelineBuilder&,
                              const GrPathProcessor*,
                              const GrPathRange*,
@@ -755,7 +754,7 @@
                              int count,
                              const GrScissorState&,
                              const GrStencilSettings&,
-                             const GrDeviceCoordTexture*) = 0;
+                             const SkRect* devBounds) = 0;
 
     virtual void onClear(const SkIRect* rect, GrColor color, bool canIgnoreRect,
                          GrRenderTarget* renderTarget) = 0;
diff --git a/src/gpu/GrGpu.h b/src/gpu/GrGpu.h
index 8c458f1..8a3c32b 100644
--- a/src/gpu/GrGpu.h
+++ b/src/gpu/GrGpu.h
@@ -298,7 +298,6 @@
     virtual void buildProgramDesc(GrProgramDesc*,
                                   const GrPrimitiveProcessor&,
                                   const GrPipeline&,
-                                  const GrProgramDesc::DescInfo&,
                                   const GrBatchTracker&) const = 0;
 
     // Called to determine whether a copySurface call would succeed or not. Derived
diff --git a/src/gpu/GrInOrderDrawBuffer.cpp b/src/gpu/GrInOrderDrawBuffer.cpp
index a760dcb..4629d4f 100644
--- a/src/gpu/GrInOrderDrawBuffer.cpp
+++ b/src/gpu/GrInOrderDrawBuffer.cpp
@@ -247,14 +247,13 @@
 void GrInOrderDrawBuffer::onDraw(const GrPipelineBuilder& pipelineBuilder,
                                  const GrGeometryProcessor* gp,
                                  const DrawInfo& info,
-                                 const GrScissorState& scissorState,
-                                 const GrDeviceCoordTexture* dstCopy) {
+                                 const GrScissorState& scissorState) {
     SkASSERT(info.vertexBuffer() && (!info.isIndexed() || info.indexBuffer()));
 
     // This closeBatch call is required because we may introduce new draws when we setup clip
     this->closeBatch();
 
-    if (!this->recordStateAndShouldDraw(pipelineBuilder, gp, scissorState, dstCopy)) {
+    if (!this->recordStateAndShouldDraw(pipelineBuilder, gp, scissorState, info.getDevBounds())) {
         return;
     }
 
@@ -276,8 +275,8 @@
 void GrInOrderDrawBuffer::onDrawBatch(GrBatch* batch,
                                       const GrPipelineBuilder& pipelineBuilder,
                                       const GrScissorState& scissorState,
-                                      const GrDeviceCoordTexture* dstCopy) {
-    if (!this->recordStateAndShouldDraw(batch, pipelineBuilder, scissorState, dstCopy)) {
+                                      const SkRect* devBounds) {
+    if (!this->recordStateAndShouldDraw(batch, pipelineBuilder, scissorState, devBounds)) {
         return;
     }
 
@@ -316,11 +315,11 @@
                                      const GrPath* path,
                                      const GrScissorState& scissorState,
                                      const GrStencilSettings& stencilSettings,
-                                     const GrDeviceCoordTexture* dstCopy) {
+                                     const SkRect* devBounds) {
     this->closeBatch();
 
     // TODO: Only compare the subset of GrPipelineBuilder relevant to path covering?
-    if (!this->recordStateAndShouldDraw(pipelineBuilder, pathProc, scissorState, dstCopy)) {
+    if (!this->recordStateAndShouldDraw(pipelineBuilder, pathProc, scissorState, devBounds)) {
         return;
     }
     DrawPath* dp = GrNEW_APPEND_TO_RECORDER(fCmdBuffer, DrawPath, (path));
@@ -338,14 +337,14 @@
                                       int count,
                                       const GrScissorState& scissorState,
                                       const GrStencilSettings& stencilSettings,
-                                      const GrDeviceCoordTexture* dstCopy) {
+                                      const SkRect* devBounds) {
     SkASSERT(pathRange);
     SkASSERT(indices);
     SkASSERT(transformValues);
 
     this->closeBatch();
 
-    if (!this->recordStateAndShouldDraw(pipelineBuilder, pathProc, scissorState, dstCopy)) {
+    if (!this->recordStateAndShouldDraw(pipelineBuilder, pathProc, scissorState, devBounds)) {
         return;
     }
 
@@ -486,7 +485,6 @@
             if (ss->fPrimitiveProcessor) {
                 this->getGpu()->buildProgramDesc(&ss->fDesc, *ss->fPrimitiveProcessor,
                                                  ss->fPipeline,
-                                                 ss->fPipeline.descInfo(),
                                                  ss->fBatchTracker);
             }
             currentState = ss;
@@ -581,10 +579,14 @@
 bool GrInOrderDrawBuffer::recordStateAndShouldDraw(const GrPipelineBuilder& pipelineBuilder,
                                                    const GrPrimitiveProcessor* primProc,
                                                    const GrScissorState& scissor,
-                                                   const GrDeviceCoordTexture* dstCopy) {
+                                                   const SkRect* devBounds) {
+    GrDeviceCoordTexture dstCopy;
+    if (!this->setupDstReadIfNecessary(pipelineBuilder, &dstCopy, devBounds)) {
+        return false;
+    }
     SetState* ss = GrNEW_APPEND_TO_RECORDER(fCmdBuffer, SetState,
                                             (pipelineBuilder, primProc, *this->getGpu()->caps(),
-                                             scissor, dstCopy));
+                                             scissor, &dstCopy));
     if (ss->fPipeline.mustSkip()) {
         fCmdBuffer.pop_back();
         return false;
@@ -609,13 +611,17 @@
 bool GrInOrderDrawBuffer::recordStateAndShouldDraw(GrBatch* batch,
                                                    const GrPipelineBuilder& pipelineBuilder,
                                                    const GrScissorState& scissor,
-                                                   const GrDeviceCoordTexture* dstCopy) {
+                                                   const SkRect* devBounds) {
+    GrDeviceCoordTexture dstCopy;
+    if (!this->setupDstReadIfNecessary(pipelineBuilder, &dstCopy, devBounds)) {
+        return false;
+    }
     // TODO this gets much simpler when we have batches everywhere.
     // If the previous command is also a set state, then we check to see if it has a Batch.  If so,
     // and we can make the two batches equal, and we can combine the states, then we make them equal
     SetState* ss = GrNEW_APPEND_TO_RECORDER(fCmdBuffer, SetState,
                                             (batch, pipelineBuilder, *this->getGpu()->caps(), scissor,
-                                             dstCopy));
+                                             &dstCopy));
     if (ss->fPipeline.mustSkip()) {
         fCmdBuffer.pop_back();
         return false;
diff --git a/src/gpu/GrInOrderDrawBuffer.h b/src/gpu/GrInOrderDrawBuffer.h
index bf9237d..3d1bd7c 100644
--- a/src/gpu/GrInOrderDrawBuffer.h
+++ b/src/gpu/GrInOrderDrawBuffer.h
@@ -233,12 +233,11 @@
     void onDraw(const GrPipelineBuilder&,
                 const GrGeometryProcessor*,
                 const DrawInfo&,
-                const GrScissorState&,
-                const GrDeviceCoordTexture* dstCopy) SK_OVERRIDE;
+                const GrScissorState&) SK_OVERRIDE;
     void onDrawBatch(GrBatch*,
                      const GrPipelineBuilder&,
                      const GrScissorState&,
-                     const GrDeviceCoordTexture* dstCopy) SK_OVERRIDE;
+                     const SkRect* devBounds) SK_OVERRIDE;
     void onDrawRect(GrPipelineBuilder*,
                     GrColor,
                     const SkMatrix& viewMatrix,
@@ -256,7 +255,7 @@
                     const GrPath*,
                     const GrScissorState&,
                     const GrStencilSettings&,
-                    const GrDeviceCoordTexture* dstCopy) SK_OVERRIDE;
+                    const SkRect* devBounds) SK_OVERRIDE;
     void onDrawPaths(const GrPipelineBuilder&,
                      const GrPathProcessor*,
                      const GrPathRange*,
@@ -267,7 +266,7 @@
                      int count,
                      const GrScissorState&,
                      const GrStencilSettings&,
-                     const GrDeviceCoordTexture*) SK_OVERRIDE;
+                     const SkRect* devBounds) SK_OVERRIDE;
     void onClear(const SkIRect* rect,
                  GrColor color,
                  bool canIgnoreRect,
@@ -288,11 +287,11 @@
     bool SK_WARN_UNUSED_RESULT recordStateAndShouldDraw(const GrPipelineBuilder&,
                                                         const GrPrimitiveProcessor*,
                                                         const GrScissorState&,
-                                                        const GrDeviceCoordTexture*);
+                                                        const SkRect*);
     bool SK_WARN_UNUSED_RESULT recordStateAndShouldDraw(GrBatch*,
                                                         const GrPipelineBuilder&,
                                                         const GrScissorState&,
-                                                        const GrDeviceCoordTexture*);
+                                                        const SkRect*);
 
     // We lazily record clip changes in order to skip clips that have no effect.
     void recordClipIfNecessary();
diff --git a/src/gpu/GrPipeline.cpp b/src/gpu/GrPipeline.cpp
index 911169e..bd508e9 100644
--- a/src/gpu/GrPipeline.cpp
+++ b/src/gpu/GrPipeline.cpp
@@ -48,7 +48,7 @@
                                          const GrDeviceCoordTexture* dstCopy) {
     // Create XferProcessor from DS's XPFactory
     SkAutoTUnref<GrXferProcessor> xferProcessor(
-        pipelineBuilder.getXPFactory()->createXferProcessor(colorPOI, coveragePOI));
+        pipelineBuilder.getXPFactory()->createXferProcessor(colorPOI, coveragePOI, dstCopy, caps));
 
     GrColor overrideColor = GrColor_ILLEGAL;
     if (colorPOI.firstEffectiveStageIndex() != 0) {
@@ -82,10 +82,6 @@
     fScissorState = scissorState;
     fStencilSettings = pipelineBuilder.getStencil();
     fDrawFace = pipelineBuilder.getDrawFace();
-    // TODO move this out of GrPipeline
-    if (dstCopy) {
-        fDstCopy = *dstCopy;
-    }
 
     fFlags = 0;
     if (pipelineBuilder.isHWAntialias()) {
@@ -107,8 +103,6 @@
     this->adjustProgramFromOptimizations(pipelineBuilder, optFlags, colorPOI, coveragePOI,
                                          &firstColorStageIdx, &firstCoverageStageIdx);
 
-    fDescInfo.fReadsDst = fXferProcessor->willReadDstColor();
-
     bool usesLocalCoords = false;
 
     // Copy Stages from PipelineBuilder to Pipeline
@@ -142,20 +136,20 @@
                                                 const GrProcOptInfo& coveragePOI,
                                                 int* firstColorStageIdx,
                                                 int* firstCoverageStageIdx) {
-    fDescInfo.fReadsFragPosition = false;
+    fReadsFragPosition = false;
 
     if ((flags & GrXferProcessor::kIgnoreColor_OptFlag) ||
         (flags & GrXferProcessor::kOverrideColor_OptFlag)) {
         *firstColorStageIdx = pipelineBuilder.numColorStages();
     } else {
-        fDescInfo.fReadsFragPosition = colorPOI.readsFragPosition();
+        fReadsFragPosition = colorPOI.readsFragPosition();
     }
 
     if (flags & GrXferProcessor::kIgnoreCoverage_OptFlag) {
         *firstCoverageStageIdx = pipelineBuilder.numCoverageStages();
     } else {
         if (coveragePOI.readsFragPosition()) {
-            fDescInfo.fReadsFragPosition = true;
+            fReadsFragPosition = true;
         }
     }
 }
@@ -169,8 +163,7 @@
         this->fScissorState != that.fScissorState ||
         this->fFlags != that.fFlags ||
         this->fStencilSettings != that.fStencilSettings ||
-        this->fDrawFace != that.fDrawFace ||
-        this->fDstCopy.texture() != that.fDstCopy.texture()) {
+        this->fDrawFace != that.fDrawFace) {
         return false;
     }
 
diff --git a/src/gpu/GrPipeline.h b/src/gpu/GrPipeline.h
index 24d0b62..8480c00 100644
--- a/src/gpu/GrPipeline.h
+++ b/src/gpu/GrPipeline.h
@@ -132,9 +132,7 @@
 
     ///////////////////////////////////////////////////////////////////////////
 
-    const GrDeviceCoordTexture* getDstCopy() const { return fDstCopy.texture() ? &fDstCopy : NULL; }
-
-    const GrProgramDesc::DescInfo& descInfo() const { return fDescInfo; }
+    bool readsFragPosition() const { return fReadsFragPosition; }
 
     const GrPipelineInfo& getInitBatchTracker() const { return fInitBT; }
 
@@ -177,11 +175,10 @@
     GrScissorState                      fScissorState;
     GrStencilSettings                   fStencilSettings;
     GrPipelineBuilder::DrawFace         fDrawFace;
-    GrDeviceCoordTexture                fDstCopy;
     uint32_t                            fFlags;
     ProgramXferProcessor                fXferProcessor;
     FragmentStageArray                  fFragmentStages;
-    GrProgramDesc::DescInfo             fDescInfo;
+    bool                                fReadsFragPosition;
     GrPipelineInfo                      fInitBT;
 
     // This function is equivalent to the offset into fFragmentStages where coverage stages begin.
diff --git a/src/gpu/GrPipelineBuilder.cpp b/src/gpu/GrPipelineBuilder.cpp
index 90aee74..54d357f 100644
--- a/src/gpu/GrPipelineBuilder.cpp
+++ b/src/gpu/GrPipelineBuilder.cpp
@@ -101,8 +101,8 @@
 
 //////////////////////////////////////////////////////////////////////////////s
 
-bool GrPipelineBuilder::willEffectReadDstColor() const {
-    return this->getXPFactory()->willReadDst();
+bool GrPipelineBuilder::willXPNeedDstCopy(const GrDrawTargetCaps& caps) const {
+    return this->getXPFactory()->willNeedDstCopy(caps);
 }
 
 void GrPipelineBuilder::AutoRestoreEffects::set(GrPipelineBuilder* pipelineBuilder) {
diff --git a/src/gpu/GrPipelineBuilder.h b/src/gpu/GrPipelineBuilder.h
index df9a1c8..5743c39 100644
--- a/src/gpu/GrPipelineBuilder.h
+++ b/src/gpu/GrPipelineBuilder.h
@@ -102,10 +102,9 @@
     const GrFragmentStage& getCoverageStage(int idx) const { return fCoverageStages[idx]; }
 
     /**
-     * Checks whether the xp will read the dst pixel color.
-     * TODO: remove when we have dstCpy contained inside of GrXP
+     * Checks whether the xp will need a copy of the destination to correctly blend.
      */
-    bool willEffectReadDstColor() const;
+    bool willXPNeedDstCopy(const GrDrawTargetCaps& caps) const;
 
     /**
      * The xfer processor factory.
diff --git a/src/gpu/GrProgramDesc.h b/src/gpu/GrProgramDesc.h
index e07e116..66e0e06 100644
--- a/src/gpu/GrProgramDesc.h
+++ b/src/gpu/GrProgramDesc.h
@@ -55,9 +55,6 @@
     }
 
     struct KeyHeader {
-        uint8_t                     fDstReadKey;   // set by GrGLShaderBuilder if there
-                                                   // are effects that must read the dst.
-                                                   // Otherwise, 0.
         uint8_t                     fFragPosKey;   // set by GrGLShaderBuilder if there are
                                                    // effects that read the fragment position.
                                                    // Otherwise, 0.
@@ -79,20 +76,6 @@
     // This should really only be used internally, base classes should return their own headers
     const KeyHeader& header() const { return *this->atOffset<KeyHeader, kHeaderOffset>(); }
 
-    // A struct to communicate descriptor information to the program descriptor builder
-    struct DescInfo {
-        bool operator==(const DescInfo& that) const {
-            return fReadsDst == that.fReadsDst &&
-                   fReadsFragPosition == that.fReadsFragPosition;
-        }
-        bool operator!=(const DescInfo& that) const { return !(*this == that); };
-
-        // These flags give aggregated info on the processor stages that are used when building
-        // programs.
-        bool            fReadsDst;
-        bool            fReadsFragPosition;
-    };
-
 private:
     template<typename T, size_t OFFSET> T* atOffset() {
         return reinterpret_cast<T*>(reinterpret_cast<intptr_t>(fKey.begin()) + OFFSET);
diff --git a/src/gpu/GrTest.cpp b/src/gpu/GrTest.cpp
index 05a4a5f..2f32eb5 100644
--- a/src/gpu/GrTest.cpp
+++ b/src/gpu/GrTest.cpp
@@ -142,7 +142,6 @@
                                    size_t rowBytes) const SK_OVERRIDE { return false; }
     void buildProgramDesc(GrProgramDesc*,const GrPrimitiveProcessor&,
                           const GrPipeline&,
-                          const GrProgramDesc::DescInfo&,
                           const GrBatchTracker&) const SK_OVERRIDE {}
 
     void discard(GrRenderTarget*) SK_OVERRIDE {}
diff --git a/src/gpu/GrXferProcessor.cpp b/src/gpu/GrXferProcessor.cpp
new file mode 100644
index 0000000..87d5fe6
--- /dev/null
+++ b/src/gpu/GrXferProcessor.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "GrXferProcessor.h"
+#include "gl/GrGLCaps.h"
+
+GrXferProcessor::GrXferProcessor() : fWillReadDstColor(false), fDstCopyTextureOffset() {
+}
+
+GrXferProcessor::GrXferProcessor(const GrDeviceCoordTexture* dstCopy, bool willReadDstColor)
+    : fWillReadDstColor(willReadDstColor)
+    , fDstCopyTextureOffset() {
+    if (dstCopy && dstCopy->texture()) {
+        fDstCopy.reset(dstCopy->texture());
+        fDstCopyTextureOffset = dstCopy->offset();
+        SkASSERT(kTopLeft_GrSurfaceOrigin == fDstCopy.getTexture()->origin());
+        this->addTextureAccess(&fDstCopy);
+    }
+}
+
+void GrXferProcessor::getGLProcessorKey(const GrGLCaps& caps, GrProcessorKeyBuilder* b) const {
+    uint32_t key = this->willReadDstColor() ? 0x1 : 0x0;
+    b->add32(key);
+    this->onGetGLProcessorKey(caps, b);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+GrXferProcessor* GrXPFactory::createXferProcessor(const GrProcOptInfo& colorPOI,
+                                                  const GrProcOptInfo& coveragePOI,
+                                                  const GrDeviceCoordTexture* dstCopy,
+                                                  const GrDrawTargetCaps& caps) const {
+#ifdef SK_DEBUG
+    if (this->willReadDstColor()) {
+        if (!caps.dstReadInShaderSupport()) {
+            SkASSERT(dstCopy && dstCopy->texture());
+        } else {
+            SkASSERT(!dstCopy || !dstCopy->texture()); 
+        }
+    } else {
+        SkASSERT(!dstCopy || !dstCopy->texture()); 
+
+    }
+#endif
+    return this->onCreateXferProcessor(colorPOI, coveragePOI, dstCopy);
+}
+
+bool GrXPFactory::willNeedDstCopy(const GrDrawTargetCaps& caps) const {
+    return (this->willReadDstColor() && !caps.dstReadInShaderSupport());
+}
+
diff --git a/src/gpu/effects/GrCoverageSetOpXP.cpp b/src/gpu/effects/GrCoverageSetOpXP.cpp
index fe286f6..7025d20 100644
--- a/src/gpu/effects/GrCoverageSetOpXP.cpp
+++ b/src/gpu/effects/GrCoverageSetOpXP.cpp
@@ -21,7 +21,15 @@
 
     ~GrGLCoverageSetOpXP() SK_OVERRIDE {}
 
-    void emitCode(const EmitArgs& args) SK_OVERRIDE {
+    static void GenKey(const GrProcessor& processor, const GrGLCaps& caps,
+                       GrProcessorKeyBuilder* b) {
+        const GrCoverageSetOpXP& xp = processor.cast<GrCoverageSetOpXP>();
+        uint32_t key = xp.invertCoverage() ?  0x0 : 0x1;
+        b->add32(key);
+    };
+
+private:
+    void onEmitCode(const EmitArgs& args) SK_OVERRIDE {
         const GrCoverageSetOpXP& xp = args.fXP.cast<GrCoverageSetOpXP>();
         GrGLFPFragmentBuilder* fsBuilder = args.fPB->getFragmentShaderBuilder();
 
@@ -32,16 +40,8 @@
         }
     }
 
-    void setData(const GrGLProgramDataManager&, const GrXferProcessor&) SK_OVERRIDE {};
+    void onSetData(const GrGLProgramDataManager&, const GrXferProcessor&) SK_OVERRIDE {};
 
-    static void GenKey(const GrProcessor& processor, const GrGLCaps& caps,
-                       GrProcessorKeyBuilder* b) {
-        const GrCoverageSetOpXP& xp = processor.cast<GrCoverageSetOpXP>();
-        uint32_t key = xp.invertCoverage() ?  0x0 : 0x1;
-        b->add32(key);
-    };
-
-private:
     typedef GrGLXferProcessor INHERITED;
 };
 
@@ -56,7 +56,7 @@
 GrCoverageSetOpXP::~GrCoverageSetOpXP() {
 }
 
-void GrCoverageSetOpXP::getGLProcessorKey(const GrGLCaps& caps, GrProcessorKeyBuilder* b) const {
+void GrCoverageSetOpXP::onGetGLProcessorKey(const GrGLCaps& caps, GrProcessorKeyBuilder* b) const {
     GrGLCoverageSetOpXP::GenKey(*this, caps, b);
 }
 
@@ -179,8 +179,10 @@
     }
 }
 
-GrXferProcessor* GrCoverageSetOpXPFactory::createXferProcessor(const GrProcOptInfo& /* colorPOI*/,
-                                                               const GrProcOptInfo& covPOI) const {
+GrXferProcessor*
+GrCoverageSetOpXPFactory::onCreateXferProcessor(const GrProcOptInfo& colorPOI,
+                                                const GrProcOptInfo& covPOI,
+                                                const GrDeviceCoordTexture* dstCopy) const {
     return GrCoverageSetOpXP::Create(fRegionOp, fInvertCoverage);
 }
 
diff --git a/src/gpu/effects/GrCoverageSetOpXP.h b/src/gpu/effects/GrCoverageSetOpXP.h
index a144f97..0bc9e91 100644
--- a/src/gpu/effects/GrCoverageSetOpXP.h
+++ b/src/gpu/effects/GrCoverageSetOpXP.h
@@ -29,8 +29,6 @@
 
     const char* name() const SK_OVERRIDE { return "Coverage Set Op"; }
 
-    void getGLProcessorKey(const GrGLCaps& caps, GrProcessorKeyBuilder* b) const SK_OVERRIDE;
-
     GrGLXferProcessor* createGLInstance() const SK_OVERRIDE;
 
     bool hasSecondaryOutput() const SK_OVERRIDE { return false; }
@@ -48,6 +46,8 @@
 private:
     GrCoverageSetOpXP(SkRegion::Op regionOp, bool fInvertCoverage);
 
+    void onGetGLProcessorKey(const GrGLCaps& caps, GrProcessorKeyBuilder* b) const SK_OVERRIDE;
+
     bool onIsEqual(const GrXferProcessor& xpBase) const SK_OVERRIDE {
         const GrCoverageSetOpXP& xp = xpBase.cast<GrCoverageSetOpXP>();
         return (fRegionOp == xp.fRegionOp &&
@@ -66,9 +66,6 @@
 public:
     static GrXPFactory* Create(SkRegion::Op regionOp, bool invertCoverage = false);
 
-    GrXferProcessor* createXferProcessor(const GrProcOptInfo& colorPOI,
-                                         const GrProcOptInfo& coveragePOI) const SK_OVERRIDE;
-
     bool supportsRGBCoverage(GrColor knownColor, uint32_t knownColorFlags) const SK_OVERRIDE {
         return true;
     }
@@ -83,11 +80,15 @@
     void getInvariantOutput(const GrProcOptInfo& colorPOI, const GrProcOptInfo& coveragePOI,
                             GrXPFactory::InvariantOutput*) const SK_OVERRIDE;
 
-    bool willReadDst() const SK_OVERRIDE { return false; }
-
 private:
     GrCoverageSetOpXPFactory(SkRegion::Op regionOp, bool invertCoverage);
 
+    GrXferProcessor* onCreateXferProcessor(const GrProcOptInfo& colorPOI,
+                                           const GrProcOptInfo& coveragePOI,
+                                           const GrDeviceCoordTexture* dstCopy) const SK_OVERRIDE;
+
+    bool willReadDstColor() const SK_OVERRIDE { return false; }
+
     bool onIsEqual(const GrXPFactory& xpfBase) const SK_OVERRIDE {
         const GrCoverageSetOpXPFactory& xpf = xpfBase.cast<GrCoverageSetOpXPFactory>();
         return fRegionOp == xpf.fRegionOp;
diff --git a/src/gpu/effects/GrCustomXfermode.cpp b/src/gpu/effects/GrCustomXfermode.cpp
index 9aa7153..b77bf2f 100644
--- a/src/gpu/effects/GrCustomXfermode.cpp
+++ b/src/gpu/effects/GrCustomXfermode.cpp
@@ -499,7 +499,15 @@
     GLCustomXP(const GrXferProcessor&) {}
     ~GLCustomXP() SK_OVERRIDE {}
 
-    void emitCode(const EmitArgs& args) SK_OVERRIDE {
+    static void GenKey(const GrXferProcessor& proc, const GrGLCaps&, GrProcessorKeyBuilder* b) {
+        uint32_t key = proc.numTextures();
+        SkASSERT(key <= 1);
+        key |= proc.cast<GrCustomXP>().mode() << 1;
+        b->add32(key);
+    }
+
+private:
+    void onEmitCode(const EmitArgs& args) SK_OVERRIDE {
         SkXfermode::Mode mode = args.fXP.cast<GrCustomXP>().mode();
         GrGLFPFragmentBuilder* fsBuilder = args.fPB->getFragmentShaderBuilder();
         const char* dstColor = fsBuilder->dstColor();
@@ -511,28 +519,20 @@
                                args.fInputCoverage, dstColor);
     }
 
-    void setData(const GrGLProgramDataManager&, const GrXferProcessor&) SK_OVERRIDE {}
+    void onSetData(const GrGLProgramDataManager&, const GrXferProcessor&) SK_OVERRIDE {}
 
-    static void GenKey(const GrXferProcessor& proc, const GrGLCaps&, GrProcessorKeyBuilder* b) {
-        uint32_t key = proc.numTextures();
-        SkASSERT(key <= 1);
-        key |= proc.cast<GrCustomXP>().mode() << 1;
-        b->add32(key);
-    }
-
-private:
     typedef GrGLFragmentProcessor INHERITED;
 };
 
 ///////////////////////////////////////////////////////////////////////////////
 
-GrCustomXP::GrCustomXP(SkXfermode::Mode mode)
-    : fMode(mode) {
+GrCustomXP::GrCustomXP(SkXfermode::Mode mode, const GrDeviceCoordTexture* dstCopy,
+                       bool willReadDstColor)
+    : INHERITED(dstCopy, willReadDstColor), fMode(mode) {
     this->initClassID<GrCustomXP>();
-    this->setWillReadDstColor();
 }
 
-void GrCustomXP::getGLProcessorKey(const GrGLCaps& caps, GrProcessorKeyBuilder* b) const {
+void GrCustomXP::onGetGLProcessorKey(const GrGLCaps& caps, GrProcessorKeyBuilder* b) const {
     GLCustomXP::GenKey(*this, caps, b);
 }
 
diff --git a/src/gpu/effects/GrCustomXfermodePriv.h b/src/gpu/effects/GrCustomXfermodePriv.h
index 45a9d30..681c805 100644
--- a/src/gpu/effects/GrCustomXfermodePriv.h
+++ b/src/gpu/effects/GrCustomXfermodePriv.h
@@ -9,6 +9,7 @@
 #define GrCustomXfermodePriv_DEFINED
 
 #include "GrCoordTransform.h"
+#include "GrDrawTargetCaps.h"
 #include "GrFragmentProcessor.h"
 #include "GrTextureAccess.h"
 #include "GrXferProcessor.h"
@@ -57,11 +58,12 @@
 
 class GrCustomXP : public GrXferProcessor {
 public:
-    static GrXferProcessor* Create(SkXfermode::Mode mode) {
+    static GrXferProcessor* Create(SkXfermode::Mode mode, const GrDeviceCoordTexture* dstCopy,
+                                   bool willReadDstColor) {
         if (!GrCustomXfermode::IsSupportedMode(mode)) {
             return NULL;
         } else {
-            return SkNEW_ARGS(GrCustomXP, (mode));
+            return SkNEW_ARGS(GrCustomXP, (mode, dstCopy, willReadDstColor));
         }
     }
 
@@ -69,8 +71,6 @@
 
     const char* name() const SK_OVERRIDE { return "Custom Xfermode"; }
 
-    void getGLProcessorKey(const GrGLCaps& caps, GrProcessorKeyBuilder* b) const SK_OVERRIDE;
-
     GrGLXferProcessor* createGLInstance() const SK_OVERRIDE;
 
     bool hasSecondaryOutput() const SK_OVERRIDE { return false; }
@@ -90,7 +90,9 @@
     SkXfermode::Mode mode() const { return fMode; }
 
 private:
-    GrCustomXP(SkXfermode::Mode mode);
+    GrCustomXP(SkXfermode::Mode mode, const GrDeviceCoordTexture* dstCopy, bool willReadDstColor);
+
+    void onGetGLProcessorKey(const GrGLCaps& caps, GrProcessorKeyBuilder* b) const SK_OVERRIDE;
 
     bool onIsEqual(const GrXferProcessor& xpBase) const SK_OVERRIDE;
 
@@ -105,11 +107,6 @@
 public:
     GrCustomXPFactory(SkXfermode::Mode mode); 
 
-    GrXferProcessor* createXferProcessor(const GrProcOptInfo& colorPOI,
-                                         const GrProcOptInfo& coveragePOI) const SK_OVERRIDE {
-        return GrCustomXP::Create(fMode);
-    }
-
     bool supportsRGBCoverage(GrColor knownColor, uint32_t knownColorFlags) const SK_OVERRIDE {
         return true;
     }
@@ -126,9 +123,15 @@
     void getInvariantOutput(const GrProcOptInfo& colorPOI, const GrProcOptInfo& coveragePOI,
                             GrXPFactory::InvariantOutput*) const SK_OVERRIDE;
 
-    bool willReadDst() const SK_OVERRIDE { return true; }
-
 private:
+    GrXferProcessor* onCreateXferProcessor(const GrProcOptInfo& colorPOI,
+                                           const GrProcOptInfo& coveragePOI,
+                                           const GrDeviceCoordTexture* dstCopy) const SK_OVERRIDE {
+        return GrCustomXP::Create(fMode, dstCopy, this->willReadDstColor());
+    }
+
+    bool willReadDstColor() const SK_OVERRIDE { return true; }
+
     bool onIsEqual(const GrXPFactory& xpfBase) const SK_OVERRIDE {
         const GrCustomXPFactory& xpf = xpfBase.cast<GrCustomXPFactory>();
         return fMode == xpf.fMode;
diff --git a/src/gpu/effects/GrDisableColorXP.cpp b/src/gpu/effects/GrDisableColorXP.cpp
index 9383385..83b6d231 100644
--- a/src/gpu/effects/GrDisableColorXP.cpp
+++ b/src/gpu/effects/GrDisableColorXP.cpp
@@ -17,7 +17,10 @@
 
     ~GrGLDisableColorXP() SK_OVERRIDE {}
 
-    void emitCode(const EmitArgs& args) SK_OVERRIDE {
+    static void GenKey(const GrProcessor&, const GrGLCaps&, GrProcessorKeyBuilder*) {}
+
+private:
+    void onEmitCode(const EmitArgs& args) SK_OVERRIDE {
         // This emit code should be empty. However, on the nexus 6 there is a driver bug where if
         // you do not give gl_FragColor a value, the gl context is lost and we end up drawing
         // nothing. So this fix just sets the gl_FragColor arbitrarily to 0.
@@ -25,11 +28,8 @@
         fsBuilder->codeAppendf("%s = vec4(0);", args.fOutputPrimary);
     }
 
-    void setData(const GrGLProgramDataManager&, const GrXferProcessor&) SK_OVERRIDE {}
+    void onSetData(const GrGLProgramDataManager&, const GrXferProcessor&) SK_OVERRIDE {}
 
-    static void GenKey(const GrProcessor&, const GrGLCaps&, GrProcessorKeyBuilder*) {}
-
-private:
     typedef GrGLXferProcessor INHERITED;
 };
 
@@ -39,7 +39,7 @@
     this->initClassID<GrDisableColorXP>();
 }
 
-void GrDisableColorXP::getGLProcessorKey(const GrGLCaps& caps, GrProcessorKeyBuilder* b) const {
+void GrDisableColorXP::onGetGLProcessorKey(const GrGLCaps& caps, GrProcessorKeyBuilder* b) const {
     GrGLDisableColorXP::GenKey(*this, caps, b);
 }
 
@@ -57,8 +57,10 @@
     this->initClassID<GrDisableColorXPFactory>();
 }
 
-GrXferProcessor* GrDisableColorXPFactory::createXferProcessor(const GrProcOptInfo& colorPOI,
-                                                              const GrProcOptInfo& covPOI) const {
+GrXferProcessor*
+GrDisableColorXPFactory::onCreateXferProcessor(const GrProcOptInfo& colorPOI,
+                                               const GrProcOptInfo& covPOI,
+                                               const GrDeviceCoordTexture* dstCopy) const {
     return GrDisableColorXP::Create();
 }
 
diff --git a/src/gpu/effects/GrDisableColorXP.h b/src/gpu/effects/GrDisableColorXP.h
index d62c320..da6fa45 100644
--- a/src/gpu/effects/GrDisableColorXP.h
+++ b/src/gpu/effects/GrDisableColorXP.h
@@ -27,8 +27,6 @@
 
     const char* name() const SK_OVERRIDE { return "Disable Color"; }
 
-    void getGLProcessorKey(const GrGLCaps& caps, GrProcessorKeyBuilder* b) const SK_OVERRIDE;
-
     GrGLXferProcessor* createGLInstance() const SK_OVERRIDE;
 
     bool hasSecondaryOutput() const SK_OVERRIDE { return false; }
@@ -46,6 +44,8 @@
 private:
     GrDisableColorXP();
 
+    void onGetGLProcessorKey(const GrGLCaps& caps, GrProcessorKeyBuilder* b) const SK_OVERRIDE;
+
     bool onIsEqual(const GrXferProcessor& xpBase) const SK_OVERRIDE {
         return true;
     }
@@ -61,9 +61,6 @@
         return SkNEW(GrDisableColorXPFactory);
     }
 
-    GrXferProcessor* createXferProcessor(const GrProcOptInfo& colorPOI,
-                                         const GrProcOptInfo& coveragePOI) const SK_OVERRIDE;
-
     bool supportsRGBCoverage(GrColor knownColor, uint32_t knownColorFlags) const SK_OVERRIDE {
         return true;
     }
@@ -81,11 +78,15 @@
         output->fWillBlendWithDst = 0;
     }
 
-    bool willReadDst() const SK_OVERRIDE { return false; }
-
 private:
     GrDisableColorXPFactory();
 
+    GrXferProcessor* onCreateXferProcessor(const GrProcOptInfo& colorPOI,
+                                           const GrProcOptInfo& coveragePOI,
+                                           const GrDeviceCoordTexture* dstCopy) const SK_OVERRIDE;
+
+    bool willReadDstColor() const SK_OVERRIDE { return false; }
+
     bool onIsEqual(const GrXPFactory& xpfBase) const SK_OVERRIDE {
         return true;
     }
diff --git a/src/gpu/effects/GrPorterDuffXferProcessor.cpp b/src/gpu/effects/GrPorterDuffXferProcessor.cpp
index 5038aa3..320d943 100644
--- a/src/gpu/effects/GrPorterDuffXferProcessor.cpp
+++ b/src/gpu/effects/GrPorterDuffXferProcessor.cpp
@@ -38,7 +38,15 @@
 
     virtual ~GrGLPorterDuffXferProcessor() {}
 
-    void emitCode(const EmitArgs& args) SK_OVERRIDE {
+    static void GenKey(const GrProcessor& processor, const GrGLCaps& caps,
+                       GrProcessorKeyBuilder* b) {
+        const GrPorterDuffXferProcessor& xp = processor.cast<GrPorterDuffXferProcessor>();
+        b->add32(xp.primaryOutputType());
+        b->add32(xp.secondaryOutputType());
+    };
+
+private:
+    void onEmitCode(const EmitArgs& args) SK_OVERRIDE {
         const GrPorterDuffXferProcessor& xp = args.fXP.cast<GrPorterDuffXferProcessor>();
         GrGLFPFragmentBuilder* fsBuilder = args.fPB->getFragmentShaderBuilder();
         if (xp.hasSecondaryOutput()) {
@@ -80,23 +88,18 @@
         }
     }
 
-    void setData(const GrGLProgramDataManager&, const GrXferProcessor&) SK_OVERRIDE {};
+    void onSetData(const GrGLProgramDataManager&, const GrXferProcessor&) SK_OVERRIDE {};
 
-    static void GenKey(const GrProcessor& processor, const GrGLCaps& caps,
-                       GrProcessorKeyBuilder* b) {
-        const GrPorterDuffXferProcessor& xp = processor.cast<GrPorterDuffXferProcessor>();
-        b->add32(xp.primaryOutputType());
-        b->add32(xp.secondaryOutputType());
-    };
-
-private:
     typedef GrGLXferProcessor INHERITED;
 };
 
 ///////////////////////////////////////////////////////////////////////////////
 
-GrPorterDuffXferProcessor::GrPorterDuffXferProcessor(GrBlendCoeff srcBlend, GrBlendCoeff dstBlend,
-                                                     GrColor constant)
+GrPorterDuffXferProcessor::GrPorterDuffXferProcessor(GrBlendCoeff srcBlend,
+                                                     GrBlendCoeff dstBlend,
+                                                     GrColor constant,
+                                                     const GrDeviceCoordTexture* dstCopy,
+                                                     bool willReadDstColor)
     : fSrcBlend(srcBlend)
     , fDstBlend(dstBlend)
     , fBlendConstant(constant)
@@ -108,8 +111,8 @@
 GrPorterDuffXferProcessor::~GrPorterDuffXferProcessor() {
 }
 
-void GrPorterDuffXferProcessor::getGLProcessorKey(const GrGLCaps& caps,
-                                                  GrProcessorKeyBuilder* b) const {
+void GrPorterDuffXferProcessor::onGetGLProcessorKey(const GrGLCaps& caps,
+                                                    GrProcessorKeyBuilder* b) const {
     GrGLPorterDuffXferProcessor::GenKey(*this, caps, b);
 }
 
@@ -353,16 +356,20 @@
     }
 }
 
-GrXferProcessor* GrPorterDuffXPFactory::createXferProcessor(const GrProcOptInfo& colorPOI,
-                                                            const GrProcOptInfo& covPOI) const {
+GrXferProcessor*
+GrPorterDuffXPFactory::onCreateXferProcessor(const GrProcOptInfo& colorPOI,
+                                             const GrProcOptInfo& covPOI,
+                                             const GrDeviceCoordTexture* dstCopy) const {
     if (!covPOI.isFourChannelOutput()) {
-        return GrPorterDuffXferProcessor::Create(fSrcCoeff, fDstCoeff);
+        return GrPorterDuffXferProcessor::Create(fSrcCoeff, fDstCoeff, 0, dstCopy,
+                                                 this->willReadDstColor());
     } else {
         if (this->supportsRGBCoverage(colorPOI.color(), colorPOI.validFlags())) {
             SkASSERT(kRGBA_GrColorComponentFlags == colorPOI.validFlags());
             GrColor blendConstant = GrUnPreMulColor(colorPOI.color());
             return GrPorterDuffXferProcessor::Create(kConstC_GrBlendCoeff, kISC_GrBlendCoeff,
-                                                     blendConstant);
+                                                     blendConstant, dstCopy,
+                                                     this->willReadDstColor());
         } else {
             return NULL;
         }
@@ -484,6 +491,10 @@
     output->fWillBlendWithDst = false;
 }
 
+bool GrPorterDuffXPFactory::willReadDstColor() const {
+    return false;
+}
+
 GR_DEFINE_XP_FACTORY_TEST(GrPorterDuffXPFactory);
 
 GrXPFactory* GrPorterDuffXPFactory::TestCreate(SkRandom* random,
diff --git a/src/gpu/gl/GrGLGpu.cpp b/src/gpu/gl/GrGLGpu.cpp
index dd55abb..6419aa6 100644
--- a/src/gpu/gl/GrGLGpu.cpp
+++ b/src/gpu/gl/GrGLGpu.cpp
@@ -1451,10 +1451,8 @@
 void GrGLGpu::buildProgramDesc(GrProgramDesc* desc,
                                const GrPrimitiveProcessor& primProc,
                                const GrPipeline& pipeline,
-                               const GrProgramDesc::DescInfo& descInfo,
                                const GrBatchTracker& batchTracker) const {
-    if (!GrGLProgramDescBuilder::Build(desc, primProc, pipeline, descInfo, this,
-                                       batchTracker)) {
+    if (!GrGLProgramDescBuilder::Build(desc, primProc, pipeline, this, batchTracker)) {
         SkDEBUGFAIL("Failed to generate GL program descriptor");
     }
 }
diff --git a/src/gpu/gl/GrGLGpu.h b/src/gpu/gl/GrGLGpu.h
index 7eda014..18f0f74 100644
--- a/src/gpu/gl/GrGLGpu.h
+++ b/src/gpu/gl/GrGLGpu.h
@@ -108,7 +108,6 @@
     void buildProgramDesc(GrProgramDesc*,
                           const GrPrimitiveProcessor&,
                           const GrPipeline&,
-                          const GrProgramDesc::DescInfo&,
                           const GrBatchTracker&) const SK_OVERRIDE;
 
 private:
diff --git a/src/gpu/gl/GrGLProgram.cpp b/src/gpu/gl/GrGLProgram.cpp
index ad63dc4..eebcf6b 100644
--- a/src/gpu/gl/GrGLProgram.cpp
+++ b/src/gpu/gl/GrGLProgram.cpp
@@ -61,10 +61,6 @@
 void GrGLProgram::initSamplerUniforms() {
     GL_CALL(UseProgram(fProgramID));
     GrGLint texUnitIdx = 0;
-    if (fBuiltinUniformHandles.fDstCopySamplerUni.isValid()) {
-        fProgramDataManager.setSampler(fBuiltinUniformHandles.fDstCopySamplerUni, texUnitIdx);
-        fDstCopyTexUnit = texUnitIdx++;
-    }
     this->initSamplers(fGeometryProcessor.get(), &texUnitIdx);
     if (fXferProcessor.get()) {
         this->initSamplers(fXferProcessor.get(), &texUnitIdx);
@@ -107,38 +103,15 @@
                           const GrBatchTracker& batchTracker) {
     this->setRenderTargetState(primProc, pipeline);
 
-    const GrDeviceCoordTexture* dstCopy = pipeline.getDstCopy();
-    if (dstCopy) {
-        if (fBuiltinUniformHandles.fDstCopyTopLeftUni.isValid()) {
-            fProgramDataManager.set2f(fBuiltinUniformHandles.fDstCopyTopLeftUni,
-                                       static_cast<GrGLfloat>(dstCopy->offset().fX),
-                                       static_cast<GrGLfloat>(dstCopy->offset().fY));
-            fProgramDataManager.set2f(fBuiltinUniformHandles.fDstCopyScaleUni,
-                                       1.f / dstCopy->texture()->width(),
-                                       1.f / dstCopy->texture()->height());
-            GrGLTexture* texture = static_cast<GrGLTexture*>(dstCopy->texture());
-            static GrTextureParams kParams; // the default is clamp, nearest filtering.
-            fGpu->bindTexture(fDstCopyTexUnit, kParams, texture);
-        } else {
-            SkASSERT(!fBuiltinUniformHandles.fDstCopyScaleUni.isValid());
-            SkASSERT(!fBuiltinUniformHandles.fDstCopySamplerUni.isValid());
-        }
-    } else {
-        SkASSERT(!fBuiltinUniformHandles.fDstCopyTopLeftUni.isValid());
-        SkASSERT(!fBuiltinUniformHandles.fDstCopyScaleUni.isValid());
-        SkASSERT(!fBuiltinUniformHandles.fDstCopySamplerUni.isValid());
-    }
-
     // we set the textures, and uniforms for installed processors in a generic way, but subclasses
     // of GLProgram determine how to set coord transforms
     fGeometryProcessor->fGLProc->setData(fProgramDataManager, primProc, batchTracker);
     this->bindTextures(fGeometryProcessor.get(), primProc);
 
-    if (fXferProcessor.get()) {
-        const GrXferProcessor& xp = *pipeline.getXferProcessor();
-        fXferProcessor->fGLProc->setData(fProgramDataManager, xp);
-        this->bindTextures(fXferProcessor.get(), xp);
-    }
+    const GrXferProcessor& xp = *pipeline.getXferProcessor();
+    fXferProcessor->fGLProc->setData(fProgramDataManager, xp);
+    this->bindTextures(fXferProcessor.get(), xp);
+
     this->setFragmentData(primProc, pipeline);
 
     // Some of GrGLProgram subclasses need to update state here
diff --git a/src/gpu/gl/GrGLProgramDesc.cpp b/src/gpu/gl/GrGLProgramDesc.cpp
index 32f831e..abe2439 100644
--- a/src/gpu/gl/GrGLProgramDesc.cpp
+++ b/src/gpu/gl/GrGLProgramDesc.cpp
@@ -89,7 +89,6 @@
 bool GrGLProgramDescBuilder::Build(GrProgramDesc* desc,
                                    const GrPrimitiveProcessor& primProc,
                                    const GrPipeline& pipeline,
-                                   const GrProgramDesc::DescInfo& descInfo,
                                    const GrGLGpu* gpu,
                                    const GrBatchTracker& batchTracker) {
     // The descriptor is used as a cache key. Thus when a field of the
@@ -135,21 +134,7 @@
     // make sure any padding in the header is zeroed.
     memset(header, 0, kHeaderSize);
 
-    if (descInfo.fReadsDst) {
-        const GrDeviceCoordTexture* dstCopy = pipeline.getDstCopy();
-        SkASSERT(dstCopy || gpu->caps()->dstReadInShaderSupport());
-        const GrTexture* dstCopyTexture = NULL;
-        if (dstCopy) {
-            dstCopyTexture = dstCopy->texture();
-        }
-        header->fDstReadKey = GrGLFragmentShaderBuilder::KeyForDstRead(dstCopyTexture,
-                                                                       gpu->glCaps());
-        SkASSERT(0 != header->fDstReadKey);
-    } else {
-        header->fDstReadKey = 0;
-    }
-
-    if (descInfo.fReadsFragPosition) {
+    if (pipeline.readsFragPosition()) {
         header->fFragPosKey =
                 GrGLFragmentShaderBuilder::KeyForFragmentPosition(pipeline.getRenderTarget(),
                                                                   gpu->glCaps());
diff --git a/src/gpu/gl/GrGLProgramDesc.h b/src/gpu/gl/GrGLProgramDesc.h
index e4db4c3..f237155 100644
--- a/src/gpu/gl/GrGLProgramDesc.h
+++ b/src/gpu/gl/GrGLProgramDesc.h
@@ -47,8 +47,6 @@
      *                        general draw information, as well as the specific color, geometry,
      *                        and coverage stages which will be used to generate the GL Program for
      *                        this optstate.
-     * @param DescInfo  A descriptor info struct, generated by the optstate, which contains a number
-     *                  of important facts about the program the built descriptor will represent
      * @param GrGLGpu  A GL Gpu, the caps and Gpu object are used to output processor specific
      *                 parts of the descriptor.
      * @param GrDeviceCoordTexture  A dstCopy texture, which may be null if frame buffer fetch is
@@ -58,7 +56,6 @@
     static bool Build(GrProgramDesc*,
                       const GrPrimitiveProcessor&,
                       const GrPipeline&,
-                      const GrProgramDesc::DescInfo&,
                       const GrGLGpu*,
                       const GrBatchTracker&);
 };
diff --git a/src/gpu/gl/GrGLXferProcessor.cpp b/src/gpu/gl/GrGLXferProcessor.cpp
new file mode 100644
index 0000000..5e8c00a
--- /dev/null
+++ b/src/gpu/gl/GrGLXferProcessor.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2014 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "gl/GrGLXferProcessor.h"
+
+#include "GrXferProcessor.h"
+#include "gl/builders/GrGLFragmentShaderBuilder.h"
+#include "gl/builders/GrGLProgramBuilder.h"
+
+void GrGLXferProcessor::emitCode(const EmitArgs& args) {
+    if (args.fXP.getDstCopyTexture()) {
+        GrGLFPFragmentBuilder* fsBuilder = args.fPB->getFragmentShaderBuilder();
+        const char* dstColor = fsBuilder->dstColor();
+
+        const char* dstCopyTopLeftName;
+        const char* dstCopyCoordScaleName;
+
+        fDstCopyTopLeftUni = args.fPB->addUniform(GrGLProgramBuilder::kFragment_Visibility,
+                                                   kVec2f_GrSLType,
+                                                   kDefault_GrSLPrecision,
+                                                   "DstCopyUpperLeft",
+                                                   &dstCopyTopLeftName);
+        fDstCopyScaleUni = args.fPB->addUniform(GrGLProgramBuilder::kFragment_Visibility,
+                                                kVec2f_GrSLType,
+                                                kDefault_GrSLPrecision,
+                                                "DstCopyCoordScale",
+                                                &dstCopyCoordScaleName);
+        const char* fragPos = fsBuilder->fragmentPosition();
+
+        fsBuilder->codeAppend("// Read color from copy of the destination.\n");
+        fsBuilder->codeAppendf("vec2 _dstTexCoord = (%s.xy - %s) * %s;",
+                               fragPos, dstCopyTopLeftName, dstCopyCoordScaleName);
+        fsBuilder->codeAppendf("vec4 %s = ", dstColor);
+        fsBuilder->appendTextureLookup(args.fSamplers[0], "_dstTexCoord", kVec2f_GrSLType);
+        fsBuilder->codeAppend(";");
+    }
+
+    this->onEmitCode(args);
+}
+
+void GrGLXferProcessor::setData(const GrGLProgramDataManager& pdm, const GrXferProcessor& xp) {
+    if (xp.getDstCopyTexture()) {
+        if (fDstCopyTopLeftUni.isValid()) {
+            pdm.set2f(fDstCopyTopLeftUni, static_cast<GrGLfloat>(xp.dstCopyTextureOffset().fX),
+                      static_cast<GrGLfloat>(xp.dstCopyTextureOffset().fY));
+            pdm.set2f(fDstCopyScaleUni, 1.f / xp.getDstCopyTexture()->width(),
+                      1.f / xp.getDstCopyTexture()->height());
+        } else {
+            SkASSERT(!fDstCopyScaleUni.isValid());
+        }
+    } else {
+        SkASSERT(!fDstCopyTopLeftUni.isValid());
+        SkASSERT(!fDstCopyScaleUni.isValid());
+    }
+    this->onSetData(pdm, xp);
+}
+
diff --git a/src/gpu/gl/GrGLXferProcessor.h b/src/gpu/gl/GrGLXferProcessor.h
index 5c92559..7f60f32 100644
--- a/src/gpu/gl/GrGLXferProcessor.h
+++ b/src/gpu/gl/GrGLXferProcessor.h
@@ -11,6 +11,7 @@
 #include "GrGLProcessor.h"
 
 class GrGLXPBuilder;
+class GrXferProcessor;
 
 class GrGLXferProcessor {
 public:
@@ -46,16 +47,25 @@
      * This is similar to emitCode() in the base class, except it takes a full shader builder.
      * This allows the effect subclass to emit vertex code.
      */
-    virtual void emitCode(const EmitArgs&) = 0;
+    void emitCode(const EmitArgs&);
 
     /** A GrGLXferProcessor instance can be reused with any GrGLXferProcessor that produces
         the same stage key; this function reads data from a GrGLXferProcessor and uploads any
         uniform variables required  by the shaders created in emitCode(). The GrXferProcessor
         parameter is guaranteed to be of the same type that created this GrGLXferProcessor and
-        to have an identical processor key as the one that created this GrGLXferProcessor.  */
-    virtual void setData(const GrGLProgramDataManager&,
-                         const GrXferProcessor&) = 0;
+        to have an identical processor key as the one that created this GrGLXferProcessor. This
+        function calls onSetData on the subclass of GrGLXferProcessor
+     */
+    void setData(const GrGLProgramDataManager& pdm, const GrXferProcessor& xp);
+
 private:
+    virtual void onEmitCode(const EmitArgs&) = 0;
+
+    virtual void onSetData(const GrGLProgramDataManager&, const GrXferProcessor&) = 0;
+
+    GrGLProgramDataManager::UniformHandle fDstCopyTopLeftUni;
+    GrGLProgramDataManager::UniformHandle fDstCopyScaleUni;
+
     typedef GrGLProcessor INHERITED;
 };
 #endif
diff --git a/src/gpu/gl/builders/GrGLFragmentShaderBuilder.cpp b/src/gpu/gl/builders/GrGLFragmentShaderBuilder.cpp
index 61932f3..86c622d 100644
--- a/src/gpu/gl/builders/GrGLFragmentShaderBuilder.cpp
+++ b/src/gpu/gl/builders/GrGLFragmentShaderBuilder.cpp
@@ -179,56 +179,9 @@
             fbFetchColorName = declared_color_output_name();
         }
         return fbFetchColorName;
-    } else if (fProgramBuilder->fUniformHandles.fDstCopySamplerUni.isValid()) {
+    } else {
         return kDstCopyColorName;
-    } else {
-        return "";
-    }
-}
-
-void GrGLFragmentShaderBuilder::emitCodeToReadDstTexture() {
-    bool topDown = SkToBool(kTopLeftOrigin_DstReadKeyBit & fProgramBuilder->header().fDstReadKey);
-    const char* dstCopyTopLeftName;
-    const char* dstCopyCoordScaleName;
-    const char* dstCopySamplerName;
-    uint32_t configMask;
-    if (SkToBool(kUseAlphaConfig_DstReadKeyBit & fProgramBuilder->header().fDstReadKey)) {
-        configMask = kA_GrColorComponentFlag;
-    } else {
-        configMask = kRGBA_GrColorComponentFlags;
-    }
-    fProgramBuilder->fUniformHandles.fDstCopySamplerUni =
-            fProgramBuilder->addUniform(GrGLProgramBuilder::kFragment_Visibility,
-                                        kSampler2D_GrSLType,
-                                        kDefault_GrSLPrecision,
-                                        "DstCopySampler",
-                                        &dstCopySamplerName);
-    fProgramBuilder->fUniformHandles.fDstCopyTopLeftUni =
-            fProgramBuilder->addUniform(GrGLProgramBuilder::kFragment_Visibility,
-                                        kVec2f_GrSLType,
-                                        kDefault_GrSLPrecision,
-                                        "DstCopyUpperLeft",
-                                        &dstCopyTopLeftName);
-    fProgramBuilder->fUniformHandles.fDstCopyScaleUni =
-            fProgramBuilder->addUniform(GrGLProgramBuilder::kFragment_Visibility,
-                                        kVec2f_GrSLType,
-                                        kDefault_GrSLPrecision,
-                                        "DstCopyCoordScale",
-                                        &dstCopyCoordScaleName);
-    const char* fragPos = this->fragmentPosition();
-
-    this->codeAppend("// Read color from copy of the destination.\n");
-    this->codeAppendf("vec2 _dstTexCoord = (%s.xy - %s) * %s;",
-                      fragPos, dstCopyTopLeftName, dstCopyCoordScaleName);
-    if (!topDown) {
-        this->codeAppend("_dstTexCoord.y = 1.0 - _dstTexCoord.y;");
-    }
-    this->codeAppendf("vec4 %s = ", GrGLFragmentShaderBuilder::kDstCopyColorName);
-    this->appendTextureLookup(dstCopySamplerName,
-                              "_dstTexCoord",
-                              configMask,
-                              "rgba");
-    this->codeAppend(";");
+    } 
 }
 
 void GrGLFragmentShaderBuilder::enableCustomOutput() {
diff --git a/src/gpu/gl/builders/GrGLFragmentShaderBuilder.h b/src/gpu/gl/builders/GrGLFragmentShaderBuilder.h
index 688bbe6..903c5e1 100644
--- a/src/gpu/gl/builders/GrGLFragmentShaderBuilder.h
+++ b/src/gpu/gl/builders/GrGLFragmentShaderBuilder.h
@@ -97,7 +97,6 @@
 
 private:
     // Private public interface, used by GrGLProgramBuilder to build a fragment shader
-    void emitCodeToReadDstTexture();
     void enableCustomOutput();
     void enableSecondaryOutput();
     const char* getPrimaryColorOutputName() const;
diff --git a/src/gpu/gl/builders/GrGLProgramBuilder.cpp b/src/gpu/gl/builders/GrGLProgramBuilder.cpp
index 0881696..bb278be 100644
--- a/src/gpu/gl/builders/GrGLProgramBuilder.cpp
+++ b/src/gpu/gl/builders/GrGLProgramBuilder.cpp
@@ -56,12 +56,6 @@
 
     GrGLProgramBuilder* pb = builder.get();
 
-    // emit code to read the dst copy texture, if necessary
-    if (GrGLFragmentShaderBuilder::kNoDstRead_DstReadKey != pb->header().fDstReadKey &&
-        !gpu->glCaps().fbFetchSupport()) {
-        pb->fFS.emitCodeToReadDstTexture();
-    }
-
     // TODO: Once all stages can handle taking a float or vec4 and correctly handling them we can
     // seed correctly here
     GrGLSLExpr4 inputColor;
diff --git a/src/gpu/gl/builders/GrGLProgramBuilder.h b/src/gpu/gl/builders/GrGLProgramBuilder.h
index 4b40cef..a9288cc 100644
--- a/src/gpu/gl/builders/GrGLProgramBuilder.h
+++ b/src/gpu/gl/builders/GrGLProgramBuilder.h
@@ -267,11 +267,6 @@
         // We use the render target height to provide a y-down frag coord when specifying
         // origin_upper_left is not supported.
         UniformHandle       fRTHeightUni;
-
-        // Uniforms for computing texture coords to do the dst-copy lookup
-        UniformHandle       fDstCopyTopLeftUni;
-        UniformHandle       fDstCopyScaleUni;
-        UniformHandle       fDstCopySamplerUni;
     };
 
 protected:
diff --git a/tests/GLProgramsTest.cpp b/tests/GLProgramsTest.cpp
index 16d44fd..aee12d7 100644
--- a/tests/GLProgramsTest.cpp
+++ b/tests/GLProgramsTest.cpp
@@ -298,7 +298,7 @@
         } else {
             primProc = pathProc.get();
         }
-        if (!this->setupDstReadIfNecessary(&pipelineBuilder, &dstCopy, NULL)) {
+        if (!this->setupDstReadIfNecessary(pipelineBuilder, &dstCopy, NULL)) {
             SkDebugf("Couldn't setup dst read texture");
             return false;
         }
@@ -313,7 +313,7 @@
         primProc->initBatchTracker(&bt, pipeline.getInitBatchTracker());
 
         GrProgramDesc desc;
-        gpu->buildProgramDesc(&desc, *primProc, pipeline, pipeline.descInfo(), bt);
+        gpu->buildProgramDesc(&desc, *primProc, pipeline, bt);
 
         GrGpu::DrawArgs args(primProc, &pipeline, &desc, &bt);
         SkAutoTUnref<GrGLProgram> program(GrGLProgramBuilder::CreateProgram(args, gpu));