Oval and stroke AA rect now batch

BUG=skia:

Review URL: https://codereview.chromium.org/664193002
diff --git a/expectations/gm/ignored-tests.txt b/expectations/gm/ignored-tests.txt
index 5cd4f6b..2075850 100644
--- a/expectations/gm/ignored-tests.txt
+++ b/expectations/gm/ignored-tests.txt
@@ -85,3 +85,22 @@
 testimagefilters
 tileimagefilter
 xfermodeimagefilter
+
+# joshualitt batching oval renderer and AA rect
+testimagefilters
+strokes_round
+rrect
+ovals
+mixed_xfermodes
+hairmodes
+circles
+blurs
+aarectmodes
+peekpixels
+shadows
+stroke-fill
+srcmode_gpu
+rrect_draw_aa
+twopointconical
+image-surface
+
diff --git a/src/gpu/GrAAHairLinePathRenderer.cpp b/src/gpu/GrAAHairLinePathRenderer.cpp
index 7347f9c..6990b7e 100644
--- a/src/gpu/GrAAHairLinePathRenderer.cpp
+++ b/src/gpu/GrAAHairLinePathRenderer.cpp
@@ -21,12 +21,9 @@
 
 #include "effects/GrBezierEffect.h"
 
-namespace {
 // quadratics are rendered as 5-sided polys in order to bound the
 // AA stroke around the center-curve. See comments in push_quad_index_buffer and
 // bloat_quad. Quadratics and conics share an index buffer
-static const int kVertsPerQuad = 5;
-static const int kIdxsPerQuad = 9;
 
 // lines are rendered as:
 //      *______________*
@@ -36,127 +33,66 @@
 //      | /  ______/ \ |
 //      */_-__________\*
 // For: 6 vertices and 18 indices (for 6 triangles)
-static const int kVertsPerLineSeg = 6;
-static const int kIdxsPerLineSeg = 18;
 
-static const int kNumQuadsInIdxBuffer = 256;
-static const size_t kQuadIdxSBufize = kIdxsPerQuad *
-                                      sizeof(uint16_t) *
-                                      kNumQuadsInIdxBuffer;
+// Each quadratic is rendered as a five sided polygon. This poly bounds
+// the quadratic's bounding triangle but has been expanded so that the
+// 1-pixel wide area around the curve is inside the poly.
+// If a,b,c are the original control points then the poly a0,b0,c0,c1,a1
+// that is rendered would look like this:
+//              b0
+//              b
+//
+//     a0              c0
+//      a            c
+//       a1       c1
+// Each is drawn as three triangles specified by these 9 indices:
+static const uint16_t kQuadIdxBufPattern[] = {
+    0, 1, 2,
+    2, 4, 3,
+    1, 4, 2
+};
 
-static const int kNumLineSegsInIdxBuffer = 256;
-static const size_t kLineSegIdxSBufize = kIdxsPerLineSeg *
-                                         sizeof(uint16_t) *
-                                         kNumLineSegsInIdxBuffer;
+static const int kIdxsPerQuad = SK_ARRAY_COUNT(kQuadIdxBufPattern);
+static const int kQuadNumVertices = 5;
+static const int kQuadsNumInIdxBuffer = 256;
 
-static bool push_quad_index_data(GrIndexBuffer* qIdxBuffer) {
-    uint16_t* data = (uint16_t*) qIdxBuffer->map();
-    bool tempData = NULL == data;
-    if (tempData) {
-        data = SkNEW_ARRAY(uint16_t, kNumQuadsInIdxBuffer * kIdxsPerQuad);
-    }
-    for (int i = 0; i < kNumQuadsInIdxBuffer; ++i) {
 
-        // Each quadratic is rendered as a five sided polygon. This poly bounds
-        // the quadratic's bounding triangle but has been expanded so that the
-        // 1-pixel wide area around the curve is inside the poly.
-        // If a,b,c are the original control points then the poly a0,b0,c0,c1,a1
-        // that is rendered would look like this:
-        //              b0
-        //              b
-        //
-        //     a0              c0
-        //      a            c
-        //       a1       c1
-        // Each is drawn as three triangles specified by these 9 indices:
-        int baseIdx = i * kIdxsPerQuad;
-        uint16_t baseVert = (uint16_t)(i * kVertsPerQuad);
-        data[0 + baseIdx] = baseVert + 0; // a0
-        data[1 + baseIdx] = baseVert + 1; // a1
-        data[2 + baseIdx] = baseVert + 2; // b0
-        data[3 + baseIdx] = baseVert + 2; // b0
-        data[4 + baseIdx] = baseVert + 4; // c1
-        data[5 + baseIdx] = baseVert + 3; // c0
-        data[6 + baseIdx] = baseVert + 1; // a1
-        data[7 + baseIdx] = baseVert + 4; // c1
-        data[8 + baseIdx] = baseVert + 2; // b0
-    }
-    if (tempData) {
-        bool ret = qIdxBuffer->updateData(data, kQuadIdxSBufize);
-        delete[] data;
-        return ret;
-    } else {
-        qIdxBuffer->unmap();
-        return true;
-    }
-}
+// Each line segment is rendered as two quads and two triangles.
+// p0 and p1 have alpha = 1 while all other points have alpha = 0.
+// The four external points are offset 1 pixel perpendicular to the
+// line and half a pixel parallel to the line.
+//
+// p4                  p5
+//      p0         p1
+// p2                  p3
+//
+// Each is drawn as six triangles specified by these 18 indices:
 
-static bool push_line_index_data(GrIndexBuffer* lIdxBuffer) {
-    uint16_t* data = (uint16_t*) lIdxBuffer->map();
-    bool tempData = NULL == data;
-    if (tempData) {
-        data = SkNEW_ARRAY(uint16_t, kNumLineSegsInIdxBuffer * kIdxsPerLineSeg);
-    }
-    for (int i = 0; i < kNumLineSegsInIdxBuffer; ++i) {
-        // Each line segment is rendered as two quads and two triangles.
-        // p0 and p1 have alpha = 1 while all other points have alpha = 0.
-        // The four external points are offset 1 pixel perpendicular to the
-        // line and half a pixel parallel to the line.
-        //
-        // p4                  p5
-        //      p0         p1
-        // p2                  p3
-        //
-        // Each is drawn as six triangles specified by these 18 indices:
-        int baseIdx = i * kIdxsPerLineSeg;
-        uint16_t baseVert = (uint16_t)(i * kVertsPerLineSeg);
-        data[0 + baseIdx] = baseVert + 0;
-        data[1 + baseIdx] = baseVert + 1;
-        data[2 + baseIdx] = baseVert + 3;
+static const uint16_t kLineSegIdxBufPattern[] = {
+    0, 1, 3,
+    0, 3, 2,
+    0, 4, 5,
+    0, 5, 1,
+    0, 2, 4,
+    1, 5, 3
+};
 
-        data[3 + baseIdx] = baseVert + 0;
-        data[4 + baseIdx] = baseVert + 3;
-        data[5 + baseIdx] = baseVert + 2;
-
-        data[6 + baseIdx] = baseVert + 0;
-        data[7 + baseIdx] = baseVert + 4;
-        data[8 + baseIdx] = baseVert + 5;
-
-        data[9 + baseIdx] = baseVert + 0;
-        data[10+ baseIdx] = baseVert + 5;
-        data[11+ baseIdx] = baseVert + 1;
-
-        data[12 + baseIdx] = baseVert + 0;
-        data[13 + baseIdx] = baseVert + 2;
-        data[14 + baseIdx] = baseVert + 4;
-
-        data[15 + baseIdx] = baseVert + 1;
-        data[16 + baseIdx] = baseVert + 5;
-        data[17 + baseIdx] = baseVert + 3;
-    }
-    if (tempData) {
-        bool ret = lIdxBuffer->updateData(data, kLineSegIdxSBufize);
-        delete[] data;
-        return ret;
-    } else {
-        lIdxBuffer->unmap();
-        return true;
-    }
-}
-}
+static const int kIdxsPerLineSeg = SK_ARRAY_COUNT(kLineSegIdxBufPattern);
+static const int kLineSegNumVertices = 6;
+static const int kLineSegsNumInIdxBuffer = 256;
 
 GrPathRenderer* GrAAHairLinePathRenderer::Create(GrContext* context) {
     GrGpu* gpu = context->getGpu();
-    GrIndexBuffer* qIdxBuf = gpu->createIndexBuffer(kQuadIdxSBufize, false);
+    GrIndexBuffer* qIdxBuf = gpu->createInstancedIndexBuffer(kQuadIdxBufPattern,
+                                                             kIdxsPerQuad,
+                                                             kQuadsNumInIdxBuffer,
+                                                             kQuadNumVertices);
     SkAutoTUnref<GrIndexBuffer> qIdxBuffer(qIdxBuf);
-    if (NULL == qIdxBuf || !push_quad_index_data(qIdxBuf)) {
-        return NULL;
-    }
-    GrIndexBuffer* lIdxBuf = gpu->createIndexBuffer(kLineSegIdxSBufize, false);
+    GrIndexBuffer* lIdxBuf = gpu->createInstancedIndexBuffer(kLineSegIdxBufPattern,
+                                                             kIdxsPerLineSeg,
+                                                             kLineSegsNumInIdxBuffer,
+                                                             kLineSegNumVertices);
     SkAutoTUnref<GrIndexBuffer> lIdxBuffer(lIdxBuf);
-    if (NULL == lIdxBuf || !push_line_index_data(lIdxBuf)) {
-        return NULL;
-    }
     return SkNEW_ARGS(GrAAHairLinePathRenderer,
                       (context, lIdxBuf, qIdxBuf));
 }
@@ -525,14 +461,14 @@
     result->fY = SkScalarMul(result->fY, wInv);
 }
 
-void set_uv_quad(const SkPoint qpts[3], BezierVertex verts[kVertsPerQuad]) {
+void set_uv_quad(const SkPoint qpts[3], BezierVertex verts[kQuadNumVertices]) {
     // this should be in the src space, not dev coords, when we have perspective
     GrPathUtils::QuadUVMatrix DevToUV(qpts);
-    DevToUV.apply<kVertsPerQuad, sizeof(BezierVertex), sizeof(SkPoint)>(verts);
+    DevToUV.apply<kQuadNumVertices, sizeof(BezierVertex), sizeof(SkPoint)>(verts);
 }
 
 void bloat_quad(const SkPoint qpts[3], const SkMatrix* toDevice,
-                const SkMatrix* toSrc, BezierVertex verts[kVertsPerQuad],
+                const SkMatrix* toSrc, BezierVertex verts[kQuadNumVertices],
                 SkRect* devBounds) {
     SkASSERT(!toDevice == !toSrc);
     // original quad is specified by tri a,b,c
@@ -598,10 +534,10 @@
     c1.fPos -= cbN;
 
     intersect_lines(a0.fPos, abN, c0.fPos, cbN, &b0.fPos);
-    devBounds->growToInclude(&verts[0].fPos, sizeof(BezierVertex), kVertsPerQuad);
+    devBounds->growToInclude(&verts[0].fPos, sizeof(BezierVertex), kQuadNumVertices);
 
     if (toSrc) {
-        toSrc->mapPointsWithStride(&verts[0].fPos, sizeof(BezierVertex), kVertsPerQuad);
+        toSrc->mapPointsWithStride(&verts[0].fPos, sizeof(BezierVertex), kQuadNumVertices);
     }
 }
 
@@ -612,13 +548,13 @@
 // f(x, y, w) = f(P) = K^2 - LM
 // K = dot(k, P), L = dot(l, P), M = dot(m, P)
 // k, l, m are calculated in function GrPathUtils::getConicKLM
-void set_conic_coeffs(const SkPoint p[3], BezierVertex verts[kVertsPerQuad],
+void set_conic_coeffs(const SkPoint p[3], BezierVertex verts[kQuadNumVertices],
                       const SkScalar weight) {
     SkScalar klm[9];
 
     GrPathUtils::getConicKLM(p, weight, klm);
 
-    for (int i = 0; i < kVertsPerQuad; ++i) {
+    for (int i = 0; i < kQuadNumVertices; ++i) {
         const SkPoint pnt = verts[i].fPos;
         verts[i].fConic.fK = pnt.fX * klm[0] + pnt.fY * klm[1] + klm[2];
         verts[i].fConic.fL = pnt.fX * klm[3] + pnt.fY * klm[4] + klm[5];
@@ -634,7 +570,7 @@
                 SkRect* devBounds) {
     bloat_quad(p, toDevice, toSrc, *vert, devBounds);
     set_conic_coeffs(p, *vert, weight);
-    *vert += kVertsPerQuad;
+    *vert += kQuadNumVertices;
 }
 
 void add_quads(const SkPoint p[3],
@@ -652,7 +588,7 @@
     } else {
         bloat_quad(p, toDevice, toSrc, *vert, devBounds);
         set_uv_quad(p, *vert);
-        *vert += kVertsPerQuad;
+        *vert += kQuadNumVertices;
     }
 }
 
@@ -687,16 +623,16 @@
         if (toSrc) {
             toSrc->mapPointsWithStride(&(*vert)->fPos,
                                        sizeof(LineVertex),
-                                       kVertsPerLineSeg);
+                                       kLineSegNumVertices);
         }
     } else {
         // just make it degenerate and likely offscreen
-        for (int i = 0; i < kVertsPerLineSeg; ++i) {
+        for (int i = 0; i < kLineSegNumVertices; ++i) {
             (*vert)[i].fPos.set(SK_ScalarMax, SK_ScalarMax);
         }
     }
 
-    *vert += kVertsPerLineSeg;
+    *vert += kLineSegNumVertices;
 }
 
 }
@@ -729,7 +665,7 @@
 
     const SkMatrix& viewM = drawState->getViewMatrix();
 
-    int vertCnt = kVertsPerLineSeg * lineCnt;
+    int vertCnt = kLineSegNumVertices * lineCnt;
 
     drawState->setVertexAttribs<gHairlineLineAttribs>(SK_ARRAY_COUNT(gHairlineLineAttribs),
                                                       sizeof(LineVertex));
@@ -776,7 +712,7 @@
 
     const SkMatrix& viewM = drawState->getViewMatrix();
 
-    int vertCnt = kVertsPerQuad * quadCnt + kVertsPerQuad * conicCnt;
+    int vertCnt = kQuadNumVertices * quadCnt + kQuadNumVertices * conicCnt;
 
     int vAttribCnt = SK_ARRAY_COUNT(gHairlineBezierAttribs);
     target->drawState()->setVertexAttribs<gHairlineBezierAttribs>(vAttribCnt, sizeof(BezierVertex));
@@ -942,19 +878,19 @@
 
         // Check devBounds
         SkASSERT(check_bounds<LineVertex>(drawState, devBounds, arg.vertices(),
-                                          kVertsPerLineSeg * lineCnt));
+                                          kLineSegNumVertices * lineCnt));
 
         {
             GrDrawState::AutoRestoreEffects are(drawState);
             target->setIndexSourceToBuffer(fLinesIndexBuffer);
             int lines = 0;
             while (lines < lineCnt) {
-                int n = SkTMin(lineCnt - lines, kNumLineSegsInIdxBuffer);
+                int n = SkTMin(lineCnt - lines, kLineSegsNumInIdxBuffer);
                 target->drawIndexed(kTriangles_GrPrimitiveType,
-                                    kVertsPerLineSeg*lines,     // startV
-                                    0,                          // startI
-                                    kVertsPerLineSeg*n,         // vCount
-                                    kIdxsPerLineSeg*n,          // iCount
+                                    kLineSegNumVertices*lines,     // startV
+                                    0,                             // startI
+                                    kLineSegNumVertices*n,         // vCount
+                                    kIdxsPerLineSeg*n,             // iCount
                                     &devBounds);
                 lines += n;
             }
@@ -992,7 +928,7 @@
 
         // Check devBounds
         SkASSERT(check_bounds<BezierVertex>(drawState, devBounds, arg.vertices(),
-                                            kVertsPerQuad * quadCnt + kVertsPerQuad * conicCnt));
+                                            kQuadNumVertices * quadCnt + kQuadNumVertices * conicCnt));
 
         if (quadCnt > 0) {
             GrGeometryProcessor* hairQuadProcessor =
@@ -1003,12 +939,12 @@
             drawState->setGeometryProcessor(hairQuadProcessor)->unref();
             int quads = 0;
             while (quads < quadCnt) {
-                int n = SkTMin(quadCnt - quads, kNumQuadsInIdxBuffer);
+                int n = SkTMin(quadCnt - quads, kQuadsNumInIdxBuffer);
                 target->drawIndexed(kTriangles_GrPrimitiveType,
-                                    kVertsPerQuad*quads,               // startV
-                                    0,                                 // startI
-                                    kVertsPerQuad*n,                   // vCount
-                                    kIdxsPerQuad*n,                    // iCount
+                                    kQuadNumVertices*quads,               // startV
+                                    0,                                    // startI
+                                    kQuadNumVertices*n,                   // vCount
+                                    kIdxsPerQuad*n,                       // iCount
                                     &devBounds);
                 quads += n;
             }
@@ -1022,12 +958,12 @@
             drawState->setGeometryProcessor(hairConicProcessor)->unref();
             int conics = 0;
             while (conics < conicCnt) {
-                int n = SkTMin(conicCnt - conics, kNumQuadsInIdxBuffer);
+                int n = SkTMin(conicCnt - conics, kQuadsNumInIdxBuffer);
                 target->drawIndexed(kTriangles_GrPrimitiveType,
-                                    kVertsPerQuad*(quadCnt + conics),  // startV
-                                    0,                                 // startI
-                                    kVertsPerQuad*n,                   // vCount
-                                    kIdxsPerQuad*n,                    // iCount
+                                    kQuadNumVertices*(quadCnt + conics),  // startV
+                                    0,                                    // startI
+                                    kQuadNumVertices*n,                   // vCount
+                                    kIdxsPerQuad*n,                       // iCount
                                     &devBounds);
                 conics += n;
             }
diff --git a/src/gpu/GrAARectRenderer.cpp b/src/gpu/GrAARectRenderer.cpp
index 3b4fd63..e5d1593 100644
--- a/src/gpu/GrAARectRenderer.cpp
+++ b/src/gpu/GrAARectRenderer.cpp
@@ -311,42 +311,6 @@
 static const int kVertsPerAAFillRect = 8;
 static const int kNumAAFillRectsInIndexBuffer = 256;
 
-GrIndexBuffer* GrAARectRenderer::aaFillRectIndexBuffer(GrGpu* gpu) {
-    static const size_t kAAFillRectIndexBufferSize = kIndicesPerAAFillRect *
-                                                     sizeof(uint16_t) *
-                                                     kNumAAFillRectsInIndexBuffer;
-
-    if (NULL == fAAFillRectIndexBuffer) {
-        fAAFillRectIndexBuffer = gpu->createIndexBuffer(kAAFillRectIndexBufferSize, false);
-        if (fAAFillRectIndexBuffer) {
-            uint16_t* data = (uint16_t*) fAAFillRectIndexBuffer->map();
-            bool useTempData = (NULL == data);
-            if (useTempData) {
-                data = SkNEW_ARRAY(uint16_t, kNumAAFillRectsInIndexBuffer * kIndicesPerAAFillRect);
-            }
-            for (int i = 0; i < kNumAAFillRectsInIndexBuffer; ++i) {
-                // Each AA filled rect is drawn with 8 vertices and 10 triangles (8 around
-                // the inner rect (for AA) and 2 for the inner rect.
-                int baseIdx = i * kIndicesPerAAFillRect;
-                uint16_t baseVert = (uint16_t)(i * kVertsPerAAFillRect);
-                for (int j = 0; j < kIndicesPerAAFillRect; ++j) {
-                    data[baseIdx+j] = baseVert + gFillAARectIdx[j];
-                }
-            }
-            if (useTempData) {
-                if (!fAAFillRectIndexBuffer->updateData(data, kAAFillRectIndexBufferSize)) {
-                    SkFAIL("Can't get AA Fill Rect indices into buffer!");
-                }
-                SkDELETE_ARRAY(data);
-            } else {
-                fAAFillRectIndexBuffer->unmap();
-            }
-        }
-    }
-
-    return fAAFillRectIndexBuffer;
-}
-
 static const uint16_t gMiterStrokeAARectIdx[] = {
     0 + 0, 1 + 0, 5 + 0, 5 + 0, 4 + 0, 0 + 0,
     1 + 0, 2 + 0, 6 + 0, 6 + 0, 5 + 0, 1 + 0,
@@ -364,6 +328,10 @@
     3 + 8, 0 + 8, 4 + 8, 4 + 8, 7 + 8, 3 + 8,
 };
 
+static const int kIndicesPerMiterStrokeRect = SK_ARRAY_COUNT(gMiterStrokeAARectIdx);
+static const int kVertsPerMiterStrokeRect = 16;
+static const int kNumMiterStrokeRectsInIndexBuffer = 256;
+
 /**
  * As in miter-stroke, index = a + b, and a is the current index, b is the shift
  * from the first index. The index layout:
@@ -421,6 +389,10 @@
     3 + 16, 0 + 16, 4 + 16, 4 + 16, 7 + 16, 3 + 16,
 };
 
+static const int kIndicesPerBevelStrokeRect = SK_ARRAY_COUNT(gBevelStrokeAARectIdx);
+static const int kVertsPerBevelStrokeRect = 24;
+static const int kNumBevelStrokeRectsInIndexBuffer = 256;
+
 int GrAARectRenderer::aaStrokeRectIndexCount(bool miterStroke) {
     return miterStroke ? SK_ARRAY_COUNT(gMiterStrokeAARectIdx) :
                          SK_ARRAY_COUNT(gBevelStrokeAARectIdx);
@@ -430,29 +402,19 @@
     if (miterStroke) {
         if (NULL == fAAMiterStrokeRectIndexBuffer) {
             fAAMiterStrokeRectIndexBuffer =
-                gpu->createIndexBuffer(sizeof(gMiterStrokeAARectIdx), false);
-            if (fAAMiterStrokeRectIndexBuffer) {
-#ifdef SK_DEBUG
-                bool updated =
-#endif
-                fAAMiterStrokeRectIndexBuffer->updateData(gMiterStrokeAARectIdx,
-                                                          sizeof(gMiterStrokeAARectIdx));
-                GR_DEBUGASSERT(updated);
-            }
+                    gpu->createInstancedIndexBuffer(gMiterStrokeAARectIdx,
+                                                    kIndicesPerMiterStrokeRect,
+                                                    kNumMiterStrokeRectsInIndexBuffer,
+                                                    kVertsPerMiterStrokeRect);
         }
         return fAAMiterStrokeRectIndexBuffer;
     } else {
         if (NULL == fAABevelStrokeRectIndexBuffer) {
             fAABevelStrokeRectIndexBuffer =
-                gpu->createIndexBuffer(sizeof(gBevelStrokeAARectIdx), false);
-            if (fAABevelStrokeRectIndexBuffer) {
-#ifdef SK_DEBUG
-                bool updated =
-#endif
-                fAABevelStrokeRectIndexBuffer->updateData(gBevelStrokeAARectIdx,
-                                                          sizeof(gBevelStrokeAARectIdx));
-                GR_DEBUGASSERT(updated);
-            }
+                    gpu->createInstancedIndexBuffer(gBevelStrokeAARectIdx,
+                                                    kIndicesPerBevelStrokeRect,
+                                                    kNumBevelStrokeRectsInIndexBuffer,
+                                                    kVertsPerBevelStrokeRect);
         }
         return fAABevelStrokeRectIndexBuffer;
     }
@@ -478,7 +440,13 @@
         return;
     }
 
-    GrIndexBuffer* indexBuffer = this->aaFillRectIndexBuffer(gpu);
+    if (NULL == fAAFillRectIndexBuffer) {
+        fAAFillRectIndexBuffer = gpu->createInstancedIndexBuffer(gFillAARectIdx,
+                                                                 kIndicesPerAAFillRect,
+                                                                 kNumAAFillRectsInIndexBuffer,
+                                                                 kVertsPerAAFillRect);
+    }
+    GrIndexBuffer* indexBuffer = fAAFillRectIndexBuffer;
     if (NULL == indexBuffer) {
         GrPrintf("Failed to create index buffer!\n");
         return;
@@ -933,8 +901,9 @@
     }
 
     target->setIndexSourceToBuffer(indexBuffer);
-    target->drawIndexed(kTriangles_GrPrimitiveType, 0, 0,
-                        totalVertexNum, aaStrokeRectIndexCount(miterStroke));
+    target->drawIndexedInstances(kTriangles_GrPrimitiveType, 1,
+                                 totalVertexNum, aaStrokeRectIndexCount(miterStroke));
+    target->resetIndexSource();
 }
 
 void GrAARectRenderer::fillAANestedRects(GrGpu* gpu,
diff --git a/src/gpu/GrAARectRenderer.h b/src/gpu/GrAARectRenderer.h
index bfa295b..b276823 100644
--- a/src/gpu/GrAARectRenderer.h
+++ b/src/gpu/GrAARectRenderer.h
@@ -75,8 +75,6 @@
     GrIndexBuffer*              fAAMiterStrokeRectIndexBuffer;
     GrIndexBuffer*              fAABevelStrokeRectIndexBuffer;
 
-    GrIndexBuffer* aaFillRectIndexBuffer(GrGpu* gpu);
-
     static int aaStrokeRectIndexCount(bool miterStroke);
     GrIndexBuffer* aaStrokeRectIndexBuffer(GrGpu* gpu, bool miterStroke);
 
diff --git a/src/gpu/GrGpu.cpp b/src/gpu/GrGpu.cpp
index 1f01e12..6510048 100644
--- a/src/gpu/GrGpu.cpp
+++ b/src/gpu/GrGpu.cpp
@@ -164,6 +164,39 @@
     return this->onCreateIndexBuffer(size, dynamic);
 }
 
+GrIndexBuffer* GrGpu::createInstancedIndexBuffer(const uint16_t* pattern,
+                                                 int patternSize,
+                                                 int reps,
+                                                 int vertCount,
+                                                 bool isDynamic) {
+    size_t bufferSize = patternSize * reps * sizeof(uint16_t);
+    GrGpu* me = const_cast<GrGpu*>(this);
+    GrIndexBuffer* buffer = me->createIndexBuffer(bufferSize, isDynamic);
+    if (buffer) {
+        uint16_t* data = (uint16_t*) buffer->map();
+        bool useTempData = (NULL == data);
+        if (useTempData) {
+            data = SkNEW_ARRAY(uint16_t, reps * patternSize);
+        }
+        for (int i = 0; i < reps; ++i) {
+            int baseIdx = i * patternSize;
+            uint16_t baseVert = (uint16_t)(i * vertCount);
+            for (int j = 0; j < patternSize; ++j) {
+                data[baseIdx+j] = baseVert + pattern[j];
+            }
+        }
+        if (useTempData) {
+            if (!buffer->updateData(data, bufferSize)) {
+                SkFAIL("Can't get indices into buffer!");
+            }
+            SkDELETE_ARRAY(data);
+        } else {
+            buffer->unmap();
+        }
+    }
+    return buffer;
+}
+
 void GrGpu::clear(const SkIRect* rect,
                   GrColor color,
                   bool canIgnoreRect,
@@ -246,39 +279,18 @@
 
 GR_STATIC_ASSERT(4 * MAX_QUADS <= 65535);
 
-static inline void fill_indices(uint16_t* indices, int quadCount) {
-    for (int i = 0; i < quadCount; ++i) {
-        indices[6 * i + 0] = 4 * i + 0;
-        indices[6 * i + 1] = 4 * i + 1;
-        indices[6 * i + 2] = 4 * i + 2;
-        indices[6 * i + 3] = 4 * i + 0;
-        indices[6 * i + 4] = 4 * i + 2;
-        indices[6 * i + 5] = 4 * i + 3;
-    }
-}
+static const uint16_t gQuadIndexPattern[] = {
+  0, 1, 2, 0, 2, 3
+};
 
 const GrIndexBuffer* GrGpu::getQuadIndexBuffer() const {
     if (NULL == fQuadIndexBuffer || fQuadIndexBuffer->wasDestroyed()) {
         SkSafeUnref(fQuadIndexBuffer);
-        static const int SIZE = sizeof(uint16_t) * 6 * MAX_QUADS;
         GrGpu* me = const_cast<GrGpu*>(this);
-        fQuadIndexBuffer = me->createIndexBuffer(SIZE, false);
-        if (fQuadIndexBuffer) {
-            uint16_t* indices = (uint16_t*)fQuadIndexBuffer->map();
-            if (indices) {
-                fill_indices(indices, MAX_QUADS);
-                fQuadIndexBuffer->unmap();
-            } else {
-                indices = (uint16_t*)sk_malloc_throw(SIZE);
-                fill_indices(indices, MAX_QUADS);
-                if (!fQuadIndexBuffer->updateData(indices, SIZE)) {
-                    fQuadIndexBuffer->unref();
-                    fQuadIndexBuffer = NULL;
-                    SkFAIL("Can't get indices into buffer!");
-                }
-                sk_free(indices);
-            }
-        }
+        fQuadIndexBuffer = me->createInstancedIndexBuffer(gQuadIndexPattern,
+                                                          6,
+                                                          MAX_QUADS,
+                                                          4);
     }
 
     return fQuadIndexBuffer;
diff --git a/src/gpu/GrGpu.h b/src/gpu/GrGpu.h
index 54fe471..e4669a2 100644
--- a/src/gpu/GrGpu.h
+++ b/src/gpu/GrGpu.h
@@ -143,6 +143,25 @@
     GrIndexBuffer* createIndexBuffer(size_t size, bool dynamic);
 
     /**
+     * Creates an index buffer for instance drawing with a specific pattern.
+     *
+     * @param pattern     the pattern to repeat
+     * @param patternSize size in bytes of the pattern
+     * @param reps        number of times to repeat the pattern
+     * @param vertCount   number of vertices the pattern references
+     * @param dynamic     hints whether the data will be frequently changed
+     *                    by either GrIndexBuffer::map() or
+     *                    GrIndexBuffer::updateData().
+     *
+     * @return The index buffer if successful, otherwise NULL.
+     */
+    GrIndexBuffer* createInstancedIndexBuffer(const uint16_t* pattern,
+                                              int patternSize,
+                                              int reps,
+                                              int vertCount,
+                                              bool isDynamic = false);
+
+    /**
      * Returns an index buffer that can be used to render quads.
      * Six indices per quad: 0, 1, 2, 0, 2, 3, etc.
      * The max number of quads can be queried using GrIndexBuffer::maxQuads().
diff --git a/src/gpu/GrOvalRenderer.cpp b/src/gpu/GrOvalRenderer.cpp
index 85a5389..0622994 100644
--- a/src/gpu/GrOvalRenderer.cpp
+++ b/src/gpu/GrOvalRenderer.cpp
@@ -26,7 +26,7 @@
 #include "effects/GrRRectEffect.h"
 
 namespace {
-
+// TODO(joshualitt) add per vertex colors
 struct CircleVertex {
     SkPoint  fPos;
     SkPoint  fOffset;
@@ -478,13 +478,13 @@
     // we can draw circles
     if (SkScalarNearlyEqual(oval.width(), oval.height())
         && circle_stays_circle(vm)) {
-        this->drawCircle(target, useCoverageAA, oval, stroke);
+        this->drawCircle(target, context, useCoverageAA, oval, stroke);
     // if we have shader derivative support, render as device-independent
     } else if (target->caps()->shaderDerivativeSupport()) {
-        return this->drawDIEllipse(target, useCoverageAA, oval, stroke);
+        return this->drawDIEllipse(target, context, useCoverageAA, oval, stroke);
     // otherwise axis-aligned ellipses only
     } else if (vm.rectStaysRect()) {
-        return this->drawEllipse(target, useCoverageAA, oval, stroke);
+        return this->drawEllipse(target, context, useCoverageAA, oval, stroke);
     } else {
         return false;
     }
@@ -501,6 +501,7 @@
 };
 
 void GrOvalRenderer::drawCircle(GrDrawTarget* target,
+                                const GrContext* context,
                                 bool useCoverageAA,
                                 const SkRect& circle,
                                 const SkStrokeRec& stroke)
@@ -572,22 +573,24 @@
     verts[0].fOuterRadius = outerRadius;
     verts[0].fInnerRadius = innerRadius;
 
-    verts[1].fPos = SkPoint::Make(bounds.fRight, bounds.fTop);
-    verts[1].fOffset = SkPoint::Make(outerRadius, -outerRadius);
+    verts[1].fPos = SkPoint::Make(bounds.fLeft,  bounds.fBottom);
+    verts[1].fOffset = SkPoint::Make(-outerRadius, outerRadius);
     verts[1].fOuterRadius = outerRadius;
     verts[1].fInnerRadius = innerRadius;
 
-    verts[2].fPos = SkPoint::Make(bounds.fLeft,  bounds.fBottom);
-    verts[2].fOffset = SkPoint::Make(-outerRadius, outerRadius);
+    verts[2].fPos = SkPoint::Make(bounds.fRight, bounds.fBottom);
+    verts[2].fOffset = SkPoint::Make(outerRadius, outerRadius);
     verts[2].fOuterRadius = outerRadius;
     verts[2].fInnerRadius = innerRadius;
 
-    verts[3].fPos = SkPoint::Make(bounds.fRight, bounds.fBottom);
-    verts[3].fOffset = SkPoint::Make(outerRadius, outerRadius);
+    verts[3].fPos = SkPoint::Make(bounds.fRight, bounds.fTop);
+    verts[3].fOffset = SkPoint::Make(outerRadius, -outerRadius);
     verts[3].fOuterRadius = outerRadius;
     verts[3].fInnerRadius = innerRadius;
 
-    target->drawNonIndexed(kTriangleStrip_GrPrimitiveType, 0, 4, &bounds);
+    target->setIndexSourceToBuffer(context->getGpu()->getQuadIndexBuffer());
+    target->drawIndexedInstances(kTriangles_GrPrimitiveType, 1, 4, 6, &bounds);
+    target->resetIndexSource();
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -607,6 +610,7 @@
 };
 
 bool GrOvalRenderer::drawEllipse(GrDrawTarget* target,
+                                 const GrContext* context,
                                  bool useCoverageAA,
                                  const SkRect& ellipse,
                                  const SkStrokeRec& stroke)
@@ -718,27 +722,30 @@
     verts[0].fOuterRadii = SkPoint::Make(xRadRecip, yRadRecip);
     verts[0].fInnerRadii = SkPoint::Make(xInnerRadRecip, yInnerRadRecip);
 
-    verts[1].fPos = SkPoint::Make(bounds.fRight, bounds.fTop);
-    verts[1].fOffset = SkPoint::Make(xRadius, -yRadius);
+    verts[1].fPos = SkPoint::Make(bounds.fLeft,  bounds.fBottom);
+    verts[1].fOffset = SkPoint::Make(-xRadius, yRadius);
     verts[1].fOuterRadii = SkPoint::Make(xRadRecip, yRadRecip);
     verts[1].fInnerRadii = SkPoint::Make(xInnerRadRecip, yInnerRadRecip);
 
-    verts[2].fPos = SkPoint::Make(bounds.fLeft,  bounds.fBottom);
-    verts[2].fOffset = SkPoint::Make(-xRadius, yRadius);
+    verts[2].fPos = SkPoint::Make(bounds.fRight, bounds.fBottom);
+    verts[2].fOffset = SkPoint::Make(xRadius, yRadius);
     verts[2].fOuterRadii = SkPoint::Make(xRadRecip, yRadRecip);
     verts[2].fInnerRadii = SkPoint::Make(xInnerRadRecip, yInnerRadRecip);
 
-    verts[3].fPos = SkPoint::Make(bounds.fRight, bounds.fBottom);
-    verts[3].fOffset = SkPoint::Make(xRadius, yRadius);
+    verts[3].fPos = SkPoint::Make(bounds.fRight, bounds.fTop);
+    verts[3].fOffset = SkPoint::Make(xRadius, -yRadius);
     verts[3].fOuterRadii = SkPoint::Make(xRadRecip, yRadRecip);
     verts[3].fInnerRadii = SkPoint::Make(xInnerRadRecip, yInnerRadRecip);
 
-    target->drawNonIndexed(kTriangleStrip_GrPrimitiveType, 0, 4, &bounds);
+    target->setIndexSourceToBuffer(context->getGpu()->getQuadIndexBuffer());
+    target->drawIndexedInstances(kTriangles_GrPrimitiveType, 1, 4, 6, &bounds);
+    target->resetIndexSource();
 
     return true;
 }
 
 bool GrOvalRenderer::drawDIEllipse(GrDrawTarget* target,
+                                   const GrContext* context,
                                    bool useCoverageAA,
                                    const SkRect& ellipse,
                                    const SkStrokeRec& stroke)
@@ -832,19 +839,21 @@
     verts[0].fOuterOffset = SkPoint::Make(-1.0f - offsetDx, -1.0f - offsetDy);
     verts[0].fInnerOffset = SkPoint::Make(-innerRatioX - offsetDx, -innerRatioY - offsetDy);
 
-    verts[1].fPos = SkPoint::Make(bounds.fRight, bounds.fTop);
-    verts[1].fOuterOffset = SkPoint::Make(1.0f + offsetDx, -1.0f - offsetDy);
-    verts[1].fInnerOffset = SkPoint::Make(innerRatioX + offsetDx, -innerRatioY - offsetDy);
+    verts[1].fPos = SkPoint::Make(bounds.fLeft,  bounds.fBottom);
+    verts[1].fOuterOffset = SkPoint::Make(-1.0f - offsetDx, 1.0f + offsetDy);
+    verts[1].fInnerOffset = SkPoint::Make(-innerRatioX - offsetDx, innerRatioY + offsetDy);
 
-    verts[2].fPos = SkPoint::Make(bounds.fLeft,  bounds.fBottom);
-    verts[2].fOuterOffset = SkPoint::Make(-1.0f - offsetDx, 1.0f + offsetDy);
-    verts[2].fInnerOffset = SkPoint::Make(-innerRatioX - offsetDx, innerRatioY + offsetDy);
+    verts[2].fPos = SkPoint::Make(bounds.fRight, bounds.fBottom);
+    verts[2].fOuterOffset = SkPoint::Make(1.0f + offsetDx, 1.0f + offsetDy);
+    verts[2].fInnerOffset = SkPoint::Make(innerRatioX + offsetDx, innerRatioY + offsetDy);
 
-    verts[3].fPos = SkPoint::Make(bounds.fRight, bounds.fBottom);
-    verts[3].fOuterOffset = SkPoint::Make(1.0f + offsetDx, 1.0f + offsetDy);
-    verts[3].fInnerOffset = SkPoint::Make(innerRatioX + offsetDx, innerRatioY + offsetDy);
+    verts[3].fPos = SkPoint::Make(bounds.fRight, bounds.fTop);
+    verts[3].fOuterOffset = SkPoint::Make(1.0f + offsetDx, -1.0f - offsetDy);
+    verts[3].fInnerOffset = SkPoint::Make(innerRatioX + offsetDx, -innerRatioY - offsetDy);
 
-    target->drawNonIndexed(kTriangleStrip_GrPrimitiveType, 0, 4, &bounds);
+    target->setIndexSourceToBuffer(context->getGpu()->getQuadIndexBuffer());
+    target->drawIndexedInstances(kTriangles_GrPrimitiveType, 1, 4, 6, &bounds);
+    target->resetIndexSource();
 
     return true;
 }
@@ -869,21 +878,29 @@
     5, 6, 10, 5, 10, 9
 };
 
+static const int kIndicesPerStrokeRRect = SK_ARRAY_COUNT(gRRectIndices) - 6;
+static const int kIndicesPerRRect = SK_ARRAY_COUNT(gRRectIndices);
+static const int kVertsPerRRect = 16;
+static const int kNumRRectsInIndexBuffer = 256;
 
-GrIndexBuffer* GrOvalRenderer::rRectIndexBuffer(GrGpu* gpu) {
-    if (NULL == fRRectIndexBuffer) {
-        fRRectIndexBuffer =
-        gpu->createIndexBuffer(sizeof(gRRectIndices), false);
-        if (fRRectIndexBuffer) {
-#ifdef SK_DEBUG
-            bool updated =
-#endif
-            fRRectIndexBuffer->updateData(gRRectIndices,
-                                          sizeof(gRRectIndices));
-            GR_DEBUGASSERT(updated);
+GrIndexBuffer* GrOvalRenderer::rRectIndexBuffer(bool isStrokeOnly, GrGpu* gpu) {
+    if (isStrokeOnly) {
+        if (NULL == fStrokeRRectIndexBuffer) {
+            fStrokeRRectIndexBuffer = gpu->createInstancedIndexBuffer(gRRectIndices,
+                                                                      kIndicesPerStrokeRRect,
+                                                                      kNumRRectsInIndexBuffer,
+                                                                      kVertsPerRRect);
         }
+        return fStrokeRRectIndexBuffer;
+    } else {
+        if (NULL == fRRectIndexBuffer) {
+            fRRectIndexBuffer = gpu->createInstancedIndexBuffer(gRRectIndices,
+                                                                kIndicesPerRRect,
+                                                                kNumRRectsInIndexBuffer,
+                                                                kVertsPerRRect);
+        }
+        return fRRectIndexBuffer;
     }
-    return fRRectIndexBuffer;
 }
 
 bool GrOvalRenderer::drawDRRect(GrDrawTarget* target, GrContext* context, bool useAA,
@@ -1018,7 +1035,7 @@
         return false;
     }
 
-    GrIndexBuffer* indexBuffer = this->rRectIndexBuffer(context->getGpu());
+    GrIndexBuffer* indexBuffer = this->rRectIndexBuffer(isStrokeOnly, context->getGpu());
     if (NULL == indexBuffer) {
         GrPrintf("Failed to create index buffer!\n");
         return false;
@@ -1110,7 +1127,7 @@
         int indexCnt = isStrokeOnly ? SK_ARRAY_COUNT(gRRectIndices) - 6 :
                                       SK_ARRAY_COUNT(gRRectIndices);
         target->setIndexSourceToBuffer(indexBuffer);
-        target->drawIndexed(kTriangles_GrPrimitiveType, 0, 0, 16, indexCnt, &bounds);
+        target->drawIndexedInstances(kTriangles_GrPrimitiveType, 1, 16, indexCnt, &bounds);
 
     // otherwise we use the ellipse renderer
     } else {
@@ -1217,8 +1234,9 @@
         int indexCnt = isStrokeOnly ? SK_ARRAY_COUNT(gRRectIndices) - 6 :
                                       SK_ARRAY_COUNT(gRRectIndices);
         target->setIndexSourceToBuffer(indexBuffer);
-        target->drawIndexed(kTriangles_GrPrimitiveType, 0, 0, 16, indexCnt, &bounds);
+        target->drawIndexedInstances(kTriangles_GrPrimitiveType, 1, 16, indexCnt, &bounds);
     }
 
+    target->resetIndexSource();
     return true;
 }
diff --git a/src/gpu/GrOvalRenderer.h b/src/gpu/GrOvalRenderer.h
index 92f6ff0..96f9b3a 100644
--- a/src/gpu/GrOvalRenderer.h
+++ b/src/gpu/GrOvalRenderer.h
@@ -24,7 +24,7 @@
 public:
     SK_DECLARE_INST_COUNT(GrOvalRenderer)
 
-    GrOvalRenderer() : fRRectIndexBuffer(NULL) {}
+    GrOvalRenderer() : fRRectIndexBuffer(NULL), fStrokeRRectIndexBuffer(NULL) {}
     ~GrOvalRenderer() {
         this->reset();
     }
@@ -39,19 +39,20 @@
                     const SkRRect& outer, const SkRRect& inner);
 
 private:
-    bool drawEllipse(GrDrawTarget* target, bool useCoverageAA,
+    bool drawEllipse(GrDrawTarget* target, const GrContext* context, bool useCoverageAA,
                      const SkRect& ellipse,
                      const SkStrokeRec& stroke);
-    bool drawDIEllipse(GrDrawTarget* target, bool useCoverageAA,
+    bool drawDIEllipse(GrDrawTarget* target, const GrContext* context, bool useCoverageAA,
                        const SkRect& ellipse,
                        const SkStrokeRec& stroke);
-    void drawCircle(GrDrawTarget* target, bool useCoverageAA,
+    void drawCircle(GrDrawTarget* target, const GrContext* context, bool useCoverageAA,
                     const SkRect& circle,
                     const SkStrokeRec& stroke);
 
-    GrIndexBuffer* rRectIndexBuffer(GrGpu* gpu);
+    GrIndexBuffer* rRectIndexBuffer(bool isStrokeOnly, GrGpu* gpu);
 
     GrIndexBuffer* fRRectIndexBuffer;
+    GrIndexBuffer* fStrokeRRectIndexBuffer;
 
     typedef SkRefCnt INHERITED;
 };