Reland "Use glDraw.*BaseInstance calls to avoid deferred buffer binding"

This is a reland of e8c963d474b7a672bb1292c02b7a774ab29b2cef

Original change's description:
> Use glDraw.*BaseInstance calls to avoid deferred buffer binding
> 
> Change-Id: I968dab317673051acc65f87ea76a0d657d89b3d2
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/279538
> Commit-Queue: Chris Dalton <csmartdalton@google.com>
> Reviewed-by: Michael Ludwig <michaelludwig@google.com>
> Reviewed-by: Brian Salomon <bsalomon@google.com>

Change-Id: I79b2d23e5e66d47214898a9068079b6fe2269599
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/280806
Commit-Queue: Chris Dalton <csmartdalton@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
diff --git a/src/gpu/gl/GrGLCaps.cpp b/src/gpu/gl/GrGLCaps.cpp
index a0f7957..9cc18c4 100644
--- a/src/gpu/gl/GrGLCaps.cpp
+++ b/src/gpu/gl/GrGLCaps.cpp
@@ -37,7 +37,7 @@
     fDrawIndirectSupport = false;
     fDrawRangeElementsSupport = false;
     fMultiDrawIndirectSupport = false;
-    fBaseInstanceSupport = false;
+    fBaseVertexBaseInstanceSupport = false;
     fUseNonVBOVertexAndIndexDynamicData = false;
     fIsCoreProfile = false;
     fBindFragDataLocationSupport = false;
@@ -625,18 +625,17 @@
     if (GR_IS_GR_GL(standard)) {
         fDrawIndirectSupport = version >= GR_GL_VER(4,0) ||
                                ctxInfo.hasExtension("GL_ARB_draw_indirect");
-        fBaseInstanceSupport = version >= GR_GL_VER(4,2);
+        fBaseVertexBaseInstanceSupport = version >= GR_GL_VER(4,2) ||
+                                         ctxInfo.hasExtension("GL_ARB_base_instance");
         fMultiDrawIndirectSupport = version >= GR_GL_VER(4,3) ||
-                                    (fDrawIndirectSupport &&
-                                     !fBaseInstanceSupport && // The ARB extension has no base inst.
-                                     ctxInfo.hasExtension("GL_ARB_multi_draw_indirect"));
+                                    ctxInfo.hasExtension("GL_ARB_multi_draw_indirect");
         fDrawRangeElementsSupport = version >= GR_GL_VER(2,0);
     } else if (GR_IS_GR_GL_ES(standard)) {
         fDrawIndirectSupport = version >= GR_GL_VER(3,1);
         fMultiDrawIndirectSupport = fDrawIndirectSupport &&
                                     ctxInfo.hasExtension("GL_EXT_multi_draw_indirect");
-        fBaseInstanceSupport = ctxInfo.hasExtension("GL_EXT_base_instance") ||
-                               ctxInfo.hasExtension("GL_ANGLE_base_vertex_base_instance");
+        fBaseVertexBaseInstanceSupport = ctxInfo.hasExtension("GL_EXT_base_instance") ||
+                                         ctxInfo.hasExtension("GL_ANGLE_base_vertex_base_instance");
         fDrawRangeElementsSupport = version >= GR_GL_VER(3,0);
     } else if (GR_IS_GR_WEBGL(standard)) {
         // WebGL lacks indirect support, but drawRange was added in WebGL 2.0
@@ -1184,7 +1183,7 @@
     writer->appendBool("Debug support", fDebugSupport);
     writer->appendBool("Draw indirect support", fDrawIndirectSupport);
     writer->appendBool("Multi draw indirect support", fMultiDrawIndirectSupport);
-    writer->appendBool("Base instance support", fBaseInstanceSupport);
+    writer->appendBool("Base (vertex base) instance support", fBaseVertexBaseInstanceSupport);
     writer->appendBool("RGBA 8888 pixel ops are slow", fRGBA8888PixelsOpsAreSlow);
     writer->appendBool("Partial FBO read is slow", fPartialFBOReadIsSlow);
     writer->appendBool("Bind uniform location support", fBindUniformLocationSupport);
@@ -3544,7 +3543,7 @@
     // http://anglebug.com/4536
     if (ctxInfo.driver() == kANGLE_GrGLDriver &&
         ctxInfo.angleBackend() != GrGLANGLEBackend::kOpenGL) {
-        fBaseInstanceSupport = false;
+        fBaseVertexBaseInstanceSupport = false;
     }
 
     // Currently the extension is advertised but fb fetch is broken on 500 series Adrenos like the
diff --git a/src/gpu/gl/GrGLCaps.h b/src/gpu/gl/GrGLCaps.h
index 3468e04..b4efd5d 100644
--- a/src/gpu/gl/GrGLCaps.h
+++ b/src/gpu/gl/GrGLCaps.h
@@ -324,8 +324,9 @@
     /// Is there support for glDrawRangeElements?
     bool drawRangeElementsSupport() const { return fDrawRangeElementsSupport; }
 
-    /// Are the baseInstance fields supported in indirect draw commands?
-    bool baseInstanceSupport() const { return fBaseInstanceSupport; }
+    /// Are the glDraw*Base(VertexBase)Instance methods, and baseInstance fields in indirect draw
+    //commands supported?
+    bool baseVertexBaseInstanceSupport() const { return fBaseVertexBaseInstanceSupport; }
 
     /// Use indices or vertices in CPU arrays rather than VBOs for dynamic content.
     bool useNonVBOVertexAndIndexDynamicData() const { return fUseNonVBOVertexAndIndexDynamicData; }
@@ -528,7 +529,7 @@
     bool fDrawIndirectSupport : 1;
     bool fDrawRangeElementsSupport : 1;
     bool fMultiDrawIndirectSupport : 1;
-    bool fBaseInstanceSupport : 1;
+    bool fBaseVertexBaseInstanceSupport : 1;
     bool fUseNonVBOVertexAndIndexDynamicData : 1;
     bool fIsCoreProfile : 1;
     bool fBindFragDataLocationSupport : 1;
diff --git a/src/gpu/gl/GrGLGpu.cpp b/src/gpu/gl/GrGLGpu.cpp
index 1c7c3e6..1dc61c4 100644
--- a/src/gpu/gl/GrGLGpu.cpp
+++ b/src/gpu/gl/GrGLGpu.cpp
@@ -2274,19 +2274,43 @@
 
 void GrGLGpu::drawArraysInstanced(GrPrimitiveType primitiveType, GrGLint baseVertex,
                                   GrGLsizei vertexCount, GrGLsizei instanceCount) {
+    SkASSERT(this->glCaps().instanceAttribSupport());
     SkASSERT(instanceCount <= this->glCaps().maxInstancesPerDrawWithoutCrashing(instanceCount));
     GrGLenum glPrimType = this->prepareToDraw(primitiveType);
     GL_CALL(DrawArraysInstanced(glPrimType, baseVertex, vertexCount, instanceCount));
 }
 
+void GrGLGpu::drawArraysInstancedBaseInstance(GrPrimitiveType primitiveType, GrGLint baseVertex,
+                                              GrGLsizei vertexCount, GrGLsizei instanceCount,
+                                              GrGLuint baseInstance) {
+    SkASSERT(this->glCaps().instanceAttribSupport());
+    SkASSERT(this->glCaps().baseVertexBaseInstanceSupport());
+    SkASSERT(instanceCount <= this->glCaps().maxInstancesPerDrawWithoutCrashing(instanceCount));
+    GrGLenum glPrimType = this->prepareToDraw(primitiveType);
+    GL_CALL(DrawArraysInstancedBaseInstance(glPrimType, baseVertex, vertexCount, instanceCount,
+                                            baseInstance));
+}
+
 void GrGLGpu::drawElementsInstanced(GrPrimitiveType primitiveType, GrGLsizei indexCount,
                                     GrGLenum indexType, const void* indices,
                                     GrGLsizei instanceCount) {
+    SkASSERT(this->glCaps().instanceAttribSupport());
     SkASSERT(instanceCount <= this->glCaps().maxInstancesPerDrawWithoutCrashing(instanceCount));
     GrGLenum glPrimType = this->prepareToDraw(primitiveType);
     GL_CALL(DrawElementsInstanced(glPrimType, indexCount, indexType, indices, instanceCount));
 }
 
+void GrGLGpu::drawElementsInstancedBaseVertexBaseInstance(
+        GrPrimitiveType primitiveType, GrGLsizei indexCount, GrGLenum indexType,
+        const void* indices, GrGLsizei instanceCount, GrGLint baseVertex, GrGLuint baseInstance) {
+    SkASSERT(this->glCaps().instanceAttribSupport());
+    SkASSERT(this->glCaps().baseVertexBaseInstanceSupport());
+    SkASSERT(instanceCount <= this->glCaps().maxInstancesPerDrawWithoutCrashing(instanceCount));
+    GrGLenum glPrimType = this->prepareToDraw(primitiveType);
+    GL_CALL(DrawElementsInstancedBaseVertexBaseInstance(glPrimType, indexCount, indexType, indices,
+                                                        instanceCount, baseVertex, baseInstance));
+}
+
 void GrGLGpu::onResolveRenderTarget(GrRenderTarget* target, const SkIRect& resolveRect,
                                     ForExternalIO) {
     // Some extensions automatically resolves the texture when it is read.
diff --git a/src/gpu/gl/GrGLGpu.h b/src/gpu/gl/GrGLGpu.h
index 7fdcdf1..5c52d39 100644
--- a/src/gpu/gl/GrGLGpu.h
+++ b/src/gpu/gl/GrGLGpu.h
@@ -113,8 +113,13 @@
                            GrGLsizei indexCount, GrGLenum indexType, const void* indices);
     void drawArraysInstanced(GrPrimitiveType, GrGLint baseVertex, GrGLsizei vertexCount,
                              GrGLsizei instanceCount);
+    void drawArraysInstancedBaseInstance(GrPrimitiveType, GrGLint baseVertex, GrGLsizei vertexCount,
+                                         GrGLsizei instanceCount, GrGLuint baseInstance);
     void drawElementsInstanced(GrPrimitiveType, GrGLsizei indexCount, GrGLenum indexType,
                                const void* indices, GrGLsizei instanceCount);
+    void drawElementsInstancedBaseVertexBaseInstance(
+            GrPrimitiveType, GrGLsizei indexCount, GrGLenum indexType, const void* indices,
+            GrGLsizei instanceCount, GrGLint baseVertex, GrGLuint baseInstance);
 
     // The GrGLOpsRenderPass does not buffer up draws before submitting them to the gpu.
     // Thus this is the implementation of the clear call for the corresponding passthrough function
diff --git a/src/gpu/gl/GrGLInterfaceAutogen.cpp b/src/gpu/gl/GrGLInterfaceAutogen.cpp
index bcf8c1f..c465fe6 100644
--- a/src/gpu/gl/GrGLInterfaceAutogen.cpp
+++ b/src/gpu/gl/GrGLInterfaceAutogen.cpp
@@ -240,7 +240,10 @@
        (GR_IS_GR_GL_ES(fStandard) && (
           fExtensions.has("GL_EXT_base_instance") ||
           fExtensions.has("GL_ANGLE_base_vertex_base_instance")))) {
-        // all functions were marked optional or test_only
+        if (!fFunctions.fDrawArraysInstancedBaseInstance ||
+            !fFunctions.fDrawElementsInstancedBaseVertexBaseInstance) {
+            RETURN_FALSE_INTERFACE;
+        }
     }
 
     if (GR_IS_GR_GL(fStandard) ||
diff --git a/src/gpu/gl/GrGLOpsRenderPass.cpp b/src/gpu/gl/GrGLOpsRenderPass.cpp
index 260376e..c427158 100644
--- a/src/gpu/gl/GrGLOpsRenderPass.cpp
+++ b/src/gpu/gl/GrGLOpsRenderPass.cpp
@@ -48,9 +48,7 @@
                                        const GrSurfaceProxy* const primProcTextures[],
                                        const GrPipeline& pipeline) {
     GrGLProgram* program = fGpu->currentProgram();
-    if (!program) {
-        return false;
-    }
+    SkASSERT(program);
     program->bindTextures(primProc, primProcTextures, pipeline);
     return true;
 }
@@ -60,9 +58,7 @@
                                       GrPrimitiveRestart primitiveRestart) {
     SkASSERT((primitiveRestart == GrPrimitiveRestart::kNo) || indexBuffer);
     GrGLProgram* program = fGpu->currentProgram();
-    if (!program) {
-        return;
-    }
+    SkASSERT(program);
 
     int numAttribs = program->numVertexAttributes() + program->numInstanceAttributes();
     fAttribArrayState = fGpu->bindInternalVertexArray(indexBuffer, numAttribs, primitiveRestart);
@@ -76,32 +72,27 @@
         }
     }
 
-    // We defer binding of instance and vertex buffers because GL does not (always) support base
-    // instance and/or base vertex.
-    fActiveInstanceBuffer = sk_ref_sp(instanceBuffer);
-    fActiveVertexBuffer = sk_ref_sp(vertexBuffer);
+    if (!fGpu->glCaps().baseVertexBaseInstanceSupport()) {
+        // This platform does not support baseInstance. Defer binding of the instance buffer.
+        fActiveInstanceBuffer = sk_ref_sp(instanceBuffer);
+    } else {
+        this->bindInstanceBuffer(instanceBuffer, 0);
+    }
+    if (!indexBuffer && fGpu->glCaps().drawArraysBaseVertexIsBroken()) {
+        // There is a driver bug affecting glDrawArrays. Defer binding of the vertex buffer.
+        fActiveVertexBuffer = sk_ref_sp(vertexBuffer);
+    } else if (indexBuffer && !fGpu->glCaps().baseVertexBaseInstanceSupport()) {
+        // This platform does not support baseVertex with indexed draws. Defer binding of the
+        // vertex buffer.
+        fActiveVertexBuffer = sk_ref_sp(vertexBuffer);
+    } else {
+        this->bindVertexBuffer(vertexBuffer, 0);
+    }
 }
 
-void GrGLOpsRenderPass::setupGeometry(const GrBuffer* vertexBuffer, int baseVertex,
-                                      const GrBuffer* instanceBuffer, int baseInstance) {
+void GrGLOpsRenderPass::bindInstanceBuffer(const GrBuffer* instanceBuffer, int baseInstance) {
     GrGLProgram* program = fGpu->currentProgram();
-    if (!program) {
-        return;
-    }
-
-    if (int vertexStride = program->vertexStride()) {
-        SkASSERT(vertexBuffer);
-        SkASSERT(vertexBuffer->isCpuBuffer() ||
-                 !static_cast<const GrGpuBuffer*>(vertexBuffer)->isMapped());
-        size_t bufferOffset = baseVertex * static_cast<size_t>(vertexStride);
-        for (int i = 0; i < program->numVertexAttributes(); ++i) {
-            const auto& attrib = program->vertexAttribute(i);
-            static constexpr int kDivisor = 0;
-            fAttribArrayState->set(fGpu, attrib.fLocation, vertexBuffer, attrib.fCPUType,
-                                   attrib.fGPUType, vertexStride, bufferOffset + attrib.fOffset,
-                                   kDivisor);
-        }
-    }
+    SkASSERT(program);
     if (int instanceStride = program->instanceStride()) {
         SkASSERT(instanceBuffer);
         SkASSERT(instanceBuffer->isCpuBuffer() ||
@@ -118,20 +109,47 @@
     }
 }
 
-void GrGLOpsRenderPass::onDraw(int vertexCount, int baseVertex) {
-    if (fGpu->glCaps().drawArraysBaseVertexIsBroken()) {
-        this->setupGeometry(fActiveVertexBuffer.get(), baseVertex, nullptr, 0);
-        fGpu->drawArrays(fPrimitiveType, 0, vertexCount);
-        return;
+void GrGLOpsRenderPass::bindVertexBuffer(const GrBuffer* vertexBuffer, int baseVertex) {
+    GrGLProgram* program = fGpu->currentProgram();
+    SkASSERT(program);
+    if (int vertexStride = program->vertexStride()) {
+        SkASSERT(vertexBuffer);
+        SkASSERT(vertexBuffer->isCpuBuffer() ||
+                 !static_cast<const GrGpuBuffer*>(vertexBuffer)->isMapped());
+        size_t bufferOffset = baseVertex * static_cast<size_t>(vertexStride);
+        for (int i = 0; i < program->numVertexAttributes(); ++i) {
+            const auto& attrib = program->vertexAttribute(i);
+            static constexpr int kDivisor = 0;
+            fAttribArrayState->set(fGpu, attrib.fLocation, vertexBuffer, attrib.fCPUType,
+                                   attrib.fGPUType, vertexStride, bufferOffset + attrib.fOffset,
+                                   kDivisor);
+        }
     }
+}
 
-    this->setupGeometry(fActiveVertexBuffer.get(), 0, nullptr, 0);
+void GrGLOpsRenderPass::onDraw(int vertexCount, int baseVertex) {
+    SkASSERT(!fActiveVertexBuffer || fGpu->glCaps().drawArraysBaseVertexIsBroken());
+    if (fGpu->glCaps().drawArraysBaseVertexIsBroken()) {
+        this->bindVertexBuffer(fActiveVertexBuffer.get(), baseVertex);
+        baseVertex = 0;
+    }
     fGpu->drawArrays(fPrimitiveType, baseVertex, vertexCount);
 }
 
 void GrGLOpsRenderPass::onDrawIndexed(int indexCount, int baseIndex, uint16_t minIndexValue,
                                       uint16_t maxIndexValue, int baseVertex) {
-    this->setupGeometry(fActiveVertexBuffer.get(), baseVertex, nullptr, 0);
+    if (fGpu->glCaps().baseVertexBaseInstanceSupport()) {
+        SkASSERT(!fActiveVertexBuffer);
+        if (baseVertex != 0) {
+            fGpu->drawElementsInstancedBaseVertexBaseInstance(
+                    fPrimitiveType, indexCount, GR_GL_UNSIGNED_SHORT,
+                    this->offsetForBaseIndex(baseIndex), 1, baseVertex, 0);
+            return;
+        }
+    } else {
+        this->bindVertexBuffer(fActiveVertexBuffer.get(), baseVertex);
+    }
+
     if (fGpu->glCaps().drawRangeElementsSupport()) {
         fGpu->drawRangeElements(fPrimitiveType, minIndexValue, maxIndexValue, indexCount,
                                 GR_GL_UNSIGNED_SHORT, this->offsetForBaseIndex(baseIndex));
@@ -143,12 +161,25 @@
 
 void GrGLOpsRenderPass::onDrawInstanced(int instanceCount, int baseInstance, int vertexCount,
                                         int baseVertex) {
+    SkASSERT(!fActiveVertexBuffer || fGpu->glCaps().drawArraysBaseVertexIsBroken());
+    if (fGpu->glCaps().drawArraysBaseVertexIsBroken()) {
+        // We weren't able to bind the vertex buffer during onBindBuffers because of a driver bug
+        // affecting glDrawArrays.
+        this->bindVertexBuffer(fActiveVertexBuffer.get(), 0);
+    }
     int maxInstances = fGpu->glCaps().maxInstancesPerDrawWithoutCrashing(instanceCount);
     for (int i = 0; i < instanceCount; i += maxInstances) {
-        this->setupGeometry(fActiveVertexBuffer.get(), 0, fActiveInstanceBuffer.get(),
-                            baseInstance + i);
-        fGpu->drawArraysInstanced(fPrimitiveType, baseVertex, vertexCount,
-                                  std::min(instanceCount - i, maxInstances));
+        int instanceCountForDraw = std::min(instanceCount - i, maxInstances);
+        int baseInstanceForDraw = baseInstance + i;
+        if (fGpu->glCaps().baseVertexBaseInstanceSupport()) {
+            SkASSERT(!fActiveInstanceBuffer);
+            fGpu->drawArraysInstancedBaseInstance(fPrimitiveType, baseVertex, vertexCount,
+                                                  instanceCountForDraw, baseInstanceForDraw);
+        } else {
+            this->bindInstanceBuffer(fActiveInstanceBuffer.get(), baseInstanceForDraw);
+            fGpu->drawArraysInstanced(fPrimitiveType, baseVertex, vertexCount,
+                                      instanceCountForDraw);
+        }
     }
 }
 
@@ -156,11 +187,21 @@
                                                int baseInstance, int baseVertex) {
     int maxInstances = fGpu->glCaps().maxInstancesPerDrawWithoutCrashing(instanceCount);
     for (int i = 0; i < instanceCount; i += maxInstances) {
-        this->setupGeometry(fActiveVertexBuffer.get(), baseVertex, fActiveInstanceBuffer.get(),
-                            baseInstance + i);
-        fGpu->drawElementsInstanced(fPrimitiveType, indexCount, GR_GL_UNSIGNED_SHORT,
-                                    this->offsetForBaseIndex(baseIndex),
-                                    std::min(instanceCount - i, maxInstances));
+        int instanceCountForDraw = std::min(instanceCount - i, maxInstances);
+        int baseInstanceForDraw = baseInstance + i;
+        if (fGpu->glCaps().baseVertexBaseInstanceSupport()) {
+            SkASSERT(!fActiveInstanceBuffer);
+            SkASSERT(!fActiveVertexBuffer);
+            fGpu->drawElementsInstancedBaseVertexBaseInstance(
+                    fPrimitiveType, indexCount, GR_GL_UNSIGNED_SHORT,
+                    this->offsetForBaseIndex(baseIndex), instanceCountForDraw, baseVertex,
+                    baseInstanceForDraw);
+        } else {
+            this->bindInstanceBuffer(fActiveInstanceBuffer.get(), baseInstanceForDraw);
+            this->bindVertexBuffer(fActiveVertexBuffer.get(), baseVertex);
+            fGpu->drawElementsInstanced(fPrimitiveType, indexCount, GR_GL_UNSIGNED_SHORT,
+                                        this->offsetForBaseIndex(baseIndex), instanceCountForDraw);
+        }
     }
 }
 
diff --git a/src/gpu/gl/GrGLOpsRenderPass.h b/src/gpu/gl/GrGLOpsRenderPass.h
index b186237..af808560 100644
--- a/src/gpu/gl/GrGLOpsRenderPass.h
+++ b/src/gpu/gl/GrGLOpsRenderPass.h
@@ -40,8 +40,8 @@
 private:
     GrGpu* gpu() override { return fGpu; }
 
-    void setupGeometry(const GrBuffer* vertexBuffer, int baseVertex, const GrBuffer* instanceBuffer,
-                       int baseInstance);
+    void bindInstanceBuffer(const GrBuffer*, int baseInstance);
+    void bindVertexBuffer(const GrBuffer*, int baseVertex);
 
     const void* offsetForBaseIndex(int baseIndex) const {
         if (!fIndexPointer) {
diff --git a/tools/gpu/gl/interface/interface.json5 b/tools/gpu/gl/interface/interface.json5
index 9d5390e..284fde8 100644
--- a/tools/gpu/gl/interface/interface.json5
+++ b/tools/gpu/gl/interface/interface.json5
@@ -179,9 +179,6 @@
     "functions": [
       "DrawArraysInstancedBaseInstance", "DrawElementsInstancedBaseVertexBaseInstance"
     ],
-    "optional": [
-      "DrawArraysInstancedBaseInstance", "DrawElementsInstancedBaseVertexBaseInstance"
-    ],
   },
 
   { // ES 3.0 has glDrawBuffers but not glDrawBuffer