Add basic D3D mipmap support.

This adds support for mipmap generation in the simplest case:
* power-of-two
* non-sRGB
* not BGR

The mipmaps are generated with a compute shader that reads from a SRV
of one miplevel and then writes to a UAV at miplevel+1. Miplevel+1 is
then transitioned to a resource state that can be read from, and the
process continues for miplevel+1 and miplevel+2, etc.

Change-Id: Id467d0e56d5408559d3cacec1514f1b43d76d640
Bug: skia:10446
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/392917
Reviewed-by: Greg Daniel <egdaniel@google.com>
Commit-Queue: Jim Van Verth <jvanverth@google.com>
diff --git a/src/gpu/d3d/GrD3DCpuDescriptorManager.cpp b/src/gpu/d3d/GrD3DCpuDescriptorManager.cpp
index 7c6d296..ae84027 100644
--- a/src/gpu/d3d/GrD3DCpuDescriptorManager.cpp
+++ b/src/gpu/d3d/GrD3DCpuDescriptorManager.cpp
@@ -166,7 +166,10 @@
 
     // need to allocate more space
     std::unique_ptr<GrD3DCpuDescriptorManager::Heap> heap =
-        GrD3DCpuDescriptorManager::Heap::Make(gpu, fHeapType, fMaxAvailableDescriptors);
+            GrD3DCpuDescriptorManager::Heap::Make(gpu, fHeapType, fMaxAvailableDescriptors);
+    // TODO: handle failed heap creation and/or memory restrictions better
+    // skbug.com/11959
+    SkASSERT(heap);
 
     fDescriptorHeaps.push_back(std::move(heap));
     fMaxAvailableDescriptors *= 2;
diff --git a/src/gpu/d3d/GrD3DGpu.cpp b/src/gpu/d3d/GrD3DGpu.cpp
index 82cb622..930f309 100644
--- a/src/gpu/d3d/GrD3DGpu.cpp
+++ b/src/gpu/d3d/GrD3DGpu.cpp
@@ -148,6 +148,10 @@
     SkASSERT(fCurrentDirectCommandList);
 
     fResourceProvider.prepForSubmit();
+    for (int i = 0; i < fMipmapCPUDescriptors.count(); ++i) {
+        fResourceProvider.recycleShaderView(fMipmapCPUDescriptors[i]);
+    }
+    fMipmapCPUDescriptors.reset();
 
     GrD3DDirectCommandList::SubmitResult result = fCurrentDirectCommandList->submit(fQueue.get());
     if (result == GrD3DDirectCommandList::SubmitResult::kFailure) {
@@ -871,6 +875,157 @@
     return std::move(tgt);
 }
 
+bool GrD3DGpu::onRegenerateMipMapLevels(GrTexture * tex) {
+    auto * d3dTex = static_cast<GrD3DTexture*>(tex);
+    SkASSERT(tex->textureType() == GrTextureType::k2D);
+    int width = tex->width();
+    int height = tex->height();
+
+    // determine if we can read from and mipmap this format
+    const GrD3DCaps & caps = this->d3dCaps();
+    if (!caps.isFormatTexturable(d3dTex->dxgiFormat()) ||
+        !caps.mipmapSupport()) {
+        return false;
+    }
+
+    sk_sp<GrD3DTexture> uavTexture;
+    // if the format is unordered accessible and resource flag is set, use resource for uav
+    if (caps.isFormatUnorderedAccessible(d3dTex->dxgiFormat()) &&
+        (d3dTex->d3dResource()->GetDesc().Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS)) {
+        uavTexture = sk_ref_sp(d3dTex);
+    } else {
+        // need to make a copy and use that for our uav
+        D3D12_RESOURCE_DESC uavDesc = d3dTex->d3dResource()->GetDesc();
+        uavDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
+        // if the format is unordered accessible, copy to resource with same format and flag set
+        if (!caps.isFormatUnorderedAccessible(d3dTex->dxgiFormat())) {
+            // TODO: support BGR and sRGB
+            return false;
+        }
+        // TODO: make this a scratch texture
+        GrProtected grProtected = tex->isProtected() ? GrProtected::kYes : GrProtected::kNo;
+        uavTexture = GrD3DTexture::MakeNewTexture(this, SkBudgeted::kNo, tex->dimensions(),
+                                                  uavDesc, grProtected, GrMipmapStatus::kDirty);
+        if (!uavTexture) {
+            return false;
+        }
+
+        d3dTex->setResourceState(this, D3D12_RESOURCE_STATE_COPY_SOURCE);
+        // copy top miplevel to uavTexture
+        uavTexture->setResourceState(this, D3D12_RESOURCE_STATE_COPY_DEST);
+        this->currentCommandList()->copyTextureToTexture(uavTexture.get(), d3dTex, 0);
+    }
+
+    uint32_t levelCount = d3dTex->mipLevels();
+    // SkMipmap doesn't include the base level in the level count so we have to add 1
+    SkASSERT(levelCount == SkMipmap::ComputeLevelCount(tex->width(), tex->height()) + 1);
+
+    sk_sp<GrD3DRootSignature> rootSig = fResourceProvider.findOrCreateRootSignature(1, 1);
+    this->currentCommandList()->setComputeRootSignature(rootSig);
+
+    // TODO: use linear vs. srgb shader based on texture format
+    // TODO: handle odd widths and heights with triangular filter
+    sk_sp<GrD3DPipeline> pipeline = this->resourceProvider().findOrCreateMipmapPipeline();
+    SkASSERT(pipeline);
+    this->currentCommandList()->setPipelineState(std::move(pipeline));
+
+    // set sampler
+    GrSamplerState samplerState(SkFilterMode::kLinear, SkMipmapMode::kNearest);
+    std::vector<D3D12_CPU_DESCRIPTOR_HANDLE> samplers(1);
+    samplers[0] = fResourceProvider.findOrCreateCompatibleSampler(samplerState);
+    this->currentCommandList()->addSampledTextureRef(uavTexture.get());
+    sk_sp<GrD3DDescriptorTable> samplerTable = fResourceProvider.findOrCreateSamplerTable(samplers);
+    this->currentCommandList()->setComputeRootDescriptorTable(
+            static_cast<unsigned int>(GrD3DRootSignature::ParamIndex::kSamplerDescriptorTable),
+            samplerTable->baseGpuDescriptor());
+
+    // Transition the top subresource to be readable in the compute shader
+    D3D12_RESOURCE_STATES currentResourceState = uavTexture->currentState();
+    D3D12_RESOURCE_TRANSITION_BARRIER barrier;
+    barrier.pResource = uavTexture->d3dResource();
+    barrier.Subresource = 0;
+    barrier.StateBefore = currentResourceState;
+    barrier.StateAfter = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
+    this->addResourceBarriers(uavTexture->resource(), 1, &barrier);
+
+    // Generate the miplevels
+    for (unsigned int dstMip = 1; dstMip < levelCount; ++dstMip) {
+        unsigned int srcMip = dstMip - 1;
+        // TODO: manage odd widths and heights
+        width = std::max(1, width / 2);
+        height = std::max(1, height / 2);
+
+        // set constants
+        struct {
+            uint32_t mipLevel;
+            SkSize inverseSize;
+        } constantData = { srcMip, {1.f / width, 1.f / height} };
+
+        D3D12_GPU_VIRTUAL_ADDRESS constantsAddress =
+            fResourceProvider.uploadConstantData(&constantData, sizeof(constantData));
+        this->currentCommandList()->setComputeRootConstantBufferView(
+                (unsigned int)GrD3DRootSignature::ParamIndex::kConstantBufferView,
+                constantsAddress);
+
+        std::vector<D3D12_CPU_DESCRIPTOR_HANDLE> shaderViews;
+        // create SRV
+        GrD3DDescriptorHeap::CPUHandle srvHandle =
+                fResourceProvider.createShaderResourceView(uavTexture->d3dResource(), srcMip, 1);
+        shaderViews.push_back(srvHandle.fHandle);
+        fMipmapCPUDescriptors.push_back(srvHandle);
+        // create UAV
+        GrD3DDescriptorHeap::CPUHandle uavHandle =
+                fResourceProvider.createUnorderedAccessView(uavTexture->d3dResource(), dstMip);
+        shaderViews.push_back(uavHandle.fHandle);
+        fMipmapCPUDescriptors.push_back(uavHandle);
+
+        // set up and bind shaderView descriptor table
+        sk_sp<GrD3DDescriptorTable> srvTable =
+                fResourceProvider.findOrCreateShaderViewTable(shaderViews);
+        this->currentCommandList()->setComputeRootDescriptorTable(
+                (unsigned int)GrD3DRootSignature::ParamIndex::kShaderViewDescriptorTable,
+                srvTable->baseGpuDescriptor());
+
+        // Transition resource state of dstMip subresource so we can write to it
+        barrier.Subresource = dstMip;
+        barrier.StateBefore = currentResourceState;
+        barrier.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
+        this->addResourceBarriers(uavTexture->resource(), 1, &barrier);
+
+        // Using the form (x+7)/8 ensures that the remainder is covered as well
+        this->currentCommandList()->dispatch((width+7)/8, (height+7)/8);
+
+        // guarantee UAV writes have completed
+        this->currentCommandList()->uavBarrier(uavTexture->resource(), uavTexture->d3dResource());
+
+        // Transition resource state of dstMip subresource so we can read it in the next stage
+        barrier.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
+        barrier.StateAfter = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
+        this->addResourceBarriers(uavTexture->resource(), 1, &barrier);
+    }
+
+    // copy back if necessary
+    if (uavTexture.get() != d3dTex) {
+        d3dTex->setResourceState(this, D3D12_RESOURCE_STATE_COPY_DEST);
+        barrier.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+        barrier.StateBefore = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
+        barrier.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;
+        // TODO: support BGR and sRGB
+        this->addResourceBarriers(uavTexture->resource(), 1, &barrier);
+        this->currentCommandList()->copyTextureToTexture(d3dTex, uavTexture.get());
+    } else {
+        // For simplicity our resource state tracking considers all subresources to have the same
+        // state. However, we've changed that state one subresource at a time without going through
+        // the tracking system, so we need to patch up the resource states back to the original.
+        barrier.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+        barrier.StateBefore = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
+        barrier.StateAfter = currentResourceState;
+        this->addResourceBarriers(d3dTex->resource(), 1, &barrier);
+    }
+
+    return true;
+}
+
 sk_sp<GrGpuBuffer> GrD3DGpu::onCreateBuffer(size_t sizeInBytes, GrGpuBufferType type,
                                              GrAccessPattern accessPattern, const void* data) {
     sk_sp<GrD3DBuffer> buffer = GrD3DBuffer::Make(this, sizeInBytes, type, accessPattern);
diff --git a/src/gpu/d3d/GrD3DGpu.h b/src/gpu/d3d/GrD3DGpu.h
index 6beb5c7..6bcc400 100644
--- a/src/gpu/d3d/GrD3DGpu.h
+++ b/src/gpu/d3d/GrD3DGpu.h
@@ -186,7 +186,7 @@
     bool onCopySurface(GrSurface* dst, GrSurface* src, const SkIRect& srcRect,
                        const SkIPoint& dstPoint) override;
 
-    bool onRegenerateMipMapLevels(GrTexture*) override { return true; }
+    bool onRegenerateMipMapLevels(GrTexture*) override;
 
     void onResolveRenderTarget(GrRenderTarget* target, const SkIRect&) override;
 
@@ -279,6 +279,9 @@
     uint64_t fCurrentFenceValue = 0;
 
     std::unique_ptr<GrD3DDirectCommandList> fCurrentDirectCommandList;
+    // One-off special-case descriptors created directly for the mipmap compute shader
+    // and hence aren't tracked by the normal path.
+    SkSTArray<32, GrD3DDescriptorHeap::CPUHandle> fMipmapCPUDescriptors;
 
     struct OutstandingCommandList {
         OutstandingCommandList(std::unique_ptr<GrD3DDirectCommandList> commandList,
diff --git a/src/gpu/d3d/GrD3DPipelineStateBuilder.cpp b/src/gpu/d3d/GrD3DPipelineStateBuilder.cpp
index 3eaf828..2c1c220 100644
--- a/src/gpu/d3d/GrD3DPipelineStateBuilder.cpp
+++ b/src/gpu/d3d/GrD3DPipelineStateBuilder.cpp
@@ -687,3 +687,49 @@
                                                             geomProc.vertexStride(),
                                                             geomProc.instanceStride()));
 }
+
+
+sk_sp<GrD3DPipeline> GrD3DPipelineStateBuilder::MakeComputePipeline(GrD3DGpu* gpu,
+                                                                    GrD3DRootSignature* rootSig,
+                                                                    const char* shader) {
+    D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = {};
+    psoDesc.pRootSignature = rootSig->rootSignature();
+
+    // compile shader
+    gr_cp<ID3DBlob> shaderBlob;
+    {
+        TRACE_EVENT0("skia.shaders", "driver_compile_shader");
+        uint32_t compileFlags = 0;
+#ifdef SK_DEBUG
+        // Enable better shader debugging with the graphics debugging tools.
+        compileFlags |= D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION;
+#endif
+
+        gr_cp<ID3DBlob> errors;
+        HRESULT hr = D3DCompile(shader, strlen(shader), nullptr, nullptr, nullptr, "main",
+                                "cs_5_1", compileFlags, 0, &shaderBlob, &errors);
+        if (!SUCCEEDED(hr)) {
+            gpu->getContext()->priv().getShaderErrorHandler()->compileError(
+                shader, reinterpret_cast<char*>(errors->GetBufferPointer()));
+            return nullptr;
+        }
+        psoDesc.CS = { reinterpret_cast<UINT8*>(shaderBlob->GetBufferPointer()),
+                       shaderBlob->GetBufferSize() };
+    }
+
+    // Only used for multi-adapter systems.
+    psoDesc.NodeMask = 0;
+
+    psoDesc.CachedPSO = { nullptr, 0 };
+    psoDesc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE;
+
+    gr_cp<ID3D12PipelineState> pipelineState;
+    {
+        TRACE_EVENT0("skia.shaders", "CreateComputePipelineState");
+        GR_D3D_CALL_ERRCHECK(
+            gpu->device()->CreateComputePipelineState(&psoDesc, IID_PPV_ARGS(&pipelineState)));
+    }
+
+    return GrD3DPipeline::Make(std::move(pipelineState));
+}
+
diff --git a/src/gpu/d3d/GrD3DPipelineStateBuilder.h b/src/gpu/d3d/GrD3DPipelineStateBuilder.h
index b8f93ff..d80ebd6 100644
--- a/src/gpu/d3d/GrD3DPipelineStateBuilder.h
+++ b/src/gpu/d3d/GrD3DPipelineStateBuilder.h
@@ -32,6 +32,9 @@
                                                                  const GrProgramDesc&,
                                                                  const GrProgramInfo&);
 
+    static sk_sp<GrD3DPipeline> MakeComputePipeline(GrD3DGpu*, GrD3DRootSignature*,
+                                                    const char* shader);
+
     const GrCaps* caps() const override;
 
     GrD3DGpu* gpu() const { return fGpu; }
diff --git a/src/gpu/d3d/GrD3DResourceProvider.cpp b/src/gpu/d3d/GrD3DResourceProvider.cpp
index b8e1c31..f0cc9cb 100644
--- a/src/gpu/d3d/GrD3DResourceProvider.cpp
+++ b/src/gpu/d3d/GrD3DResourceProvider.cpp
@@ -199,6 +199,35 @@
     return fPipelineStateCache->refPipelineState(rt, info);
 }
 
+sk_sp<GrD3DPipeline> GrD3DResourceProvider::findOrCreateMipmapPipeline() {
+    if (!fMipmapPipeline) {
+        const char* shader =
+            "SamplerState textureSampler : register(s0, space1);\n"
+            "Texture2D<float4> inputTexture : register(t1, space1);\n"
+            "RWTexture2D<float4> outUAV : register(u2, space1);\n"
+            "\n"
+            "cbuffer UniformBuffer : register(b0, space0) {\n"
+            "    uint mipLevel;\n"
+            "    float2 inverseDims;\n"
+            "}\n"
+            "\n"
+            "[numthreads(8, 8, 1)]\n"
+            "void main(uint groupIndex : SV_GroupIndex, uint3 threadID : SV_DispatchThreadID) {\n"
+            "    float2 uv = inverseDims * (threadID.xy + 0.5);\n"
+            "    float4 mipVal = inputTexture.SampleLevel(textureSampler, uv, mipLevel);\n"
+            "\n"
+            "    outUAV[threadID.xy] = mipVal;\n"
+            "}\n";
+
+        sk_sp<GrD3DRootSignature> rootSig = this->findOrCreateRootSignature(1, 1);
+
+        fMipmapPipeline =
+                GrD3DPipelineStateBuilder::MakeComputePipeline(fGpu, rootSig.get(), shader);
+    }
+
+    return fMipmapPipeline;
+}
+
 D3D12_GPU_VIRTUAL_ADDRESS GrD3DResourceProvider::uploadConstantData(void* data, size_t size) {
     // constant size has to be aligned to 256
     constexpr int kConstantAlignment = 256;
diff --git a/src/gpu/d3d/GrD3DResourceProvider.h b/src/gpu/d3d/GrD3DResourceProvider.h
index 0e38f61..f9ed87c 100644
--- a/src/gpu/d3d/GrD3DResourceProvider.h
+++ b/src/gpu/d3d/GrD3DResourceProvider.h
@@ -75,6 +75,8 @@
     GrD3DPipelineState* findOrCreateCompatiblePipelineState(GrD3DRenderTarget*,
                                                             const GrProgramInfo&);
 
+    sk_sp<GrD3DPipeline> findOrCreateMipmapPipeline();
+
     D3D12_GPU_VIRTUAL_ADDRESS uploadConstantData(void* data, size_t size);
     void prepForSubmit();
 
@@ -161,6 +163,7 @@
     GrD3DDescriptorTableManager fDescriptorTableManager;
 
     std::unique_ptr<PipelineStateCache> fPipelineStateCache;
+    sk_sp<GrD3DPipeline> fMipmapPipeline;
 
     SkTHashMap<uint32_t, D3D12_CPU_DESCRIPTOR_HANDLE> fSamplers;