src/gpu/mtl/GrMtlResourceProvider.mm - skia - Git at Google

 /*
  * Copyright 2018 Google Inc.
  *
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE file.
  */

 #include "src/gpu/mtl/GrMtlResourceProvider.h"

 #include "include/gpu/GrContextOptions.h"
 #include "src/gpu/GrContextPriv.h"
 #include "src/gpu/mtl/GrMtlCommandBuffer.h"
 #include "src/gpu/mtl/GrMtlGpu.h"
 #include "src/gpu/mtl/GrMtlPipelineState.h"
 #include "src/gpu/mtl/GrMtlUtil.h"

 #include "src/sksl/SkSLCompiler.h"

 #if !__has_feature(objc_arc)
 #error This file must be compiled with Arc. Use -fobjc-arc flag
 #endif

 GrMtlResourceProvider::GrMtlResourceProvider(GrMtlGpu* gpu)
     : fGpu(gpu) {
     fPipelineStateCache.reset(new PipelineStateCache(gpu));
     fBufferSuballocator.reset(new BufferSuballocator(gpu->device(), kBufferSuballocatorStartSize));
     // TODO: maxBufferLength seems like a reasonable metric to determine fBufferSuballocatorMaxSize
     // but may need tuning. Might also need a GrContextOption to let the client set this.
 #ifdef SK_BUILD_FOR_MAC
     int64_t maxBufferLength = 1024*1024*1024;
 #else
     int64_t maxBufferLength = 256*1024*1024;
 #endif
     if (@available(iOS 12, macOS 10.14, *)) {
        maxBufferLength = gpu->device().maxBufferLength;
     }
     fBufferSuballocatorMaxSize = maxBufferLength/16;
 }

 GrMtlPipelineState* GrMtlResourceProvider::findOrCreateCompatiblePipelineState(
         GrRenderTarget* renderTarget,
         const GrProgramInfo& programInfo,
         GrPrimitiveType primType) {
     return fPipelineStateCache->refPipelineState(renderTarget, programInfo, primType);
 }

 ////////////////////////////////////////////////////////////////////////////////////////////////

 GrMtlDepthStencil* GrMtlResourceProvider::findOrCreateCompatibleDepthStencilState(
         const GrStencilSettings& stencil, GrSurfaceOrigin origin) {
     GrMtlDepthStencil* depthStencilState;
     GrMtlDepthStencil::Key key = GrMtlDepthStencil::GenerateKey(stencil, origin);
     depthStencilState = fDepthStencilStates.find(key);
     if (!depthStencilState) {
         depthStencilState = GrMtlDepthStencil::Create(fGpu, stencil, origin);
         fDepthStencilStates.add(depthStencilState);
     }
     SkASSERT(depthStencilState);
     return depthStencilState;
 }

 GrMtlSampler* GrMtlResourceProvider::findOrCreateCompatibleSampler(const GrSamplerState& params,
                                                                    uint32_t maxMipLevel) {
     GrMtlSampler* sampler;
     sampler = fSamplers.find(GrMtlSampler::GenerateKey(params, maxMipLevel));
     if (!sampler) {
         sampler = GrMtlSampler::Create(fGpu, params, maxMipLevel);
         fSamplers.add(sampler);
     }
     SkASSERT(sampler);
     return sampler;
 }

 void GrMtlResourceProvider::destroyResources() {
     // Iterate through all stored GrMtlSamplers and unref them before resetting the hash.
     SkTDynamicHash<GrMtlSampler, GrMtlSampler::Key>::Iter samplerIter(&fSamplers);
     for (; !samplerIter.done(); ++samplerIter) {
         (*samplerIter).unref();
     }
     fSamplers.reset();

     // Iterate through all stored GrMtlDepthStencils and unref them before resetting the hash.
     SkTDynamicHash<GrMtlDepthStencil, GrMtlDepthStencil::Key>::Iter dsIter(&fDepthStencilStates);
     for (; !dsIter.done(); ++dsIter) {
         (*dsIter).unref();
     }
     fDepthStencilStates.reset();

     fPipelineStateCache->release();
 }

 ////////////////////////////////////////////////////////////////////////////////////////////////

 #ifdef GR_PIPELINE_STATE_CACHE_STATS
 // Display pipeline state cache usage
 static const bool c_DisplayMtlPipelineCache{false};
 #endif

 struct GrMtlResourceProvider::PipelineStateCache::Entry {
     Entry(GrMtlGpu* gpu, GrMtlPipelineState* pipelineState)
     : fGpu(gpu)
     , fPipelineState(pipelineState) {}

     GrMtlGpu* fGpu;
     std::unique_ptr<GrMtlPipelineState> fPipelineState;
 };

 GrMtlResourceProvider::PipelineStateCache::PipelineStateCache(GrMtlGpu* gpu)
     : fMap(gpu->getContext()->priv().options().fRuntimeProgramCacheSize)
     , fGpu(gpu)
 #ifdef GR_PIPELINE_STATE_CACHE_STATS
     , fTotalRequests(0)
     , fCacheMisses(0)
 #endif
 {}

 GrMtlResourceProvider::PipelineStateCache::~PipelineStateCache() {
     SkASSERT(0 == fMap.count());
     // dump stats
 #ifdef GR_PIPELINE_STATE_CACHE_STATS
     if (c_DisplayMtlPipelineCache) {
         SkDebugf("--- Pipeline State Cache ---\n");
         SkDebugf("Total requests: %d\n", fTotalRequests);
         SkDebugf("Cache misses: %d\n", fCacheMisses);
         SkDebugf("Cache miss %%: %f\n", (fTotalRequests > 0) ?
                  100.f * fCacheMisses / fTotalRequests :
                  0.f);
         SkDebugf("---------------------\n");
     }
 #endif
 }

 void GrMtlResourceProvider::PipelineStateCache::release() {
     fMap.reset();
 }

 GrMtlPipelineState* GrMtlResourceProvider::PipelineStateCache::refPipelineState(
         GrRenderTarget* renderTarget,
         const GrProgramInfo& programInfo,
         GrPrimitiveType primType) {
 #ifdef GR_PIPELINE_STATE_CACHE_STATS
     ++fTotalRequests;
 #endif

     // TODO: unify GL, VK and Mtl
     // Get GrMtlProgramDesc
     GrMtlPipelineStateBuilder::Desc desc;
     if (!GrMtlPipelineStateBuilder::Desc::Build(&desc, renderTarget, programInfo, primType, fGpu)) {
         GrCapsDebugf(fGpu->caps(), "Failed to build mtl program descriptor!\n");
         return nullptr;
     }

     std::unique_ptr<Entry>* entry = fMap.find(desc);
     if (!entry) {
 #ifdef GR_PIPELINE_STATE_CACHE_STATS
         ++fCacheMisses;
 #endif
         GrMtlPipelineState* pipelineState(GrMtlPipelineStateBuilder::CreatePipelineState(
             fGpu, renderTarget, programInfo, &desc));
         if (!pipelineState) {
             return nullptr;
         }
         entry = fMap.insert(desc, std::unique_ptr<Entry>(new Entry(fGpu, pipelineState)));
         return (*entry)->fPipelineState.get();
     }
     return (*entry)->fPipelineState.get();
 }

 ////////////////////////////////////////////////////////////////////////////////////////////////

 static id<MTLBuffer> alloc_dynamic_buffer(id<MTLDevice> device, size_t size) {
     NSUInteger options = 0;
     if (@available(macOS 10.11, iOS 9.0, *)) {
 #ifdef SK_BUILD_FOR_MAC
         options |= MTLResourceStorageModeManaged;
 #else
         options |= MTLResourceStorageModeShared;
 #endif
     }
     return [device newBufferWithLength: size
                                options: options];

 }

 // The idea here is that we create a ring buffer which is used for all dynamic allocations
 // below a certain size. When a dynamic GrMtlBuffer is mapped, it grabs a portion of this
 // buffer and uses it. On a subsequent map it will grab a different portion of the buffer.
 // This prevents the buffer from overwriting itself before it's submitted to the command
 // stream.

 GrMtlResourceProvider::BufferSuballocator::BufferSuballocator(id<MTLDevice> device, size_t size)
         : fBuffer(alloc_dynamic_buffer(device, size))
         , fTotalSize(size)
         , fHead(0)
         , fTail(0) {
     // We increment fHead and fTail without bound and let overflow handle any wrapping.
     // Because of this, size needs to be a power of two.
     SkASSERT(SkIsPow2(size));
 }

 id<MTLBuffer> GrMtlResourceProvider::BufferSuballocator::getAllocation(size_t size,
                                                                        size_t* offset) {
     // capture current state locally (because fTail could be overwritten by the completion handler)
     size_t head, tail;
     SkAutoSpinlock lock(fMutex);
     head = fHead;
     tail = fTail;

     // The head and tail indices increment without bound, wrapping with overflow,
     // so we need to mod them down to the actual bounds of the allocation to determine
     // which blocks are available.
     size_t modHead = head & (fTotalSize - 1);
     size_t modTail = tail & (fTotalSize - 1);

     bool full = (head != tail && modHead == modTail);


     // We don't want large allocations to eat up this buffer, so we allocate them separately.
     if (full || size > fTotalSize/2) {
         return nil;
     }

     // case 1: free space lies at the beginning and/or the end of the buffer
     if (modHead >= modTail) {
         // check for room at the end
         if (fTotalSize - modHead < size) {
             // no room at the end, check the beginning
             if (modTail < size) {
                 // no room at the beginning
                 return nil;
             }
             // we are going to allocate from the beginning, adjust head to '0' position
             head += fTotalSize - modHead;
             modHead = 0;
         }
     // case 2: free space lies in the middle of the buffer, check for room there
     } else if (modTail - modHead < size) {
         // no room in the middle
         return nil;
     }

     *offset = modHead;
     // We're not sure what the usage of the next allocation will be --
     // to be safe we'll use 16 byte alignment.
     fHead = GrSizeAlignUp(head + size, 16);
     return fBuffer;
 }

 void GrMtlResourceProvider::BufferSuballocator::addCompletionHandler(
         GrMtlCommandBuffer* cmdBuffer) {
     this->ref();
     SkAutoSpinlock lock(fMutex);
     size_t newTail = fHead;
     cmdBuffer->addCompletedHandler(^(id <MTLCommandBuffer>commandBuffer) {
         // Make sure SkAutoSpinlock goes out of scope before
         // the BufferSuballocator is potentially deleted.
         {
             SkAutoSpinlock lock(fMutex);
             fTail = newTail;
         }
         this->unref();
     });
 }

 id<MTLBuffer> GrMtlResourceProvider::getDynamicBuffer(size_t size, size_t* offset) {
     id<MTLBuffer> buffer = fBufferSuballocator->getAllocation(size, offset);
     if (buffer) {
         return buffer;
     }

     // Try to grow allocation (old allocation will age out).
     // We grow up to a maximum size, and only grow if the requested allocation will
     // fit into half of the new buffer (to prevent very large transient buffers forcing
     // growth when they'll never fit anyway).
     if (fBufferSuballocator->size() < fBufferSuballocatorMaxSize &&
         size <= fBufferSuballocator->size()) {
         fBufferSuballocator.reset(new BufferSuballocator(fGpu->device(),
                                                          2*fBufferSuballocator->size()));
         id<MTLBuffer> buffer = fBufferSuballocator->getAllocation(size, offset);
         if (buffer) {
             return buffer;
         }
     }

     *offset = 0;
     return alloc_dynamic_buffer(fGpu->device(), size);
 }

 void GrMtlResourceProvider::addBufferCompletionHandler(GrMtlCommandBuffer* cmdBuffer) {
     fBufferSuballocator->addCompletionHandler(cmdBuffer);
 }
	/*
	* Copyright 2018 Google Inc.
	*
	* Use of this source code is governed by a BSD-style license that can be
	* found in the LICENSE file.
	*/

	#include "src/gpu/mtl/GrMtlResourceProvider.h"

	#include "include/gpu/GrContextOptions.h"
	#include "src/gpu/GrContextPriv.h"
	#include "src/gpu/mtl/GrMtlCommandBuffer.h"
	#include "src/gpu/mtl/GrMtlGpu.h"
	#include "src/gpu/mtl/GrMtlPipelineState.h"
	#include "src/gpu/mtl/GrMtlUtil.h"

	#include "src/sksl/SkSLCompiler.h"

	#if !__has_feature(objc_arc)
	#error This file must be compiled with Arc. Use -fobjc-arc flag
	#endif

	GrMtlResourceProvider::GrMtlResourceProvider(GrMtlGpu* gpu)
	: fGpu(gpu) {
	fPipelineStateCache.reset(new PipelineStateCache(gpu));
	fBufferSuballocator.reset(new BufferSuballocator(gpu->device(), kBufferSuballocatorStartSize));
	// TODO: maxBufferLength seems like a reasonable metric to determine fBufferSuballocatorMaxSize
	// but may need tuning. Might also need a GrContextOption to let the client set this.
	#ifdef SK_BUILD_FOR_MAC
	int64_t maxBufferLength = 102410241024;
	#else
	int64_t maxBufferLength = 25610241024;
	#endif
	if (@available(iOS 12, macOS 10.14, *)) {
	maxBufferLength = gpu->device().maxBufferLength;
	}
	fBufferSuballocatorMaxSize = maxBufferLength/16;
	}

	GrMtlPipelineState* GrMtlResourceProvider::findOrCreateCompatiblePipelineState(
	GrRenderTarget* renderTarget,
	const GrProgramInfo& programInfo,
	GrPrimitiveType primType) {
	return fPipelineStateCache->refPipelineState(renderTarget, programInfo, primType);
	}

	////////////////////////////////////////////////////////////////////////////////////////////////

	GrMtlDepthStencil* GrMtlResourceProvider::findOrCreateCompatibleDepthStencilState(
	const GrStencilSettings& stencil, GrSurfaceOrigin origin) {
	GrMtlDepthStencil* depthStencilState;
	GrMtlDepthStencil::Key key = GrMtlDepthStencil::GenerateKey(stencil, origin);
	depthStencilState = fDepthStencilStates.find(key);
	if (!depthStencilState) {
	depthStencilState = GrMtlDepthStencil::Create(fGpu, stencil, origin);
	fDepthStencilStates.add(depthStencilState);
	}
	SkASSERT(depthStencilState);
	return depthStencilState;
	}

	GrMtlSampler* GrMtlResourceProvider::findOrCreateCompatibleSampler(const GrSamplerState& params,
	uint32_t maxMipLevel) {
	GrMtlSampler* sampler;
	sampler = fSamplers.find(GrMtlSampler::GenerateKey(params, maxMipLevel));
	if (!sampler) {
	sampler = GrMtlSampler::Create(fGpu, params, maxMipLevel);
	fSamplers.add(sampler);
	}
	SkASSERT(sampler);
	return sampler;
	}

	void GrMtlResourceProvider::destroyResources() {
	// Iterate through all stored GrMtlSamplers and unref them before resetting the hash.
	SkTDynamicHash<GrMtlSampler, GrMtlSampler::Key>::Iter samplerIter(&fSamplers);
	for (; !samplerIter.done(); ++samplerIter) {
	(*samplerIter).unref();
	}
	fSamplers.reset();

	// Iterate through all stored GrMtlDepthStencils and unref them before resetting the hash.
	SkTDynamicHash<GrMtlDepthStencil, GrMtlDepthStencil::Key>::Iter dsIter(&fDepthStencilStates);
	for (; !dsIter.done(); ++dsIter) {
	(*dsIter).unref();
	}
	fDepthStencilStates.reset();

	fPipelineStateCache->release();
	}

	////////////////////////////////////////////////////////////////////////////////////////////////

	#ifdef GR_PIPELINE_STATE_CACHE_STATS
	// Display pipeline state cache usage
	static const bool c_DisplayMtlPipelineCache{false};
	#endif

	struct GrMtlResourceProvider::PipelineStateCache::Entry {
	Entry(GrMtlGpu* gpu, GrMtlPipelineState* pipelineState)
	: fGpu(gpu)
	, fPipelineState(pipelineState) {}

	GrMtlGpu* fGpu;
	std::unique_ptr<GrMtlPipelineState> fPipelineState;
	};

	GrMtlResourceProvider::PipelineStateCache::PipelineStateCache(GrMtlGpu* gpu)
	: fMap(gpu->getContext()->priv().options().fRuntimeProgramCacheSize)
	, fGpu(gpu)
	#ifdef GR_PIPELINE_STATE_CACHE_STATS
	, fTotalRequests(0)
	, fCacheMisses(0)
	#endif
	{}

	GrMtlResourceProvider::PipelineStateCache::~PipelineStateCache() {
	SkASSERT(0 == fMap.count());
	// dump stats
	#ifdef GR_PIPELINE_STATE_CACHE_STATS
	if (c_DisplayMtlPipelineCache) {
	SkDebugf("--- Pipeline State Cache ---\n");
	SkDebugf("Total requests: %d\n", fTotalRequests);
	SkDebugf("Cache misses: %d\n", fCacheMisses);
	SkDebugf("Cache miss %%: %f\n", (fTotalRequests > 0) ?
	100.f * fCacheMisses / fTotalRequests :
	0.f);
	SkDebugf("---------------------\n");
	}
	#endif
	}

	void GrMtlResourceProvider::PipelineStateCache::release() {
	fMap.reset();
	}

	GrMtlPipelineState* GrMtlResourceProvider::PipelineStateCache::refPipelineState(
	GrRenderTarget* renderTarget,
	const GrProgramInfo& programInfo,
	GrPrimitiveType primType) {
	#ifdef GR_PIPELINE_STATE_CACHE_STATS
	++fTotalRequests;
	#endif

	// TODO: unify GL, VK and Mtl
	// Get GrMtlProgramDesc
	GrMtlPipelineStateBuilder::Desc desc;
	if (!GrMtlPipelineStateBuilder::Desc::Build(&desc, renderTarget, programInfo, primType, fGpu)) {
	GrCapsDebugf(fGpu->caps(), "Failed to build mtl program descriptor!\n");
	return nullptr;
	}

	std::unique_ptr<Entry>* entry = fMap.find(desc);
	if (!entry) {
	#ifdef GR_PIPELINE_STATE_CACHE_STATS
	++fCacheMisses;
	#endif
	GrMtlPipelineState* pipelineState(GrMtlPipelineStateBuilder::CreatePipelineState(
	fGpu, renderTarget, programInfo, &desc));
	if (!pipelineState) {
	return nullptr;
	}
	entry = fMap.insert(desc, std::unique_ptr<Entry>(new Entry(fGpu, pipelineState)));
	return (*entry)->fPipelineState.get();
	}
	return (*entry)->fPipelineState.get();
	}

	////////////////////////////////////////////////////////////////////////////////////////////////

	static id<MTLBuffer> alloc_dynamic_buffer(id<MTLDevice> device, size_t size) {
	NSUInteger options = 0;
	if (@available(macOS 10.11, iOS 9.0, *)) {
	#ifdef SK_BUILD_FOR_MAC
	options \|= MTLResourceStorageModeManaged;
	#else
	options \|= MTLResourceStorageModeShared;
	#endif
	}
	return [device newBufferWithLength: size
	options: options];

	}

	// The idea here is that we create a ring buffer which is used for all dynamic allocations
	// below a certain size. When a dynamic GrMtlBuffer is mapped, it grabs a portion of this
	// buffer and uses it. On a subsequent map it will grab a different portion of the buffer.
	// This prevents the buffer from overwriting itself before it's submitted to the command
	// stream.

	GrMtlResourceProvider::BufferSuballocator::BufferSuballocator(id<MTLDevice> device, size_t size)
	: fBuffer(alloc_dynamic_buffer(device, size))
	, fTotalSize(size)
	, fHead(0)
	, fTail(0) {
	// We increment fHead and fTail without bound and let overflow handle any wrapping.
	// Because of this, size needs to be a power of two.
	SkASSERT(SkIsPow2(size));
	}

	id<MTLBuffer> GrMtlResourceProvider::BufferSuballocator::getAllocation(size_t size,
	size_t* offset) {
	// capture current state locally (because fTail could be overwritten by the completion handler)
	size_t head, tail;
	SkAutoSpinlock lock(fMutex);
	head = fHead;
	tail = fTail;

	// The head and tail indices increment without bound, wrapping with overflow,
	// so we need to mod them down to the actual bounds of the allocation to determine
	// which blocks are available.
	size_t modHead = head & (fTotalSize - 1);
	size_t modTail = tail & (fTotalSize - 1);

	bool full = (head != tail && modHead == modTail);


	// We don't want large allocations to eat up this buffer, so we allocate them separately.
	if (full \|\| size > fTotalSize/2) {
	return nil;
	}

	// case 1: free space lies at the beginning and/or the end of the buffer
	if (modHead >= modTail) {
	// check for room at the end
	if (fTotalSize - modHead < size) {
	// no room at the end, check the beginning
	if (modTail < size) {
	// no room at the beginning
	return nil;
	}
	// we are going to allocate from the beginning, adjust head to '0' position
	head += fTotalSize - modHead;
	modHead = 0;
	}
	// case 2: free space lies in the middle of the buffer, check for room there
	} else if (modTail - modHead < size) {
	// no room in the middle
	return nil;
	}

	*offset = modHead;
	// We're not sure what the usage of the next allocation will be --
	// to be safe we'll use 16 byte alignment.
	fHead = GrSizeAlignUp(head + size, 16);
	return fBuffer;
	}

	void GrMtlResourceProvider::BufferSuballocator::addCompletionHandler(
	GrMtlCommandBuffer* cmdBuffer) {
	this->ref();
	SkAutoSpinlock lock(fMutex);
	size_t newTail = fHead;
	cmdBuffer->addCompletedHandler(^(id <MTLCommandBuffer>commandBuffer) {
	// Make sure SkAutoSpinlock goes out of scope before
	// the BufferSuballocator is potentially deleted.
	{
	SkAutoSpinlock lock(fMutex);
	fTail = newTail;
	}
	this->unref();
	});
	}

	id<MTLBuffer> GrMtlResourceProvider::getDynamicBuffer(size_t size, size_t* offset) {
	id<MTLBuffer> buffer = fBufferSuballocator->getAllocation(size, offset);
	if (buffer) {
	return buffer;
	}

	// Try to grow allocation (old allocation will age out).
	// We grow up to a maximum size, and only grow if the requested allocation will
	// fit into half of the new buffer (to prevent very large transient buffers forcing
	// growth when they'll never fit anyway).
	if (fBufferSuballocator->size() < fBufferSuballocatorMaxSize &&
	size <= fBufferSuballocator->size()) {
	fBufferSuballocator.reset(new BufferSuballocator(fGpu->device(),
	2*fBufferSuballocator->size()));
	id<MTLBuffer> buffer = fBufferSuballocator->getAllocation(size, offset);
	if (buffer) {
	return buffer;
	}
	}

	*offset = 0;
	return alloc_dynamic_buffer(fGpu->device(), size);
	}

	void GrMtlResourceProvider::addBufferCompletionHandler(GrMtlCommandBuffer* cmdBuffer) {
	fBufferSuballocator->addCompletionHandler(cmdBuffer);
	}