Merge pull request #711 from cdavis5e/metal3-placement-heaps
Use placement heaps for VkDeviceMemory when possible.
diff --git a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
index c9edf75..7a09f83 100644
--- a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
+++ b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
@@ -542,6 +542,7 @@
VkBool32 postDepthCoverage; /**< If true, coverage masks in fragment shaders post-depth-test are supported. */
VkBool32 native3DCompressedTextures; /**< If true, 3D compressed images are supported natively, without manual decompression. */
VkBool32 nativeTextureSwizzle; /**< If true, component swizzle is supported natively, without manual swizzling in shaders. */
+ VkBool32 placementHeaps; /**< If true, MTLHeap objects support placement of resources. */
} MVKPhysicalDeviceMetalFeatures;
/**
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h
index 82815ca..67d8844 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h
@@ -69,10 +69,10 @@
#pragma mark Metal
/** Returns the Metal buffer underlying this memory allocation. */
- inline id<MTLBuffer> getMTLBuffer() { return _deviceMemory ? _deviceMemory->getMTLBuffer() : nullptr; }
+ id<MTLBuffer> getMTLBuffer();
/** Returns the offset at which the contents of this instance starts within the underlying Metal buffer. */
- inline NSUInteger getMTLBufferOffset() { return _deviceMemoryOffset; }
+ inline NSUInteger getMTLBufferOffset() { return _deviceMemory && _deviceMemory->getMTLHeap() ? 0 : _deviceMemoryOffset; }
#pragma mark Construction
@@ -90,6 +90,7 @@
VkBufferMemoryBarrier* pBufferMemoryBarrier);
VkBufferUsageFlags _usage;
+ id<MTLBuffer> _mtlBuffer = nil;
};
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm
index ee23a31..bf1be23 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm
@@ -29,21 +29,28 @@
#pragma mark MVKBuffer
void MVKBuffer::propogateDebugName() {
- if (_debugName &&
- _deviceMemory &&
+ if (!_debugName) { return; }
+ if (_deviceMemory &&
_deviceMemory->isDedicatedAllocation() &&
_deviceMemory->_debugName.length == 0) {
_deviceMemory->setDebugName(_debugName.UTF8String);
}
+ setLabelIfNotNil(_mtlBuffer, _debugName);
}
#pragma mark Resource memory
VkResult MVKBuffer::getMemoryRequirements(VkMemoryRequirements* pMemoryRequirements) {
- pMemoryRequirements->size = getByteCount();
- pMemoryRequirements->alignment = _byteAlignment;
+ if (_device->_pMetalFeatures->placementHeaps) {
+ MTLSizeAndAlign sizeAndAlign = [_device->getMTLDevice() heapBufferSizeAndAlignWithLength: getByteCount() options: MTLResourceStorageModePrivate];
+ pMemoryRequirements->size = sizeAndAlign.size;
+ pMemoryRequirements->alignment = sizeAndAlign.align;
+ } else {
+ pMemoryRequirements->size = getByteCount();
+ pMemoryRequirements->alignment = _byteAlignment;
+ }
pMemoryRequirements->memoryTypeBits = _device->getPhysicalDevice()->getAllMemoryTypes();
#if MVK_MACOS
// Textures must not use shared memory
@@ -61,21 +68,15 @@
VkResult MVKBuffer::getMemoryRequirements(const void*, VkMemoryRequirements2* pMemoryRequirements) {
pMemoryRequirements->sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2;
getMemoryRequirements(&pMemoryRequirements->memoryRequirements);
- auto* next = (VkStructureType*)pMemoryRequirements->pNext;
- while (next) {
- switch (*next) {
+ for (auto* next = (VkBaseOutStructure*)pMemoryRequirements->pNext; next; next = next->pNext) {
+ switch (next->sType) {
case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
auto* dedicatedReqs = (VkMemoryDedicatedRequirements*)next;
- // TODO: Maybe someday we could do something with MTLHeaps
- // and allocate non-dedicated memory from them. For now, we
- // always prefer dedicated allocations.
- dedicatedReqs->prefersDedicatedAllocation = VK_TRUE;
+ dedicatedReqs->prefersDedicatedAllocation = VK_FALSE;
dedicatedReqs->requiresDedicatedAllocation = VK_FALSE;
- next = (VkStructureType*)dedicatedReqs->pNext;
break;
}
default:
- next = (VkStructureType*)((VkMemoryRequirements2*)next)->pNext;
break;
}
}
@@ -134,6 +135,25 @@
}
+#pragma mark Metal
+
+id<MTLBuffer> MVKBuffer::getMTLBuffer() {
+ if (_mtlBuffer) { return _mtlBuffer; }
+ if (_deviceMemory) {
+ if (_deviceMemory->getMTLHeap()) {
+ _mtlBuffer = [_deviceMemory->getMTLHeap() newBufferWithLength: getByteCount()
+ options: _deviceMemory->getMTLResourceOptions()
+ offset: _deviceMemoryOffset]; // retained
+ propogateDebugName();
+ return _mtlBuffer;
+ } else {
+ return _deviceMemory->getMTLBuffer();
+ }
+ }
+ return nil;
+}
+
+
#pragma mark Construction
MVKBuffer::MVKBuffer(MVKDevice* device, const VkBufferCreateInfo* pCreateInfo) : MVKResource(device), _usage(pCreateInfo->usage) {
@@ -143,6 +163,7 @@
MVKBuffer::~MVKBuffer() {
if (_deviceMemory) { _deviceMemory->removeBuffer(this); }
+ if (_mtlBuffer) { [_mtlBuffer release]; }
}
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index 1360e4e..8873d69 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -840,6 +840,7 @@
if ( mvkOSVersion() >= 13.0 ) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion2_2;
+ _metalFeatures.placementHeaps = true;
if ( getSupportsGPUFamily(MTLGPUFamilyApple4) ) {
_metalFeatures.nativeTextureSwizzle = true;
}
@@ -894,6 +895,7 @@
_metalFeatures.native3DCompressedTextures = true;
if ( getSupportsGPUFamily(MTLGPUFamilyMac2) ) {
_metalFeatures.nativeTextureSwizzle = true;
+ _metalFeatures.placementHeaps = true;
}
}
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h
index c87b443..bd15e52 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h
@@ -94,6 +94,9 @@
/** Returns the Metal buffer underlying this memory allocation. */
inline id<MTLBuffer> getMTLBuffer() { return _mtlBuffer; }
+ /** Returns the Metal heap underlying this memory allocation. */
+ inline id<MTLHeap> getMTLHeap() { return _mtlHeap; }
+
/** Returns the Metal storage mode used by this memory allocation. */
inline MTLStorageMode getMTLStorageMode() { return _mtlStorageMode; }
@@ -123,6 +126,7 @@
void removeBuffer(MVKBuffer* mvkBuff);
VkResult addImage(MVKImage* mvkImg);
void removeImage(MVKImage* mvkImg);
+ bool ensureMTLHeap();
bool ensureMTLBuffer();
bool ensureHostMemory();
void freeHostMemory();
@@ -135,6 +139,7 @@
VkDeviceSize _mapOffset = 0;
VkDeviceSize _mapSize = 0;
id<MTLBuffer> _mtlBuffer = nil;
+ id<MTLHeap> _mtlHeap = nil;
void* _pMemory = nullptr;
void* _pHostMemory = nullptr;
bool _isMapped = false;
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm
index ee4aedf..c25500c 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm
@@ -31,7 +31,10 @@
#pragma mark MVKDeviceMemory
-void MVKDeviceMemory::propogateDebugName() { setLabelIfNotNil(_mtlBuffer, _debugName); }
+void MVKDeviceMemory::propogateDebugName() {
+ setLabelIfNotNil(_mtlHeap, _debugName);
+ setLabelIfNotNil(_mtlBuffer, _debugName);
+}
VkResult MVKDeviceMemory::map(VkDeviceSize offset, VkDeviceSize size, VkMemoryMapFlags flags, void** ppData) {
@@ -85,8 +88,11 @@
}
#endif
- lock_guard<mutex> lock(_rezLock);
- for (auto& img : _images) { img->flushToDevice(offset, memSize); }
+ // If we have an MTLHeap object, there's no need to sync memory manually between images and the buffer.
+ if (!_mtlHeap) {
+ lock_guard<mutex> lock(_rezLock);
+ for (auto& img : _images) { img->flushToDevice(offset, memSize); }
+ }
}
return VK_SUCCESS;
}
@@ -94,7 +100,7 @@
VkResult MVKDeviceMemory::pullFromDevice(VkDeviceSize offset, VkDeviceSize size, bool evenIfCoherent) {
// Coherent memory is flushed on unmap(), so it is only flushed if forced
VkDeviceSize memSize = adjustMemorySize(size, offset);
- if (memSize > 0 && isMemoryHostAccessible() && (evenIfCoherent || !isMemoryHostCoherent()) ) {
+ if (memSize > 0 && isMemoryHostAccessible() && (evenIfCoherent || !isMemoryHostCoherent()) && !_mtlHeap) {
lock_guard<mutex> lock(_rezLock);
for (auto& img : _images) { img->pullFromDevice(offset, memSize); }
}
@@ -140,8 +146,7 @@
return reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "Could not bind VkImage %p to a VkDeviceMemory dedicated to resource %p. A dedicated allocation may only be used with the resource it was dedicated to.", mvkImg, getDedicatedResource() );
}
- if (!_isDedicated)
- _images.push_back(mvkImg);
+ if (!_isDedicated) { _images.push_back(mvkImg); }
return VK_SUCCESS;
}
@@ -151,6 +156,36 @@
mvkRemoveAllOccurances(_images, mvkImg);
}
+// Ensures that this instance is backed by a MTLHeap object,
+// creating the MTLHeap if needed, and returns whether it was successful.
+bool MVKDeviceMemory::ensureMTLHeap() {
+
+ if (_mtlHeap) { return true; }
+
+ // Don't bother if we don't have placement heaps.
+ if (!getDevice()->_pMetalFeatures->placementHeaps) { return true; }
+
+#if MVK_MACOS
+ // MTLHeaps on Mac must use private storage for now.
+ if (_mtlStorageMode != MTLStorageModePrivate) { return true; }
+#endif
+
+ MTLHeapDescriptor* heapDesc = [MTLHeapDescriptor new];
+ heapDesc.type = MTLHeapTypePlacement;
+ heapDesc.resourceOptions = getMTLResourceOptions();
+ // For now, use tracked resources. Later, we should probably default
+ // to untracked, since Vulkan uses explicit barriers anyway.
+ heapDesc.hazardTrackingMode = MTLHazardTrackingModeTracked;
+ heapDesc.size = _allocationSize;
+ _mtlHeap = [_device->getMTLDevice() newHeapWithDescriptor: heapDesc]; // retained
+ [heapDesc release];
+ if (!_mtlHeap) { return false; }
+
+ propogateDebugName();
+
+ return true;
+}
+
// Ensures that this instance is backed by a MTLBuffer object,
// creating the MTLBuffer if needed, and returns whether it was successful.
bool MVKDeviceMemory::ensureMTLBuffer() {
@@ -162,12 +197,20 @@
if (memLen > _device->_pMetalFeatures->maxMTLBufferSize) { return false; }
// If host memory was already allocated, it is copied into the new MTLBuffer, and then released.
- if (_pHostMemory) {
+ if (_mtlHeap) {
+ _mtlBuffer = [_mtlHeap newBufferWithLength: memLen options: getMTLResourceOptions() offset: 0]; // retained
+ if (_pHostMemory) {
+ memcpy(_mtlBuffer.contents, _pHostMemory, memLen);
+ freeHostMemory();
+ }
+ [_mtlBuffer makeAliasable];
+ } else if (_pHostMemory) {
_mtlBuffer = [getMTLDevice() newBufferWithBytes: _pHostMemory length: memLen options: getMTLResourceOptions()]; // retained
freeHostMemory();
} else {
_mtlBuffer = [getMTLDevice() newBufferWithLength: memLen options: getMTLResourceOptions()]; // retained
}
+ if (!_mtlBuffer) { return false; }
_pMemory = isMemoryHostAccessible() ? _mtlBuffer.contents : nullptr;
propogateDebugName();
@@ -254,6 +297,15 @@
return;
}
+ // If we can, create a MTLHeap. This should happen before creating the buffer
+ // allowing us to map its contents.
+ if (!dedicatedImage && !dedicatedBuffer) {
+ if (!ensureMTLHeap()) {
+ setConfigurationResult(reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "Could not allocate VkDeviceMemory of size %llu bytes.", _allocationSize));
+ return;
+ }
+ }
+
// If memory needs to be coherent it must reside in an MTLBuffer, since an open-ended map() must work.
if (isMemoryHostCoherent() && !ensureMTLBuffer() ) {
setConfigurationResult(reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "Could not allocate a host-coherent VkDeviceMemory of size %llu bytes. The maximum memory-aligned size of a host-coherent VkDeviceMemory is %llu bytes.", _allocationSize, _device->_pMetalFeatures->maxMTLBufferSize));
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
index 0cbf0d0..dfbde55 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
@@ -275,6 +275,7 @@
bool _usesTexelBuffer;
bool _isLinear;
bool _is3DCompressed;
+ bool _isAliasable;
};
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
index 04a8862..bf51f65 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
@@ -192,21 +192,16 @@
VkResult MVKImage::getMemoryRequirements(const void*, VkMemoryRequirements2* pMemoryRequirements) {
pMemoryRequirements->sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2;
getMemoryRequirements(&pMemoryRequirements->memoryRequirements);
- auto* next = (VkStructureType*)pMemoryRequirements->pNext;
- while (next) {
- switch (*next) {
+ for (auto* next = (VkBaseOutStructure*)pMemoryRequirements->pNext; next; next = next->pNext) {
+ switch (next->sType) {
case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
auto* dedicatedReqs = (VkMemoryDedicatedRequirements*)next;
- // TODO: Maybe someday we could do something with MTLHeaps
- // and allocate non-dedicated memory from them. For now, we
- // always prefer dedicated allocations.
- dedicatedReqs->prefersDedicatedAllocation = VK_TRUE;
+ bool writable = mvkIsAnyFlagEnabled(_usage, VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT);
+ dedicatedReqs->prefersDedicatedAllocation = !_usesTexelBuffer && (writable || !_device->_pMetalFeatures->placementHeaps);
dedicatedReqs->requiresDedicatedAllocation = VK_FALSE;
- next = (VkStructureType*)dedicatedReqs->pNext;
break;
}
default:
- next = (VkStructureType*)((VkMemoryRequirements2*)next)->pNext;
break;
}
}
@@ -231,7 +226,7 @@
bool isUncompressed = blockExt.width == 1 && blockExt.height == 1;
bool useTexelBuffer = _device->_pMetalFeatures->texelBuffers; // Texel buffers available
- useTexelBuffer = useTexelBuffer && isMemoryHostAccessible() && _isLinear && isUncompressed; // Applicable memory layout
+ useTexelBuffer = useTexelBuffer && (isMemoryHostAccessible() || _device->_pMetalFeatures->placementHeaps) && _isLinear && isUncompressed; // Applicable memory layout
useTexelBuffer = useTexelBuffer && _deviceMemory && _deviceMemory->_mtlBuffer; // Buffer is available to overlay
#if MVK_MACOS
@@ -352,6 +347,10 @@
mtlTex = [_deviceMemory->_mtlBuffer newTextureWithDescriptor: mtlTexDesc
offset: getDeviceMemoryOffset()
bytesPerRow: _subresources[0].layout.rowPitch];
+ } else if (_deviceMemory->_mtlHeap) {
+ mtlTex = [_deviceMemory->_mtlHeap newTextureWithDescriptor: mtlTexDesc
+ offset: getDeviceMemoryOffset()];
+ if (_isAliasable) [mtlTex makeAliasable];
} else {
mtlTex = [getMTLDevice() newTextureWithDescriptor: mtlTexDesc];
}
@@ -628,11 +627,20 @@
_canSupportMTLTextureView = !_isDepthStencilAttachment || _device->_pMetalFeatures->stencilViews;
_hasExpectedTexelSize = (mvkMTLPixelFormatBytesPerBlock(_mtlPixelFormat) == mvkVkFormatBytesPerBlock(pCreateInfo->format));
- // Calc _byteCount after _byteAlignment
- _byteAlignment = _isLinear ? _device->getVkFormatTexelBufferAlignment(pCreateInfo->format, this) : mvkEnsurePowerOfTwo(mvkVkFormatBytesPerBlock(pCreateInfo->format));
- for (uint32_t mipLvl = 0; mipLvl < _mipLevels; mipLvl++) {
- _byteCount += getBytesPerLayer(mipLvl) * _extent.depth * _arrayLayers;
- }
+ if (!_isLinear && _device->_pMetalFeatures->placementHeaps) {
+ MTLTextureDescriptor *mtlTexDesc = newMTLTextureDescriptor(); // temp retain
+ MTLSizeAndAlign sizeAndAlign = [_device->getMTLDevice() heapTextureSizeAndAlignWithDescriptor: mtlTexDesc];
+ [mtlTexDesc release];
+ _byteCount = sizeAndAlign.size;
+ _byteAlignment = sizeAndAlign.align;
+ _isAliasable = mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_IMAGE_CREATE_ALIAS_BIT);
+ } else {
+ // Calc _byteCount after _byteAlignment
+ _byteAlignment = _isLinear ? _device->getVkFormatTexelBufferAlignment(pCreateInfo->format, this) : mvkEnsurePowerOfTwo(mvkVkFormatBytesPerBlock(pCreateInfo->format));
+ for (uint32_t mipLvl = 0; mipLvl < _mipLevels; mipLvl++) {
+ _byteCount += getBytesPerLayer(mipLvl) * _extent.depth * _arrayLayers;
+ }
+ }
initSubresources(pCreateInfo);
}