Merge pull request #1078 from cdavis5e/fill-buffer-round-down
MVKCmdFillBuffer: Round size down, not up.
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
index 9002d59..322b19c 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
@@ -290,6 +290,9 @@
/** If a render encoder is active, encodes store actions for all attachments to it. */
void encodeStoreActions(bool storeOverride = false);
+ /** Returns whether or not we are presently in a render pass. */
+ bool isInRenderPass() { return _renderPass != nullptr; }
+
/** Returns the render subpass that is currently active. */
MVKRenderSubpass* getSubpass();
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index 232a9e2..c83e14b 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -113,6 +113,11 @@
shaderDrawParamsFeatures->shaderDrawParameters = true;
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES: {
+ auto* shaderSGTypesFeatures = (VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures*)next;
+ shaderSGTypesFeatures->shaderSubgroupExtendedTypes = _metalFeatures.subgroupSize != 0;
+ break;
+ }
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR: {
auto* uboLayoutFeatures = (VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR*)next;
uboLayoutFeatures->uniformBufferStandardLayout = true;
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm
index 593be4d..96c34b9 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm
@@ -154,8 +154,9 @@
lock_guard<mutex> lock(_rezLock);
// If a dedicated alloc, ensure this image is the one and only image
- // I am dedicated to.
- if (_isDedicated && (_imageMemoryBindings.empty() || _imageMemoryBindings[0] != mvkImg) ) {
+ // I am dedicated to. If my image is aliasable, though, allow other aliasable
+ // images to bind to me.
+ if (_isDedicated && (_imageMemoryBindings.empty() || !(contains(_imageMemoryBindings, mvkImg) || (_imageMemoryBindings[0]->_image->getIsAliasable() && mvkImg->_image->getIsAliasable()))) ) {
return reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "Could not bind VkImage %p to a VkDeviceMemory dedicated to resource %p. A dedicated allocation may only be used with the resource it was dedicated to.", mvkImg, getDedicatedResource() );
}
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
index ea8c68c..a7b81b8 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
@@ -187,6 +187,9 @@
/** Returns whether this image has a linear memory layout. */
bool getIsLinear() { return _isLinear; }
+ /** Returns whether this image is allowed to alias another image. */
+ bool getIsAliasable() { return _isAliasable; }
+
/**
* Returns the 3D extent of this image at the base mipmap level.
* For 2D or cube images, the Z component will be 1.
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
index fdf4a00..8b07945 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
@@ -62,6 +62,13 @@
newTextureWithDescriptor: mtlTexDesc
offset: memoryBinding->getDeviceMemoryOffset() + _subresources[0].layout.offset];
if (_image->_isAliasable) { [_mtlTexture makeAliasable]; }
+ } else if (_image->_isAliasable && memoryBinding->_deviceMemory->isDedicatedAllocation() &&
+ !contains(memoryBinding->_deviceMemory->_imageMemoryBindings, memoryBinding)) {
+ // This is a dedicated allocation, but it belongs to another aliasable image.
+ // In this case, use the MTLTexture from the memory's dedicated image.
+ // We know the other image must be aliasable, or I couldn't have been bound
+ // to its memory: the memory object wouldn't allow it.
+ _mtlTexture = [memoryBinding->_deviceMemory->_imageMemoryBindings[0]->_image->getMTLTexture(_planeIndex, mtlTexDesc.pixelFormat) retain];
} else {
_mtlTexture = [_image->getMTLDevice() newTextureWithDescriptor: mtlTexDesc];
}
@@ -113,6 +120,8 @@
}
#endif
+ MVKImageMemoryBinding* memoryBinding = getMemoryBinding();
+
VkExtent3D extent = _image->getExtent3D(_planeIndex, 0);
MTLTextureDescriptor* mtlTexDesc = [MTLTextureDescriptor new]; // retained
mtlTexDesc.pixelFormat = mtlPixFmt;
@@ -123,7 +132,12 @@
mtlTexDesc.mipmapLevelCount = _image->_mipLevels;
mtlTexDesc.sampleCount = mvkSampleCountFromVkSampleCountFlagBits(_image->_samples);
mtlTexDesc.arrayLength = _image->_arrayLayers;
- mtlTexDesc.usageMVK = _image->getPixelFormats()->getMTLTextureUsage(_image->_usage, mtlPixFmt, minUsage, _image->_isLinear, _image->_hasMutableFormat, _image->_hasExtendedUsage);
+ if (_image->_isAliasable && memoryBinding->_deviceMemory && memoryBinding->_deviceMemory->isDedicatedAllocation()) {
+ // Unfortunately, in this instance, we must presume the texture can be used for anything.
+ mtlTexDesc.usageMVK = MTLTextureUsageUnknown;
+ } else {
+ mtlTexDesc.usageMVK = _image->getPixelFormats()->getMTLTextureUsage(_image->_usage, mtlPixFmt, minUsage, _image->_isLinear, _image->_hasMutableFormat, _image->_hasExtendedUsage);
+ }
mtlTexDesc.storageModeMVK = _image->getMTLStorageMode();
mtlTexDesc.cpuCacheMode = _image->getMTLCPUCacheMode();
@@ -831,6 +845,7 @@
MVKPixelFormats* pixFmts = getPixelFormats();
_vkFormat = pCreateInfo->format;
_usage = pCreateInfo->usage;
+ _isAliasable = mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_IMAGE_CREATE_ALIAS_BIT);
_hasMutableFormat = mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT);
_hasExtendedUsage = mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_IMAGE_CREATE_EXTENDED_USAGE_BIT);
@@ -876,7 +891,6 @@
[mtlTexDesc release];
memoryBinding->_byteCount += sizeAndAlign.size;
memoryBinding->_byteAlignment = std::max(memoryBinding->_byteAlignment, (VkDeviceSize)sizeAndAlign.align);
- _isAliasable = mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_IMAGE_CREATE_ALIAS_BIT);
} else if (_isLinearForAtomics && _device->_pMetalFeatures->placementHeaps) {
NSUInteger bufferLength = 0;
for (uint32_t mipLvl = 0; mipLvl < _mipLevels; mipLvl++) {
@@ -886,7 +900,6 @@
MTLSizeAndAlign sizeAndAlign = [_device->getMTLDevice() heapBufferSizeAndAlignWithLength: bufferLength options: MTLResourceStorageModePrivate];
memoryBinding->_byteCount += sizeAndAlign.size;
memoryBinding->_byteAlignment = std::max(std::max(memoryBinding->_byteAlignment, _rowByteAlignment), (VkDeviceSize)sizeAndAlign.align);
- _isAliasable = mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_IMAGE_CREATE_ALIAS_BIT);
} else {
for (uint32_t mipLvl = 0; mipLvl < _mipLevels; mipLvl++) {
VkExtent3D mipExtent = getExtent3D(planeIndex, mipLvl);
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm
index 68db119..4105026 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm
@@ -31,7 +31,7 @@
#pragma mark MVKQueryPool
void MVKQueryPool::endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder) {
- uint32_t queryCount = cmdEncoder->getSubpass()->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex());
+ uint32_t queryCount = cmdEncoder->isInRenderPass() ? cmdEncoder->getSubpass()->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex()) : 1;
lock_guard<mutex> lock(_availabilityLock);
for (uint32_t i = query; i < query + queryCount; ++i) {
_availability[i] = DeviceAvailable;
diff --git a/MoltenVK/MoltenVK/Layers/MVKExtensions.def b/MoltenVK/MoltenVK/Layers/MVKExtensions.def
index cd54eb6..c770885 100644
--- a/MoltenVK/MoltenVK/Layers/MVKExtensions.def
+++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.def
@@ -70,6 +70,7 @@
MVK_EXTENSION(KHR_sampler_ycbcr_conversion, KHR_SAMPLER_YCBCR_CONVERSION, DEVICE)
MVK_EXTENSION(KHR_shader_draw_parameters, KHR_SHADER_DRAW_PARAMETERS, DEVICE)
MVK_EXTENSION(KHR_shader_float16_int8, KHR_SHADER_FLOAT16_INT8, DEVICE)
+MVK_EXTENSION(KHR_shader_subgroup_extended_types, KHR_SHADER_SUBGROUP_EXTENDED_TYPES, DEVICE)
MVK_EXTENSION(KHR_storage_buffer_storage_class, KHR_STORAGE_BUFFER_STORAGE_CLASS, DEVICE)
MVK_EXTENSION(KHR_surface, KHR_SURFACE, INSTANCE)
MVK_EXTENSION(KHR_swapchain, KHR_SWAPCHAIN, DEVICE)
diff --git a/MoltenVK/MoltenVK/Layers/MVKExtensions.mm b/MoltenVK/MoltenVK/Layers/MVKExtensions.mm
index 80c221a..bc7cf71 100644
--- a/MoltenVK/MoltenVK/Layers/MVKExtensions.mm
+++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.mm
@@ -52,6 +52,7 @@
if (pProperties == &kVkExtProps_EXT_POST_DEPTH_COVERAGE) { return false; }
if (pProperties == &kVkExtProps_AMD_SHADER_IMAGE_LOAD_STORE_LOD) { return false; }
+ if (pProperties == &kVkExtProps_KHR_SHADER_SUBGROUP_EXTENDED_TYPES) { return mvkOSVersionIsAtLeast(10.14); }
if (pProperties == &kVkExtProps_EXT_HDR_METADATA) { return mvkOSVersionIsAtLeast(10.15); }
if (pProperties == &kVkExtProps_EXT_FRAGMENT_SHADER_INTERLOCK) { return mvkOSVersionIsAtLeast(10.13); }
if (pProperties == &kVkExtProps_EXT_MEMORY_BUDGET) { return mvkOSVersionIsAtLeast(10.13); }
@@ -62,6 +63,7 @@
#if MVK_IOS
if (pProperties == &kVkExtProps_MVK_MACOS_SURFACE) { return false; }
if (pProperties == &kVkExtProps_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE) { return false; }
+ if (pProperties == &kVkExtProps_KHR_SHADER_SUBGROUP_EXTENDED_TYPES) { return false; }
if (pProperties == &kVkExtProps_EXT_HDR_METADATA) { return false; }
if (pProperties == &kVkExtProps_EXT_FRAGMENT_SHADER_INTERLOCK) { return mvkOSVersionIsAtLeast(11.0); }
diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.h b/MoltenVK/MoltenVK/Utility/MVKFoundation.h
index d8d1ed2..0dbfc88 100644
--- a/MoltenVK/MoltenVK/Utility/MVKFoundation.h
+++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.h
@@ -470,7 +470,7 @@
/** Returns whether the container contains an item equal to the value. */
template<class C, class T>
-bool contains(const C& container, const T& val) {
+bool contains(C& container, const T& val) {
for (const T& cVal : container) { if (cVal == val) { return true; } }
return false;
}