Merge pull request #1078 from cdavis5e/fill-buffer-round-down

MVKCmdFillBuffer: Round size down, not up.
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
index 9002d59..322b19c 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
@@ -290,6 +290,9 @@
 	/** If a render encoder is active, encodes store actions for all attachments to it. */
 	void encodeStoreActions(bool storeOverride = false);
 
+	/** Returns whether or not we are presently in a render pass. */
+	bool isInRenderPass() { return _renderPass != nullptr; }
+
 	/** Returns the render subpass that is currently active. */
 	MVKRenderSubpass* getSubpass();
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index 232a9e2..c83e14b 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -113,6 +113,11 @@
 				shaderDrawParamsFeatures->shaderDrawParameters = true;
 				break;
 			}
+			case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES: {
+				auto* shaderSGTypesFeatures = (VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures*)next;
+				shaderSGTypesFeatures->shaderSubgroupExtendedTypes = _metalFeatures.subgroupSize != 0;
+				break;
+			}
 			case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR: {
 				auto* uboLayoutFeatures = (VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR*)next;
 				uboLayoutFeatures->uniformBufferStandardLayout = true;
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm
index 593be4d..96c34b9 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm
@@ -154,8 +154,9 @@
 	lock_guard<mutex> lock(_rezLock);
 
 	// If a dedicated alloc, ensure this image is the one and only image
-	// I am dedicated to.
-	if (_isDedicated && (_imageMemoryBindings.empty() || _imageMemoryBindings[0] != mvkImg) ) {
+	// I am dedicated to. If my image is aliasable, though, allow other aliasable
+	// images to bind to me.
+	if (_isDedicated && (_imageMemoryBindings.empty() || !(contains(_imageMemoryBindings, mvkImg) || (_imageMemoryBindings[0]->_image->getIsAliasable() && mvkImg->_image->getIsAliasable()))) ) {
 		return reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "Could not bind VkImage %p to a VkDeviceMemory dedicated to resource %p. A dedicated allocation may only be used with the resource it was dedicated to.", mvkImg, getDedicatedResource() );
 	}
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
index ea8c68c..a7b81b8 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
@@ -187,6 +187,9 @@
 	/** Returns whether this image has a linear memory layout. */
 	bool getIsLinear() { return _isLinear; }
 
+	/** Returns whether this image is allowed to alias another image. */
+	bool getIsAliasable() { return _isAliasable; }
+
 	/** 
 	 * Returns the 3D extent of this image at the base mipmap level.
 	 * For 2D or cube images, the Z component will be 1.  
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
index fdf4a00..8b07945 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
@@ -62,6 +62,13 @@
                            newTextureWithDescriptor: mtlTexDesc
                            offset: memoryBinding->getDeviceMemoryOffset() + _subresources[0].layout.offset];
             if (_image->_isAliasable) { [_mtlTexture makeAliasable]; }
+        } else if (_image->_isAliasable && memoryBinding->_deviceMemory->isDedicatedAllocation() &&
+            !contains(memoryBinding->_deviceMemory->_imageMemoryBindings, memoryBinding)) {
+            // This is a dedicated allocation, but it belongs to another aliasable image.
+            // In this case, use the MTLTexture from the memory's dedicated image.
+            // We know the other image must be aliasable, or I couldn't have been bound
+            // to its memory: the memory object wouldn't allow it.
+            _mtlTexture = [memoryBinding->_deviceMemory->_imageMemoryBindings[0]->_image->getMTLTexture(_planeIndex, mtlTexDesc.pixelFormat) retain];
         } else {
             _mtlTexture = [_image->getMTLDevice() newTextureWithDescriptor: mtlTexDesc];
         }
@@ -113,6 +120,8 @@
     }
 #endif
 
+    MVKImageMemoryBinding* memoryBinding = getMemoryBinding();
+
     VkExtent3D extent = _image->getExtent3D(_planeIndex, 0);
     MTLTextureDescriptor* mtlTexDesc = [MTLTextureDescriptor new];    // retained
     mtlTexDesc.pixelFormat = mtlPixFmt;
@@ -123,7 +132,12 @@
     mtlTexDesc.mipmapLevelCount = _image->_mipLevels;
     mtlTexDesc.sampleCount = mvkSampleCountFromVkSampleCountFlagBits(_image->_samples);
     mtlTexDesc.arrayLength = _image->_arrayLayers;
-    mtlTexDesc.usageMVK = _image->getPixelFormats()->getMTLTextureUsage(_image->_usage, mtlPixFmt, minUsage, _image->_isLinear, _image->_hasMutableFormat, _image->_hasExtendedUsage);
+    if (_image->_isAliasable && memoryBinding->_deviceMemory && memoryBinding->_deviceMemory->isDedicatedAllocation()) {
+        // Unfortunately, in this instance, we must presume the texture can be used for anything.
+        mtlTexDesc.usageMVK = MTLTextureUsageUnknown;
+    } else {
+        mtlTexDesc.usageMVK = _image->getPixelFormats()->getMTLTextureUsage(_image->_usage, mtlPixFmt, minUsage, _image->_isLinear, _image->_hasMutableFormat, _image->_hasExtendedUsage);
+    }
     mtlTexDesc.storageModeMVK = _image->getMTLStorageMode();
     mtlTexDesc.cpuCacheMode = _image->getMTLCPUCacheMode();
 
@@ -831,6 +845,7 @@
 	MVKPixelFormats* pixFmts = getPixelFormats();
     _vkFormat = pCreateInfo->format;
 	_usage = pCreateInfo->usage;
+    _isAliasable = mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_IMAGE_CREATE_ALIAS_BIT);
 	_hasMutableFormat = mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT);
 	_hasExtendedUsage = mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_IMAGE_CREATE_EXTENDED_USAGE_BIT);
 
@@ -876,7 +891,6 @@
             [mtlTexDesc release];
             memoryBinding->_byteCount += sizeAndAlign.size;
             memoryBinding->_byteAlignment = std::max(memoryBinding->_byteAlignment, (VkDeviceSize)sizeAndAlign.align);
-            _isAliasable = mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_IMAGE_CREATE_ALIAS_BIT);
         } else if (_isLinearForAtomics && _device->_pMetalFeatures->placementHeaps) {
             NSUInteger bufferLength = 0;
             for (uint32_t mipLvl = 0; mipLvl < _mipLevels; mipLvl++) {
@@ -886,7 +900,6 @@
             MTLSizeAndAlign sizeAndAlign = [_device->getMTLDevice() heapBufferSizeAndAlignWithLength: bufferLength options: MTLResourceStorageModePrivate];
             memoryBinding->_byteCount += sizeAndAlign.size;
             memoryBinding->_byteAlignment = std::max(std::max(memoryBinding->_byteAlignment, _rowByteAlignment), (VkDeviceSize)sizeAndAlign.align);
-            _isAliasable = mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_IMAGE_CREATE_ALIAS_BIT);
         } else {
             for (uint32_t mipLvl = 0; mipLvl < _mipLevels; mipLvl++) {
                 VkExtent3D mipExtent = getExtent3D(planeIndex, mipLvl);
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm
index 68db119..4105026 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm
@@ -31,7 +31,7 @@
 #pragma mark MVKQueryPool
 
 void MVKQueryPool::endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder) {
-    uint32_t queryCount = cmdEncoder->getSubpass()->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex());
+    uint32_t queryCount = cmdEncoder->isInRenderPass() ? cmdEncoder->getSubpass()->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex()) : 1;
     lock_guard<mutex> lock(_availabilityLock);
     for (uint32_t i = query; i < query + queryCount; ++i) {
         _availability[i] = DeviceAvailable;
diff --git a/MoltenVK/MoltenVK/Layers/MVKExtensions.def b/MoltenVK/MoltenVK/Layers/MVKExtensions.def
index cd54eb6..c770885 100644
--- a/MoltenVK/MoltenVK/Layers/MVKExtensions.def
+++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.def
@@ -70,6 +70,7 @@
 MVK_EXTENSION(KHR_sampler_ycbcr_conversion, KHR_SAMPLER_YCBCR_CONVERSION, DEVICE)
 MVK_EXTENSION(KHR_shader_draw_parameters, KHR_SHADER_DRAW_PARAMETERS, DEVICE)
 MVK_EXTENSION(KHR_shader_float16_int8, KHR_SHADER_FLOAT16_INT8, DEVICE)
+MVK_EXTENSION(KHR_shader_subgroup_extended_types, KHR_SHADER_SUBGROUP_EXTENDED_TYPES, DEVICE)
 MVK_EXTENSION(KHR_storage_buffer_storage_class, KHR_STORAGE_BUFFER_STORAGE_CLASS, DEVICE)
 MVK_EXTENSION(KHR_surface, KHR_SURFACE, INSTANCE)
 MVK_EXTENSION(KHR_swapchain, KHR_SWAPCHAIN, DEVICE)
diff --git a/MoltenVK/MoltenVK/Layers/MVKExtensions.mm b/MoltenVK/MoltenVK/Layers/MVKExtensions.mm
index 80c221a..bc7cf71 100644
--- a/MoltenVK/MoltenVK/Layers/MVKExtensions.mm
+++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.mm
@@ -52,6 +52,7 @@
 	if (pProperties == &kVkExtProps_EXT_POST_DEPTH_COVERAGE) { return false; }
 	if (pProperties == &kVkExtProps_AMD_SHADER_IMAGE_LOAD_STORE_LOD) { return false; }
 
+	if (pProperties == &kVkExtProps_KHR_SHADER_SUBGROUP_EXTENDED_TYPES) { return mvkOSVersionIsAtLeast(10.14); }
 	if (pProperties == &kVkExtProps_EXT_HDR_METADATA) { return mvkOSVersionIsAtLeast(10.15); }
 	if (pProperties == &kVkExtProps_EXT_FRAGMENT_SHADER_INTERLOCK) { return mvkOSVersionIsAtLeast(10.13); }
 	if (pProperties == &kVkExtProps_EXT_MEMORY_BUDGET) { return mvkOSVersionIsAtLeast(10.13); }
@@ -62,6 +63,7 @@
 #if MVK_IOS
 	if (pProperties == &kVkExtProps_MVK_MACOS_SURFACE) { return false; }
 	if (pProperties == &kVkExtProps_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE) { return false; }
+	if (pProperties == &kVkExtProps_KHR_SHADER_SUBGROUP_EXTENDED_TYPES) { return false; }
 	if (pProperties == &kVkExtProps_EXT_HDR_METADATA) { return false; }
 
 	if (pProperties == &kVkExtProps_EXT_FRAGMENT_SHADER_INTERLOCK) { return mvkOSVersionIsAtLeast(11.0); }
diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.h b/MoltenVK/MoltenVK/Utility/MVKFoundation.h
index d8d1ed2..0dbfc88 100644
--- a/MoltenVK/MoltenVK/Utility/MVKFoundation.h
+++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.h
@@ -470,7 +470,7 @@
 
 /** Returns whether the container contains an item equal to the value. */
 template<class C, class T>
-bool contains(const C& container, const T& val) {
+bool contains(C& container, const T& val) {
 	for (const T& cVal : container) { if (cVal == val) { return true; } }
 	return false;
 }