fix GPU memory defragmentation to restrict supported memory types

Defragmentation of GPU memory is restricted to those memory types that support creating buffer used for transfers. #59
diff --git a/src/vk_mem_alloc.h b/src/vk_mem_alloc.h
index 42d4e18..35b50a7 100644
--- a/src/vk_mem_alloc.h
+++ b/src/vk_mem_alloc.h
@@ -3206,7 +3206,6 @@
 #include <cassert> // for assert

 #include <algorithm> // for min, max

 #include <mutex>

-#include <atomic> // for std::atomic

 

 #ifndef VMA_NULL

    // Value used as null pointer. Define it to e.g.: nullptr, NULL, 0, (void*)0.

@@ -3396,7 +3395,8 @@
 - bool compare_exchange_weak(uint32_t& expected, uint32_t desired)

 */

 #ifndef VMA_ATOMIC_UINT32

-   #define VMA_ATOMIC_UINT32 std::atomic<uint32_t>

+    #include <atomic>

+    #define VMA_ATOMIC_UINT32 std::atomic<uint32_t>

 #endif

 

 #ifndef VMA_DEBUG_ALWAYS_DEDICATED_MEMORY

@@ -3744,6 +3744,18 @@
     return true;

 }

 

+/*

+Fills structure with parameters of an example buffer to be used for transfers

+during GPU memory defragmentation.

+*/

+static void VmaFillGpuDefragmentationBufferCreateInfo(VkBufferCreateInfo& outBufCreateInfo)

+{

+    memset(&outBufCreateInfo, 0, sizeof(outBufCreateInfo));

+    outBufCreateInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;

+    outBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;

+    outBufCreateInfo.size = (VkDeviceSize)VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE; // Example size.

+}

+

 // Helper RAII class to lock a mutex in constructor and unlock it in destructor (at the end of scope).

 struct VmaMutexLock

 {

@@ -6779,11 +6791,18 @@
 

     void FillAllocation(const VmaAllocation hAllocation, uint8_t pattern);

 

+    /*

+    Returns bit mask of memory types that can support defragmentation on GPU as

+    they support creation of required buffer for copy operations.

+    */

+    uint32_t GetGpuDefragmentationMemoryTypeBits();

+

 private:

     VkDeviceSize m_PreferredLargeHeapBlockSize;

 

     VkPhysicalDevice m_PhysicalDevice;

     VMA_ATOMIC_UINT32 m_CurrentFrameIndex;

+    VMA_ATOMIC_UINT32 m_GpuDefragmentationMemoryTypeBits; // UINT32_MAX means uninitialized.

     

     VMA_RW_MUTEX m_PoolsMutex;

     // Protected by m_PoolsMutex. Sorted by pointer value.

@@ -6838,6 +6857,12 @@
 

     // Tries to free pMemory as Dedicated Memory. Returns true if found and freed.

     void FreeDedicatedMemory(VmaAllocation allocation);

+

+    /*

+    Calculates and returns bit mask of memory types that can support defragmentation

+    on GPU as they support creation of required buffer for copy operations.

+    */

+    uint32_t CalculateGpuDefragmentationMemoryTypeBits() const;

 };

 

 ////////////////////////////////////////////////////////////////////////////////

@@ -12266,9 +12291,8 @@
 

     // Go over all blocks. Create and bind buffer for whole block if necessary.

     {

-        VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };

-        bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT |

-            VK_BUFFER_USAGE_TRANSFER_DST_BIT;

+        VkBufferCreateInfo bufCreateInfo;

+        VmaFillGpuDefragmentationBufferCreateInfo(bufCreateInfo);

 

         for(size_t blockIndex = 0; pDefragCtx->res == VK_SUCCESS && blockIndex < blockCount; ++blockIndex)

         {

@@ -12428,7 +12452,8 @@
     const bool canDefragmentOnCpu = maxCpuBytesToMove > 0 && maxCpuAllocationsToMove > 0 &&

         isHostVisible;

     const bool canDefragmentOnGpu = maxGpuBytesToMove > 0 && maxGpuAllocationsToMove > 0 &&

-        !IsCorruptionDetectionEnabled();

+        !IsCorruptionDetectionEnabled() &&

+        ((1u << m_MemoryTypeIndex) & m_hAllocator->GetGpuDefragmentationMemoryTypeBits()) != 0;

 

     // There are options to defragment this memory type.

     if(canDefragmentOnCpu || canDefragmentOnGpu)

@@ -14164,6 +14189,7 @@
     m_PreferredLargeHeapBlockSize(0),

     m_PhysicalDevice(pCreateInfo->physicalDevice),

     m_CurrentFrameIndex(0),

+    m_GpuDefragmentationMemoryTypeBits(UINT32_MAX),

     m_Pools(VmaStlAllocator<VmaPool>(GetAllocationCallbacks())),

     m_NextPoolId(0)

 #if VMA_RECORDING_ENABLED

@@ -15555,6 +15581,31 @@
     VMA_DEBUG_LOG("    Freed DedicatedMemory MemoryTypeIndex=%u", memTypeIndex);

 }

 

+uint32_t VmaAllocator_T::CalculateGpuDefragmentationMemoryTypeBits() const

+{

+    VkBufferCreateInfo dummyBufCreateInfo;

+    VmaFillGpuDefragmentationBufferCreateInfo(dummyBufCreateInfo);

+

+    uint32_t memoryTypeBits = 0;

+

+    // Create buffer.

+    VkBuffer buf = VMA_NULL;

+    VkResult res = (*GetVulkanFunctions().vkCreateBuffer)(

+        m_hDevice, &dummyBufCreateInfo, GetAllocationCallbacks(), &buf);

+    if(res == VK_SUCCESS)

+    {

+        // Query for supported memory types.

+        VkMemoryRequirements memReq;

+        (*GetVulkanFunctions().vkGetBufferMemoryRequirements)(m_hDevice, buf, &memReq);

+        memoryTypeBits = memReq.memoryTypeBits;

+

+        // Destroy buffer.

+        (*GetVulkanFunctions().vkDestroyBuffer)(m_hDevice, buf, GetAllocationCallbacks());

+    }

+

+    return memoryTypeBits;

+}

+

 void VmaAllocator_T::FillAllocation(const VmaAllocation hAllocation, uint8_t pattern)

 {

     if(VMA_DEBUG_INITIALIZE_ALLOCATIONS &&

@@ -15576,6 +15627,17 @@
     }

 }

 

+uint32_t VmaAllocator_T::GetGpuDefragmentationMemoryTypeBits()

+{

+    uint32_t memoryTypeBits = m_GpuDefragmentationMemoryTypeBits.load();

+    if(memoryTypeBits == UINT32_MAX)

+    {

+        memoryTypeBits = CalculateGpuDefragmentationMemoryTypeBits();

+        m_GpuDefragmentationMemoryTypeBits.store(memoryTypeBits);

+    }

+    return memoryTypeBits;

+}

+

 #if VMA_STATS_STRING_ENABLED

 

 void VmaAllocator_T::PrintDetailedMap(VmaJsonWriter& json)