VmaBlockMetadata_Buddy: Fixed reporting of space wasted due to internal fragmentation as unused blocks. Added test for multi-block pool with buddy algorithm.
diff --git a/src/Tests.cpp b/src/Tests.cpp
index 1527657..0d0d4cb 100644
--- a/src/Tests.cpp
+++ b/src/Tests.cpp
@@ -18,8 +18,8 @@
     CONFIG_TYPE_COUNT

 };

 

-static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_SMALL;

-//static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_LARGE;

+//static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_SMALL;

+static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_LARGE;

 

 enum class FREE_ORDER { FORWARD, BACKWARD, RANDOM, COUNT };

 

@@ -29,6 +29,23 @@
     "RANDOM",

 };

 

+// Copy of internal VmaAlgorithmToStr.

+static const char* AlgorithmToStr(uint32_t algorithm)

+{

+    switch(algorithm)

+    {

+    case VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT:

+        return "Linear";

+    case VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT:

+        return "Buddy";

+    case 0:

+        return "Default";

+    default:

+        assert(0);

+        return "";

+    }

+}

+

 struct AllocationSize

 {

     uint32_t Probability;

@@ -2131,8 +2148,8 @@
     vmaDestroyPool(g_hAllocator, pool);

 }

 

-static void BenchmarkLinearAllocatorCase(FILE* file,

-    bool linear,

+static void BenchmarkAlgorithmsCase(FILE* file,

+    uint32_t algorithm,

     bool empty,

     VmaAllocationCreateFlags allocStrategy,

     FREE_ORDER freeOrder)

@@ -2156,8 +2173,7 @@
     assert(res == VK_SUCCESS);

 

     poolCreateInfo.blockSize = bufSizeMax * maxBufCapacity;

-    if(linear)

-        poolCreateInfo.flags = VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT;

+    poolCreateInfo.flags |= algorithm;

     poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;

 

     VmaPool pool = nullptr;

@@ -2258,8 +2274,8 @@
     const float allocTotalSeconds = ToFloatSeconds(allocTotalDuration);

     const float freeTotalSeconds  = ToFloatSeconds(freeTotalDuration);

 

-    printf("    LinearAlgorithm=%u %s Allocation=%s FreeOrder=%s: allocations %g s, free %g s\n",

-        linear ? 1 : 0,

+    printf("    Algorithm=%s %s Allocation=%s FreeOrder=%s: allocations %g s, free %g s\n",

+        AlgorithmToStr(algorithm),

         empty ? "Empty" : "Not empty",

         GetAllocationStrategyName(allocStrategy),

         FREE_ORDER_NAMES[(size_t)freeOrder],

@@ -2271,9 +2287,9 @@
         std::string currTime;

         CurrentTimeToStr(currTime);

 

-        fprintf(file, "%s,%s,%u,%u,%s,%s,%g,%g\n",

+        fprintf(file, "%s,%s,%s,%u,%s,%s,%g,%g\n",

             CODE_DESCRIPTION, currTime.c_str(),

-            linear ? 1 : 0,

+            AlgorithmToStr(algorithm),

             empty ? 1 : 0,

             GetAllocationStrategyName(allocStrategy),

             FREE_ORDER_NAMES[(uint32_t)freeOrder],

@@ -2282,15 +2298,15 @@
     }

 }

 

-static void BenchmarkLinearAllocator(FILE* file)

+static void BenchmarkAlgorithms(FILE* file)

 {

-    wprintf(L"Benchmark linear allocator\n");

+    wprintf(L"Benchmark algorithms\n");

 

     if(file)

     {

         fprintf(file,

             "Code,Time,"

-            "Linear,Empty,Allocation strategy,Free order,"

+            "Algorithm,Empty,Allocation strategy,Free order,"

             "Allocation time (s),Deallocation time (s)\n");

     }

 

@@ -2316,15 +2332,28 @@
 

         for(uint32_t emptyIndex = 0; emptyIndex < emptyCount; ++emptyIndex)

         {

-            for(uint32_t linearIndex = 0; linearIndex < 2; ++linearIndex)

+            for(uint32_t algorithmIndex = 0; algorithmIndex < 3; ++algorithmIndex)

             {

-                const bool linear = linearIndex ? 1 : 0;

+                uint32_t algorithm = 0;

+                switch(algorithmIndex)

+                {

+                case 0:

+                    break;

+                case 1:

+                    algorithm = VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT;

+                    break;

+                case 2:

+                    algorithm = VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT;

+                    break;

+                default:

+                    assert(0);

+                }

 

-                uint32_t currAllocStrategyCount = linear ? 1 : allocStrategyCount;

+                uint32_t currAllocStrategyCount = algorithm != 0 ? 1 : allocStrategyCount;

                 for(uint32_t allocStrategyIndex = 0; allocStrategyIndex < currAllocStrategyCount; ++allocStrategyIndex)

                 {

                     VmaAllocatorCreateFlags strategy = 0;

-                    if(!linear)

+                    if(currAllocStrategyCount > 1)

                     {

                         switch(allocStrategyIndex)

                         {

@@ -2335,9 +2364,9 @@
                         }

                     }

 

-                    BenchmarkLinearAllocatorCase(

+                    BenchmarkAlgorithmsCase(

                         file,

-                        linear, // linear

+                        algorithm,

                         emptyIndex ? 0 : 1, // empty

                         strategy,

                         freeOrder); // freeOrder

@@ -4098,7 +4127,7 @@
 

     poolCreateInfo.blockSize = 1024 * 1024;

     poolCreateInfo.flags = VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT;

-    poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;

+    //poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;

 

     VmaPool pool = nullptr;

     res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);

@@ -4131,12 +4160,23 @@
     assert(res == VK_SUCCESS);

     bufInfo.push_back(newBufInfo);

 

-    SaveAllocatorStatsToFile(L"BuddyTest01.json");

 

     VmaPoolStats stats = {};

     vmaGetPoolStats(g_hAllocator, pool, &stats);

     int DBG = 0; // Set breakpoint here to inspect `stats`.

 

+    // Allocate enough new buffers to surely fall into second block.

+    for(uint32_t i = 0; i < 32; ++i)

+    {

+        bufCreateInfo.size = 1024 * (rand.Generate() % 32 + 1);

+        res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,

+            &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);

+        assert(res == VK_SUCCESS);

+        bufInfo.push_back(newBufInfo);

+    }

+

+    SaveAllocatorStatsToFile(L"BuddyTest01.json");

+

     // Destroy the buffers in random order.

     while(!bufInfo.empty())

     {

@@ -4158,6 +4198,7 @@
         // # Temporarily insert custom tests here

         // ########################################

         // ########################################

+        

         BasicTestBuddyAllocator();

         return;

     }

@@ -4186,9 +4227,7 @@
         FILE* file;

         fopen_s(&file, "LinearAllocator.csv", "w");

         assert(file != NULL);

-        

-        BenchmarkLinearAllocator(file);

-

+        BenchmarkAlgorithms(file);

         fclose(file);

     }

 

diff --git a/src/vk_mem_alloc.h b/src/vk_mem_alloc.h
index be39a50..4f98d15 100644
--- a/src/vk_mem_alloc.h
+++ b/src/vk_mem_alloc.h
@@ -5051,7 +5051,9 @@
     } m_FreeList[MAX_LEVELS];

     // Number of nodes in the tree with type == TYPE_ALLOCATION.

     size_t m_AllocationCount;

+    // Number of nodes in the tree with type == TYPE_FREE.

     size_t m_FreeCount;

+    // This includes space wasted due to internal fragmentation.

     VkDeviceSize m_SumFreeSize;

 

     void DeleteNode(Node* node);

@@ -9258,7 +9260,7 @@
     outInfo.blockCount = 1;

 

     outInfo.allocationCount = outInfo.unusedRangeCount = 0;

-    outInfo.unusedBytes = outInfo.unusedBytes = 0;

+    outInfo.usedBytes = outInfo.unusedBytes = 0;

 

     outInfo.allocationSizeMax = outInfo.unusedRangeSizeMax = 0;

     outInfo.allocationSizeMin = outInfo.unusedRangeSizeMin = UINT64_MAX;

@@ -9392,9 +9394,10 @@
         AddToFreeListFront(childrenLevel, rightChild);

         AddToFreeListFront(childrenLevel, leftChild);

 

+        ++m_FreeCount;

+        m_SumFreeSize -= LevelToNodeSize(currLevel) % 2; // Useful only when level node sizes can be non power of 2.

         ++currLevel;

         currNode = m_FreeList[currLevel].front;

-        ++m_FreeCount;

     }

 

     // Remove from free list.

@@ -9407,7 +9410,7 @@
 

     ++m_AllocationCount;

     --m_FreeCount;

-    m_SumFreeSize -= LevelToNodeSize(currLevel);

+    m_SumFreeSize -= allocSize;

 }

 

 void VmaBlockMetadata_Buddy::DeleteNode(Node* node)

@@ -9435,6 +9438,7 @@
         break;

     case Node::TYPE_ALLOCATION:

         ++ctx.calculatedAllocationCount;

+        ctx.calculatedSumFreeSize += levelNodeSize - curr->allocation.alloc->GetSize();

         VMA_VALIDATE(curr->allocation.alloc != VK_NULL_HANDLE);

         break;

     case Node::TYPE_SPLIT:

@@ -9517,7 +9521,7 @@
 

     ++m_FreeCount;

     --m_AllocationCount;

-    m_SumFreeSize += levelSize;

+    m_SumFreeSize += alloc->GetSize();

 

     node->type = Node::TYPE_FREE;

 

@@ -9533,6 +9537,7 @@
         

         node = parent;

         --level;

+        m_SumFreeSize += LevelToNodeSize(level) % 2; // Useful only when level node sizes can be non power of 2.

         --m_FreeCount;

     }

 

@@ -9550,10 +9555,22 @@
         outInfo.unusedRangeSizeMin = VMA_MAX(outInfo.unusedRangeSizeMin, levelNodeSize);

         break;

     case Node::TYPE_ALLOCATION:

-        ++outInfo.allocationCount;

-        outInfo.usedBytes += levelNodeSize;

-        outInfo.allocationSizeMax = VMA_MAX(outInfo.allocationSizeMax, levelNodeSize);

-        outInfo.allocationSizeMin = VMA_MAX(outInfo.allocationSizeMin, levelNodeSize);

+        {

+            const VkDeviceSize allocSize = node->allocation.alloc->GetSize();

+            ++outInfo.allocationCount;

+            outInfo.usedBytes += allocSize;

+            outInfo.allocationSizeMax = VMA_MAX(outInfo.allocationSizeMax, allocSize);

+            outInfo.allocationSizeMin = VMA_MAX(outInfo.allocationSizeMin, allocSize);

+

+            const VkDeviceSize unusedRangeSize = levelNodeSize - allocSize;

+            if(unusedRangeSize > 0)

+            {

+                ++outInfo.unusedRangeCount;

+                outInfo.unusedBytes += unusedRangeSize;

+                outInfo.unusedRangeSizeMax = VMA_MAX(outInfo.unusedRangeSizeMax, unusedRangeSize);

+                outInfo.unusedRangeSizeMin = VMA_MAX(outInfo.unusedRangeSizeMin, unusedRangeSize);

+            }

+        }

         break;

     case Node::TYPE_SPLIT:

         {

@@ -9631,7 +9648,14 @@
         PrintDetailedMap_UnusedRange(json, node->offset, levelNodeSize);

         break;

     case Node::TYPE_ALLOCATION:

-        PrintDetailedMap_Allocation(json, node->offset, node->allocation.alloc);

+        {   

+            PrintDetailedMap_Allocation(json, node->offset, node->allocation.alloc);

+            const VkDeviceSize allocSize = node->allocation.alloc->GetSize();

+            if(allocSize < levelNodeSize)

+            {

+                PrintDetailedMap_UnusedRange(json, node->offset + allocSize, levelNodeSize - allocSize);

+            }

+        }

         break;

     case Node::TYPE_SPLIT:

         {