Merge branch 'master' into allocation_defragmentation_strategies
diff --git a/src/Tests.cpp b/src/Tests.cpp
index 842fba2..d04045d 100644
--- a/src/Tests.cpp
+++ b/src/Tests.cpp
@@ -21,10 +21,10 @@
 

 enum class FREE_ORDER { FORWARD, BACKWARD, RANDOM, COUNT };

 

-static const wchar_t* FREE_ORDER_NAMES[] = {

-    L"FORWARD",

-    L"BACKWARD",

-    L"RANDOM",

+static const char* FREE_ORDER_NAMES[] = {

+    "FORWARD",

+    "BACKWARD",

+    "RANDOM",

 };

 

 struct AllocationSize

@@ -45,6 +45,7 @@
     uint32_t ThreadCount;

     uint32_t ThreadsUsingCommonAllocationsProbabilityPercent;

     FREE_ORDER FreeOrder;

+    VmaAllocationCreateFlags AllocationStrategy; // For VMA_ALLOCATION_CREATE_STRATEGY_*

 };

 

 struct Result

@@ -264,6 +265,7 @@
 

         VmaAllocationCreateInfo memReq = {};

         memReq.usage = (VmaMemoryUsage)(VMA_MEMORY_USAGE_GPU_ONLY + memUsageIndex);

+        memReq.flags |= config.AllocationStrategy;

 

         Allocation allocation = {};

         VmaAllocationInfo allocationInfo;

@@ -1002,7 +1004,7 @@
     for(size_t i = 0; i < allocations.size(); ++i)

         ValidateAllocationData(allocations[i]);

 

-    SaveAllocatorStatsToFile(L"Before.csv");

+    //SaveAllocatorStatsToFile(L"Before.csv");

 

     {

         std::vector<VmaAllocation> vmaAllocations(allocations.size());

@@ -1051,9 +1053,9 @@
             for(size_t i = 0; i < allocations.size(); ++i)

                 ValidateAllocationData(allocations[i]);

 

-            wchar_t fileName[MAX_PATH];

-            swprintf(fileName, MAX_PATH, L"After_%02u.csv", defragIndex);

-            SaveAllocatorStatsToFile(fileName);

+            //wchar_t fileName[MAX_PATH];

+            //swprintf(fileName, MAX_PATH, L"After_%02u.csv", defragIndex);

+            //SaveAllocatorStatsToFile(fileName);

         }

     }

 

@@ -2210,9 +2212,9 @@
 

     vmaDestroyPool(g_hAllocator, pool);

 

-    wprintf(L"    LinearAlgorithm=%u %s FreeOrder=%s: allocations %g s, free %g s\n",

+    printf("    LinearAlgorithm=%u %s FreeOrder=%s: allocations %g s, free %g s\n",

         linear ? 1 : 0,

-        empty ? L"Empty" : L"Not empty",

+        empty ? "Empty" : "Not empty",

         FREE_ORDER_NAMES[(size_t)freeOrder],

         ToFloatSeconds(allocTotalDuration),

         ToFloatSeconds(freeTotalDuration));

@@ -3351,12 +3353,12 @@
 

     fprintf(file,

         "%s,%s,%s,"

-        "BeginBytesToAllocate=%I64u MaxBytesToAllocate=%I64u AdditionalOperationCount=%u ThreadCount=%u FreeOrder=%d,"

+        "BeginBytesToAllocate=%I64u MaxBytesToAllocate=%I64u AdditionalOperationCount=%u ThreadCount=%u FreeOrder=%s,"

         "%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%I64u,%I64u,%I64u\n",

         codeDescription,

         testDescription,

         timeStr,

-        config.BeginBytesToAllocate, config.MaxBytesToAllocate, config.AdditionalOperationCount, config.ThreadCount, (uint32_t)config.FreeOrder,

+        config.BeginBytesToAllocate, config.MaxBytesToAllocate, config.AdditionalOperationCount, config.ThreadCount, FREE_ORDER_NAMES[(uint32_t)config.FreeOrder],

         totalTimeSeconds * 1e6f,

         allocationTimeMinSeconds * 1e6f,

         allocationTimeAvgSeconds * 1e6f,

@@ -3447,6 +3449,7 @@
     config.FreeOrder = FREE_ORDER::FORWARD;

     config.ThreadCount = 16;

     config.ThreadsUsingCommonAllocationsProbabilityPercent = 50;

+    config.AllocationStrategy = 0;

 

     // Buffers

     //config.AllocationSizes.push_back({4, 16, 1024});

@@ -3522,6 +3525,18 @@
     case CONFIG_TYPE_MAXIMUM: threadCountCount = 7; break;

     default: assert(0);

     }

+

+    size_t strategyCount = 0;

+    switch(ConfigType)

+    {

+    case CONFIG_TYPE_MINIMUM: strategyCount = 1; break;

+    case CONFIG_TYPE_SMALL:   strategyCount = 1; break;

+    case CONFIG_TYPE_AVERAGE: strategyCount = 2; break;

+    case CONFIG_TYPE_LARGE:   strategyCount = 2; break;

+    case CONFIG_TYPE_MAXIMUM: strategyCount = 3; break;

+    default: assert(0);

+    }

+

     for(size_t threadCountIndex = 0; threadCountIndex < threadCountCount; ++threadCountIndex)

     {

         std::string desc1;

@@ -3718,16 +3733,38 @@
                             assert(0);

                         }

 

-                        const char* testDescription = desc5.c_str();

-

-                        for(size_t repeat = 0; repeat < repeatCount; ++repeat)

+                        for(size_t strategyIndex = 0; strategyIndex < strategyCount; ++strategyIndex)

                         {

-                            printf("%s Repeat %u\n", testDescription, (uint32_t)repeat);

+                            std::string desc6 = desc5;

+                            switch(strategyIndex)

+                            {

+                            case 0:

+                                desc6 += " BestFit";

+                                config.AllocationStrategy = VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT;

+                                break;

+                            case 1:

+                                desc6 += " WorstFit";

+                                config.AllocationStrategy = VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT;

+                                break;

+                            case 2:

+                                desc6 += " FirstFit";

+                                config.AllocationStrategy = VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT;

+                                break;

+                            default:

+                                assert(0);

+                            }

 

-                            Result result{};

-                            VkResult res = MainTest(result, config);

-                            assert(res == VK_SUCCESS);

-                            WriteMainTestResult(file, CODE_DESCRIPTION, testDescription, config, result);

+                           const char* testDescription = desc6.c_str();

+

+                            for(size_t repeat = 0; repeat < repeatCount; ++repeat)

+                            {

+                                printf("%s Repeat %u\n", testDescription, (uint32_t)repeat);

+

+                                Result result{};

+                                VkResult res = MainTest(result, config);

+                                assert(res == VK_SUCCESS);

+                                WriteMainTestResult(file, CODE_DESCRIPTION, testDescription, config, result);

+                            }

                         }

                     }

                 }

@@ -3978,6 +4015,7 @@
 

     // # Simple tests

 

+#if 0

     TestBasics();

 #if VMA_DEBUG_MARGIN

     TestDebugMargin();

@@ -3996,6 +4034,7 @@
     BenchmarkLinearAllocator();

     TestDefragmentationSimple();

     TestDefragmentationFull();

+#endif

 

     // # Detailed tests

     FILE* file;

@@ -4006,8 +4045,10 @@
     PerformMainTests(file);

     //PerformCustomMainTest(file);

 

+#if 0

     WritePoolTestResultHeader(file);

     PerformPoolTests(file);

+#endif

     //PerformCustomPoolTest(file);

     

     fclose(file);

diff --git a/src/vk_mem_alloc.h b/src/vk_mem_alloc.h
index 47c78b0..758e9f6 100644
--- a/src/vk_mem_alloc.h
+++ b/src/vk_mem_alloc.h
@@ -1813,6 +1813,29 @@
     */

     VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT = 0x00000040,

 

+    /** Allocation strategy that chooses smallest possible free range for the

+    allocation.

+    */

+    VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT  = 0x00010000,

+    /** Allocation strategy that chooses biggest possible free range for the

+    allocation.

+    */

+    VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT = 0x00020000,

+    /** Allocation strategy that chooses first suitable free range for the

+    allocation.

+    */

+    VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT = 0x00040000,

+

+    /** Allocation strategy that tries to minimize memory usage.

+    */

+    VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT = VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT,

+    /** Allocation strategy that tries to minimize allocation time.

+    */

+    VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT = VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT,

+    /** Allocation strategy that tries to minimize memory fragmentation.

+    */

+    VMA_ALLOCATION_CREATE_STRATEGY_MIN_FRAGMENTATION_BIT = VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT,

+

     VMA_ALLOCATION_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF

 } VmaAllocationCreateFlagBits;

 typedef VkFlags VmaAllocationCreateFlags;

@@ -2800,22 +2823,6 @@
    #define VMA_ATOMIC_UINT32 std::atomic<uint32_t>

 #endif

 

-#ifndef VMA_BEST_FIT

-    /**

-    Main parameter for function assessing how good is a free suballocation for a new

-    allocation request.

-    

-    - Set to 1 to use Best-Fit algorithm - prefer smaller blocks, as close to the

-      size of requested allocations as possible.

-    - Set to 0 to use Worst-Fit algorithm - prefer larger blocks, as large as

-      possible.

-    

-    Experiments in special testing environment showed that Best-Fit algorithm is

-    better.

-    */

-    #define VMA_BEST_FIT (1)

-#endif

-

 #ifndef VMA_DEBUG_ALWAYS_DEDICATED_MEMORY

     /**

     Every allocation will have its own memory block.

@@ -4551,6 +4558,7 @@
         bool upperAddress,

         VmaSuballocationType allocType,

         bool canMakeOtherLost,

+        uint32_t strategy, // Always one of VMA_ALLOCATION_CREATE_STRATEGY_* flags.

         VmaAllocationRequest* pAllocationRequest) = 0;

 

     virtual bool MakeRequestedAllocationsLost(

@@ -4623,6 +4631,7 @@
         bool upperAddress,

         VmaSuballocationType allocType,

         bool canMakeOtherLost,

+        uint32_t strategy,

         VmaAllocationRequest* pAllocationRequest);

 

     virtual bool MakeRequestedAllocationsLost(

@@ -4791,6 +4800,7 @@
         bool upperAddress,

         VmaSuballocationType allocType,

         bool canMakeOtherLost,

+        uint32_t strategy,

         VmaAllocationRequest* pAllocationRequest);

 

     virtual bool MakeRequestedAllocationsLost(

@@ -5051,6 +5061,7 @@
         VmaAllocationCreateFlags allocFlags,

         void* pUserData,

         VmaSuballocationType suballocType,

+        uint32_t strategy,

         VmaAllocation* pAllocation);

 

     VkResult CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIndex);

@@ -6591,16 +6602,6 @@
 

 #endif // #if VMA_STATS_STRING_ENABLED

 

-/*

-How many suitable free suballocations to analyze before choosing best one.

-- Set to 1 to use First-Fit algorithm - first suitable free suballocation will

-  be chosen.

-- Set to UINT32_MAX to use Best-Fit/Worst-Fit algorithm - all suitable free

-  suballocations will be analized and best one will be chosen.

-- Any other value is also acceptable.

-*/

-//static const uint32_t MAX_SUITABLE_SUBALLOCATIONS_TO_CHECK = 8;

-

 bool VmaBlockMetadata_Generic::CreateAllocationRequest(

     uint32_t currentFrameIndex,

     uint32_t frameInUseCount,

@@ -6610,6 +6611,7 @@
     bool upperAddress,

     VmaSuballocationType allocType,

     bool canMakeOtherLost,

+    uint32_t strategy,

     VmaAllocationRequest* pAllocationRequest)

 {

     VMA_ASSERT(allocSize > 0);

@@ -6619,7 +6621,8 @@
     VMA_HEAVY_ASSERT(Validate());

 

     // There is not enough total free space in this block to fullfill the request: Early return.

-    if(canMakeOtherLost == false && m_SumFreeSize < allocSize + 2 * VMA_DEBUG_MARGIN)

+    if(canMakeOtherLost == false &&

+        m_SumFreeSize < allocSize + 2 * VMA_DEBUG_MARGIN)

     {

         return false;

     }

@@ -6628,7 +6631,7 @@
     const size_t freeSuballocCount = m_FreeSuballocationsBySize.size();

     if(freeSuballocCount > 0)

     {

-        if(VMA_BEST_FIT)

+        if(strategy == VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT)

         {

             // Find first free suballocation with size not less than allocSize + 2 * VMA_DEBUG_MARGIN.

             VmaSuballocationList::iterator* const it = VmaBinaryFindFirstNotLess(

@@ -6658,7 +6661,7 @@
                 }

             }

         }

-        else

+        else // WORST_FIT, FIRST_FIT

         {

             // Search staring from biggest suballocations.

             for(size_t index = freeSuballocCount; index--; )

@@ -6715,7 +6718,8 @@
                 {

                     tmpAllocRequest.item = suballocIt;

 

-                    if(tmpAllocRequest.CalcCost() < pAllocationRequest->CalcCost())

+                    if(tmpAllocRequest.CalcCost() < pAllocationRequest->CalcCost() ||

+                        strategy == VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT)

                     {

                         *pAllocationRequest = tmpAllocRequest;

                     }

@@ -8321,6 +8325,7 @@
     bool upperAddress,

     VmaSuballocationType allocType,

     bool canMakeOtherLost,

+    uint32_t strategy,

     VmaAllocationRequest* pAllocationRequest)

 {

     VMA_ASSERT(allocSize > 0);

@@ -9459,6 +9464,10 @@
     const bool canCreateNewBlock =

         ((createInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0) &&

         (m_Blocks.size() < m_MaxBlockCount);

+    uint32_t strategy = createInfo.flags & (

+        VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT |

+        VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT |

+        VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT);

 

     // If linearAlgorithm is used, canMakeOtherLost is available only when used as ring buffer.

     // Which in turn is available only when maxBlockCount = 1.

@@ -9474,6 +9483,20 @@
         return VK_ERROR_FEATURE_NOT_PRESENT;

     }

 

+    // Validate strategy.

+    switch(strategy)

+    {

+    case 0:

+        strategy = VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT;

+        break;

+    case VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT:

+    case VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT:

+    case VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT:

+        break;

+    default:

+        return VK_ERROR_FEATURE_NOT_PRESENT;

+    }

+

     // Early reject: requested allocation size is larger that maximum block size for this block vector.

     if(size + 2 * VMA_DEBUG_MARGIN > m_PreferredBlockSize)

     {

@@ -9509,6 +9532,7 @@
                     allocFlagsCopy,

                     createInfo.pUserData,

                     suballocType,

+                    strategy,

                     pAllocation);

                 if(res == VK_SUCCESS)

                 {

@@ -9519,25 +9543,54 @@
         }

         else

         {

-            // Forward order in m_Blocks - prefer blocks with smallest amount of free space.

-            for(size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex )

+            if(strategy == VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT)

             {

-                VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex];

-                VMA_ASSERT(pCurrBlock);

-                VkResult res = AllocateFromBlock(

-                    pCurrBlock,

-                    hCurrentPool,

-                    currentFrameIndex,

-                    size,

-                    alignment,

-                    allocFlagsCopy,

-                    createInfo.pUserData,

-                    suballocType,

-                    pAllocation);

-                if(res == VK_SUCCESS)

+                // Forward order in m_Blocks - prefer blocks with smallest amount of free space.

+                for(size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex )

                 {

-                    VMA_DEBUG_LOG("    Returned from existing block #%u", (uint32_t)blockIndex);

-                    return VK_SUCCESS;

+                    VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex];

+                    VMA_ASSERT(pCurrBlock);

+                    VkResult res = AllocateFromBlock(

+                        pCurrBlock,

+                        hCurrentPool,

+                        currentFrameIndex,

+                        size,

+                        alignment,

+                        allocFlagsCopy,

+                        createInfo.pUserData,

+                        suballocType,

+                        strategy,

+                        pAllocation);

+                    if(res == VK_SUCCESS)

+                    {

+                        VMA_DEBUG_LOG("    Returned from existing block #%u", (uint32_t)blockIndex);

+                        return VK_SUCCESS;

+                    }

+                }

+            }

+            else // WORST_FIT, FIRST_FIT

+            {

+                // Backward order in m_Blocks - prefer blocks with largest amount of free space.

+                for(size_t blockIndex = m_Blocks.size(); blockIndex--; )

+                {

+                    VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex];

+                    VMA_ASSERT(pCurrBlock);

+                    VkResult res = AllocateFromBlock(

+                        pCurrBlock,

+                        hCurrentPool,

+                        currentFrameIndex,

+                        size,

+                        alignment,

+                        allocFlagsCopy,

+                        createInfo.pUserData,

+                        suballocType,

+                        strategy,

+                        pAllocation);

+                    if(res == VK_SUCCESS)

+                    {

+                        VMA_DEBUG_LOG("    Returned from existing block #%u", (uint32_t)blockIndex);

+                        return VK_SUCCESS;

+                    }

                 }

             }

         }

@@ -9604,6 +9657,7 @@
                     allocFlagsCopy,

                     createInfo.pUserData,

                     suballocType,

+                    strategy,

                     pAllocation);

                 if(res == VK_SUCCESS)

                 {

@@ -9630,34 +9684,76 @@
             VkDeviceSize bestRequestCost = VK_WHOLE_SIZE;

 

             // 1. Search existing allocations.

-            // Forward order in m_Blocks - prefer blocks with smallest amount of free space.

-            for(size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex )

+            if(strategy == VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT)

             {

-                VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex];

-                VMA_ASSERT(pCurrBlock);

-                VmaAllocationRequest currRequest = {};

-                if(pCurrBlock->m_pMetadata->CreateAllocationRequest(

-                    currentFrameIndex,

-                    m_FrameInUseCount,

-                    m_BufferImageGranularity,

-                    size,

-                    alignment,

-                    (createInfo.flags & VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0,

-                    suballocType,

-                    canMakeOtherLost,

-                    &currRequest))

+                // Forward order in m_Blocks - prefer blocks with smallest amount of free space.

+                for(size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex )

                 {

-                    const VkDeviceSize currRequestCost = currRequest.CalcCost();

-                    if(pBestRequestBlock == VMA_NULL ||

-                        currRequestCost < bestRequestCost)

+                    VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex];

+                    VMA_ASSERT(pCurrBlock);

+                    VmaAllocationRequest currRequest = {};

+                    if(pCurrBlock->m_pMetadata->CreateAllocationRequest(

+                        currentFrameIndex,

+                        m_FrameInUseCount,

+                        m_BufferImageGranularity,

+                        size,

+                        alignment,

+                        (createInfo.flags & VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0,

+                        suballocType,

+                        canMakeOtherLost,

+                        strategy,

+                        &currRequest))

                     {

-                        pBestRequestBlock = pCurrBlock;

-                        bestRequest = currRequest;

-                        bestRequestCost = currRequestCost;

-

-                        if(bestRequestCost == 0)

+                        const VkDeviceSize currRequestCost = currRequest.CalcCost();

+                        if(pBestRequestBlock == VMA_NULL ||

+                            currRequestCost < bestRequestCost)

                         {

-                            break;

+                            pBestRequestBlock = pCurrBlock;

+                            bestRequest = currRequest;

+                            bestRequestCost = currRequestCost;

+

+                            if(bestRequestCost == 0)

+                            {

+                                break;

+                            }

+                        }

+                    }

+                }

+            }

+            else // WORST_FIT, FIRST_FIT

+            {

+                // Backward order in m_Blocks - prefer blocks with largest amount of free space.

+                for(size_t blockIndex = m_Blocks.size(); blockIndex--; )

+                {

+                    VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex];

+                    VMA_ASSERT(pCurrBlock);

+                    VmaAllocationRequest currRequest = {};

+                    if(pCurrBlock->m_pMetadata->CreateAllocationRequest(

+                        currentFrameIndex,

+                        m_FrameInUseCount,

+                        m_BufferImageGranularity,

+                        size,

+                        alignment,

+                        (createInfo.flags & VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0,

+                        suballocType,

+                        canMakeOtherLost,

+                        strategy,

+                        &currRequest))

+                    {

+                        const VkDeviceSize currRequestCost = currRequest.CalcCost();

+                        if(pBestRequestBlock == VMA_NULL ||

+                            currRequestCost < bestRequestCost ||

+                            strategy == VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT)

+                        {

+                            pBestRequestBlock = pCurrBlock;

+                            bestRequest = currRequest;

+                            bestRequestCost = currRequestCost;

+

+                            if(bestRequestCost == 0 ||

+                                strategy == VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT)

+                            {

+                                break;

+                            }

                         }

                     }

                 }

@@ -9850,6 +9946,7 @@
     VmaAllocationCreateFlags allocFlags,

     void* pUserData,

     VmaSuballocationType suballocType,

+    uint32_t strategy,

     VmaAllocation* pAllocation)

 {

     VMA_ASSERT((allocFlags & VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT) == 0);

@@ -9867,6 +9964,7 @@
         isUpperAddress,

         suballocType,

         false, // canMakeOtherLost

+        strategy,

         &currRequest))

     {

         // Allocate from pCurrBlock.

@@ -10277,6 +10375,7 @@
                 false, // upperAddress

                 suballocType,

                 false, // canMakeOtherLost

+                VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT,

                 &dstAllocRequest) &&

             MoveMakesSense(

                 dstBlockIndex, dstAllocRequest.offset, srcBlockIndex, srcOffset))