Merge branch 'master' into allocation_defragmentation_strategies
diff --git a/src/Tests.cpp b/src/Tests.cpp
index 842fba2..d04045d 100644
--- a/src/Tests.cpp
+++ b/src/Tests.cpp
@@ -21,10 +21,10 @@
enum class FREE_ORDER { FORWARD, BACKWARD, RANDOM, COUNT };
-static const wchar_t* FREE_ORDER_NAMES[] = {
- L"FORWARD",
- L"BACKWARD",
- L"RANDOM",
+static const char* FREE_ORDER_NAMES[] = {
+ "FORWARD",
+ "BACKWARD",
+ "RANDOM",
};
struct AllocationSize
@@ -45,6 +45,7 @@
uint32_t ThreadCount;
uint32_t ThreadsUsingCommonAllocationsProbabilityPercent;
FREE_ORDER FreeOrder;
+ VmaAllocationCreateFlags AllocationStrategy; // For VMA_ALLOCATION_CREATE_STRATEGY_*
};
struct Result
@@ -264,6 +265,7 @@
VmaAllocationCreateInfo memReq = {};
memReq.usage = (VmaMemoryUsage)(VMA_MEMORY_USAGE_GPU_ONLY + memUsageIndex);
+ memReq.flags |= config.AllocationStrategy;
Allocation allocation = {};
VmaAllocationInfo allocationInfo;
@@ -1002,7 +1004,7 @@
for(size_t i = 0; i < allocations.size(); ++i)
ValidateAllocationData(allocations[i]);
- SaveAllocatorStatsToFile(L"Before.csv");
+ //SaveAllocatorStatsToFile(L"Before.csv");
{
std::vector<VmaAllocation> vmaAllocations(allocations.size());
@@ -1051,9 +1053,9 @@
for(size_t i = 0; i < allocations.size(); ++i)
ValidateAllocationData(allocations[i]);
- wchar_t fileName[MAX_PATH];
- swprintf(fileName, MAX_PATH, L"After_%02u.csv", defragIndex);
- SaveAllocatorStatsToFile(fileName);
+ //wchar_t fileName[MAX_PATH];
+ //swprintf(fileName, MAX_PATH, L"After_%02u.csv", defragIndex);
+ //SaveAllocatorStatsToFile(fileName);
}
}
@@ -2210,9 +2212,9 @@
vmaDestroyPool(g_hAllocator, pool);
- wprintf(L" LinearAlgorithm=%u %s FreeOrder=%s: allocations %g s, free %g s\n",
+ printf(" LinearAlgorithm=%u %s FreeOrder=%s: allocations %g s, free %g s\n",
linear ? 1 : 0,
- empty ? L"Empty" : L"Not empty",
+ empty ? "Empty" : "Not empty",
FREE_ORDER_NAMES[(size_t)freeOrder],
ToFloatSeconds(allocTotalDuration),
ToFloatSeconds(freeTotalDuration));
@@ -3351,12 +3353,12 @@
fprintf(file,
"%s,%s,%s,"
- "BeginBytesToAllocate=%I64u MaxBytesToAllocate=%I64u AdditionalOperationCount=%u ThreadCount=%u FreeOrder=%d,"
+ "BeginBytesToAllocate=%I64u MaxBytesToAllocate=%I64u AdditionalOperationCount=%u ThreadCount=%u FreeOrder=%s,"
"%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%I64u,%I64u,%I64u\n",
codeDescription,
testDescription,
timeStr,
- config.BeginBytesToAllocate, config.MaxBytesToAllocate, config.AdditionalOperationCount, config.ThreadCount, (uint32_t)config.FreeOrder,
+ config.BeginBytesToAllocate, config.MaxBytesToAllocate, config.AdditionalOperationCount, config.ThreadCount, FREE_ORDER_NAMES[(uint32_t)config.FreeOrder],
totalTimeSeconds * 1e6f,
allocationTimeMinSeconds * 1e6f,
allocationTimeAvgSeconds * 1e6f,
@@ -3447,6 +3449,7 @@
config.FreeOrder = FREE_ORDER::FORWARD;
config.ThreadCount = 16;
config.ThreadsUsingCommonAllocationsProbabilityPercent = 50;
+ config.AllocationStrategy = 0;
// Buffers
//config.AllocationSizes.push_back({4, 16, 1024});
@@ -3522,6 +3525,18 @@
case CONFIG_TYPE_MAXIMUM: threadCountCount = 7; break;
default: assert(0);
}
+
+ size_t strategyCount = 0;
+ switch(ConfigType)
+ {
+ case CONFIG_TYPE_MINIMUM: strategyCount = 1; break;
+ case CONFIG_TYPE_SMALL: strategyCount = 1; break;
+ case CONFIG_TYPE_AVERAGE: strategyCount = 2; break;
+ case CONFIG_TYPE_LARGE: strategyCount = 2; break;
+ case CONFIG_TYPE_MAXIMUM: strategyCount = 3; break;
+ default: assert(0);
+ }
+
for(size_t threadCountIndex = 0; threadCountIndex < threadCountCount; ++threadCountIndex)
{
std::string desc1;
@@ -3718,16 +3733,38 @@
assert(0);
}
- const char* testDescription = desc5.c_str();
-
- for(size_t repeat = 0; repeat < repeatCount; ++repeat)
+ for(size_t strategyIndex = 0; strategyIndex < strategyCount; ++strategyIndex)
{
- printf("%s Repeat %u\n", testDescription, (uint32_t)repeat);
+ std::string desc6 = desc5;
+ switch(strategyIndex)
+ {
+ case 0:
+ desc6 += " BestFit";
+ config.AllocationStrategy = VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT;
+ break;
+ case 1:
+ desc6 += " WorstFit";
+ config.AllocationStrategy = VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT;
+ break;
+ case 2:
+ desc6 += " FirstFit";
+ config.AllocationStrategy = VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT;
+ break;
+ default:
+ assert(0);
+ }
- Result result{};
- VkResult res = MainTest(result, config);
- assert(res == VK_SUCCESS);
- WriteMainTestResult(file, CODE_DESCRIPTION, testDescription, config, result);
+ const char* testDescription = desc6.c_str();
+
+ for(size_t repeat = 0; repeat < repeatCount; ++repeat)
+ {
+ printf("%s Repeat %u\n", testDescription, (uint32_t)repeat);
+
+ Result result{};
+ VkResult res = MainTest(result, config);
+ assert(res == VK_SUCCESS);
+ WriteMainTestResult(file, CODE_DESCRIPTION, testDescription, config, result);
+ }
}
}
}
@@ -3978,6 +4015,7 @@
// # Simple tests
+#if 0
TestBasics();
#if VMA_DEBUG_MARGIN
TestDebugMargin();
@@ -3996,6 +4034,7 @@
BenchmarkLinearAllocator();
TestDefragmentationSimple();
TestDefragmentationFull();
+#endif
// # Detailed tests
FILE* file;
@@ -4006,8 +4045,10 @@
PerformMainTests(file);
//PerformCustomMainTest(file);
+#if 0
WritePoolTestResultHeader(file);
PerformPoolTests(file);
+#endif
//PerformCustomPoolTest(file);
fclose(file);
diff --git a/src/vk_mem_alloc.h b/src/vk_mem_alloc.h
index 47c78b0..758e9f6 100644
--- a/src/vk_mem_alloc.h
+++ b/src/vk_mem_alloc.h
@@ -1813,6 +1813,29 @@
*/
VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT = 0x00000040,
+ /** Allocation strategy that chooses smallest possible free range for the
+ allocation.
+ */
+ VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT = 0x00010000,
+ /** Allocation strategy that chooses biggest possible free range for the
+ allocation.
+ */
+ VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT = 0x00020000,
+ /** Allocation strategy that chooses first suitable free range for the
+ allocation.
+ */
+ VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT = 0x00040000,
+
+ /** Allocation strategy that tries to minimize memory usage.
+ */
+ VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT = VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT,
+ /** Allocation strategy that tries to minimize allocation time.
+ */
+ VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT = VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT,
+ /** Allocation strategy that tries to minimize memory fragmentation.
+ */
+ VMA_ALLOCATION_CREATE_STRATEGY_MIN_FRAGMENTATION_BIT = VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT,
+
VMA_ALLOCATION_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
} VmaAllocationCreateFlagBits;
typedef VkFlags VmaAllocationCreateFlags;
@@ -2800,22 +2823,6 @@
#define VMA_ATOMIC_UINT32 std::atomic<uint32_t>
#endif
-#ifndef VMA_BEST_FIT
- /**
- Main parameter for function assessing how good is a free suballocation for a new
- allocation request.
-
- - Set to 1 to use Best-Fit algorithm - prefer smaller blocks, as close to the
- size of requested allocations as possible.
- - Set to 0 to use Worst-Fit algorithm - prefer larger blocks, as large as
- possible.
-
- Experiments in special testing environment showed that Best-Fit algorithm is
- better.
- */
- #define VMA_BEST_FIT (1)
-#endif
-
#ifndef VMA_DEBUG_ALWAYS_DEDICATED_MEMORY
/**
Every allocation will have its own memory block.
@@ -4551,6 +4558,7 @@
bool upperAddress,
VmaSuballocationType allocType,
bool canMakeOtherLost,
+ uint32_t strategy, // Always one of VMA_ALLOCATION_CREATE_STRATEGY_* flags.
VmaAllocationRequest* pAllocationRequest) = 0;
virtual bool MakeRequestedAllocationsLost(
@@ -4623,6 +4631,7 @@
bool upperAddress,
VmaSuballocationType allocType,
bool canMakeOtherLost,
+ uint32_t strategy,
VmaAllocationRequest* pAllocationRequest);
virtual bool MakeRequestedAllocationsLost(
@@ -4791,6 +4800,7 @@
bool upperAddress,
VmaSuballocationType allocType,
bool canMakeOtherLost,
+ uint32_t strategy,
VmaAllocationRequest* pAllocationRequest);
virtual bool MakeRequestedAllocationsLost(
@@ -5051,6 +5061,7 @@
VmaAllocationCreateFlags allocFlags,
void* pUserData,
VmaSuballocationType suballocType,
+ uint32_t strategy,
VmaAllocation* pAllocation);
VkResult CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIndex);
@@ -6591,16 +6602,6 @@
#endif // #if VMA_STATS_STRING_ENABLED
-/*
-How many suitable free suballocations to analyze before choosing best one.
-- Set to 1 to use First-Fit algorithm - first suitable free suballocation will
- be chosen.
-- Set to UINT32_MAX to use Best-Fit/Worst-Fit algorithm - all suitable free
- suballocations will be analized and best one will be chosen.
-- Any other value is also acceptable.
-*/
-//static const uint32_t MAX_SUITABLE_SUBALLOCATIONS_TO_CHECK = 8;
-
bool VmaBlockMetadata_Generic::CreateAllocationRequest(
uint32_t currentFrameIndex,
uint32_t frameInUseCount,
@@ -6610,6 +6611,7 @@
bool upperAddress,
VmaSuballocationType allocType,
bool canMakeOtherLost,
+ uint32_t strategy,
VmaAllocationRequest* pAllocationRequest)
{
VMA_ASSERT(allocSize > 0);
@@ -6619,7 +6621,8 @@
VMA_HEAVY_ASSERT(Validate());
// There is not enough total free space in this block to fullfill the request: Early return.
- if(canMakeOtherLost == false && m_SumFreeSize < allocSize + 2 * VMA_DEBUG_MARGIN)
+ if(canMakeOtherLost == false &&
+ m_SumFreeSize < allocSize + 2 * VMA_DEBUG_MARGIN)
{
return false;
}
@@ -6628,7 +6631,7 @@
const size_t freeSuballocCount = m_FreeSuballocationsBySize.size();
if(freeSuballocCount > 0)
{
- if(VMA_BEST_FIT)
+ if(strategy == VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT)
{
// Find first free suballocation with size not less than allocSize + 2 * VMA_DEBUG_MARGIN.
VmaSuballocationList::iterator* const it = VmaBinaryFindFirstNotLess(
@@ -6658,7 +6661,7 @@
}
}
}
- else
+ else // WORST_FIT, FIRST_FIT
{
// Search staring from biggest suballocations.
for(size_t index = freeSuballocCount; index--; )
@@ -6715,7 +6718,8 @@
{
tmpAllocRequest.item = suballocIt;
- if(tmpAllocRequest.CalcCost() < pAllocationRequest->CalcCost())
+ if(tmpAllocRequest.CalcCost() < pAllocationRequest->CalcCost() ||
+ strategy == VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT)
{
*pAllocationRequest = tmpAllocRequest;
}
@@ -8321,6 +8325,7 @@
bool upperAddress,
VmaSuballocationType allocType,
bool canMakeOtherLost,
+ uint32_t strategy,
VmaAllocationRequest* pAllocationRequest)
{
VMA_ASSERT(allocSize > 0);
@@ -9459,6 +9464,10 @@
const bool canCreateNewBlock =
((createInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0) &&
(m_Blocks.size() < m_MaxBlockCount);
+ uint32_t strategy = createInfo.flags & (
+ VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT |
+ VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT |
+ VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT);
// If linearAlgorithm is used, canMakeOtherLost is available only when used as ring buffer.
// Which in turn is available only when maxBlockCount = 1.
@@ -9474,6 +9483,20 @@
return VK_ERROR_FEATURE_NOT_PRESENT;
}
+ // Validate strategy.
+ switch(strategy)
+ {
+ case 0:
+ strategy = VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT;
+ break;
+ case VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT:
+ case VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT:
+ case VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT:
+ break;
+ default:
+ return VK_ERROR_FEATURE_NOT_PRESENT;
+ }
+
// Early reject: requested allocation size is larger that maximum block size for this block vector.
if(size + 2 * VMA_DEBUG_MARGIN > m_PreferredBlockSize)
{
@@ -9509,6 +9532,7 @@
allocFlagsCopy,
createInfo.pUserData,
suballocType,
+ strategy,
pAllocation);
if(res == VK_SUCCESS)
{
@@ -9519,25 +9543,54 @@
}
else
{
- // Forward order in m_Blocks - prefer blocks with smallest amount of free space.
- for(size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex )
+ if(strategy == VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT)
{
- VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex];
- VMA_ASSERT(pCurrBlock);
- VkResult res = AllocateFromBlock(
- pCurrBlock,
- hCurrentPool,
- currentFrameIndex,
- size,
- alignment,
- allocFlagsCopy,
- createInfo.pUserData,
- suballocType,
- pAllocation);
- if(res == VK_SUCCESS)
+ // Forward order in m_Blocks - prefer blocks with smallest amount of free space.
+ for(size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex )
{
- VMA_DEBUG_LOG(" Returned from existing block #%u", (uint32_t)blockIndex);
- return VK_SUCCESS;
+ VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex];
+ VMA_ASSERT(pCurrBlock);
+ VkResult res = AllocateFromBlock(
+ pCurrBlock,
+ hCurrentPool,
+ currentFrameIndex,
+ size,
+ alignment,
+ allocFlagsCopy,
+ createInfo.pUserData,
+ suballocType,
+ strategy,
+ pAllocation);
+ if(res == VK_SUCCESS)
+ {
+ VMA_DEBUG_LOG(" Returned from existing block #%u", (uint32_t)blockIndex);
+ return VK_SUCCESS;
+ }
+ }
+ }
+ else // WORST_FIT, FIRST_FIT
+ {
+ // Backward order in m_Blocks - prefer blocks with largest amount of free space.
+ for(size_t blockIndex = m_Blocks.size(); blockIndex--; )
+ {
+ VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex];
+ VMA_ASSERT(pCurrBlock);
+ VkResult res = AllocateFromBlock(
+ pCurrBlock,
+ hCurrentPool,
+ currentFrameIndex,
+ size,
+ alignment,
+ allocFlagsCopy,
+ createInfo.pUserData,
+ suballocType,
+ strategy,
+ pAllocation);
+ if(res == VK_SUCCESS)
+ {
+ VMA_DEBUG_LOG(" Returned from existing block #%u", (uint32_t)blockIndex);
+ return VK_SUCCESS;
+ }
}
}
}
@@ -9604,6 +9657,7 @@
allocFlagsCopy,
createInfo.pUserData,
suballocType,
+ strategy,
pAllocation);
if(res == VK_SUCCESS)
{
@@ -9630,34 +9684,76 @@
VkDeviceSize bestRequestCost = VK_WHOLE_SIZE;
// 1. Search existing allocations.
- // Forward order in m_Blocks - prefer blocks with smallest amount of free space.
- for(size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex )
+ if(strategy == VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT)
{
- VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex];
- VMA_ASSERT(pCurrBlock);
- VmaAllocationRequest currRequest = {};
- if(pCurrBlock->m_pMetadata->CreateAllocationRequest(
- currentFrameIndex,
- m_FrameInUseCount,
- m_BufferImageGranularity,
- size,
- alignment,
- (createInfo.flags & VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0,
- suballocType,
- canMakeOtherLost,
- &currRequest))
+ // Forward order in m_Blocks - prefer blocks with smallest amount of free space.
+ for(size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex )
{
- const VkDeviceSize currRequestCost = currRequest.CalcCost();
- if(pBestRequestBlock == VMA_NULL ||
- currRequestCost < bestRequestCost)
+ VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex];
+ VMA_ASSERT(pCurrBlock);
+ VmaAllocationRequest currRequest = {};
+ if(pCurrBlock->m_pMetadata->CreateAllocationRequest(
+ currentFrameIndex,
+ m_FrameInUseCount,
+ m_BufferImageGranularity,
+ size,
+ alignment,
+ (createInfo.flags & VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0,
+ suballocType,
+ canMakeOtherLost,
+ strategy,
+ &currRequest))
{
- pBestRequestBlock = pCurrBlock;
- bestRequest = currRequest;
- bestRequestCost = currRequestCost;
-
- if(bestRequestCost == 0)
+ const VkDeviceSize currRequestCost = currRequest.CalcCost();
+ if(pBestRequestBlock == VMA_NULL ||
+ currRequestCost < bestRequestCost)
{
- break;
+ pBestRequestBlock = pCurrBlock;
+ bestRequest = currRequest;
+ bestRequestCost = currRequestCost;
+
+ if(bestRequestCost == 0)
+ {
+ break;
+ }
+ }
+ }
+ }
+ }
+ else // WORST_FIT, FIRST_FIT
+ {
+ // Backward order in m_Blocks - prefer blocks with largest amount of free space.
+ for(size_t blockIndex = m_Blocks.size(); blockIndex--; )
+ {
+ VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex];
+ VMA_ASSERT(pCurrBlock);
+ VmaAllocationRequest currRequest = {};
+ if(pCurrBlock->m_pMetadata->CreateAllocationRequest(
+ currentFrameIndex,
+ m_FrameInUseCount,
+ m_BufferImageGranularity,
+ size,
+ alignment,
+ (createInfo.flags & VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0,
+ suballocType,
+ canMakeOtherLost,
+ strategy,
+ &currRequest))
+ {
+ const VkDeviceSize currRequestCost = currRequest.CalcCost();
+ if(pBestRequestBlock == VMA_NULL ||
+ currRequestCost < bestRequestCost ||
+ strategy == VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT)
+ {
+ pBestRequestBlock = pCurrBlock;
+ bestRequest = currRequest;
+ bestRequestCost = currRequestCost;
+
+ if(bestRequestCost == 0 ||
+ strategy == VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT)
+ {
+ break;
+ }
}
}
}
@@ -9850,6 +9946,7 @@
VmaAllocationCreateFlags allocFlags,
void* pUserData,
VmaSuballocationType suballocType,
+ uint32_t strategy,
VmaAllocation* pAllocation)
{
VMA_ASSERT((allocFlags & VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT) == 0);
@@ -9867,6 +9964,7 @@
isUpperAddress,
suballocType,
false, // canMakeOtherLost
+ strategy,
&currRequest))
{
// Allocate from pCurrBlock.
@@ -10277,6 +10375,7 @@
false, // upperAddress
suballocType,
false, // canMakeOtherLost
+ VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT,
&dstAllocRequest) &&
MoveMakesSense(
dstBlockIndex, dstAllocRequest.offset, srcBlockIndex, srcOffset))