blob: 59356001842b9d0fb8a8750347ea2bf58565d9d4 [file] [log] [blame]
//
// Copyright (c) 2017-2020 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
#include "Tests.h"
#include "VmaUsage.h"
#include "Common.h"
#include <atomic>
#include <thread>
#include <mutex>
#include <functional>
#ifdef _WIN32
static const char* CODE_DESCRIPTION = "Foo";
extern VkCommandBuffer g_hTemporaryCommandBuffer;
extern const VkAllocationCallbacks* g_Allocs;
void BeginSingleTimeCommands();
void EndSingleTimeCommands();
#ifndef VMA_DEBUG_MARGIN
#define VMA_DEBUG_MARGIN 0
#endif
enum CONFIG_TYPE {
CONFIG_TYPE_MINIMUM,
CONFIG_TYPE_SMALL,
CONFIG_TYPE_AVERAGE,
CONFIG_TYPE_LARGE,
CONFIG_TYPE_MAXIMUM,
CONFIG_TYPE_COUNT
};
static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_SMALL;
//static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_LARGE;
enum class FREE_ORDER { FORWARD, BACKWARD, RANDOM, COUNT };
static const char* FREE_ORDER_NAMES[] = {
"FORWARD",
"BACKWARD",
"RANDOM",
};
// Copy of internal VmaAlgorithmToStr.
static const char* AlgorithmToStr(uint32_t algorithm)
{
switch(algorithm)
{
case VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT:
return "Linear";
case VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT:
return "Buddy";
case 0:
return "Default";
default:
assert(0);
return "";
}
}
struct AllocationSize
{
uint32_t Probability;
VkDeviceSize BufferSizeMin, BufferSizeMax;
uint32_t ImageSizeMin, ImageSizeMax;
};
struct Config
{
uint32_t RandSeed;
VkDeviceSize BeginBytesToAllocate;
uint32_t AdditionalOperationCount;
VkDeviceSize MaxBytesToAllocate;
uint32_t MemUsageProbability[4]; // For VMA_MEMORY_USAGE_*
std::vector<AllocationSize> AllocationSizes;
uint32_t ThreadCount;
uint32_t ThreadsUsingCommonAllocationsProbabilityPercent;
FREE_ORDER FreeOrder;
VmaAllocationCreateFlags AllocationStrategy; // For VMA_ALLOCATION_CREATE_STRATEGY_*
};
struct Result
{
duration TotalTime;
duration AllocationTimeMin, AllocationTimeAvg, AllocationTimeMax;
duration DeallocationTimeMin, DeallocationTimeAvg, DeallocationTimeMax;
VkDeviceSize TotalMemoryAllocated;
VkDeviceSize FreeRangeSizeAvg, FreeRangeSizeMax;
};
void TestDefragmentationSimple();
void TestDefragmentationFull();
struct PoolTestConfig
{
uint32_t RandSeed;
uint32_t ThreadCount;
VkDeviceSize PoolSize;
uint32_t FrameCount;
uint32_t TotalItemCount;
// Range for number of items used in each frame.
uint32_t UsedItemCountMin, UsedItemCountMax;
// Percent of items to make unused, and possibly make some others used in each frame.
uint32_t ItemsToMakeUnusedPercent;
std::vector<AllocationSize> AllocationSizes;
VkDeviceSize CalcAvgResourceSize() const
{
uint32_t probabilitySum = 0;
VkDeviceSize sizeSum = 0;
for(size_t i = 0; i < AllocationSizes.size(); ++i)
{
const AllocationSize& allocSize = AllocationSizes[i];
if(allocSize.BufferSizeMax > 0)
sizeSum += (allocSize.BufferSizeMin + allocSize.BufferSizeMax) / 2 * allocSize.Probability;
else
{
const VkDeviceSize avgDimension = (allocSize.ImageSizeMin + allocSize.ImageSizeMax) / 2;
sizeSum += avgDimension * avgDimension * 4 * allocSize.Probability;
}
probabilitySum += allocSize.Probability;
}
return sizeSum / probabilitySum;
}
bool UsesBuffers() const
{
for(size_t i = 0; i < AllocationSizes.size(); ++i)
if(AllocationSizes[i].BufferSizeMax > 0)
return true;
return false;
}
bool UsesImages() const
{
for(size_t i = 0; i < AllocationSizes.size(); ++i)
if(AllocationSizes[i].ImageSizeMax > 0)
return true;
return false;
}
};
struct PoolTestResult
{
duration TotalTime;
duration AllocationTimeMin, AllocationTimeAvg, AllocationTimeMax;
duration DeallocationTimeMin, DeallocationTimeAvg, DeallocationTimeMax;
size_t LostAllocationCount, LostAllocationTotalSize;
size_t FailedAllocationCount, FailedAllocationTotalSize;
};
static const uint32_t IMAGE_BYTES_PER_PIXEL = 1;
uint32_t g_FrameIndex = 0;
struct BufferInfo
{
VkBuffer Buffer = VK_NULL_HANDLE;
VmaAllocation Allocation = VK_NULL_HANDLE;
};
static uint32_t MemoryTypeToHeap(uint32_t memoryTypeIndex)
{
const VkPhysicalDeviceMemoryProperties* props;
vmaGetMemoryProperties(g_hAllocator, &props);
return props->memoryTypes[memoryTypeIndex].heapIndex;
}
static uint32_t GetAllocationStrategyCount()
{
uint32_t strategyCount = 0;
switch(ConfigType)
{
case CONFIG_TYPE_MINIMUM: strategyCount = 1; break;
case CONFIG_TYPE_SMALL: strategyCount = 1; break;
case CONFIG_TYPE_AVERAGE: strategyCount = 2; break;
case CONFIG_TYPE_LARGE: strategyCount = 2; break;
case CONFIG_TYPE_MAXIMUM: strategyCount = 3; break;
default: assert(0);
}
return strategyCount;
}
static const char* GetAllocationStrategyName(VmaAllocationCreateFlags allocStrategy)
{
switch(allocStrategy)
{
case VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT: return "BEST_FIT"; break;
case VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT: return "WORST_FIT"; break;
case VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT: return "FIRST_FIT"; break;
case 0: return "Default"; break;
default: assert(0); return "";
}
}
static void InitResult(Result& outResult)
{
outResult.TotalTime = duration::zero();
outResult.AllocationTimeMin = duration::max();
outResult.AllocationTimeAvg = duration::zero();
outResult.AllocationTimeMax = duration::min();
outResult.DeallocationTimeMin = duration::max();
outResult.DeallocationTimeAvg = duration::zero();
outResult.DeallocationTimeMax = duration::min();
outResult.TotalMemoryAllocated = 0;
outResult.FreeRangeSizeAvg = 0;
outResult.FreeRangeSizeMax = 0;
}
class TimeRegisterObj
{
public:
TimeRegisterObj(duration& min, duration& sum, duration& max) :
m_Min(min),
m_Sum(sum),
m_Max(max),
m_TimeBeg(std::chrono::high_resolution_clock::now())
{
}
~TimeRegisterObj()
{
duration d = std::chrono::high_resolution_clock::now() - m_TimeBeg;
m_Sum += d;
if(d < m_Min) m_Min = d;
if(d > m_Max) m_Max = d;
}
private:
duration& m_Min;
duration& m_Sum;
duration& m_Max;
time_point m_TimeBeg;
};
struct PoolTestThreadResult
{
duration AllocationTimeMin, AllocationTimeSum, AllocationTimeMax;
duration DeallocationTimeMin, DeallocationTimeSum, DeallocationTimeMax;
size_t AllocationCount, DeallocationCount;
size_t LostAllocationCount, LostAllocationTotalSize;
size_t FailedAllocationCount, FailedAllocationTotalSize;
};
class AllocationTimeRegisterObj : public TimeRegisterObj
{
public:
AllocationTimeRegisterObj(Result& result) :
TimeRegisterObj(result.AllocationTimeMin, result.AllocationTimeAvg, result.AllocationTimeMax)
{
}
};
class DeallocationTimeRegisterObj : public TimeRegisterObj
{
public:
DeallocationTimeRegisterObj(Result& result) :
TimeRegisterObj(result.DeallocationTimeMin, result.DeallocationTimeAvg, result.DeallocationTimeMax)
{
}
};
class PoolAllocationTimeRegisterObj : public TimeRegisterObj
{
public:
PoolAllocationTimeRegisterObj(PoolTestThreadResult& result) :
TimeRegisterObj(result.AllocationTimeMin, result.AllocationTimeSum, result.AllocationTimeMax)
{
}
};
class PoolDeallocationTimeRegisterObj : public TimeRegisterObj
{
public:
PoolDeallocationTimeRegisterObj(PoolTestThreadResult& result) :
TimeRegisterObj(result.DeallocationTimeMin, result.DeallocationTimeSum, result.DeallocationTimeMax)
{
}
};
static void CurrentTimeToStr(std::string& out)
{
time_t rawTime; time(&rawTime);
struct tm timeInfo; localtime_s(&timeInfo, &rawTime);
char timeStr[128];
strftime(timeStr, _countof(timeStr), "%c", &timeInfo);
out = timeStr;
}
VkResult MainTest(Result& outResult, const Config& config)
{
assert(config.ThreadCount > 0);
InitResult(outResult);
RandomNumberGenerator mainRand{config.RandSeed};
time_point timeBeg = std::chrono::high_resolution_clock::now();
std::atomic<size_t> allocationCount = 0;
VkResult res = VK_SUCCESS;
uint32_t memUsageProbabilitySum =
config.MemUsageProbability[0] + config.MemUsageProbability[1] +
config.MemUsageProbability[2] + config.MemUsageProbability[3];
assert(memUsageProbabilitySum > 0);
uint32_t allocationSizeProbabilitySum = std::accumulate(
config.AllocationSizes.begin(),
config.AllocationSizes.end(),
0u,
[](uint32_t sum, const AllocationSize& allocSize) {
return sum + allocSize.Probability;
});
struct Allocation
{
VkBuffer Buffer;
VkImage Image;
VmaAllocation Alloc;
};
std::vector<Allocation> commonAllocations;
std::mutex commonAllocationsMutex;
auto Allocate = [&](
VkDeviceSize bufferSize,
const VkExtent2D imageExtent,
RandomNumberGenerator& localRand,
VkDeviceSize& totalAllocatedBytes,
std::vector<Allocation>& allocations) -> VkResult
{
assert((bufferSize == 0) != (imageExtent.width == 0 && imageExtent.height == 0));
uint32_t memUsageIndex = 0;
uint32_t memUsageRand = localRand.Generate() % memUsageProbabilitySum;
while(memUsageRand >= config.MemUsageProbability[memUsageIndex])
memUsageRand -= config.MemUsageProbability[memUsageIndex++];
VmaAllocationCreateInfo memReq = {};
memReq.usage = (VmaMemoryUsage)(VMA_MEMORY_USAGE_GPU_ONLY + memUsageIndex);
memReq.flags |= config.AllocationStrategy;
Allocation allocation = {};
VmaAllocationInfo allocationInfo;
// Buffer
if(bufferSize > 0)
{
assert(imageExtent.width == 0);
VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
bufferInfo.size = bufferSize;
bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
{
AllocationTimeRegisterObj timeRegisterObj{outResult};
res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &memReq, &allocation.Buffer, &allocation.Alloc, &allocationInfo);
}
}
// Image
else
{
VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
imageInfo.imageType = VK_IMAGE_TYPE_2D;
imageInfo.extent.width = imageExtent.width;
imageInfo.extent.height = imageExtent.height;
imageInfo.extent.depth = 1;
imageInfo.mipLevels = 1;
imageInfo.arrayLayers = 1;
imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
imageInfo.tiling = memReq.usage == VMA_MEMORY_USAGE_GPU_ONLY ?
VK_IMAGE_TILING_OPTIMAL :
VK_IMAGE_TILING_LINEAR;
imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
switch(memReq.usage)
{
case VMA_MEMORY_USAGE_GPU_ONLY:
switch(localRand.Generate() % 3)
{
case 0:
imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
break;
case 1:
imageInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
break;
case 2:
imageInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
break;
}
break;
case VMA_MEMORY_USAGE_CPU_ONLY:
case VMA_MEMORY_USAGE_CPU_TO_GPU:
imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
break;
case VMA_MEMORY_USAGE_GPU_TO_CPU:
imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT;
break;
}
imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
imageInfo.flags = 0;
{
AllocationTimeRegisterObj timeRegisterObj{outResult};
res = vmaCreateImage(g_hAllocator, &imageInfo, &memReq, &allocation.Image, &allocation.Alloc, &allocationInfo);
}
}
if(res == VK_SUCCESS)
{
++allocationCount;
totalAllocatedBytes += allocationInfo.size;
bool useCommonAllocations = localRand.Generate() % 100 < config.ThreadsUsingCommonAllocationsProbabilityPercent;
if(useCommonAllocations)
{
std::unique_lock<std::mutex> lock(commonAllocationsMutex);
commonAllocations.push_back(allocation);
}
else
allocations.push_back(allocation);
}
else
{
TEST(0);
}
return res;
};
auto GetNextAllocationSize = [&](
VkDeviceSize& outBufSize,
VkExtent2D& outImageSize,
RandomNumberGenerator& localRand)
{
outBufSize = 0;
outImageSize = {0, 0};
uint32_t allocSizeIndex = 0;
uint32_t r = localRand.Generate() % allocationSizeProbabilitySum;
while(r >= config.AllocationSizes[allocSizeIndex].Probability)
r -= config.AllocationSizes[allocSizeIndex++].Probability;
const AllocationSize& allocSize = config.AllocationSizes[allocSizeIndex];
if(allocSize.BufferSizeMax > 0)
{
assert(allocSize.ImageSizeMax == 0);
if(allocSize.BufferSizeMax == allocSize.BufferSizeMin)
outBufSize = allocSize.BufferSizeMin;
else
{
outBufSize = allocSize.BufferSizeMin + localRand.Generate() % (allocSize.BufferSizeMax - allocSize.BufferSizeMin);
outBufSize = outBufSize / 16 * 16;
}
}
else
{
if(allocSize.ImageSizeMax == allocSize.ImageSizeMin)
outImageSize.width = outImageSize.height = allocSize.ImageSizeMax;
else
{
outImageSize.width = allocSize.ImageSizeMin + localRand.Generate() % (allocSize.ImageSizeMax - allocSize.ImageSizeMin);
outImageSize.height = allocSize.ImageSizeMin + localRand.Generate() % (allocSize.ImageSizeMax - allocSize.ImageSizeMin);
}
}
};
std::atomic<uint32_t> numThreadsReachedMaxAllocations = 0;
HANDLE threadsFinishEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
auto ThreadProc = [&](uint32_t randSeed) -> void
{
RandomNumberGenerator threadRand(randSeed);
VkDeviceSize threadTotalAllocatedBytes = 0;
std::vector<Allocation> threadAllocations;
VkDeviceSize threadBeginBytesToAllocate = config.BeginBytesToAllocate / config.ThreadCount;
VkDeviceSize threadMaxBytesToAllocate = config.MaxBytesToAllocate / config.ThreadCount;
uint32_t threadAdditionalOperationCount = config.AdditionalOperationCount / config.ThreadCount;
// BEGIN ALLOCATIONS
for(;;)
{
VkDeviceSize bufferSize = 0;
VkExtent2D imageExtent = {};
GetNextAllocationSize(bufferSize, imageExtent, threadRand);
if(threadTotalAllocatedBytes + bufferSize + imageExtent.width * imageExtent.height * IMAGE_BYTES_PER_PIXEL <
threadBeginBytesToAllocate)
{
if(Allocate(bufferSize, imageExtent, threadRand, threadTotalAllocatedBytes, threadAllocations) != VK_SUCCESS)
break;
}
else
break;
}
// ADDITIONAL ALLOCATIONS AND FREES
for(size_t i = 0; i < threadAdditionalOperationCount; ++i)
{
VkDeviceSize bufferSize = 0;
VkExtent2D imageExtent = {};
GetNextAllocationSize(bufferSize, imageExtent, threadRand);
// true = allocate, false = free
bool allocate = threadRand.Generate() % 2 != 0;
if(allocate)
{
if(threadTotalAllocatedBytes +
bufferSize +
imageExtent.width * imageExtent.height * IMAGE_BYTES_PER_PIXEL <
threadMaxBytesToAllocate)
{
if(Allocate(bufferSize, imageExtent, threadRand, threadTotalAllocatedBytes, threadAllocations) != VK_SUCCESS)
break;
}
}
else
{
bool useCommonAllocations = threadRand.Generate() % 100 < config.ThreadsUsingCommonAllocationsProbabilityPercent;
if(useCommonAllocations)
{
std::unique_lock<std::mutex> lock(commonAllocationsMutex);
if(!commonAllocations.empty())
{
size_t indexToFree = threadRand.Generate() % commonAllocations.size();
VmaAllocationInfo allocationInfo;
vmaGetAllocationInfo(g_hAllocator, commonAllocations[indexToFree].Alloc, &allocationInfo);
if(threadTotalAllocatedBytes >= allocationInfo.size)
{
DeallocationTimeRegisterObj timeRegisterObj{outResult};
if(commonAllocations[indexToFree].Buffer != VK_NULL_HANDLE)
vmaDestroyBuffer(g_hAllocator, commonAllocations[indexToFree].Buffer, commonAllocations[indexToFree].Alloc);
else
vmaDestroyImage(g_hAllocator, commonAllocations[indexToFree].Image, commonAllocations[indexToFree].Alloc);
threadTotalAllocatedBytes -= allocationInfo.size;
commonAllocations.erase(commonAllocations.begin() + indexToFree);
}
}
}
else
{
if(!threadAllocations.empty())
{
size_t indexToFree = threadRand.Generate() % threadAllocations.size();
VmaAllocationInfo allocationInfo;
vmaGetAllocationInfo(g_hAllocator, threadAllocations[indexToFree].Alloc, &allocationInfo);
if(threadTotalAllocatedBytes >= allocationInfo.size)
{
DeallocationTimeRegisterObj timeRegisterObj{outResult};
if(threadAllocations[indexToFree].Buffer != VK_NULL_HANDLE)
vmaDestroyBuffer(g_hAllocator, threadAllocations[indexToFree].Buffer, threadAllocations[indexToFree].Alloc);
else
vmaDestroyImage(g_hAllocator, threadAllocations[indexToFree].Image, threadAllocations[indexToFree].Alloc);
threadTotalAllocatedBytes -= allocationInfo.size;
threadAllocations.erase(threadAllocations.begin() + indexToFree);
}
}
}
}
}
++numThreadsReachedMaxAllocations;
WaitForSingleObject(threadsFinishEvent, INFINITE);
// DEALLOCATION
while(!threadAllocations.empty())
{
size_t indexToFree = 0;
switch(config.FreeOrder)
{
case FREE_ORDER::FORWARD:
indexToFree = 0;
break;
case FREE_ORDER::BACKWARD:
indexToFree = threadAllocations.size() - 1;
break;
case FREE_ORDER::RANDOM:
indexToFree = mainRand.Generate() % threadAllocations.size();
break;
}
{
DeallocationTimeRegisterObj timeRegisterObj{outResult};
if(threadAllocations[indexToFree].Buffer != VK_NULL_HANDLE)
vmaDestroyBuffer(g_hAllocator, threadAllocations[indexToFree].Buffer, threadAllocations[indexToFree].Alloc);
else
vmaDestroyImage(g_hAllocator, threadAllocations[indexToFree].Image, threadAllocations[indexToFree].Alloc);
}
threadAllocations.erase(threadAllocations.begin() + indexToFree);
}
};
uint32_t threadRandSeed = mainRand.Generate();
std::vector<std::thread> bkgThreads;
for(size_t i = 0; i < config.ThreadCount; ++i)
{
bkgThreads.emplace_back(std::bind(ThreadProc, threadRandSeed + (uint32_t)i));
}
// Wait for threads reached max allocations
while(numThreadsReachedMaxAllocations < config.ThreadCount)
Sleep(0);
// CALCULATE MEMORY STATISTICS ON FINAL USAGE
VmaStats vmaStats = {};
vmaCalculateStats(g_hAllocator, &vmaStats);
outResult.TotalMemoryAllocated = vmaStats.total.usedBytes + vmaStats.total.unusedBytes;
outResult.FreeRangeSizeMax = vmaStats.total.unusedRangeSizeMax;
outResult.FreeRangeSizeAvg = vmaStats.total.unusedRangeSizeAvg;
// Signal threads to deallocate
SetEvent(threadsFinishEvent);
// Wait for threads finished
for(size_t i = 0; i < bkgThreads.size(); ++i)
bkgThreads[i].join();
bkgThreads.clear();
CloseHandle(threadsFinishEvent);
// Deallocate remaining common resources
while(!commonAllocations.empty())
{
size_t indexToFree = 0;
switch(config.FreeOrder)
{
case FREE_ORDER::FORWARD:
indexToFree = 0;
break;
case FREE_ORDER::BACKWARD:
indexToFree = commonAllocations.size() - 1;
break;
case FREE_ORDER::RANDOM:
indexToFree = mainRand.Generate() % commonAllocations.size();
break;
}
{
DeallocationTimeRegisterObj timeRegisterObj{outResult};
if(commonAllocations[indexToFree].Buffer != VK_NULL_HANDLE)
vmaDestroyBuffer(g_hAllocator, commonAllocations[indexToFree].Buffer, commonAllocations[indexToFree].Alloc);
else
vmaDestroyImage(g_hAllocator, commonAllocations[indexToFree].Image, commonAllocations[indexToFree].Alloc);
}
commonAllocations.erase(commonAllocations.begin() + indexToFree);
}
if(allocationCount)
{
outResult.AllocationTimeAvg /= allocationCount;
outResult.DeallocationTimeAvg /= allocationCount;
}
outResult.TotalTime = std::chrono::high_resolution_clock::now() - timeBeg;
return res;
}
void SaveAllocatorStatsToFile(const wchar_t* filePath)
{
wprintf(L"Saving JSON dump to file \"%s\"\n", filePath);
char* stats;
vmaBuildStatsString(g_hAllocator, &stats, VK_TRUE);
SaveFile(filePath, stats, strlen(stats));
vmaFreeStatsString(g_hAllocator, stats);
}
struct AllocInfo
{
VmaAllocation m_Allocation = VK_NULL_HANDLE;
VkBuffer m_Buffer = VK_NULL_HANDLE;
VkImage m_Image = VK_NULL_HANDLE;
VkImageLayout m_ImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
uint32_t m_StartValue = 0;
union
{
VkBufferCreateInfo m_BufferInfo;
VkImageCreateInfo m_ImageInfo;
};
// After defragmentation.
VkBuffer m_NewBuffer = VK_NULL_HANDLE;
VkImage m_NewImage = VK_NULL_HANDLE;
void CreateBuffer(
const VkBufferCreateInfo& bufCreateInfo,
const VmaAllocationCreateInfo& allocCreateInfo);
void CreateImage(
const VkImageCreateInfo& imageCreateInfo,
const VmaAllocationCreateInfo& allocCreateInfo,
VkImageLayout layout);
void Destroy();
};
void AllocInfo::CreateBuffer(
const VkBufferCreateInfo& bufCreateInfo,
const VmaAllocationCreateInfo& allocCreateInfo)
{
m_BufferInfo = bufCreateInfo;
VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &m_Buffer, &m_Allocation, nullptr);
TEST(res == VK_SUCCESS);
}
void AllocInfo::CreateImage(
const VkImageCreateInfo& imageCreateInfo,
const VmaAllocationCreateInfo& allocCreateInfo,
VkImageLayout layout)
{
m_ImageInfo = imageCreateInfo;
m_ImageLayout = layout;
VkResult res = vmaCreateImage(g_hAllocator, &imageCreateInfo, &allocCreateInfo, &m_Image, &m_Allocation, nullptr);
TEST(res == VK_SUCCESS);
}
void AllocInfo::Destroy()
{
if(m_Image)
{
assert(!m_Buffer);
vkDestroyImage(g_hDevice, m_Image, g_Allocs);
m_Image = VK_NULL_HANDLE;
}
if(m_Buffer)
{
assert(!m_Image);
vkDestroyBuffer(g_hDevice, m_Buffer, g_Allocs);
m_Buffer = VK_NULL_HANDLE;
}
if(m_Allocation)
{
vmaFreeMemory(g_hAllocator, m_Allocation);
m_Allocation = VK_NULL_HANDLE;
}
}
class StagingBufferCollection
{
public:
StagingBufferCollection() { }
~StagingBufferCollection();
// Returns false if maximum total size of buffers would be exceeded.
bool AcquireBuffer(VkDeviceSize size, VkBuffer& outBuffer, void*& outMappedPtr);
void ReleaseAllBuffers();
private:
static const VkDeviceSize MAX_TOTAL_SIZE = 256ull * 1024 * 1024;
struct BufInfo
{
VmaAllocation Allocation = VK_NULL_HANDLE;
VkBuffer Buffer = VK_NULL_HANDLE;
VkDeviceSize Size = VK_WHOLE_SIZE;
void* MappedPtr = nullptr;
bool Used = false;
};
std::vector<BufInfo> m_Bufs;
// Including both used and unused.
VkDeviceSize m_TotalSize = 0;
};
StagingBufferCollection::~StagingBufferCollection()
{
for(size_t i = m_Bufs.size(); i--; )
{
vmaDestroyBuffer(g_hAllocator, m_Bufs[i].Buffer, m_Bufs[i].Allocation);
}
}
bool StagingBufferCollection::AcquireBuffer(VkDeviceSize size, VkBuffer& outBuffer, void*& outMappedPtr)
{
assert(size <= MAX_TOTAL_SIZE);
// Try to find existing unused buffer with best size.
size_t bestIndex = SIZE_MAX;
for(size_t i = 0, count = m_Bufs.size(); i < count; ++i)
{
BufInfo& currBufInfo = m_Bufs[i];
if(!currBufInfo.Used && currBufInfo.Size >= size &&
(bestIndex == SIZE_MAX || currBufInfo.Size < m_Bufs[bestIndex].Size))
{
bestIndex = i;
}
}
if(bestIndex != SIZE_MAX)
{
m_Bufs[bestIndex].Used = true;
outBuffer = m_Bufs[bestIndex].Buffer;
outMappedPtr = m_Bufs[bestIndex].MappedPtr;
return true;
}
// Allocate new buffer with requested size.
if(m_TotalSize + size <= MAX_TOTAL_SIZE)
{
BufInfo bufInfo;
bufInfo.Size = size;
bufInfo.Used = true;
VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
bufCreateInfo.size = size;
bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
VmaAllocationCreateInfo allocCreateInfo = {};
allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo allocInfo;
VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &bufInfo.Buffer, &bufInfo.Allocation, &allocInfo);
bufInfo.MappedPtr = allocInfo.pMappedData;
TEST(res == VK_SUCCESS && bufInfo.MappedPtr);
outBuffer = bufInfo.Buffer;
outMappedPtr = bufInfo.MappedPtr;
m_Bufs.push_back(std::move(bufInfo));
m_TotalSize += size;
return true;
}
// There are some unused but smaller buffers: Free them and try again.
bool hasUnused = false;
for(size_t i = 0, count = m_Bufs.size(); i < count; ++i)
{
if(!m_Bufs[i].Used)
{
hasUnused = true;
break;
}
}
if(hasUnused)
{
for(size_t i = m_Bufs.size(); i--; )
{
if(!m_Bufs[i].Used)
{
m_TotalSize -= m_Bufs[i].Size;
vmaDestroyBuffer(g_hAllocator, m_Bufs[i].Buffer, m_Bufs[i].Allocation);
m_Bufs.erase(m_Bufs.begin() + i);
}
}
return AcquireBuffer(size, outBuffer, outMappedPtr);
}
return false;
}
void StagingBufferCollection::ReleaseAllBuffers()
{
for(size_t i = 0, count = m_Bufs.size(); i < count; ++i)
{
m_Bufs[i].Used = false;
}
}
static void UploadGpuData(const AllocInfo* allocInfo, size_t allocInfoCount)
{
StagingBufferCollection stagingBufs;
bool cmdBufferStarted = false;
for(size_t allocInfoIndex = 0; allocInfoIndex < allocInfoCount; ++allocInfoIndex)
{
const AllocInfo& currAllocInfo = allocInfo[allocInfoIndex];
if(currAllocInfo.m_Buffer)
{
const VkDeviceSize size = currAllocInfo.m_BufferInfo.size;
VkBuffer stagingBuf = VK_NULL_HANDLE;
void* stagingBufMappedPtr = nullptr;
if(!stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr))
{
TEST(cmdBufferStarted);
EndSingleTimeCommands();
stagingBufs.ReleaseAllBuffers();
cmdBufferStarted = false;
bool ok = stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr);
TEST(ok);
}
// Fill staging buffer.
{
assert(size % sizeof(uint32_t) == 0);
uint32_t* stagingValPtr = (uint32_t*)stagingBufMappedPtr;
uint32_t val = currAllocInfo.m_StartValue;
for(size_t i = 0; i < size / sizeof(uint32_t); ++i)
{
*stagingValPtr = val;
++stagingValPtr;
++val;
}
}
// Issue copy command from staging buffer to destination buffer.
if(!cmdBufferStarted)
{
cmdBufferStarted = true;
BeginSingleTimeCommands();
}
VkBufferCopy copy = {};
copy.srcOffset = 0;
copy.dstOffset = 0;
copy.size = size;
vkCmdCopyBuffer(g_hTemporaryCommandBuffer, stagingBuf, currAllocInfo.m_Buffer, 1, &copy);
}
else
{
TEST(currAllocInfo.m_ImageInfo.format == VK_FORMAT_R8G8B8A8_UNORM && "Only RGBA8 images are currently supported.");
TEST(currAllocInfo.m_ImageInfo.mipLevels == 1 && "Only single mip images are currently supported.");
const VkDeviceSize size = (VkDeviceSize)currAllocInfo.m_ImageInfo.extent.width * currAllocInfo.m_ImageInfo.extent.height * sizeof(uint32_t);
VkBuffer stagingBuf = VK_NULL_HANDLE;
void* stagingBufMappedPtr = nullptr;
if(!stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr))
{
TEST(cmdBufferStarted);
EndSingleTimeCommands();
stagingBufs.ReleaseAllBuffers();
cmdBufferStarted = false;
bool ok = stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr);
TEST(ok);
}
// Fill staging buffer.
{
assert(size % sizeof(uint32_t) == 0);
uint32_t *stagingValPtr = (uint32_t *)stagingBufMappedPtr;
uint32_t val = currAllocInfo.m_StartValue;
for(size_t i = 0; i < size / sizeof(uint32_t); ++i)
{
*stagingValPtr = val;
++stagingValPtr;
++val;
}
}
// Issue copy command from staging buffer to destination buffer.
if(!cmdBufferStarted)
{
cmdBufferStarted = true;
BeginSingleTimeCommands();
}
// Transfer to transfer dst layout
VkImageSubresourceRange subresourceRange = {
VK_IMAGE_ASPECT_COLOR_BIT,
0, VK_REMAINING_MIP_LEVELS,
0, VK_REMAINING_ARRAY_LAYERS
};
VkImageMemoryBarrier barrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER };
barrier.srcAccessMask = 0;
barrier.dstAccessMask = 0;
barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = currAllocInfo.m_Image;
barrier.subresourceRange = subresourceRange;
vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0,
0, nullptr,
0, nullptr,
1, &barrier);
// Copy image date
VkBufferImageCopy copy = {};
copy.bufferOffset = 0;
copy.bufferRowLength = 0;
copy.bufferImageHeight = 0;
copy.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
copy.imageSubresource.layerCount = 1;
copy.imageExtent = currAllocInfo.m_ImageInfo.extent;
vkCmdCopyBufferToImage(g_hTemporaryCommandBuffer, stagingBuf, currAllocInfo.m_Image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &copy);
// Transfer to desired layout
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
barrier.newLayout = currAllocInfo.m_ImageLayout;
vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0,
0, nullptr,
0, nullptr,
1, &barrier);
}
}
if(cmdBufferStarted)
{
EndSingleTimeCommands();
stagingBufs.ReleaseAllBuffers();
}
}
static void ValidateGpuData(const AllocInfo* allocInfo, size_t allocInfoCount)
{
StagingBufferCollection stagingBufs;
bool cmdBufferStarted = false;
size_t validateAllocIndexOffset = 0;
std::vector<void*> validateStagingBuffers;
for(size_t allocInfoIndex = 0; allocInfoIndex < allocInfoCount; ++allocInfoIndex)
{
const AllocInfo& currAllocInfo = allocInfo[allocInfoIndex];
if(currAllocInfo.m_Buffer)
{
const VkDeviceSize size = currAllocInfo.m_BufferInfo.size;
VkBuffer stagingBuf = VK_NULL_HANDLE;
void* stagingBufMappedPtr = nullptr;
if(!stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr))
{
TEST(cmdBufferStarted);
EndSingleTimeCommands();
cmdBufferStarted = false;
for(size_t validateIndex = 0;
validateIndex < validateStagingBuffers.size();
++validateIndex)
{
const size_t validateAllocIndex = validateIndex + validateAllocIndexOffset;
const VkDeviceSize validateSize = allocInfo[validateAllocIndex].m_BufferInfo.size;
TEST(validateSize % sizeof(uint32_t) == 0);
const uint32_t* stagingValPtr = (const uint32_t*)validateStagingBuffers[validateIndex];
uint32_t val = allocInfo[validateAllocIndex].m_StartValue;
bool valid = true;
for(size_t i = 0; i < validateSize / sizeof(uint32_t); ++i)
{
if(*stagingValPtr != val)
{
valid = false;
break;
}
++stagingValPtr;
++val;
}
TEST(valid);
}
stagingBufs.ReleaseAllBuffers();
validateAllocIndexOffset = allocInfoIndex;
validateStagingBuffers.clear();
bool ok = stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr);
TEST(ok);
}
// Issue copy command from staging buffer to destination buffer.
if(!cmdBufferStarted)
{
cmdBufferStarted = true;
BeginSingleTimeCommands();
}
VkBufferCopy copy = {};
copy.srcOffset = 0;
copy.dstOffset = 0;
copy.size = size;
vkCmdCopyBuffer(g_hTemporaryCommandBuffer, currAllocInfo.m_Buffer, stagingBuf, 1, &copy);
// Sava mapped pointer for later validation.
validateStagingBuffers.push_back(stagingBufMappedPtr);
}
else
{
TEST(0 && "Images not currently supported.");
}
}
if(cmdBufferStarted)
{
EndSingleTimeCommands();
for(size_t validateIndex = 0;
validateIndex < validateStagingBuffers.size();
++validateIndex)
{
const size_t validateAllocIndex = validateIndex + validateAllocIndexOffset;
const VkDeviceSize validateSize = allocInfo[validateAllocIndex].m_BufferInfo.size;
TEST(validateSize % sizeof(uint32_t) == 0);
const uint32_t* stagingValPtr = (const uint32_t*)validateStagingBuffers[validateIndex];
uint32_t val = allocInfo[validateAllocIndex].m_StartValue;
bool valid = true;
for(size_t i = 0; i < validateSize / sizeof(uint32_t); ++i)
{
if(*stagingValPtr != val)
{
valid = false;
break;
}
++stagingValPtr;
++val;
}
TEST(valid);
}
stagingBufs.ReleaseAllBuffers();
}
}
static void GetMemReq(VmaAllocationCreateInfo& outMemReq)
{
outMemReq = {};
outMemReq.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
//outMemReq.flags = VMA_ALLOCATION_CREATE_PERSISTENT_MAP_BIT;
}
static void CreateBuffer(
VmaPool pool,
const VkBufferCreateInfo& bufCreateInfo,
bool persistentlyMapped,
AllocInfo& outAllocInfo)
{
outAllocInfo = {};
outAllocInfo.m_BufferInfo = bufCreateInfo;
VmaAllocationCreateInfo allocCreateInfo = {};
allocCreateInfo.pool = pool;
if(persistentlyMapped)
allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo vmaAllocInfo = {};
ERR_GUARD_VULKAN( vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &outAllocInfo.m_Buffer, &outAllocInfo.m_Allocation, &vmaAllocInfo) );
// Setup StartValue and fill.
{
outAllocInfo.m_StartValue = (uint32_t)rand();
uint32_t* data = (uint32_t*)vmaAllocInfo.pMappedData;
TEST((data != nullptr) == persistentlyMapped);
if(!persistentlyMapped)
{
ERR_GUARD_VULKAN( vmaMapMemory(g_hAllocator, outAllocInfo.m_Allocation, (void**)&data) );
}
uint32_t value = outAllocInfo.m_StartValue;
TEST(bufCreateInfo.size % 4 == 0);
for(size_t i = 0; i < bufCreateInfo.size / sizeof(uint32_t); ++i)
data[i] = value++;
if(!persistentlyMapped)
vmaUnmapMemory(g_hAllocator, outAllocInfo.m_Allocation);
}
}
static void CreateAllocation(AllocInfo& outAllocation)
{
outAllocation.m_Allocation = nullptr;
outAllocation.m_Buffer = nullptr;
outAllocation.m_Image = nullptr;
outAllocation.m_StartValue = (uint32_t)rand();
VmaAllocationCreateInfo vmaMemReq;
GetMemReq(vmaMemReq);
VmaAllocationInfo allocInfo;
const bool isBuffer = true;//(rand() & 0x1) != 0;
const bool isLarge = (rand() % 16) == 0;
if(isBuffer)
{
const uint32_t bufferSize = isLarge ?
(rand() % 10 + 1) * (1024 * 1024) : // 1 MB ... 10 MB
(rand() % 1024 + 1) * 1024; // 1 KB ... 1 MB
VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
bufferInfo.size = bufferSize;
bufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
VkResult res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &vmaMemReq, &outAllocation.m_Buffer, &outAllocation.m_Allocation, &allocInfo);
outAllocation.m_BufferInfo = bufferInfo;
TEST(res == VK_SUCCESS);
}
else
{
const uint32_t imageSizeX = isLarge ?
1024 + rand() % (4096 - 1024) : // 1024 ... 4096
rand() % 1024 + 1; // 1 ... 1024
const uint32_t imageSizeY = isLarge ?
1024 + rand() % (4096 - 1024) : // 1024 ... 4096
rand() % 1024 + 1; // 1 ... 1024
VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
imageInfo.imageType = VK_IMAGE_TYPE_2D;
imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
imageInfo.extent.width = imageSizeX;
imageInfo.extent.height = imageSizeY;
imageInfo.extent.depth = 1;
imageInfo.mipLevels = 1;
imageInfo.arrayLayers = 1;
imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
VkResult res = vmaCreateImage(g_hAllocator, &imageInfo, &vmaMemReq, &outAllocation.m_Image, &outAllocation.m_Allocation, &allocInfo);
outAllocation.m_ImageInfo = imageInfo;
TEST(res == VK_SUCCESS);
}
uint32_t* data = (uint32_t*)allocInfo.pMappedData;
if(allocInfo.pMappedData == nullptr)
{
VkResult res = vmaMapMemory(g_hAllocator, outAllocation.m_Allocation, (void**)&data);
TEST(res == VK_SUCCESS);
}
uint32_t value = outAllocation.m_StartValue;
TEST(allocInfo.size % 4 == 0);
for(size_t i = 0; i < allocInfo.size / sizeof(uint32_t); ++i)
data[i] = value++;
if(allocInfo.pMappedData == nullptr)
vmaUnmapMemory(g_hAllocator, outAllocation.m_Allocation);
}
static void DestroyAllocation(const AllocInfo& allocation)
{
if(allocation.m_Buffer)
vmaDestroyBuffer(g_hAllocator, allocation.m_Buffer, allocation.m_Allocation);
else
vmaDestroyImage(g_hAllocator, allocation.m_Image, allocation.m_Allocation);
}
static void DestroyAllAllocations(std::vector<AllocInfo>& allocations)
{
for(size_t i = allocations.size(); i--; )
DestroyAllocation(allocations[i]);
allocations.clear();
}
static void ValidateAllocationData(const AllocInfo& allocation)
{
VmaAllocationInfo allocInfo;
vmaGetAllocationInfo(g_hAllocator, allocation.m_Allocation, &allocInfo);
uint32_t* data = (uint32_t*)allocInfo.pMappedData;
if(allocInfo.pMappedData == nullptr)
{
VkResult res = vmaMapMemory(g_hAllocator, allocation.m_Allocation, (void**)&data);
TEST(res == VK_SUCCESS);
}
uint32_t value = allocation.m_StartValue;
bool ok = true;
size_t i;
TEST(allocInfo.size % 4 == 0);
for(i = 0; i < allocInfo.size / sizeof(uint32_t); ++i)
{
if(data[i] != value++)
{
ok = false;
break;
}
}
TEST(ok);
if(allocInfo.pMappedData == nullptr)
vmaUnmapMemory(g_hAllocator, allocation.m_Allocation);
}
static void RecreateAllocationResource(AllocInfo& allocation)
{
VmaAllocationInfo allocInfo;
vmaGetAllocationInfo(g_hAllocator, allocation.m_Allocation, &allocInfo);
if(allocation.m_Buffer)
{
vkDestroyBuffer(g_hDevice, allocation.m_Buffer, g_Allocs);
VkResult res = vkCreateBuffer(g_hDevice, &allocation.m_BufferInfo, g_Allocs, &allocation.m_Buffer);
TEST(res == VK_SUCCESS);
// Just to silence validation layer warnings.
VkMemoryRequirements vkMemReq;
vkGetBufferMemoryRequirements(g_hDevice, allocation.m_Buffer, &vkMemReq);
TEST(vkMemReq.size >= allocation.m_BufferInfo.size);
res = vmaBindBufferMemory(g_hAllocator, allocation.m_Allocation, allocation.m_Buffer);
TEST(res == VK_SUCCESS);
}
else
{
vkDestroyImage(g_hDevice, allocation.m_Image, g_Allocs);
VkResult res = vkCreateImage(g_hDevice, &allocation.m_ImageInfo, g_Allocs, &allocation.m_Image);
TEST(res == VK_SUCCESS);
// Just to silence validation layer warnings.
VkMemoryRequirements vkMemReq;
vkGetImageMemoryRequirements(g_hDevice, allocation.m_Image, &vkMemReq);
res = vmaBindImageMemory(g_hAllocator, allocation.m_Allocation, allocation.m_Image);
TEST(res == VK_SUCCESS);
}
}
static void Defragment(AllocInfo* allocs, size_t allocCount,
const VmaDefragmentationInfo* defragmentationInfo = nullptr,
VmaDefragmentationStats* defragmentationStats = nullptr)
{
std::vector<VmaAllocation> vmaAllocs(allocCount);
for(size_t i = 0; i < allocCount; ++i)
vmaAllocs[i] = allocs[i].m_Allocation;
std::vector<VkBool32> allocChanged(allocCount);
ERR_GUARD_VULKAN( vmaDefragment(g_hAllocator, vmaAllocs.data(), allocCount, allocChanged.data(),
defragmentationInfo, defragmentationStats) );
for(size_t i = 0; i < allocCount; ++i)
{
if(allocChanged[i])
{
RecreateAllocationResource(allocs[i]);
}
}
}
static void ValidateAllocationsData(const AllocInfo* allocs, size_t allocCount)
{
std::for_each(allocs, allocs + allocCount, [](const AllocInfo& allocInfo) {
ValidateAllocationData(allocInfo);
});
}
void TestDefragmentationSimple()
{
wprintf(L"Test defragmentation simple\n");
RandomNumberGenerator rand(667);
const VkDeviceSize BUF_SIZE = 0x10000;
const VkDeviceSize BLOCK_SIZE = BUF_SIZE * 8;
const VkDeviceSize MIN_BUF_SIZE = 32;
const VkDeviceSize MAX_BUF_SIZE = BUF_SIZE * 4;
auto RandomBufSize = [&]() -> VkDeviceSize {
return align_up<VkDeviceSize>(rand.Generate() % (MAX_BUF_SIZE - MIN_BUF_SIZE + 1) + MIN_BUF_SIZE, 32);
};
VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
bufCreateInfo.size = BUF_SIZE;
bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
VmaAllocationCreateInfo exampleAllocCreateInfo = {};
exampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
uint32_t memTypeIndex = UINT32_MAX;
vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufCreateInfo, &exampleAllocCreateInfo, &memTypeIndex);
VmaPoolCreateInfo poolCreateInfo = {};
poolCreateInfo.blockSize = BLOCK_SIZE;
poolCreateInfo.memoryTypeIndex = memTypeIndex;
VmaPool pool;
ERR_GUARD_VULKAN( vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool) );
// Defragmentation of empty pool.
{
VmaDefragmentationInfo2 defragInfo = {};
defragInfo.maxCpuBytesToMove = VK_WHOLE_SIZE;
defragInfo.maxCpuAllocationsToMove = UINT32_MAX;
defragInfo.poolCount = 1;
defragInfo.pPools = &pool;
VmaDefragmentationStats defragStats = {};
VmaDefragmentationContext defragCtx = nullptr;
VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &defragStats, &defragCtx);
TEST(res >= VK_SUCCESS);
vmaDefragmentationEnd(g_hAllocator, defragCtx);
TEST(defragStats.allocationsMoved == 0 && defragStats.bytesFreed == 0 &&
defragStats.bytesMoved == 0 && defragStats.deviceMemoryBlocksFreed == 0);
}
std::vector<AllocInfo> allocations;
// persistentlyMappedOption = 0 - not persistently mapped.
// persistentlyMappedOption = 1 - persistently mapped.
for(uint32_t persistentlyMappedOption = 0; persistentlyMappedOption < 2; ++persistentlyMappedOption)
{
wprintf(L" Persistently mapped option = %u\n", persistentlyMappedOption);
const bool persistentlyMapped = persistentlyMappedOption != 0;
// # Test 1
// Buffers of fixed size.
// Fill 2 blocks. Remove odd buffers. Defragment everything.
// Expected result: at least 1 block freed.
{
for(size_t i = 0; i < BLOCK_SIZE / BUF_SIZE * 2; ++i)
{
AllocInfo allocInfo;
CreateBuffer(pool, bufCreateInfo, persistentlyMapped, allocInfo);
allocations.push_back(allocInfo);
}
for(size_t i = 1; i < allocations.size(); ++i)
{
DestroyAllocation(allocations[i]);
allocations.erase(allocations.begin() + i);
}
VmaDefragmentationStats defragStats;
Defragment(allocations.data(), allocations.size(), nullptr, &defragStats);
TEST(defragStats.allocationsMoved > 0 && defragStats.bytesMoved > 0);
TEST(defragStats.deviceMemoryBlocksFreed >= 1);
ValidateAllocationsData(allocations.data(), allocations.size());
DestroyAllAllocations(allocations);
}
// # Test 2
// Buffers of fixed size.
// Fill 2 blocks. Remove odd buffers. Defragment one buffer at time.
// Expected result: Each of 4 interations makes some progress.
{
for(size_t i = 0; i < BLOCK_SIZE / BUF_SIZE * 2; ++i)
{
AllocInfo allocInfo;
CreateBuffer(pool, bufCreateInfo, persistentlyMapped, allocInfo);
allocations.push_back(allocInfo);
}
for(size_t i = 1; i < allocations.size(); ++i)
{
DestroyAllocation(allocations[i]);
allocations.erase(allocations.begin() + i);
}
VmaDefragmentationInfo defragInfo = {};
defragInfo.maxAllocationsToMove = 1;
defragInfo.maxBytesToMove = BUF_SIZE;
for(size_t i = 0; i < BLOCK_SIZE / BUF_SIZE / 2; ++i)
{
VmaDefragmentationStats defragStats;
Defragment(allocations.data(), allocations.size(), &defragInfo, &defragStats);
TEST(defragStats.allocationsMoved > 0 && defragStats.bytesMoved > 0);
}
ValidateAllocationsData(allocations.data(), allocations.size());
DestroyAllAllocations(allocations);
}
// # Test 3
// Buffers of variable size.
// Create a number of buffers. Remove some percent of them.
// Defragment while having some percent of them unmovable.
// Expected result: Just simple validation.
{
for(size_t i = 0; i < 100; ++i)
{
VkBufferCreateInfo localBufCreateInfo = bufCreateInfo;
localBufCreateInfo.size = RandomBufSize();
AllocInfo allocInfo;
CreateBuffer(pool, bufCreateInfo, persistentlyMapped, allocInfo);
allocations.push_back(allocInfo);
}
const uint32_t percentToDelete = 60;
const size_t numberToDelete = allocations.size() * percentToDelete / 100;
for(size_t i = 0; i < numberToDelete; ++i)
{
size_t indexToDelete = rand.Generate() % (uint32_t)allocations.size();
DestroyAllocation(allocations[indexToDelete]);
allocations.erase(allocations.begin() + indexToDelete);
}
// Non-movable allocations will be at the beginning of allocations array.
const uint32_t percentNonMovable = 20;
const size_t numberNonMovable = allocations.size() * percentNonMovable / 100;
for(size_t i = 0; i < numberNonMovable; ++i)
{
size_t indexNonMovable = i + rand.Generate() % (uint32_t)(allocations.size() - i);
if(indexNonMovable != i)
std::swap(allocations[i], allocations[indexNonMovable]);
}
VmaDefragmentationStats defragStats;
Defragment(
allocations.data() + numberNonMovable,
allocations.size() - numberNonMovable,
nullptr, &defragStats);
ValidateAllocationsData(allocations.data(), allocations.size());
DestroyAllAllocations(allocations);
}
}
/*
Allocation that must be move to an overlapping place using memmove().
Create 2 buffers, second slightly bigger than the first. Delete first. Then defragment.
*/
if(VMA_DEBUG_MARGIN == 0) // FAST algorithm works only when DEBUG_MARGIN disabled.
{
AllocInfo allocInfo[2];
bufCreateInfo.size = BUF_SIZE;
CreateBuffer(pool, bufCreateInfo, false, allocInfo[0]);
const VkDeviceSize biggerBufSize = BUF_SIZE + BUF_SIZE / 256;
bufCreateInfo.size = biggerBufSize;
CreateBuffer(pool, bufCreateInfo, false, allocInfo[1]);
DestroyAllocation(allocInfo[0]);
VmaDefragmentationStats defragStats;
Defragment(&allocInfo[1], 1, nullptr, &defragStats);
// If this fails, it means we couldn't do memmove with overlapping regions.
TEST(defragStats.allocationsMoved == 1 && defragStats.bytesMoved > 0);
ValidateAllocationsData(&allocInfo[1], 1);
DestroyAllocation(allocInfo[1]);
}
vmaDestroyPool(g_hAllocator, pool);
}
void TestDefragmentationWholePool()
{
wprintf(L"Test defragmentation whole pool\n");
RandomNumberGenerator rand(668);
const VkDeviceSize BUF_SIZE = 0x10000;
const VkDeviceSize BLOCK_SIZE = BUF_SIZE * 8;
VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
bufCreateInfo.size = BUF_SIZE;
bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
VmaAllocationCreateInfo exampleAllocCreateInfo = {};
exampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
uint32_t memTypeIndex = UINT32_MAX;
vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufCreateInfo, &exampleAllocCreateInfo, &memTypeIndex);
VmaPoolCreateInfo poolCreateInfo = {};
poolCreateInfo.blockSize = BLOCK_SIZE;
poolCreateInfo.memoryTypeIndex = memTypeIndex;
VmaDefragmentationStats defragStats[2];
for(size_t caseIndex = 0; caseIndex < 2; ++caseIndex)
{
VmaPool pool;
ERR_GUARD_VULKAN( vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool) );
std::vector<AllocInfo> allocations;
// Buffers of fixed size.
// Fill 2 blocks. Remove odd buffers. Defragment all of them.
for(size_t i = 0; i < BLOCK_SIZE / BUF_SIZE * 2; ++i)
{
AllocInfo allocInfo;
CreateBuffer(pool, bufCreateInfo, false, allocInfo);
allocations.push_back(allocInfo);
}
for(size_t i = 1; i < allocations.size(); ++i)
{
DestroyAllocation(allocations[i]);
allocations.erase(allocations.begin() + i);
}
VmaDefragmentationInfo2 defragInfo = {};
defragInfo.maxCpuAllocationsToMove = UINT32_MAX;
defragInfo.maxCpuBytesToMove = VK_WHOLE_SIZE;
std::vector<VmaAllocation> allocationsToDefrag;
if(caseIndex == 0)
{
defragInfo.poolCount = 1;
defragInfo.pPools = &pool;
}
else
{
const size_t allocCount = allocations.size();
allocationsToDefrag.resize(allocCount);
std::transform(
allocations.begin(), allocations.end(),
allocationsToDefrag.begin(),
[](const AllocInfo& allocInfo) { return allocInfo.m_Allocation; });
defragInfo.allocationCount = (uint32_t)allocCount;
defragInfo.pAllocations = allocationsToDefrag.data();
}
VmaDefragmentationContext defragCtx = VK_NULL_HANDLE;
VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &defragStats[caseIndex], &defragCtx);
TEST(res >= VK_SUCCESS);
vmaDefragmentationEnd(g_hAllocator, defragCtx);
TEST(defragStats[caseIndex].allocationsMoved > 0 && defragStats[caseIndex].bytesMoved > 0);
ValidateAllocationsData(allocations.data(), allocations.size());
DestroyAllAllocations(allocations);
vmaDestroyPool(g_hAllocator, pool);
}
TEST(defragStats[0].bytesMoved == defragStats[1].bytesMoved);
TEST(defragStats[0].allocationsMoved == defragStats[1].allocationsMoved);
TEST(defragStats[0].bytesFreed == defragStats[1].bytesFreed);
TEST(defragStats[0].deviceMemoryBlocksFreed == defragStats[1].deviceMemoryBlocksFreed);
}
void TestDefragmentationFull()
{
std::vector<AllocInfo> allocations;
// Create initial allocations.
for(size_t i = 0; i < 400; ++i)
{
AllocInfo allocation;
CreateAllocation(allocation);
allocations.push_back(allocation);
}
// Delete random allocations
const size_t allocationsToDeletePercent = 80;
size_t allocationsToDelete = allocations.size() * allocationsToDeletePercent / 100;
for(size_t i = 0; i < allocationsToDelete; ++i)
{
size_t index = (size_t)rand() % allocations.size();
DestroyAllocation(allocations[index]);
allocations.erase(allocations.begin() + index);
}
for(size_t i = 0; i < allocations.size(); ++i)
ValidateAllocationData(allocations[i]);
//SaveAllocatorStatsToFile(L"Before.csv");
{
std::vector<VmaAllocation> vmaAllocations(allocations.size());
for(size_t i = 0; i < allocations.size(); ++i)
vmaAllocations[i] = allocations[i].m_Allocation;
const size_t nonMovablePercent = 0;
size_t nonMovableCount = vmaAllocations.size() * nonMovablePercent / 100;
for(size_t i = 0; i < nonMovableCount; ++i)
{
size_t index = (size_t)rand() % vmaAllocations.size();
vmaAllocations.erase(vmaAllocations.begin() + index);
}
const uint32_t defragCount = 1;
for(uint32_t defragIndex = 0; defragIndex < defragCount; ++defragIndex)
{
std::vector<VkBool32> allocationsChanged(vmaAllocations.size());
VmaDefragmentationInfo defragmentationInfo;
defragmentationInfo.maxAllocationsToMove = UINT_MAX;
defragmentationInfo.maxBytesToMove = SIZE_MAX;
wprintf(L"Defragmentation #%u\n", defragIndex);
time_point begTime = std::chrono::high_resolution_clock::now();
VmaDefragmentationStats stats;
VkResult res = vmaDefragment(g_hAllocator, vmaAllocations.data(), vmaAllocations.size(), allocationsChanged.data(), &defragmentationInfo, &stats);
TEST(res >= 0);
float defragmentDuration = ToFloatSeconds(std::chrono::high_resolution_clock::now() - begTime);
wprintf(L"Moved allocations %u, bytes %llu\n", stats.allocationsMoved, stats.bytesMoved);
wprintf(L"Freed blocks %u, bytes %llu\n", stats.deviceMemoryBlocksFreed, stats.bytesFreed);
wprintf(L"Time: %.2f s\n", defragmentDuration);
for(size_t i = 0; i < vmaAllocations.size(); ++i)
{
if(allocationsChanged[i])
{
RecreateAllocationResource(allocations[i]);
}
}
for(size_t i = 0; i < allocations.size(); ++i)
ValidateAllocationData(allocations[i]);
//wchar_t fileName[MAX_PATH];
//swprintf(fileName, MAX_PATH, L"After_%02u.csv", defragIndex);
//SaveAllocatorStatsToFile(fileName);
}
}
// Destroy all remaining allocations.
DestroyAllAllocations(allocations);
}
static void TestDefragmentationGpu()
{
wprintf(L"Test defragmentation GPU\n");
g_MemoryAliasingWarningEnabled = false;
std::vector<AllocInfo> allocations;
// Create that many allocations to surely fill 3 new blocks of 256 MB.
const VkDeviceSize bufSizeMin = 5ull * 1024 * 1024;
const VkDeviceSize bufSizeMax = 10ull * 1024 * 1024;
const VkDeviceSize totalSize = 3ull * 256 * 1024 * 1024;
const size_t bufCount = (size_t)(totalSize / bufSizeMin);
const size_t percentToLeave = 30;
const size_t percentNonMovable = 3;
RandomNumberGenerator rand = { 234522 };
VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
VmaAllocationCreateInfo allocCreateInfo = {};
allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
allocCreateInfo.flags = 0;
// Create all intended buffers.
for(size_t i = 0; i < bufCount; ++i)
{
bufCreateInfo.size = align_up(rand.Generate() % (bufSizeMax - bufSizeMin) + bufSizeMin, 32ull);
if(rand.Generate() % 100 < percentNonMovable)
{
bufCreateInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
allocCreateInfo.pUserData = (void*)(uintptr_t)2;
}
else
{
// Different usage just to see different color in output from VmaDumpVis.
bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
// And in JSON dump.
allocCreateInfo.pUserData = (void*)(uintptr_t)1;
}
AllocInfo alloc;
alloc.CreateBuffer(bufCreateInfo, allocCreateInfo);
alloc.m_StartValue = rand.Generate();
allocations.push_back(alloc);
}
// Destroy some percentage of them.
{
const size_t buffersToDestroy = round_div<size_t>(bufCount * (100 - percentToLeave), 100);
for(size_t i = 0; i < buffersToDestroy; ++i)
{
const size_t index = rand.Generate() % allocations.size();
allocations[index].Destroy();
allocations.erase(allocations.begin() + index);
}
}
// Fill them with meaningful data.
UploadGpuData(allocations.data(), allocations.size());
wchar_t fileName[MAX_PATH];
swprintf_s(fileName, L"GPU_defragmentation_A_before.json");
SaveAllocatorStatsToFile(fileName);
// Defragment using GPU only.
{
const size_t allocCount = allocations.size();
std::vector<VmaAllocation> allocationPtrs;
std::vector<VkBool32> allocationChanged;
std::vector<size_t> allocationOriginalIndex;
for(size_t i = 0; i < allocCount; ++i)
{
VmaAllocationInfo allocInfo = {};
vmaGetAllocationInfo(g_hAllocator, allocations[i].m_Allocation, &allocInfo);
if((uintptr_t)allocInfo.pUserData == 1) // Movable
{
allocationPtrs.push_back(allocations[i].m_Allocation);
allocationChanged.push_back(VK_FALSE);
allocationOriginalIndex.push_back(i);
}
}
const size_t movableAllocCount = allocationPtrs.size();
BeginSingleTimeCommands();
VmaDefragmentationInfo2 defragInfo = {};
defragInfo.flags = 0;
defragInfo.allocationCount = (uint32_t)movableAllocCount;
defragInfo.pAllocations = allocationPtrs.data();
defragInfo.pAllocationsChanged = allocationChanged.data();
defragInfo.maxGpuBytesToMove = VK_WHOLE_SIZE;
defragInfo.maxGpuAllocationsToMove = UINT32_MAX;
defragInfo.commandBuffer = g_hTemporaryCommandBuffer;
VmaDefragmentationStats stats = {};
VmaDefragmentationContext ctx = VK_NULL_HANDLE;
VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &stats, &ctx);
TEST(res >= VK_SUCCESS);
EndSingleTimeCommands();
vmaDefragmentationEnd(g_hAllocator, ctx);
for(size_t i = 0; i < movableAllocCount; ++i)
{
if(allocationChanged[i])
{
const size_t origAllocIndex = allocationOriginalIndex[i];
RecreateAllocationResource(allocations[origAllocIndex]);
}
}
// If corruption detection is enabled, GPU defragmentation may not work on
// memory types that have this detection active, e.g. on Intel.
#if !defined(VMA_DEBUG_DETECT_CORRUPTION) || VMA_DEBUG_DETECT_CORRUPTION == 0
TEST(stats.allocationsMoved > 0 && stats.bytesMoved > 0);
TEST(stats.deviceMemoryBlocksFreed > 0 && stats.bytesFreed > 0);
#endif
}
ValidateGpuData(allocations.data(), allocations.size());
swprintf_s(fileName, L"GPU_defragmentation_B_after.json");
SaveAllocatorStatsToFile(fileName);
// Destroy all remaining buffers.
for(size_t i = allocations.size(); i--; )
{
allocations[i].Destroy();
}
g_MemoryAliasingWarningEnabled = true;
}
static void ProcessDefragmentationStepInfo(VmaDefragmentationPassInfo &stepInfo)
{
std::vector<VkImageMemoryBarrier> beginImageBarriers;
std::vector<VkImageMemoryBarrier> finalizeImageBarriers;
VkPipelineStageFlags beginSrcStageMask = 0;
VkPipelineStageFlags beginDstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
VkPipelineStageFlags finalizeSrcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
VkPipelineStageFlags finalizeDstStageMask = 0;
bool wantsMemoryBarrier = false;
VkMemoryBarrier beginMemoryBarrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER };
VkMemoryBarrier finalizeMemoryBarrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER };
for(uint32_t i = 0; i < stepInfo.moveCount; ++i)
{
VmaAllocationInfo info;
vmaGetAllocationInfo(g_hAllocator, stepInfo.pMoves[i].allocation, &info);
AllocInfo *allocInfo = (AllocInfo *)info.pUserData;
if(allocInfo->m_Image)
{
VkImage newImage;
const VkResult result = vkCreateImage(g_hDevice, &allocInfo->m_ImageInfo, g_Allocs, &newImage);
TEST(result >= VK_SUCCESS);
vkBindImageMemory(g_hDevice, newImage, stepInfo.pMoves[i].memory, stepInfo.pMoves[i].offset);
allocInfo->m_NewImage = newImage;
// Keep track of our pipeline stages that we need to wait/signal on
beginSrcStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
finalizeDstStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
// We need one pipeline barrier and two image layout transitions here
// First we'll have to turn our newly created image into VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
// And the second one is turning the old image into VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
VkImageSubresourceRange subresourceRange = {
VK_IMAGE_ASPECT_COLOR_BIT,
0, VK_REMAINING_MIP_LEVELS,
0, VK_REMAINING_ARRAY_LAYERS
};
VkImageMemoryBarrier barrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER };
barrier.srcAccessMask = 0;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = newImage;
barrier.subresourceRange = subresourceRange;
beginImageBarriers.push_back(barrier);
// Second barrier to convert the existing image. This one actually needs a real barrier
barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
barrier.oldLayout = allocInfo->m_ImageLayout;
barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
barrier.image = allocInfo->m_Image;
beginImageBarriers.push_back(barrier);
// And lastly we need a barrier that turns our new image into the layout of the old one
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
barrier.newLayout = allocInfo->m_ImageLayout;
barrier.image = newImage;
finalizeImageBarriers.push_back(barrier);
}
else if(allocInfo->m_Buffer)
{
VkBuffer newBuffer;
const VkResult result = vkCreateBuffer(g_hDevice, &allocInfo->m_BufferInfo, g_Allocs, &newBuffer);
TEST(result >= VK_SUCCESS);
vkBindBufferMemory(g_hDevice, newBuffer, stepInfo.pMoves[i].memory, stepInfo.pMoves[i].offset);
allocInfo->m_NewBuffer = newBuffer;
// Keep track of our pipeline stages that we need to wait/signal on
beginSrcStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
finalizeDstStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
beginMemoryBarrier.srcAccessMask |= VK_ACCESS_MEMORY_WRITE_BIT;
beginMemoryBarrier.dstAccessMask |= VK_ACCESS_TRANSFER_READ_BIT;
finalizeMemoryBarrier.srcAccessMask |= VK_ACCESS_TRANSFER_WRITE_BIT;
finalizeMemoryBarrier.dstAccessMask |= VK_ACCESS_MEMORY_READ_BIT;
wantsMemoryBarrier = true;
}
}
if(!beginImageBarriers.empty() || wantsMemoryBarrier)
{
const uint32_t memoryBarrierCount = wantsMemoryBarrier ? 1 : 0;
vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, beginSrcStageMask, beginDstStageMask, 0,
memoryBarrierCount, &beginMemoryBarrier,
0, nullptr,
(uint32_t)beginImageBarriers.size(), beginImageBarriers.data());
}
for(uint32_t i = 0; i < stepInfo.moveCount; ++ i)
{
VmaAllocationInfo info;
vmaGetAllocationInfo(g_hAllocator, stepInfo.pMoves[i].allocation, &info);
AllocInfo *allocInfo = (AllocInfo *)info.pUserData;
if(allocInfo->m_Image)
{
std::vector<VkImageCopy> imageCopies;
// Copy all mips of the source image into the target image
VkOffset3D offset = { 0, 0, 0 };
VkExtent3D extent = allocInfo->m_ImageInfo.extent;
VkImageSubresourceLayers subresourceLayers = {
VK_IMAGE_ASPECT_COLOR_BIT,
0,
0, 1
};
for(uint32_t mip = 0; mip < allocInfo->m_ImageInfo.mipLevels; ++ mip)
{
subresourceLayers.mipLevel = mip;
VkImageCopy imageCopy{
subresourceLayers,
offset,
subresourceLayers,
offset,
extent
};
imageCopies.push_back(imageCopy);
extent.width = std::max(uint32_t(1), extent.width >> 1);
extent.height = std::max(uint32_t(1), extent.height >> 1);
extent.depth = std::max(uint32_t(1), extent.depth >> 1);
}
vkCmdCopyImage(
g_hTemporaryCommandBuffer,
allocInfo->m_Image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
allocInfo->m_NewImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
(uint32_t)imageCopies.size(), imageCopies.data());
}
else if(allocInfo->m_Buffer)
{
VkBufferCopy region = {
0,
0,
allocInfo->m_BufferInfo.size };
vkCmdCopyBuffer(g_hTemporaryCommandBuffer,
allocInfo->m_Buffer, allocInfo->m_NewBuffer,
1, &region);
}
}
if(!finalizeImageBarriers.empty() || wantsMemoryBarrier)
{
const uint32_t memoryBarrierCount = wantsMemoryBarrier ? 1 : 0;
vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, finalizeSrcStageMask, finalizeDstStageMask, 0,
memoryBarrierCount, &finalizeMemoryBarrier,
0, nullptr,
(uint32_t)finalizeImageBarriers.size(), finalizeImageBarriers.data());
}
}
static void TestDefragmentationIncrementalBasic()
{
wprintf(L"Test defragmentation incremental basic\n");
g_MemoryAliasingWarningEnabled = false;
std::vector<AllocInfo> allocations;
// Create that many allocations to surely fill 3 new blocks of 256 MB.
const std::array<uint32_t, 3> imageSizes = { 256, 512, 1024 };
const VkDeviceSize bufSizeMin = 5ull * 1024 * 1024;
const VkDeviceSize bufSizeMax = 10ull * 1024 * 1024;
const VkDeviceSize totalSize = 3ull * 256 * 1024 * 1024;
const size_t imageCount = totalSize / ((size_t)imageSizes[0] * imageSizes[0] * 4) / 2;
const size_t bufCount = (size_t)(totalSize / bufSizeMin) / 2;
const size_t percentToLeave = 30;
RandomNumberGenerator rand = { 234522 };
VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
imageInfo.imageType = VK_IMAGE_TYPE_2D;
imageInfo.extent.depth = 1;
imageInfo.mipLevels = 1;
imageInfo.arrayLayers = 1;
imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
VmaAllocationCreateInfo allocCreateInfo = {};
allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
allocCreateInfo.flags = 0;
// Create all intended images.
for(size_t i = 0; i < imageCount; ++i)
{
const uint32_t size = imageSizes[rand.Generate() % 3];
imageInfo.extent.width = size;
imageInfo.extent.height = size;
AllocInfo alloc;
alloc.CreateImage(imageInfo, allocCreateInfo, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
alloc.m_StartValue = 0;
allocations.push_back(alloc);
}
// And all buffers
VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
for(size_t i = 0; i < bufCount; ++i)
{
bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
AllocInfo alloc;
alloc.CreateBuffer(bufCreateInfo, allocCreateInfo);
alloc.m_StartValue = 0;
allocations.push_back(alloc);
}
// Destroy some percentage of them.
{
const size_t allocationsToDestroy = round_div<size_t>((imageCount + bufCount) * (100 - percentToLeave), 100);
for(size_t i = 0; i < allocationsToDestroy; ++i)
{
const size_t index = rand.Generate() % allocations.size();
allocations[index].Destroy();
allocations.erase(allocations.begin() + index);
}
}
{
// Set our user data pointers. A real application should probably be more clever here
const size_t allocationCount = allocations.size();
for(size_t i = 0; i < allocationCount; ++i)
{
AllocInfo &alloc = allocations[i];
vmaSetAllocationUserData(g_hAllocator, alloc.m_Allocation, &alloc);
}
}
// Fill them with meaningful data.
UploadGpuData(allocations.data(), allocations.size());
wchar_t fileName[MAX_PATH];
swprintf_s(fileName, L"GPU_defragmentation_incremental_basic_A_before.json");
SaveAllocatorStatsToFile(fileName);
// Defragment using GPU only.
{
const size_t allocCount = allocations.size();
std::vector<VmaAllocation> allocationPtrs;
for(size_t i = 0; i < allocCount; ++i)
{
allocationPtrs.push_back(allocations[i].m_Allocation);
}
const size_t movableAllocCount = allocationPtrs.size();
VmaDefragmentationInfo2 defragInfo = {};
defragInfo.flags = VMA_DEFRAGMENTATION_FLAG_INCREMENTAL;
defragInfo.allocationCount = (uint32_t)movableAllocCount;
defragInfo.pAllocations = allocationPtrs.data();
defragInfo.maxGpuBytesToMove = VK_WHOLE_SIZE;
defragInfo.maxGpuAllocationsToMove = UINT32_MAX;
VmaDefragmentationStats stats = {};
VmaDefragmentationContext ctx = VK_NULL_HANDLE;
VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &stats, &ctx);
TEST(res >= VK_SUCCESS);
res = VK_NOT_READY;
std::vector<VmaDefragmentationPassMoveInfo> moveInfo;
moveInfo.resize(movableAllocCount);
while(res == VK_NOT_READY)
{
VmaDefragmentationPassInfo stepInfo = {};
stepInfo.pMoves = moveInfo.data();
stepInfo.moveCount = (uint32_t)moveInfo.size();
res = vmaBeginDefragmentationPass(g_hAllocator, ctx, &stepInfo);
TEST(res >= VK_SUCCESS);
BeginSingleTimeCommands();
std::vector<void*> newHandles;
ProcessDefragmentationStepInfo(stepInfo);
EndSingleTimeCommands();
res = vmaEndDefragmentationPass(g_hAllocator, ctx);
// Destroy old buffers/images and replace them with new handles.
for(size_t i = 0; i < stepInfo.moveCount; ++i)
{
VmaAllocation const alloc = stepInfo.pMoves[i].allocation;
VmaAllocationInfo vmaAllocInfo;
vmaGetAllocationInfo(g_hAllocator, alloc, &vmaAllocInfo);
AllocInfo* allocInfo = (AllocInfo*)vmaAllocInfo.pUserData;
if(allocInfo->m_Buffer)
{
assert(allocInfo->m_NewBuffer && !allocInfo->m_Image && !allocInfo->m_NewImage);
vkDestroyBuffer(g_hDevice, allocInfo->m_Buffer, g_Allocs);
allocInfo->m_Buffer = allocInfo->m_NewBuffer;
allocInfo->m_NewBuffer = VK_NULL_HANDLE;
}
else if(allocInfo->m_Image)
{
assert(allocInfo->m_NewImage && !allocInfo->m_Buffer && !allocInfo->m_NewBuffer);
vkDestroyImage(g_hDevice, allocInfo->m_Image, g_Allocs);
allocInfo->m_Image = allocInfo->m_NewImage;
allocInfo->m_NewImage = VK_NULL_HANDLE;
}
else
assert(0);
}
}
TEST(res >= VK_SUCCESS);
vmaDefragmentationEnd(g_hAllocator, ctx);
// If corruption detection is enabled, GPU defragmentation may not work on
// memory types that have this detection active, e.g. on Intel.
#if !defined(VMA_DEBUG_DETECT_CORRUPTION) || VMA_DEBUG_DETECT_CORRUPTION == 0
TEST(stats.allocationsMoved > 0 && stats.bytesMoved > 0);
TEST(stats.deviceMemoryBlocksFreed > 0 && stats.bytesFreed > 0);
#endif
}
//ValidateGpuData(allocations.data(), allocations.size());
swprintf_s(fileName, L"GPU_defragmentation_incremental_basic_B_after.json");
SaveAllocatorStatsToFile(fileName);
// Destroy all remaining buffers and images.
for(size_t i = allocations.size(); i--; )
{
allocations[i].Destroy();
}
g_MemoryAliasingWarningEnabled = true;
}
void TestDefragmentationIncrementalComplex()
{
wprintf(L"Test defragmentation incremental complex\n");
g_MemoryAliasingWarningEnabled = false;
std::vector<AllocInfo> allocations;
// Create that many allocations to surely fill 3 new blocks of 256 MB.
const std::array<uint32_t, 3> imageSizes = { 256, 512, 1024 };
const VkDeviceSize bufSizeMin = 5ull * 1024 * 1024;
const VkDeviceSize bufSizeMax = 10ull * 1024 * 1024;
const VkDeviceSize totalSize = 3ull * 256 * 1024 * 1024;
const size_t imageCount = (size_t)(totalSize / (imageSizes[0] * imageSizes[0] * 4)) / 2;
const size_t bufCount = (size_t)(totalSize / bufSizeMin) / 2;
const size_t percentToLeave = 30;
RandomNumberGenerator rand = { 234522 };
VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
imageInfo.imageType = VK_IMAGE_TYPE_2D;
imageInfo.extent.depth = 1;
imageInfo.mipLevels = 1;
imageInfo.arrayLayers = 1;
imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
VmaAllocationCreateInfo allocCreateInfo = {};
allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
allocCreateInfo.flags = 0;
// Create all intended images.
for(size_t i = 0; i < imageCount; ++i)
{
const uint32_t size = imageSizes[rand.Generate() % 3];
imageInfo.extent.width = size;
imageInfo.extent.height = size;
AllocInfo alloc;
alloc.CreateImage(imageInfo, allocCreateInfo, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
alloc.m_StartValue = 0;
allocations.push_back(alloc);
}
// And all buffers
VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
for(size_t i = 0; i < bufCount; ++i)
{
bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
AllocInfo alloc;
alloc.CreateBuffer(bufCreateInfo, allocCreateInfo);
alloc.m_StartValue = 0;
allocations.push_back(alloc);
}
// Destroy some percentage of them.
{
const size_t allocationsToDestroy = round_div<size_t>((imageCount + bufCount) * (100 - percentToLeave), 100);
for(size_t i = 0; i < allocationsToDestroy; ++i)
{
const size_t index = rand.Generate() % allocations.size();
allocations[index].Destroy();
allocations.erase(allocations.begin() + index);
}
}
{
// Set our user data pointers. A real application should probably be more clever here
const size_t allocationCount = allocations.size();
for(size_t i = 0; i < allocationCount; ++i)
{
AllocInfo &alloc = allocations[i];
vmaSetAllocationUserData(g_hAllocator, alloc.m_Allocation, &alloc);
}
}
// Fill them with meaningful data.
UploadGpuData(allocations.data(), allocations.size());
wchar_t fileName[MAX_PATH];
swprintf_s(fileName, L"GPU_defragmentation_incremental_complex_A_before.json");
SaveAllocatorStatsToFile(fileName);
std::vector<AllocInfo> additionalAllocations;
#define MakeAdditionalAllocation() \
do { \
{ \
bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16); \
bufCreateInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT; \
\
AllocInfo alloc; \
alloc.CreateBuffer(bufCreateInfo, allocCreateInfo); \
\
additionalAllocations.push_back(alloc); \
} \
} while(0)
// Defragment using GPU only.
{
const size_t allocCount = allocations.size();
std::vector<VmaAllocation> allocationPtrs;
for(size_t i = 0; i < allocCount; ++i)
{
VmaAllocationInfo allocInfo = {};
vmaGetAllocationInfo(g_hAllocator, allocations[i].m_Allocation, &allocInfo);
allocationPtrs.push_back(allocations[i].m_Allocation);
}
const size_t movableAllocCount = allocationPtrs.size();
VmaDefragmentationInfo2 defragInfo = {};
defragInfo.flags = VMA_DEFRAGMENTATION_FLAG_INCREMENTAL;
defragInfo.allocationCount = (uint32_t)movableAllocCount;
defragInfo.pAllocations = allocationPtrs.data();
defragInfo.maxGpuBytesToMove = VK_WHOLE_SIZE;
defragInfo.maxGpuAllocationsToMove = UINT32_MAX;
VmaDefragmentationStats stats = {};
VmaDefragmentationContext ctx = VK_NULL_HANDLE;
VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &stats, &ctx);
TEST(res >= VK_SUCCESS);
res = VK_NOT_READY;
std::vector<VmaDefragmentationPassMoveInfo> moveInfo;
moveInfo.resize(movableAllocCount);
MakeAdditionalAllocation();
while(res == VK_NOT_READY)
{
VmaDefragmentationPassInfo stepInfo = {};
stepInfo.pMoves = moveInfo.data();
stepInfo.moveCount = (uint32_t)moveInfo.size();
res = vmaBeginDefragmentationPass(g_hAllocator, ctx, &stepInfo);
TEST(res >= VK_SUCCESS);
MakeAdditionalAllocation();
BeginSingleTimeCommands();
ProcessDefragmentationStepInfo(stepInfo);
EndSingleTimeCommands();
res = vmaEndDefragmentationPass(g_hAllocator, ctx);
// Destroy old buffers/images and replace them with new handles.
for(size_t i = 0; i < stepInfo.moveCount; ++i)
{
VmaAllocation const alloc = stepInfo.pMoves[i].allocation;
VmaAllocationInfo vmaAllocInfo;
vmaGetAllocationInfo(g_hAllocator, alloc, &vmaAllocInfo);
AllocInfo* allocInfo = (AllocInfo*)vmaAllocInfo.pUserData;
if(allocInfo->m_Buffer)
{
assert(allocInfo->m_NewBuffer && !allocInfo->m_Image && !allocInfo->m_NewImage);
vkDestroyBuffer(g_hDevice, allocInfo->m_Buffer, g_Allocs);
allocInfo->m_Buffer = allocInfo->m_NewBuffer;
allocInfo->m_NewBuffer = VK_NULL_HANDLE;
}
else if(allocInfo->m_Image)
{
assert(allocInfo->m_NewImage && !allocInfo->m_Buffer && !allocInfo->m_NewBuffer);
vkDestroyImage(g_hDevice, allocInfo->m_Image, g_Allocs);
allocInfo->m_Image = allocInfo->m_NewImage;
allocInfo->m_NewImage = VK_NULL_HANDLE;
}
else
assert(0);
}
MakeAdditionalAllocation();
}
TEST(res >= VK_SUCCESS);
vmaDefragmentationEnd(g_hAllocator, ctx);
// If corruption detection is enabled, GPU defragmentation may not work on
// memory types that have this detection active, e.g. on Intel.
#if !defined(VMA_DEBUG_DETECT_CORRUPTION) || VMA_DEBUG_DETECT_CORRUPTION == 0
TEST(stats.allocationsMoved > 0 && stats.bytesMoved > 0);
TEST(stats.deviceMemoryBlocksFreed > 0 && stats.bytesFreed > 0);
#endif
}
//ValidateGpuData(allocations.data(), allocations.size());
swprintf_s(fileName, L"GPU_defragmentation_incremental_complex_B_after.json");
SaveAllocatorStatsToFile(fileName);