// | |
// Copyright (c) 2017-2020 Advanced Micro Devices, Inc. All rights reserved. | |
// | |
// Permission is hereby granted, free of charge, to any person obtaining a copy | |
// of this software and associated documentation files (the "Software"), to deal | |
// in the Software without restriction, including without limitation the rights | |
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
// copies of the Software, and to permit persons to whom the Software is | |
// furnished to do so, subject to the following conditions: | |
// | |
// The above copyright notice and this permission notice shall be included in | |
// all copies or substantial portions of the Software. | |
// | |
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
// THE SOFTWARE. | |
// | |
#include "Tests.h" | |
#include "VmaUsage.h" | |
#include "Common.h" | |
#include <atomic> | |
#include <thread> | |
#include <mutex> | |
#include <functional> | |
#ifdef _WIN32 | |
static const char* CODE_DESCRIPTION = "Foo"; | |
extern VkCommandBuffer g_hTemporaryCommandBuffer; | |
extern const VkAllocationCallbacks* g_Allocs; | |
extern bool g_BufferDeviceAddressEnabled; | |
extern PFN_vkGetBufferDeviceAddressEXT g_vkGetBufferDeviceAddressEXT; | |
void BeginSingleTimeCommands(); | |
void EndSingleTimeCommands(); | |
#ifndef VMA_DEBUG_MARGIN | |
#define VMA_DEBUG_MARGIN 0 | |
#endif | |
enum CONFIG_TYPE { | |
CONFIG_TYPE_MINIMUM, | |
CONFIG_TYPE_SMALL, | |
CONFIG_TYPE_AVERAGE, | |
CONFIG_TYPE_LARGE, | |
CONFIG_TYPE_MAXIMUM, | |
CONFIG_TYPE_COUNT | |
}; | |
static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_SMALL; | |
//static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_LARGE; | |
enum class FREE_ORDER { FORWARD, BACKWARD, RANDOM, COUNT }; | |
static const char* FREE_ORDER_NAMES[] = { | |
"FORWARD", | |
"BACKWARD", | |
"RANDOM", | |
}; | |
// Copy of internal VmaAlgorithmToStr. | |
static const char* AlgorithmToStr(uint32_t algorithm) | |
{ | |
switch(algorithm) | |
{ | |
case VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT: | |
return "Linear"; | |
case VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT: | |
return "Buddy"; | |
case 0: | |
return "Default"; | |
default: | |
assert(0); | |
return ""; | |
} | |
} | |
struct AllocationSize | |
{ | |
uint32_t Probability; | |
VkDeviceSize BufferSizeMin, BufferSizeMax; | |
uint32_t ImageSizeMin, ImageSizeMax; | |
}; | |
struct Config | |
{ | |
uint32_t RandSeed; | |
VkDeviceSize BeginBytesToAllocate; | |
uint32_t AdditionalOperationCount; | |
VkDeviceSize MaxBytesToAllocate; | |
uint32_t MemUsageProbability[4]; // For VMA_MEMORY_USAGE_* | |
std::vector<AllocationSize> AllocationSizes; | |
uint32_t ThreadCount; | |
uint32_t ThreadsUsingCommonAllocationsProbabilityPercent; | |
FREE_ORDER FreeOrder; | |
VmaAllocationCreateFlags AllocationStrategy; // For VMA_ALLOCATION_CREATE_STRATEGY_* | |
}; | |
struct Result | |
{ | |
duration TotalTime; | |
duration AllocationTimeMin, AllocationTimeAvg, AllocationTimeMax; | |
duration DeallocationTimeMin, DeallocationTimeAvg, DeallocationTimeMax; | |
VkDeviceSize TotalMemoryAllocated; | |
VkDeviceSize FreeRangeSizeAvg, FreeRangeSizeMax; | |
}; | |
void TestDefragmentationSimple(); | |
void TestDefragmentationFull(); | |
struct PoolTestConfig | |
{ | |
uint32_t RandSeed; | |
uint32_t ThreadCount; | |
VkDeviceSize PoolSize; | |
uint32_t FrameCount; | |
uint32_t TotalItemCount; | |
// Range for number of items used in each frame. | |
uint32_t UsedItemCountMin, UsedItemCountMax; | |
// Percent of items to make unused, and possibly make some others used in each frame. | |
uint32_t ItemsToMakeUnusedPercent; | |
std::vector<AllocationSize> AllocationSizes; | |
VkDeviceSize CalcAvgResourceSize() const | |
{ | |
uint32_t probabilitySum = 0; | |
VkDeviceSize sizeSum = 0; | |
for(size_t i = 0; i < AllocationSizes.size(); ++i) | |
{ | |
const AllocationSize& allocSize = AllocationSizes[i]; | |
if(allocSize.BufferSizeMax > 0) | |
sizeSum += (allocSize.BufferSizeMin + allocSize.BufferSizeMax) / 2 * allocSize.Probability; | |
else | |
{ | |
const VkDeviceSize avgDimension = (allocSize.ImageSizeMin + allocSize.ImageSizeMax) / 2; | |
sizeSum += avgDimension * avgDimension * 4 * allocSize.Probability; | |
} | |
probabilitySum += allocSize.Probability; | |
} | |
return sizeSum / probabilitySum; | |
} | |
bool UsesBuffers() const | |
{ | |
for(size_t i = 0; i < AllocationSizes.size(); ++i) | |
if(AllocationSizes[i].BufferSizeMax > 0) | |
return true; | |
return false; | |
} | |
bool UsesImages() const | |
{ | |
for(size_t i = 0; i < AllocationSizes.size(); ++i) | |
if(AllocationSizes[i].ImageSizeMax > 0) | |
return true; | |
return false; | |
} | |
}; | |
struct PoolTestResult | |
{ | |
duration TotalTime; | |
duration AllocationTimeMin, AllocationTimeAvg, AllocationTimeMax; | |
duration DeallocationTimeMin, DeallocationTimeAvg, DeallocationTimeMax; | |
size_t LostAllocationCount, LostAllocationTotalSize; | |
size_t FailedAllocationCount, FailedAllocationTotalSize; | |
}; | |
static const uint32_t IMAGE_BYTES_PER_PIXEL = 1; | |
uint32_t g_FrameIndex = 0; | |
struct BufferInfo | |
{ | |
VkBuffer Buffer = VK_NULL_HANDLE; | |
VmaAllocation Allocation = VK_NULL_HANDLE; | |
}; | |
static uint32_t MemoryTypeToHeap(uint32_t memoryTypeIndex) | |
{ | |
const VkPhysicalDeviceMemoryProperties* props; | |
vmaGetMemoryProperties(g_hAllocator, &props); | |
return props->memoryTypes[memoryTypeIndex].heapIndex; | |
} | |
static uint32_t GetAllocationStrategyCount() | |
{ | |
uint32_t strategyCount = 0; | |
switch(ConfigType) | |
{ | |
case CONFIG_TYPE_MINIMUM: strategyCount = 1; break; | |
case CONFIG_TYPE_SMALL: strategyCount = 1; break; | |
case CONFIG_TYPE_AVERAGE: strategyCount = 2; break; | |
case CONFIG_TYPE_LARGE: strategyCount = 2; break; | |
case CONFIG_TYPE_MAXIMUM: strategyCount = 3; break; | |
default: assert(0); | |
} | |
return strategyCount; | |
} | |
static const char* GetAllocationStrategyName(VmaAllocationCreateFlags allocStrategy) | |
{ | |
switch(allocStrategy) | |
{ | |
case VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT: return "BEST_FIT"; break; | |
case VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT: return "WORST_FIT"; break; | |
case VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT: return "FIRST_FIT"; break; | |
case 0: return "Default"; break; | |
default: assert(0); return ""; | |
} | |
} | |
static void InitResult(Result& outResult) | |
{ | |
outResult.TotalTime = duration::zero(); | |
outResult.AllocationTimeMin = duration::max(); | |
outResult.AllocationTimeAvg = duration::zero(); | |
outResult.AllocationTimeMax = duration::min(); | |
outResult.DeallocationTimeMin = duration::max(); | |
outResult.DeallocationTimeAvg = duration::zero(); | |
outResult.DeallocationTimeMax = duration::min(); | |
outResult.TotalMemoryAllocated = 0; | |
outResult.FreeRangeSizeAvg = 0; | |
outResult.FreeRangeSizeMax = 0; | |
} | |
class TimeRegisterObj | |
{ | |
public: | |
TimeRegisterObj(duration& min, duration& sum, duration& max) : | |
m_Min(min), | |
m_Sum(sum), | |
m_Max(max), | |
m_TimeBeg(std::chrono::high_resolution_clock::now()) | |
{ | |
} | |
~TimeRegisterObj() | |
{ | |
duration d = std::chrono::high_resolution_clock::now() - m_TimeBeg; | |
m_Sum += d; | |
if(d < m_Min) m_Min = d; | |
if(d > m_Max) m_Max = d; | |
} | |
private: | |
duration& m_Min; | |
duration& m_Sum; | |
duration& m_Max; | |
time_point m_TimeBeg; | |
}; | |
struct PoolTestThreadResult | |
{ | |
duration AllocationTimeMin, AllocationTimeSum, AllocationTimeMax; | |
duration DeallocationTimeMin, DeallocationTimeSum, DeallocationTimeMax; | |
size_t AllocationCount, DeallocationCount; | |
size_t LostAllocationCount, LostAllocationTotalSize; | |
size_t FailedAllocationCount, FailedAllocationTotalSize; | |
}; | |
class AllocationTimeRegisterObj : public TimeRegisterObj | |
{ | |
public: | |
AllocationTimeRegisterObj(Result& result) : | |
TimeRegisterObj(result.AllocationTimeMin, result.AllocationTimeAvg, result.AllocationTimeMax) | |
{ | |
} | |
}; | |
class DeallocationTimeRegisterObj : public TimeRegisterObj | |
{ | |
public: | |
DeallocationTimeRegisterObj(Result& result) : | |
TimeRegisterObj(result.DeallocationTimeMin, result.DeallocationTimeAvg, result.DeallocationTimeMax) | |
{ | |
} | |
}; | |
class PoolAllocationTimeRegisterObj : public TimeRegisterObj | |
{ | |
public: | |
PoolAllocationTimeRegisterObj(PoolTestThreadResult& result) : | |
TimeRegisterObj(result.AllocationTimeMin, result.AllocationTimeSum, result.AllocationTimeMax) | |
{ | |
} | |
}; | |
class PoolDeallocationTimeRegisterObj : public TimeRegisterObj | |
{ | |
public: | |
PoolDeallocationTimeRegisterObj(PoolTestThreadResult& result) : | |
TimeRegisterObj(result.DeallocationTimeMin, result.DeallocationTimeSum, result.DeallocationTimeMax) | |
{ | |
} | |
}; | |
static void CurrentTimeToStr(std::string& out) | |
{ | |
time_t rawTime; time(&rawTime); | |
struct tm timeInfo; localtime_s(&timeInfo, &rawTime); | |
char timeStr[128]; | |
strftime(timeStr, _countof(timeStr), "%c", &timeInfo); | |
out = timeStr; | |
} | |
VkResult MainTest(Result& outResult, const Config& config) | |
{ | |
assert(config.ThreadCount > 0); | |
InitResult(outResult); | |
RandomNumberGenerator mainRand{config.RandSeed}; | |
time_point timeBeg = std::chrono::high_resolution_clock::now(); | |
std::atomic<size_t> allocationCount = 0; | |
VkResult res = VK_SUCCESS; | |
uint32_t memUsageProbabilitySum = | |
config.MemUsageProbability[0] + config.MemUsageProbability[1] + | |
config.MemUsageProbability[2] + config.MemUsageProbability[3]; | |
assert(memUsageProbabilitySum > 0); | |
uint32_t allocationSizeProbabilitySum = std::accumulate( | |
config.AllocationSizes.begin(), | |
config.AllocationSizes.end(), | |
0u, | |
[](uint32_t sum, const AllocationSize& allocSize) { | |
return sum + allocSize.Probability; | |
}); | |
struct Allocation | |
{ | |
VkBuffer Buffer; | |
VkImage Image; | |
VmaAllocation Alloc; | |
}; | |
std::vector<Allocation> commonAllocations; | |
std::mutex commonAllocationsMutex; | |
auto Allocate = [&]( | |
VkDeviceSize bufferSize, | |
const VkExtent2D imageExtent, | |
RandomNumberGenerator& localRand, | |
VkDeviceSize& totalAllocatedBytes, | |
std::vector<Allocation>& allocations) -> VkResult | |
{ | |
assert((bufferSize == 0) != (imageExtent.width == 0 && imageExtent.height == 0)); | |
uint32_t memUsageIndex = 0; | |
uint32_t memUsageRand = localRand.Generate() % memUsageProbabilitySum; | |
while(memUsageRand >= config.MemUsageProbability[memUsageIndex]) | |
memUsageRand -= config.MemUsageProbability[memUsageIndex++]; | |
VmaAllocationCreateInfo memReq = {}; | |
memReq.usage = (VmaMemoryUsage)(VMA_MEMORY_USAGE_GPU_ONLY + memUsageIndex); | |
memReq.flags |= config.AllocationStrategy; | |
Allocation allocation = {}; | |
VmaAllocationInfo allocationInfo; | |
// Buffer | |
if(bufferSize > 0) | |
{ | |
assert(imageExtent.width == 0); | |
VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; | |
bufferInfo.size = bufferSize; | |
bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; | |
{ | |
AllocationTimeRegisterObj timeRegisterObj{outResult}; | |
res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &memReq, &allocation.Buffer, &allocation.Alloc, &allocationInfo); | |
} | |
} | |
// Image | |
else | |
{ | |
VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; | |
imageInfo.imageType = VK_IMAGE_TYPE_2D; | |
imageInfo.extent.width = imageExtent.width; | |
imageInfo.extent.height = imageExtent.height; | |
imageInfo.extent.depth = 1; | |
imageInfo.mipLevels = 1; | |
imageInfo.arrayLayers = 1; | |
imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM; | |
imageInfo.tiling = memReq.usage == VMA_MEMORY_USAGE_GPU_ONLY ? | |
VK_IMAGE_TILING_OPTIMAL : | |
VK_IMAGE_TILING_LINEAR; | |
imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED; | |
switch(memReq.usage) | |
{ | |
case VMA_MEMORY_USAGE_GPU_ONLY: | |
switch(localRand.Generate() % 3) | |
{ | |
case 0: | |
imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; | |
break; | |
case 1: | |
imageInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; | |
break; | |
case 2: | |
imageInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT; | |
break; | |
} | |
break; | |
case VMA_MEMORY_USAGE_CPU_ONLY: | |
case VMA_MEMORY_USAGE_CPU_TO_GPU: | |
imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT; | |
break; | |
case VMA_MEMORY_USAGE_GPU_TO_CPU: | |
imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT; | |
break; | |
} | |
imageInfo.samples = VK_SAMPLE_COUNT_1_BIT; | |
imageInfo.flags = 0; | |
{ | |
AllocationTimeRegisterObj timeRegisterObj{outResult}; | |
res = vmaCreateImage(g_hAllocator, &imageInfo, &memReq, &allocation.Image, &allocation.Alloc, &allocationInfo); | |
} | |
} | |
if(res == VK_SUCCESS) | |
{ | |
++allocationCount; | |
totalAllocatedBytes += allocationInfo.size; | |
bool useCommonAllocations = localRand.Generate() % 100 < config.ThreadsUsingCommonAllocationsProbabilityPercent; | |
if(useCommonAllocations) | |
{ | |
std::unique_lock<std::mutex> lock(commonAllocationsMutex); | |
commonAllocations.push_back(allocation); | |
} | |
else | |
allocations.push_back(allocation); | |
} | |
else | |
{ | |
TEST(0); | |
} | |
return res; | |
}; | |
auto GetNextAllocationSize = [&]( | |
VkDeviceSize& outBufSize, | |
VkExtent2D& outImageSize, | |
RandomNumberGenerator& localRand) | |
{ | |
outBufSize = 0; | |
outImageSize = {0, 0}; | |
uint32_t allocSizeIndex = 0; | |
uint32_t r = localRand.Generate() % allocationSizeProbabilitySum; | |
while(r >= config.AllocationSizes[allocSizeIndex].Probability) | |
r -= config.AllocationSizes[allocSizeIndex++].Probability; | |
const AllocationSize& allocSize = config.AllocationSizes[allocSizeIndex]; | |
if(allocSize.BufferSizeMax > 0) | |
{ | |
assert(allocSize.ImageSizeMax == 0); | |
if(allocSize.BufferSizeMax == allocSize.BufferSizeMin) | |
outBufSize = allocSize.BufferSizeMin; | |
else | |
{ | |
outBufSize = allocSize.BufferSizeMin + localRand.Generate() % (allocSize.BufferSizeMax - allocSize.BufferSizeMin); | |
outBufSize = outBufSize / 16 * 16; | |
} | |
} | |
else | |
{ | |
if(allocSize.ImageSizeMax == allocSize.ImageSizeMin) | |
outImageSize.width = outImageSize.height = allocSize.ImageSizeMax; | |
else | |
{ | |
outImageSize.width = allocSize.ImageSizeMin + localRand.Generate() % (allocSize.ImageSizeMax - allocSize.ImageSizeMin); | |
outImageSize.height = allocSize.ImageSizeMin + localRand.Generate() % (allocSize.ImageSizeMax - allocSize.ImageSizeMin); | |
} | |
} | |
}; | |
std::atomic<uint32_t> numThreadsReachedMaxAllocations = 0; | |
HANDLE threadsFinishEvent = CreateEvent(NULL, TRUE, FALSE, NULL); | |
auto ThreadProc = [&](uint32_t randSeed) -> void | |
{ | |
RandomNumberGenerator threadRand(randSeed); | |
VkDeviceSize threadTotalAllocatedBytes = 0; | |
std::vector<Allocation> threadAllocations; | |
VkDeviceSize threadBeginBytesToAllocate = config.BeginBytesToAllocate / config.ThreadCount; | |
VkDeviceSize threadMaxBytesToAllocate = config.MaxBytesToAllocate / config.ThreadCount; | |
uint32_t threadAdditionalOperationCount = config.AdditionalOperationCount / config.ThreadCount; | |
// BEGIN ALLOCATIONS | |
for(;;) | |
{ | |
VkDeviceSize bufferSize = 0; | |
VkExtent2D imageExtent = {}; | |
GetNextAllocationSize(bufferSize, imageExtent, threadRand); | |
if(threadTotalAllocatedBytes + bufferSize + imageExtent.width * imageExtent.height * IMAGE_BYTES_PER_PIXEL < | |
threadBeginBytesToAllocate) | |
{ | |
if(Allocate(bufferSize, imageExtent, threadRand, threadTotalAllocatedBytes, threadAllocations) != VK_SUCCESS) | |
break; | |
} | |
else | |
break; | |
} | |
// ADDITIONAL ALLOCATIONS AND FREES | |
for(size_t i = 0; i < threadAdditionalOperationCount; ++i) | |
{ | |
VkDeviceSize bufferSize = 0; | |
VkExtent2D imageExtent = {}; | |
GetNextAllocationSize(bufferSize, imageExtent, threadRand); | |
// true = allocate, false = free | |
bool allocate = threadRand.Generate() % 2 != 0; | |
if(allocate) | |
{ | |
if(threadTotalAllocatedBytes + | |
bufferSize + | |
imageExtent.width * imageExtent.height * IMAGE_BYTES_PER_PIXEL < | |
threadMaxBytesToAllocate) | |
{ | |
if(Allocate(bufferSize, imageExtent, threadRand, threadTotalAllocatedBytes, threadAllocations) != VK_SUCCESS) | |
break; | |
} | |
} | |
else | |
{ | |
bool useCommonAllocations = threadRand.Generate() % 100 < config.ThreadsUsingCommonAllocationsProbabilityPercent; | |
if(useCommonAllocations) | |
{ | |
std::unique_lock<std::mutex> lock(commonAllocationsMutex); | |
if(!commonAllocations.empty()) | |
{ | |
size_t indexToFree = threadRand.Generate() % commonAllocations.size(); | |
VmaAllocationInfo allocationInfo; | |
vmaGetAllocationInfo(g_hAllocator, commonAllocations[indexToFree].Alloc, &allocationInfo); | |
if(threadTotalAllocatedBytes >= allocationInfo.size) | |
{ | |
DeallocationTimeRegisterObj timeRegisterObj{outResult}; | |
if(commonAllocations[indexToFree].Buffer != VK_NULL_HANDLE) | |
vmaDestroyBuffer(g_hAllocator, commonAllocations[indexToFree].Buffer, commonAllocations[indexToFree].Alloc); | |
else | |
vmaDestroyImage(g_hAllocator, commonAllocations[indexToFree].Image, commonAllocations[indexToFree].Alloc); | |
threadTotalAllocatedBytes -= allocationInfo.size; | |
commonAllocations.erase(commonAllocations.begin() + indexToFree); | |
} | |
} | |
} | |
else | |
{ | |
if(!threadAllocations.empty()) | |
{ | |
size_t indexToFree = threadRand.Generate() % threadAllocations.size(); | |
VmaAllocationInfo allocationInfo; | |
vmaGetAllocationInfo(g_hAllocator, threadAllocations[indexToFree].Alloc, &allocationInfo); | |
if(threadTotalAllocatedBytes >= allocationInfo.size) | |
{ | |
DeallocationTimeRegisterObj timeRegisterObj{outResult}; | |
if(threadAllocations[indexToFree].Buffer != VK_NULL_HANDLE) | |
vmaDestroyBuffer(g_hAllocator, threadAllocations[indexToFree].Buffer, threadAllocations[indexToFree].Alloc); | |
else | |
vmaDestroyImage(g_hAllocator, threadAllocations[indexToFree].Image, threadAllocations[indexToFree].Alloc); | |
threadTotalAllocatedBytes -= allocationInfo.size; | |
threadAllocations.erase(threadAllocations.begin() + indexToFree); | |
} | |
} | |
} | |
} | |
} | |
++numThreadsReachedMaxAllocations; | |
WaitForSingleObject(threadsFinishEvent, INFINITE); | |
// DEALLOCATION | |
while(!threadAllocations.empty()) | |
{ | |
size_t indexToFree = 0; | |
switch(config.FreeOrder) | |
{ | |
case FREE_ORDER::FORWARD: | |
indexToFree = 0; | |
break; | |
case FREE_ORDER::BACKWARD: | |
indexToFree = threadAllocations.size() - 1; | |
break; | |
case FREE_ORDER::RANDOM: | |
indexToFree = mainRand.Generate() % threadAllocations.size(); | |
break; | |
} | |
{ | |
DeallocationTimeRegisterObj timeRegisterObj{outResult}; | |
if(threadAllocations[indexToFree].Buffer != VK_NULL_HANDLE) | |
vmaDestroyBuffer(g_hAllocator, threadAllocations[indexToFree].Buffer, threadAllocations[indexToFree].Alloc); | |
else | |
vmaDestroyImage(g_hAllocator, threadAllocations[indexToFree].Image, threadAllocations[indexToFree].Alloc); | |
} | |
threadAllocations.erase(threadAllocations.begin() + indexToFree); | |
} | |
}; | |
uint32_t threadRandSeed = mainRand.Generate(); | |
std::vector<std::thread> bkgThreads; | |
for(size_t i = 0; i < config.ThreadCount; ++i) | |
{ | |
bkgThreads.emplace_back(std::bind(ThreadProc, threadRandSeed + (uint32_t)i)); | |
} | |
// Wait for threads reached max allocations | |
while(numThreadsReachedMaxAllocations < config.ThreadCount) | |
Sleep(0); | |
// CALCULATE MEMORY STATISTICS ON FINAL USAGE | |
VmaStats vmaStats = {}; | |
vmaCalculateStats(g_hAllocator, &vmaStats); | |
outResult.TotalMemoryAllocated = vmaStats.total.usedBytes + vmaStats.total.unusedBytes; | |
outResult.FreeRangeSizeMax = vmaStats.total.unusedRangeSizeMax; | |
outResult.FreeRangeSizeAvg = vmaStats.total.unusedRangeSizeAvg; | |
// Signal threads to deallocate | |
SetEvent(threadsFinishEvent); | |
// Wait for threads finished | |
for(size_t i = 0; i < bkgThreads.size(); ++i) | |
bkgThreads[i].join(); | |
bkgThreads.clear(); | |
CloseHandle(threadsFinishEvent); | |
// Deallocate remaining common resources | |
while(!commonAllocations.empty()) | |
{ | |
size_t indexToFree = 0; | |
switch(config.FreeOrder) | |
{ | |
case FREE_ORDER::FORWARD: | |
indexToFree = 0; | |
break; | |
case FREE_ORDER::BACKWARD: | |
indexToFree = commonAllocations.size() - 1; | |
break; | |
case FREE_ORDER::RANDOM: | |
indexToFree = mainRand.Generate() % commonAllocations.size(); | |
break; | |
} | |
{ | |
DeallocationTimeRegisterObj timeRegisterObj{outResult}; | |
if(commonAllocations[indexToFree].Buffer != VK_NULL_HANDLE) | |
vmaDestroyBuffer(g_hAllocator, commonAllocations[indexToFree].Buffer, commonAllocations[indexToFree].Alloc); | |
else | |
vmaDestroyImage(g_hAllocator, commonAllocations[indexToFree].Image, commonAllocations[indexToFree].Alloc); | |
} | |
commonAllocations.erase(commonAllocations.begin() + indexToFree); | |
} | |
if(allocationCount) | |
{ | |
outResult.AllocationTimeAvg /= allocationCount; | |
outResult.DeallocationTimeAvg /= allocationCount; | |
} | |
outResult.TotalTime = std::chrono::high_resolution_clock::now() - timeBeg; | |
return res; | |
} | |
void SaveAllocatorStatsToFile(const wchar_t* filePath) | |
{ | |
wprintf(L"Saving JSON dump to file \"%s\"\n", filePath); | |
char* stats; | |
vmaBuildStatsString(g_hAllocator, &stats, VK_TRUE); | |
SaveFile(filePath, stats, strlen(stats)); | |
vmaFreeStatsString(g_hAllocator, stats); | |
} | |
struct AllocInfo | |
{ | |
VmaAllocation m_Allocation = VK_NULL_HANDLE; | |
VkBuffer m_Buffer = VK_NULL_HANDLE; | |
VkImage m_Image = VK_NULL_HANDLE; | |
VkImageLayout m_ImageLayout = VK_IMAGE_LAYOUT_UNDEFINED; | |
uint32_t m_StartValue = 0; | |
union | |
{ | |
VkBufferCreateInfo m_BufferInfo; | |
VkImageCreateInfo m_ImageInfo; | |
}; | |
// After defragmentation. | |
VkBuffer m_NewBuffer = VK_NULL_HANDLE; | |
VkImage m_NewImage = VK_NULL_HANDLE; | |
void CreateBuffer( | |
const VkBufferCreateInfo& bufCreateInfo, | |
const VmaAllocationCreateInfo& allocCreateInfo); | |
void CreateImage( | |
const VkImageCreateInfo& imageCreateInfo, | |
const VmaAllocationCreateInfo& allocCreateInfo, | |
VkImageLayout layout); | |
void Destroy(); | |
}; | |
void AllocInfo::CreateBuffer( | |
const VkBufferCreateInfo& bufCreateInfo, | |
const VmaAllocationCreateInfo& allocCreateInfo) | |
{ | |
m_BufferInfo = bufCreateInfo; | |
VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &m_Buffer, &m_Allocation, nullptr); | |
TEST(res == VK_SUCCESS); | |
} | |
void AllocInfo::CreateImage( | |
const VkImageCreateInfo& imageCreateInfo, | |
const VmaAllocationCreateInfo& allocCreateInfo, | |
VkImageLayout layout) | |
{ | |
m_ImageInfo = imageCreateInfo; | |
m_ImageLayout = layout; | |
VkResult res = vmaCreateImage(g_hAllocator, &imageCreateInfo, &allocCreateInfo, &m_Image, &m_Allocation, nullptr); | |
TEST(res == VK_SUCCESS); | |
} | |
void AllocInfo::Destroy() | |
{ | |
if(m_Image) | |
{ | |
assert(!m_Buffer); | |
vkDestroyImage(g_hDevice, m_Image, g_Allocs); | |
m_Image = VK_NULL_HANDLE; | |
} | |
if(m_Buffer) | |
{ | |
assert(!m_Image); | |
vkDestroyBuffer(g_hDevice, m_Buffer, g_Allocs); | |
m_Buffer = VK_NULL_HANDLE; | |
} | |
if(m_Allocation) | |
{ | |
vmaFreeMemory(g_hAllocator, m_Allocation); | |
m_Allocation = VK_NULL_HANDLE; | |
} | |
} | |
class StagingBufferCollection | |
{ | |
public: | |
StagingBufferCollection() { } | |
~StagingBufferCollection(); | |
// Returns false if maximum total size of buffers would be exceeded. | |
bool AcquireBuffer(VkDeviceSize size, VkBuffer& outBuffer, void*& outMappedPtr); | |
void ReleaseAllBuffers(); | |
private: | |
static const VkDeviceSize MAX_TOTAL_SIZE = 256ull * 1024 * 1024; | |
struct BufInfo | |
{ | |
VmaAllocation Allocation = VK_NULL_HANDLE; | |
VkBuffer Buffer = VK_NULL_HANDLE; | |
VkDeviceSize Size = VK_WHOLE_SIZE; | |
void* MappedPtr = nullptr; | |
bool Used = false; | |
}; | |
std::vector<BufInfo> m_Bufs; | |
// Including both used and unused. | |
VkDeviceSize m_TotalSize = 0; | |
}; | |
StagingBufferCollection::~StagingBufferCollection() | |
{ | |
for(size_t i = m_Bufs.size(); i--; ) | |
{ | |
vmaDestroyBuffer(g_hAllocator, m_Bufs[i].Buffer, m_Bufs[i].Allocation); | |
} | |
} | |
bool StagingBufferCollection::AcquireBuffer(VkDeviceSize size, VkBuffer& outBuffer, void*& outMappedPtr) | |
{ | |
assert(size <= MAX_TOTAL_SIZE); | |
// Try to find existing unused buffer with best size. | |
size_t bestIndex = SIZE_MAX; | |
for(size_t i = 0, count = m_Bufs.size(); i < count; ++i) | |
{ | |
BufInfo& currBufInfo = m_Bufs[i]; | |
if(!currBufInfo.Used && currBufInfo.Size >= size && | |
(bestIndex == SIZE_MAX || currBufInfo.Size < m_Bufs[bestIndex].Size)) | |
{ | |
bestIndex = i; | |
} | |
} | |
if(bestIndex != SIZE_MAX) | |
{ | |
m_Bufs[bestIndex].Used = true; | |
outBuffer = m_Bufs[bestIndex].Buffer; | |
outMappedPtr = m_Bufs[bestIndex].MappedPtr; | |
return true; | |
} | |
// Allocate new buffer with requested size. | |
if(m_TotalSize + size <= MAX_TOTAL_SIZE) | |
{ | |
BufInfo bufInfo; | |
bufInfo.Size = size; | |
bufInfo.Used = true; | |
VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; | |
bufCreateInfo.size = size; | |
bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; | |
VmaAllocationCreateInfo allocCreateInfo = {}; | |
allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; | |
allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; | |
VmaAllocationInfo allocInfo; | |
VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &bufInfo.Buffer, &bufInfo.Allocation, &allocInfo); | |
bufInfo.MappedPtr = allocInfo.pMappedData; | |
TEST(res == VK_SUCCESS && bufInfo.MappedPtr); | |
outBuffer = bufInfo.Buffer; | |
outMappedPtr = bufInfo.MappedPtr; | |
m_Bufs.push_back(std::move(bufInfo)); | |
m_TotalSize += size; | |
return true; | |
} | |
// There are some unused but smaller buffers: Free them and try again. | |
bool hasUnused = false; | |
for(size_t i = 0, count = m_Bufs.size(); i < count; ++i) | |
{ | |
if(!m_Bufs[i].Used) | |
{ | |
hasUnused = true; | |
break; | |
} | |
} | |
if(hasUnused) | |
{ | |
for(size_t i = m_Bufs.size(); i--; ) | |
{ | |
if(!m_Bufs[i].Used) | |
{ | |
m_TotalSize -= m_Bufs[i].Size; | |
vmaDestroyBuffer(g_hAllocator, m_Bufs[i].Buffer, m_Bufs[i].Allocation); | |
m_Bufs.erase(m_Bufs.begin() + i); | |
} | |
} | |
return AcquireBuffer(size, outBuffer, outMappedPtr); | |
} | |
return false; | |
} | |
void StagingBufferCollection::ReleaseAllBuffers() | |
{ | |
for(size_t i = 0, count = m_Bufs.size(); i < count; ++i) | |
{ | |
m_Bufs[i].Used = false; | |
} | |
} | |
static void UploadGpuData(const AllocInfo* allocInfo, size_t allocInfoCount) | |
{ | |
StagingBufferCollection stagingBufs; | |
bool cmdBufferStarted = false; | |
for(size_t allocInfoIndex = 0; allocInfoIndex < allocInfoCount; ++allocInfoIndex) | |
{ | |
const AllocInfo& currAllocInfo = allocInfo[allocInfoIndex]; | |
if(currAllocInfo.m_Buffer) | |
{ | |
const VkDeviceSize size = currAllocInfo.m_BufferInfo.size; | |
VkBuffer stagingBuf = VK_NULL_HANDLE; | |
void* stagingBufMappedPtr = nullptr; | |
if(!stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr)) | |
{ | |
TEST(cmdBufferStarted); | |
EndSingleTimeCommands(); | |
stagingBufs.ReleaseAllBuffers(); | |
cmdBufferStarted = false; | |
bool ok = stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr); | |
TEST(ok); | |
} | |
// Fill staging buffer. | |
{ | |
assert(size % sizeof(uint32_t) == 0); | |
uint32_t* stagingValPtr = (uint32_t*)stagingBufMappedPtr; | |
uint32_t val = currAllocInfo.m_StartValue; | |
for(size_t i = 0; i < size / sizeof(uint32_t); ++i) | |
{ | |
*stagingValPtr = val; | |
++stagingValPtr; | |
++val; | |
} | |
} | |
// Issue copy command from staging buffer to destination buffer. | |
if(!cmdBufferStarted) | |
{ | |
cmdBufferStarted = true; | |
BeginSingleTimeCommands(); | |
} | |
VkBufferCopy copy = {}; | |
copy.srcOffset = 0; | |
copy.dstOffset = 0; | |
copy.size = size; | |
vkCmdCopyBuffer(g_hTemporaryCommandBuffer, stagingBuf, currAllocInfo.m_Buffer, 1, ©); | |
} | |
else | |
{ | |
TEST(currAllocInfo.m_ImageInfo.format == VK_FORMAT_R8G8B8A8_UNORM && "Only RGBA8 images are currently supported."); | |
TEST(currAllocInfo.m_ImageInfo.mipLevels == 1 && "Only single mip images are currently supported."); | |
const VkDeviceSize size = (VkDeviceSize)currAllocInfo.m_ImageInfo.extent.width * currAllocInfo.m_ImageInfo.extent.height * sizeof(uint32_t); | |
VkBuffer stagingBuf = VK_NULL_HANDLE; | |
void* stagingBufMappedPtr = nullptr; | |
if(!stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr)) | |
{ | |
TEST(cmdBufferStarted); | |
EndSingleTimeCommands(); | |
stagingBufs.ReleaseAllBuffers(); | |
cmdBufferStarted = false; | |
bool ok = stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr); | |
TEST(ok); | |
} | |
// Fill staging buffer. | |
{ | |
assert(size % sizeof(uint32_t) == 0); | |
uint32_t *stagingValPtr = (uint32_t *)stagingBufMappedPtr; | |
uint32_t val = currAllocInfo.m_StartValue; | |
for(size_t i = 0; i < size / sizeof(uint32_t); ++i) | |
{ | |
*stagingValPtr = val; | |
++stagingValPtr; | |
++val; | |
} | |
} | |
// Issue copy command from staging buffer to destination buffer. | |
if(!cmdBufferStarted) | |
{ | |
cmdBufferStarted = true; | |
BeginSingleTimeCommands(); | |
} | |
// Transfer to transfer dst layout | |
VkImageSubresourceRange subresourceRange = { | |
VK_IMAGE_ASPECT_COLOR_BIT, | |
0, VK_REMAINING_MIP_LEVELS, | |
0, VK_REMAINING_ARRAY_LAYERS | |
}; | |
VkImageMemoryBarrier barrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER }; | |
barrier.srcAccessMask = 0; | |
barrier.dstAccessMask = 0; | |
barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; | |
barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; | |
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | |
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | |
barrier.image = currAllocInfo.m_Image; | |
barrier.subresourceRange = subresourceRange; | |
vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, | |
0, nullptr, | |
0, nullptr, | |
1, &barrier); | |
// Copy image date | |
VkBufferImageCopy copy = {}; | |
copy.bufferOffset = 0; | |
copy.bufferRowLength = 0; | |
copy.bufferImageHeight = 0; | |
copy.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; | |
copy.imageSubresource.layerCount = 1; | |
copy.imageExtent = currAllocInfo.m_ImageInfo.extent; | |
vkCmdCopyBufferToImage(g_hTemporaryCommandBuffer, stagingBuf, currAllocInfo.m_Image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©); | |
// Transfer to desired layout | |
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; | |
barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; | |
barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; | |
barrier.newLayout = currAllocInfo.m_ImageLayout; | |
vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, | |
0, nullptr, | |
0, nullptr, | |
1, &barrier); | |
} | |
} | |
if(cmdBufferStarted) | |
{ | |
EndSingleTimeCommands(); | |
stagingBufs.ReleaseAllBuffers(); | |
} | |
} | |
static void ValidateGpuData(const AllocInfo* allocInfo, size_t allocInfoCount) | |
{ | |
StagingBufferCollection stagingBufs; | |
bool cmdBufferStarted = false; | |
size_t validateAllocIndexOffset = 0; | |
std::vector<void*> validateStagingBuffers; | |
for(size_t allocInfoIndex = 0; allocInfoIndex < allocInfoCount; ++allocInfoIndex) | |
{ | |
const AllocInfo& currAllocInfo = allocInfo[allocInfoIndex]; | |
if(currAllocInfo.m_Buffer) | |
{ | |
const VkDeviceSize size = currAllocInfo.m_BufferInfo.size; | |
VkBuffer stagingBuf = VK_NULL_HANDLE; | |
void* stagingBufMappedPtr = nullptr; | |
if(!stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr)) | |
{ | |
TEST(cmdBufferStarted); | |
EndSingleTimeCommands(); | |
cmdBufferStarted = false; | |
for(size_t validateIndex = 0; | |
validateIndex < validateStagingBuffers.size(); | |
++validateIndex) | |
{ | |
const size_t validateAllocIndex = validateIndex + validateAllocIndexOffset; | |
const VkDeviceSize validateSize = allocInfo[validateAllocIndex].m_BufferInfo.size; | |
TEST(validateSize % sizeof(uint32_t) == 0); | |
const uint32_t* stagingValPtr = (const uint32_t*)validateStagingBuffers[validateIndex]; | |
uint32_t val = allocInfo[validateAllocIndex].m_StartValue; | |
bool valid = true; | |
for(size_t i = 0; i < validateSize / sizeof(uint32_t); ++i) | |
{ | |
if(*stagingValPtr != val) | |
{ | |
valid = false; | |
break; | |
} | |
++stagingValPtr; | |
++val; | |
} | |
TEST(valid); | |
} | |
stagingBufs.ReleaseAllBuffers(); | |
validateAllocIndexOffset = allocInfoIndex; | |
validateStagingBuffers.clear(); | |
bool ok = stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr); | |
TEST(ok); | |
} | |
// Issue copy command from staging buffer to destination buffer. | |
if(!cmdBufferStarted) | |
{ | |
cmdBufferStarted = true; | |
BeginSingleTimeCommands(); | |
} | |
VkBufferCopy copy = {}; | |
copy.srcOffset = 0; | |
copy.dstOffset = 0; | |
copy.size = size; | |
vkCmdCopyBuffer(g_hTemporaryCommandBuffer, currAllocInfo.m_Buffer, stagingBuf, 1, ©); | |
// Sava mapped pointer for later validation. | |
validateStagingBuffers.push_back(stagingBufMappedPtr); | |
} | |
else | |
{ | |
TEST(0 && "Images not currently supported."); | |
} | |
} | |
if(cmdBufferStarted) | |
{ | |
EndSingleTimeCommands(); | |
for(size_t validateIndex = 0; | |
validateIndex < validateStagingBuffers.size(); | |
++validateIndex) | |
{ | |
const size_t validateAllocIndex = validateIndex + validateAllocIndexOffset; | |
const VkDeviceSize validateSize = allocInfo[validateAllocIndex].m_BufferInfo.size; | |
TEST(validateSize % sizeof(uint32_t) == 0); | |
const uint32_t* stagingValPtr = (const uint32_t*)validateStagingBuffers[validateIndex]; | |
uint32_t val = allocInfo[validateAllocIndex].m_StartValue; | |
bool valid = true; | |
for(size_t i = 0; i < validateSize / sizeof(uint32_t); ++i) | |
{ | |
if(*stagingValPtr != val) | |
{ | |
valid = false; | |
break; | |
} | |
++stagingValPtr; | |
++val; | |
} | |
TEST(valid); | |
} | |
stagingBufs.ReleaseAllBuffers(); | |
} | |
} | |
static void GetMemReq(VmaAllocationCreateInfo& outMemReq) | |
{ | |
outMemReq = {}; | |
outMemReq.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; | |
//outMemReq.flags = VMA_ALLOCATION_CREATE_PERSISTENT_MAP_BIT; | |
} | |
static void CreateBuffer( | |
VmaPool pool, | |
const VkBufferCreateInfo& bufCreateInfo, | |
bool persistentlyMapped, | |
AllocInfo& outAllocInfo) | |
{ | |
outAllocInfo = {}; | |
outAllocInfo.m_BufferInfo = bufCreateInfo; | |
VmaAllocationCreateInfo allocCreateInfo = {}; | |
allocCreateInfo.pool = pool; | |
if(persistentlyMapped) | |
allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; | |
VmaAllocationInfo vmaAllocInfo = {}; | |
ERR_GUARD_VULKAN( vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &outAllocInfo.m_Buffer, &outAllocInfo.m_Allocation, &vmaAllocInfo) ); | |
// Setup StartValue and fill. | |
{ | |
outAllocInfo.m_StartValue = (uint32_t)rand(); | |
uint32_t* data = (uint32_t*)vmaAllocInfo.pMappedData; | |
TEST((data != nullptr) == persistentlyMapped); | |
if(!persistentlyMapped) | |
{ | |
ERR_GUARD_VULKAN( vmaMapMemory(g_hAllocator, outAllocInfo.m_Allocation, (void**)&data) ); | |
} | |
uint32_t value = outAllocInfo.m_StartValue; | |
TEST(bufCreateInfo.size % 4 == 0); | |
for(size_t i = 0; i < bufCreateInfo.size / sizeof(uint32_t); ++i) | |
data[i] = value++; | |
if(!persistentlyMapped) | |
vmaUnmapMemory(g_hAllocator, outAllocInfo.m_Allocation); | |
} | |
} | |
static void CreateAllocation(AllocInfo& outAllocation) | |
{ | |
outAllocation.m_Allocation = nullptr; | |
outAllocation.m_Buffer = nullptr; | |
outAllocation.m_Image = nullptr; | |
outAllocation.m_StartValue = (uint32_t)rand(); | |
VmaAllocationCreateInfo vmaMemReq; | |
GetMemReq(vmaMemReq); | |
VmaAllocationInfo allocInfo; | |
const bool isBuffer = true;//(rand() & 0x1) != 0; | |
const bool isLarge = (rand() % 16) == 0; | |
if(isBuffer) | |
{ | |
const uint32_t bufferSize = isLarge ? | |
(rand() % 10 + 1) * (1024 * 1024) : // 1 MB ... 10 MB | |
(rand() % 1024 + 1) * 1024; // 1 KB ... 1 MB | |
VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; | |
bufferInfo.size = bufferSize; | |
bufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; | |
VkResult res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &vmaMemReq, &outAllocation.m_Buffer, &outAllocation.m_Allocation, &allocInfo); | |
outAllocation.m_BufferInfo = bufferInfo; | |
TEST(res == VK_SUCCESS); | |
} | |
else | |
{ | |
const uint32_t imageSizeX = isLarge ? | |
1024 + rand() % (4096 - 1024) : // 1024 ... 4096 | |
rand() % 1024 + 1; // 1 ... 1024 | |
const uint32_t imageSizeY = isLarge ? | |
1024 + rand() % (4096 - 1024) : // 1024 ... 4096 | |
rand() % 1024 + 1; // 1 ... 1024 | |
VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; | |
imageInfo.imageType = VK_IMAGE_TYPE_2D; | |
imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM; | |
imageInfo.extent.width = imageSizeX; | |
imageInfo.extent.height = imageSizeY; | |
imageInfo.extent.depth = 1; | |
imageInfo.mipLevels = 1; | |
imageInfo.arrayLayers = 1; | |
imageInfo.samples = VK_SAMPLE_COUNT_1_BIT; | |
imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL; | |
imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED; | |
imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT; | |
VkResult res = vmaCreateImage(g_hAllocator, &imageInfo, &vmaMemReq, &outAllocation.m_Image, &outAllocation.m_Allocation, &allocInfo); | |
outAllocation.m_ImageInfo = imageInfo; | |
TEST(res == VK_SUCCESS); | |
} | |
uint32_t* data = (uint32_t*)allocInfo.pMappedData; | |
if(allocInfo.pMappedData == nullptr) | |
{ | |
VkResult res = vmaMapMemory(g_hAllocator, outAllocation.m_Allocation, (void**)&data); | |
TEST(res == VK_SUCCESS); | |
} | |
uint32_t value = outAllocation.m_StartValue; | |
TEST(allocInfo.size % 4 == 0); | |
for(size_t i = 0; i < allocInfo.size / sizeof(uint32_t); ++i) | |
data[i] = value++; | |
if(allocInfo.pMappedData == nullptr) | |
vmaUnmapMemory(g_hAllocator, outAllocation.m_Allocation); | |
} | |
static void DestroyAllocation(const AllocInfo& allocation) | |
{ | |
if(allocation.m_Buffer) | |
vmaDestroyBuffer(g_hAllocator, allocation.m_Buffer, allocation.m_Allocation); | |
else | |
vmaDestroyImage(g_hAllocator, allocation.m_Image, allocation.m_Allocation); | |
} | |
static void DestroyAllAllocations(std::vector<AllocInfo>& allocations) | |
{ | |
for(size_t i = allocations.size(); i--; ) | |
DestroyAllocation(allocations[i]); | |
allocations.clear(); | |
} | |
static void ValidateAllocationData(const AllocInfo& allocation) | |
{ | |
VmaAllocationInfo allocInfo; | |
vmaGetAllocationInfo(g_hAllocator, allocation.m_Allocation, &allocInfo); | |
uint32_t* data = (uint32_t*)allocInfo.pMappedData; | |
if(allocInfo.pMappedData == nullptr) | |
{ | |
VkResult res = vmaMapMemory(g_hAllocator, allocation.m_Allocation, (void**)&data); | |
TEST(res == VK_SUCCESS); | |
} | |
uint32_t value = allocation.m_StartValue; | |
bool ok = true; | |
size_t i; | |
TEST(allocInfo.size % 4 == 0); | |
for(i = 0; i < allocInfo.size / sizeof(uint32_t); ++i) | |
{ | |
if(data[i] != value++) | |
{ | |
ok = false; | |
break; | |
} | |
} | |
TEST(ok); | |
if(allocInfo.pMappedData == nullptr) | |
vmaUnmapMemory(g_hAllocator, allocation.m_Allocation); | |
} | |
static void RecreateAllocationResource(AllocInfo& allocation) | |
{ | |
VmaAllocationInfo allocInfo; | |
vmaGetAllocationInfo(g_hAllocator, allocation.m_Allocation, &allocInfo); | |
if(allocation.m_Buffer) | |
{ | |
vkDestroyBuffer(g_hDevice, allocation.m_Buffer, g_Allocs); | |
VkResult res = vkCreateBuffer(g_hDevice, &allocation.m_BufferInfo, g_Allocs, &allocation.m_Buffer); | |
TEST(res == VK_SUCCESS); | |
// Just to silence validation layer warnings. | |
VkMemoryRequirements vkMemReq; | |
vkGetBufferMemoryRequirements(g_hDevice, allocation.m_Buffer, &vkMemReq); | |
TEST(vkMemReq.size >= allocation.m_BufferInfo.size); | |
res = vmaBindBufferMemory(g_hAllocator, allocation.m_Allocation, allocation.m_Buffer); | |
TEST(res == VK_SUCCESS); | |
} | |
else | |
{ | |
vkDestroyImage(g_hDevice, allocation.m_Image, g_Allocs); | |
VkResult res = vkCreateImage(g_hDevice, &allocation.m_ImageInfo, g_Allocs, &allocation.m_Image); | |
TEST(res == VK_SUCCESS); | |
// Just to silence validation layer warnings. | |
VkMemoryRequirements vkMemReq; | |
vkGetImageMemoryRequirements(g_hDevice, allocation.m_Image, &vkMemReq); | |
res = vmaBindImageMemory(g_hAllocator, allocation.m_Allocation, allocation.m_Image); | |
TEST(res == VK_SUCCESS); | |
} | |
} | |
static void Defragment(AllocInfo* allocs, size_t allocCount, | |
const VmaDefragmentationInfo* defragmentationInfo = nullptr, | |
VmaDefragmentationStats* defragmentationStats = nullptr) | |
{ | |
std::vector<VmaAllocation> vmaAllocs(allocCount); | |
for(size_t i = 0; i < allocCount; ++i) | |
vmaAllocs[i] = allocs[i].m_Allocation; | |
std::vector<VkBool32> allocChanged(allocCount); | |
ERR_GUARD_VULKAN( vmaDefragment(g_hAllocator, vmaAllocs.data(), allocCount, allocChanged.data(), | |
defragmentationInfo, defragmentationStats) ); | |
for(size_t i = 0; i < allocCount; ++i) | |
{ | |
if(allocChanged[i]) | |
{ | |
RecreateAllocationResource(allocs[i]); | |
} | |
} | |
} | |
static void ValidateAllocationsData(const AllocInfo* allocs, size_t allocCount) | |
{ | |
std::for_each(allocs, allocs + allocCount, [](const AllocInfo& allocInfo) { | |
ValidateAllocationData(allocInfo); | |
}); | |
} | |
void TestDefragmentationSimple() | |
{ | |
wprintf(L"Test defragmentation simple\n"); | |
RandomNumberGenerator rand(667); | |
const VkDeviceSize BUF_SIZE = 0x10000; | |
const VkDeviceSize BLOCK_SIZE = BUF_SIZE * 8; | |
const VkDeviceSize MIN_BUF_SIZE = 32; | |
const VkDeviceSize MAX_BUF_SIZE = BUF_SIZE * 4; | |
auto RandomBufSize = [&]() -> VkDeviceSize { | |
return align_up<VkDeviceSize>(rand.Generate() % (MAX_BUF_SIZE - MIN_BUF_SIZE + 1) + MIN_BUF_SIZE, 32); | |
}; | |
VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; | |
bufCreateInfo.size = BUF_SIZE; | |
bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; | |
VmaAllocationCreateInfo exampleAllocCreateInfo = {}; | |
exampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; | |
uint32_t memTypeIndex = UINT32_MAX; | |
vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufCreateInfo, &exampleAllocCreateInfo, &memTypeIndex); | |
VmaPoolCreateInfo poolCreateInfo = {}; | |
poolCreateInfo.blockSize = BLOCK_SIZE; | |
poolCreateInfo.memoryTypeIndex = memTypeIndex; | |
VmaPool pool; | |
ERR_GUARD_VULKAN( vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool) ); | |
// Defragmentation of empty pool. | |
{ | |
VmaDefragmentationInfo2 defragInfo = {}; | |
defragInfo.maxCpuBytesToMove = VK_WHOLE_SIZE; | |
defragInfo.maxCpuAllocationsToMove = UINT32_MAX; | |
defragInfo.poolCount = 1; | |
defragInfo.pPools = &pool; | |
VmaDefragmentationStats defragStats = {}; | |
VmaDefragmentationContext defragCtx = nullptr; | |
VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &defragStats, &defragCtx); | |
TEST(res >= VK_SUCCESS); | |
vmaDefragmentationEnd(g_hAllocator, defragCtx); | |
TEST(defragStats.allocationsMoved == 0 && defragStats.bytesFreed == 0 && | |
defragStats.bytesMoved == 0 && defragStats.deviceMemoryBlocksFreed == 0); | |
} | |
std::vector<AllocInfo> allocations; | |
// persistentlyMappedOption = 0 - not persistently mapped. | |
// persistentlyMappedOption = 1 - persistently mapped. | |
for(uint32_t persistentlyMappedOption = 0; persistentlyMappedOption < 2; ++persistentlyMappedOption) | |
{ | |
wprintf(L" Persistently mapped option = %u\n", persistentlyMappedOption); | |
const bool persistentlyMapped = persistentlyMappedOption != 0; | |
// # Test 1 | |
// Buffers of fixed size. | |
// Fill 2 blocks. Remove odd buffers. Defragment everything. | |
// Expected result: at least 1 block freed. | |
{ | |
for(size_t i = 0; i < BLOCK_SIZE / BUF_SIZE * 2; ++i) | |
{ | |
AllocInfo allocInfo; | |
CreateBuffer(pool, bufCreateInfo, persistentlyMapped, allocInfo); | |
allocations.push_back(allocInfo); | |
} | |
for(size_t i = 1; i < allocations.size(); ++i) | |
{ | |
DestroyAllocation(allocations[i]); | |
allocations.erase(allocations.begin() + i); | |
} | |
VmaDefragmentationStats defragStats; | |
Defragment(allocations.data(), allocations.size(), nullptr, &defragStats); | |
TEST(defragStats.allocationsMoved > 0 && defragStats.bytesMoved > 0); | |
TEST(defragStats.deviceMemoryBlocksFreed >= 1); | |
ValidateAllocationsData(allocations.data(), allocations.size()); | |
DestroyAllAllocations(allocations); | |
} | |
// # Test 2 | |
// Buffers of fixed size. | |
// Fill 2 blocks. Remove odd buffers. Defragment one buffer at time. | |
// Expected result: Each of 4 interations makes some progress. | |
{ | |
for(size_t i = 0; i < BLOCK_SIZE / BUF_SIZE * 2; ++i) | |
{ | |
AllocInfo allocInfo; | |
CreateBuffer(pool, bufCreateInfo, persistentlyMapped, allocInfo); | |
allocations.push_back(allocInfo); | |
} | |
for(size_t i = 1; i < allocations.size(); ++i) | |
{ | |
DestroyAllocation(allocations[i]); | |
allocations.erase(allocations.begin() + i); | |
} | |
VmaDefragmentationInfo defragInfo = {}; | |
defragInfo.maxAllocationsToMove = 1; | |
defragInfo.maxBytesToMove = BUF_SIZE; | |
for(size_t i = 0; i < BLOCK_SIZE / BUF_SIZE / 2; ++i) | |
{ | |
VmaDefragmentationStats defragStats; | |
Defragment(allocations.data(), allocations.size(), &defragInfo, &defragStats); | |
TEST(defragStats.allocationsMoved > 0 && defragStats.bytesMoved > 0); | |
} | |
ValidateAllocationsData(allocations.data(), allocations.size()); | |
DestroyAllAllocations(allocations); | |
} | |
// # Test 3 | |
// Buffers of variable size. | |
// Create a number of buffers. Remove some percent of them. | |
// Defragment while having some percent of them unmovable. | |
// Expected result: Just simple validation. | |
{ | |
for(size_t i = 0; i < 100; ++i) | |
{ | |
VkBufferCreateInfo localBufCreateInfo = bufCreateInfo; | |
localBufCreateInfo.size = RandomBufSize(); | |
AllocInfo allocInfo; | |
CreateBuffer(pool, bufCreateInfo, persistentlyMapped, allocInfo); | |
allocations.push_back(allocInfo); | |
} | |
const uint32_t percentToDelete = 60; | |
const size_t numberToDelete = allocations.size() * percentToDelete / 100; | |
for(size_t i = 0; i < numberToDelete; ++i) | |
{ | |
size_t indexToDelete = rand.Generate() % (uint32_t)allocations.size(); | |
DestroyAllocation(allocations[indexToDelete]); | |
allocations.erase(allocations.begin() + indexToDelete); | |
} | |
// Non-movable allocations will be at the beginning of allocations array. | |
const uint32_t percentNonMovable = 20; | |
const size_t numberNonMovable = allocations.size() * percentNonMovable / 100; | |
for(size_t i = 0; i < numberNonMovable; ++i) | |
{ | |
size_t indexNonMovable = i + rand.Generate() % (uint32_t)(allocations.size() - i); | |
if(indexNonMovable != i) | |
std::swap(allocations[i], allocations[indexNonMovable]); | |
} | |
VmaDefragmentationStats defragStats; | |
Defragment( | |
allocations.data() + numberNonMovable, | |
allocations.size() - numberNonMovable, | |
nullptr, &defragStats); | |
ValidateAllocationsData(allocations.data(), allocations.size()); | |
DestroyAllAllocations(allocations); | |
} | |
} | |
/* | |
Allocation that must be move to an overlapping place using memmove(). | |
Create 2 buffers, second slightly bigger than the first. Delete first. Then defragment. | |
*/ | |
if(VMA_DEBUG_MARGIN == 0) // FAST algorithm works only when DEBUG_MARGIN disabled. | |
{ | |
AllocInfo allocInfo[2]; | |
bufCreateInfo.size = BUF_SIZE; | |
CreateBuffer(pool, bufCreateInfo, false, allocInfo[0]); | |
const VkDeviceSize biggerBufSize = BUF_SIZE + BUF_SIZE / 256; | |
bufCreateInfo.size = biggerBufSize; | |
CreateBuffer(pool, bufCreateInfo, false, allocInfo[1]); | |
DestroyAllocation(allocInfo[0]); | |
VmaDefragmentationStats defragStats; | |
Defragment(&allocInfo[1], 1, nullptr, &defragStats); | |
// If this fails, it means we couldn't do memmove with overlapping regions. | |
TEST(defragStats.allocationsMoved == 1 && defragStats.bytesMoved > 0); | |
ValidateAllocationsData(&allocInfo[1], 1); | |
DestroyAllocation(allocInfo[1]); | |
} | |
vmaDestroyPool(g_hAllocator, pool); | |
} | |
void TestDefragmentationWholePool() | |
{ | |
wprintf(L"Test defragmentation whole pool\n"); | |
RandomNumberGenerator rand(668); | |
const VkDeviceSize BUF_SIZE = 0x10000; | |
const VkDeviceSize BLOCK_SIZE = BUF_SIZE * 8; | |
VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; | |
bufCreateInfo.size = BUF_SIZE; | |
bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; | |
VmaAllocationCreateInfo exampleAllocCreateInfo = {}; | |
exampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; | |
uint32_t memTypeIndex = UINT32_MAX; | |
vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufCreateInfo, &exampleAllocCreateInfo, &memTypeIndex); | |
VmaPoolCreateInfo poolCreateInfo = {}; | |
poolCreateInfo.blockSize = BLOCK_SIZE; | |
poolCreateInfo.memoryTypeIndex = memTypeIndex; | |
VmaDefragmentationStats defragStats[2]; | |
for(size_t caseIndex = 0; caseIndex < 2; ++caseIndex) | |
{ | |
VmaPool pool; | |
ERR_GUARD_VULKAN( vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool) ); | |
std::vector<AllocInfo> allocations; | |
// Buffers of fixed size. | |
// Fill 2 blocks. Remove odd buffers. Defragment all of them. | |
for(size_t i = 0; i < BLOCK_SIZE / BUF_SIZE * 2; ++i) | |
{ | |
AllocInfo allocInfo; | |
CreateBuffer(pool, bufCreateInfo, false, allocInfo); | |
allocations.push_back(allocInfo); | |
} | |
for(size_t i = 1; i < allocations.size(); ++i) | |
{ | |
DestroyAllocation(allocations[i]); | |
allocations.erase(allocations.begin() + i); | |
} | |
VmaDefragmentationInfo2 defragInfo = {}; | |
defragInfo.maxCpuAllocationsToMove = UINT32_MAX; | |
defragInfo.maxCpuBytesToMove = VK_WHOLE_SIZE; | |
std::vector<VmaAllocation> allocationsToDefrag; | |
if(caseIndex == 0) | |
{ | |
defragInfo.poolCount = 1; | |
defragInfo.pPools = &pool; | |
} | |
else | |
{ | |
const size_t allocCount = allocations.size(); | |
allocationsToDefrag.resize(allocCount); | |
std::transform( | |
allocations.begin(), allocations.end(), | |
allocationsToDefrag.begin(), | |
[](const AllocInfo& allocInfo) { return allocInfo.m_Allocation; }); | |
defragInfo.allocationCount = (uint32_t)allocCount; | |
defragInfo.pAllocations = allocationsToDefrag.data(); | |
} | |
VmaDefragmentationContext defragCtx = VK_NULL_HANDLE; | |
VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &defragStats[caseIndex], &defragCtx); | |
TEST(res >= VK_SUCCESS); | |
vmaDefragmentationEnd(g_hAllocator, defragCtx); | |
TEST(defragStats[caseIndex].allocationsMoved > 0 && defragStats[caseIndex].bytesMoved > 0); | |
ValidateAllocationsData(allocations.data(), allocations.size()); | |
DestroyAllAllocations(allocations); | |
vmaDestroyPool(g_hAllocator, pool); | |
} | |
TEST(defragStats[0].bytesMoved == defragStats[1].bytesMoved); | |
TEST(defragStats[0].allocationsMoved == defragStats[1].allocationsMoved); | |
TEST(defragStats[0].bytesFreed == defragStats[1].bytesFreed); | |
TEST(defragStats[0].deviceMemoryBlocksFreed == defragStats[1].deviceMemoryBlocksFreed); | |
} | |
void TestDefragmentationFull() | |
{ | |
std::vector<AllocInfo> allocations; | |
// Create initial allocations. | |
for(size_t i = 0; i < 400; ++i) | |
{ | |
AllocInfo allocation; | |
CreateAllocation(allocation); | |
allocations.push_back(allocation); | |
} | |
// Delete random allocations | |
const size_t allocationsToDeletePercent = 80; | |
size_t allocationsToDelete = allocations.size() * allocationsToDeletePercent / 100; | |
for(size_t i = 0; i < allocationsToDelete; ++i) | |
{ | |
size_t index = (size_t)rand() % allocations.size(); | |
DestroyAllocation(allocations[index]); | |
allocations.erase(allocations.begin() + index); | |
} | |
for(size_t i = 0; i < allocations.size(); ++i) | |
ValidateAllocationData(allocations[i]); | |
//SaveAllocatorStatsToFile(L"Before.csv"); | |
{ | |
std::vector<VmaAllocation> vmaAllocations(allocations.size()); | |
for(size_t i = 0; i < allocations.size(); ++i) | |
vmaAllocations[i] = allocations[i].m_Allocation; | |
const size_t nonMovablePercent = 0; | |
size_t nonMovableCount = vmaAllocations.size() * nonMovablePercent / 100; | |
for(size_t i = 0; i < nonMovableCount; ++i) | |
{ | |
size_t index = (size_t)rand() % vmaAllocations.size(); | |
vmaAllocations.erase(vmaAllocations.begin() + index); | |
} | |
const uint32_t defragCount = 1; | |
for(uint32_t defragIndex = 0; defragIndex < defragCount; ++defragIndex) | |
{ | |
std::vector<VkBool32> allocationsChanged(vmaAllocations.size()); | |
VmaDefragmentationInfo defragmentationInfo; | |
defragmentationInfo.maxAllocationsToMove = UINT_MAX; | |
defragmentationInfo.maxBytesToMove = SIZE_MAX; | |
wprintf(L"Defragmentation #%u\n", defragIndex); | |
time_point begTime = std::chrono::high_resolution_clock::now(); | |
VmaDefragmentationStats stats; | |
VkResult res = vmaDefragment(g_hAllocator, vmaAllocations.data(), vmaAllocations.size(), allocationsChanged.data(), &defragmentationInfo, &stats); | |
TEST(res >= 0); | |
float defragmentDuration = ToFloatSeconds(std::chrono::high_resolution_clock::now() - begTime); | |
wprintf(L"Moved allocations %u, bytes %llu\n", stats.allocationsMoved, stats.bytesMoved); | |
wprintf(L"Freed blocks %u, bytes %llu\n", stats.deviceMemoryBlocksFreed, stats.bytesFreed); | |
wprintf(L"Time: %.2f s\n", defragmentDuration); | |
for(size_t i = 0; i < vmaAllocations.size(); ++i) | |
{ | |
if(allocationsChanged[i]) | |
{ | |
RecreateAllocationResource(allocations[i]); | |
} | |
} | |
for(size_t i = 0; i < allocations.size(); ++i) | |
ValidateAllocationData(allocations[i]); | |
//wchar_t fileName[MAX_PATH]; | |
//swprintf(fileName, MAX_PATH, L"After_%02u.csv", defragIndex); | |
//SaveAllocatorStatsToFile(fileName); | |
} | |
} | |
// Destroy all remaining allocations. | |
DestroyAllAllocations(allocations); | |
} | |
static void TestDefragmentationGpu() | |
{ | |
wprintf(L"Test defragmentation GPU\n"); | |
g_MemoryAliasingWarningEnabled = false; | |
std::vector<AllocInfo> allocations; | |
// Create that many allocations to surely fill 3 new blocks of 256 MB. | |
const VkDeviceSize bufSizeMin = 5ull * 1024 * 1024; | |
const VkDeviceSize bufSizeMax = 10ull * 1024 * 1024; | |
const VkDeviceSize totalSize = 3ull * 256 * 1024 * 1024; | |
const size_t bufCount = (size_t)(totalSize / bufSizeMin); | |
const size_t percentToLeave = 30; | |
const size_t percentNonMovable = 3; | |
RandomNumberGenerator rand = { 234522 }; | |
VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; | |
VmaAllocationCreateInfo allocCreateInfo = {}; | |
allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; | |
allocCreateInfo.flags = 0; | |
// Create all intended buffers. | |
for(size_t i = 0; i < bufCount; ++i) | |
{ | |
bufCreateInfo.size = align_up(rand.Generate() % (bufSizeMax - bufSizeMin) + bufSizeMin, 32ull); | |
if(rand.Generate() % 100 < percentNonMovable) | |
{ | |
bufCreateInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | | |
VK_BUFFER_USAGE_TRANSFER_DST_BIT | | |
VK_BUFFER_USAGE_TRANSFER_SRC_BIT; | |
allocCreateInfo.pUserData = (void*)(uintptr_t)2; | |
} | |
else | |
{ | |
// Different usage just to see different color in output from VmaDumpVis. | |
bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | | |
VK_BUFFER_USAGE_TRANSFER_DST_BIT | | |
VK_BUFFER_USAGE_TRANSFER_SRC_BIT; | |
// And in JSON dump. | |
allocCreateInfo.pUserData = (void*)(uintptr_t)1; | |
} | |
AllocInfo alloc; | |
alloc.CreateBuffer(bufCreateInfo, allocCreateInfo); | |
alloc.m_StartValue = rand.Generate(); | |
allocations.push_back(alloc); | |
} | |
// Destroy some percentage of them. | |
{ | |
const size_t buffersToDestroy = round_div<size_t>(bufCount * (100 - percentToLeave), 100); | |
for(size_t i = 0; i < buffersToDestroy; ++i) | |
{ | |
const size_t index = rand.Generate() % allocations.size(); | |
allocations[index].Destroy(); | |
allocations.erase(allocations.begin() + index); | |
} | |
} | |
// Fill them with meaningful data. | |
UploadGpuData(allocations.data(), allocations.size()); | |
wchar_t fileName[MAX_PATH]; | |
swprintf_s(fileName, L"GPU_defragmentation_A_before.json"); | |
SaveAllocatorStatsToFile(fileName); | |
// Defragment using GPU only. | |
{ | |
const size_t allocCount = allocations.size(); | |
std::vector<VmaAllocation> allocationPtrs; | |
std::vector<VkBool32> allocationChanged; | |
std::vector<size_t> allocationOriginalIndex; | |
for(size_t i = 0; i < allocCount; ++i) | |
{ | |
VmaAllocationInfo allocInfo = {}; | |
vmaGetAllocationInfo(g_hAllocator, allocations[i].m_Allocation, &allocInfo); | |
if((uintptr_t)allocInfo.pUserData == 1) // Movable | |
{ | |
allocationPtrs.push_back(allocations[i].m_Allocation); | |
allocationChanged.push_back(VK_FALSE); | |
allocationOriginalIndex.push_back(i); | |
} | |
} | |
const size_t movableAllocCount = allocationPtrs.size(); | |
BeginSingleTimeCommands(); | |
VmaDefragmentationInfo2 defragInfo = {}; | |
defragInfo.flags = 0; | |
defragInfo.allocationCount = (uint32_t)movableAllocCount; | |
defragInfo.pAllocations = allocationPtrs.data(); | |
defragInfo.pAllocationsChanged = allocationChanged.data(); | |
defragInfo.maxGpuBytesToMove = VK_WHOLE_SIZE; | |
defragInfo.maxGpuAllocationsToMove = UINT32_MAX; | |
defragInfo.commandBuffer = g_hTemporaryCommandBuffer; | |
VmaDefragmentationStats stats = {}; | |
VmaDefragmentationContext ctx = VK_NULL_HANDLE; | |
VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &stats, &ctx); | |
TEST(res >= VK_SUCCESS); | |
EndSingleTimeCommands(); | |
vmaDefragmentationEnd(g_hAllocator, ctx); | |
for(size_t i = 0; i < movableAllocCount; ++i) | |
{ | |
if(allocationChanged[i]) | |
{ | |
const size_t origAllocIndex = allocationOriginalIndex[i]; | |
RecreateAllocationResource(allocations[origAllocIndex]); | |
} | |
} | |
// If corruption detection is enabled, GPU defragmentation may not work on | |
// memory types that have this detection active, e.g. on Intel. | |
#if !defined(VMA_DEBUG_DETECT_CORRUPTION) || VMA_DEBUG_DETECT_CORRUPTION == 0 | |
TEST(stats.allocationsMoved > 0 && stats.bytesMoved > 0); | |
TEST(stats.deviceMemoryBlocksFreed > 0 && stats.bytesFreed > 0); | |
#endif | |
} | |
ValidateGpuData(allocations.data(), allocations.size()); | |
swprintf_s(fileName, L"GPU_defragmentation_B_after.json"); | |
SaveAllocatorStatsToFile(fileName); | |
// Destroy all remaining buffers. | |
for(size_t i = allocations.size(); i--; ) | |
{ | |
allocations[i].Destroy(); | |
} | |
g_MemoryAliasingWarningEnabled = true; | |
} | |
static void ProcessDefragmentationStepInfo(VmaDefragmentationPassInfo &stepInfo) | |
{ | |
std::vector<VkImageMemoryBarrier> beginImageBarriers; | |
std::vector<VkImageMemoryBarrier> finalizeImageBarriers; | |
VkPipelineStageFlags beginSrcStageMask = 0; | |
VkPipelineStageFlags beginDstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; | |
VkPipelineStageFlags finalizeSrcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; | |
VkPipelineStageFlags finalizeDstStageMask = 0; | |
bool wantsMemoryBarrier = false; | |
VkMemoryBarrier beginMemoryBarrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER }; | |
VkMemoryBarrier finalizeMemoryBarrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER }; | |
for(uint32_t i = 0; i < stepInfo.moveCount; ++i) | |
{ | |
VmaAllocationInfo info; | |
vmaGetAllocationInfo(g_hAllocator, stepInfo.pMoves[i].allocation, &info); | |
AllocInfo *allocInfo = (AllocInfo *)info.pUserData; | |
if(allocInfo->m_Image) | |
{ | |
VkImage newImage; | |
const VkResult result = vkCreateImage(g_hDevice, &allocInfo->m_ImageInfo, g_Allocs, &newImage); | |
TEST(result >= VK_SUCCESS); | |
vkBindImageMemory(g_hDevice, newImage, stepInfo.pMoves[i].memory, stepInfo.pMoves[i].offset); | |
allocInfo->m_NewImage = newImage; | |
// Keep track of our pipeline stages that we need to wait/signal on | |
beginSrcStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; | |
finalizeDstStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; | |
// We need one pipeline barrier and two image layout transitions here | |
// First we'll have to turn our newly created image into VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL | |
// And the second one is turning the old image into VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL | |
VkImageSubresourceRange subresourceRange = { | |
VK_IMAGE_ASPECT_COLOR_BIT, | |
0, VK_REMAINING_MIP_LEVELS, | |
0, VK_REMAINING_ARRAY_LAYERS | |
}; | |
VkImageMemoryBarrier barrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER }; | |
barrier.srcAccessMask = 0; | |
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; | |
barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; | |
barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; | |
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | |
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | |
barrier.image = newImage; | |
barrier.subresourceRange = subresourceRange; | |
beginImageBarriers.push_back(barrier); | |
// Second barrier to convert the existing image. This one actually needs a real barrier | |
barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; | |
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; | |
barrier.oldLayout = allocInfo->m_ImageLayout; | |
barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; | |
barrier.image = allocInfo->m_Image; | |
beginImageBarriers.push_back(barrier); | |
// And lastly we need a barrier that turns our new image into the layout of the old one | |
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; | |
barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; | |
barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; | |
barrier.newLayout = allocInfo->m_ImageLayout; | |
barrier.image = newImage; | |
finalizeImageBarriers.push_back(barrier); | |
} | |
else if(allocInfo->m_Buffer) | |
{ | |
VkBuffer newBuffer; | |
const VkResult result = vkCreateBuffer(g_hDevice, &allocInfo->m_BufferInfo, g_Allocs, &newBuffer); | |
TEST(result >= VK_SUCCESS); | |
vkBindBufferMemory(g_hDevice, newBuffer, stepInfo.pMoves[i].memory, stepInfo.pMoves[i].offset); | |
allocInfo->m_NewBuffer = newBuffer; | |
// Keep track of our pipeline stages that we need to wait/signal on | |
beginSrcStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; | |
finalizeDstStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; | |
beginMemoryBarrier.srcAccessMask |= VK_ACCESS_MEMORY_WRITE_BIT; | |
beginMemoryBarrier.dstAccessMask |= VK_ACCESS_TRANSFER_READ_BIT; | |
finalizeMemoryBarrier.srcAccessMask |= VK_ACCESS_TRANSFER_WRITE_BIT; | |
finalizeMemoryBarrier.dstAccessMask |= VK_ACCESS_MEMORY_READ_BIT; | |
wantsMemoryBarrier = true; | |
} | |
} | |
if(!beginImageBarriers.empty() || wantsMemoryBarrier) | |
{ | |
const uint32_t memoryBarrierCount = wantsMemoryBarrier ? 1 : 0; | |
vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, beginSrcStageMask, beginDstStageMask, 0, | |
memoryBarrierCount, &beginMemoryBarrier, | |
0, nullptr, | |
(uint32_t)beginImageBarriers.size(), beginImageBarriers.data()); | |
} | |
for(uint32_t i = 0; i < stepInfo.moveCount; ++ i) | |
{ | |
VmaAllocationInfo info; | |
vmaGetAllocationInfo(g_hAllocator, stepInfo.pMoves[i].allocation, &info); | |
AllocInfo *allocInfo = (AllocInfo *)info.pUserData; | |
if(allocInfo->m_Image) | |
{ | |
std::vector<VkImageCopy> imageCopies; | |
// Copy all mips of the source image into the target image | |
VkOffset3D offset = { 0, 0, 0 }; | |
VkExtent3D extent = allocInfo->m_ImageInfo.extent; | |
VkImageSubresourceLayers subresourceLayers = { | |
VK_IMAGE_ASPECT_COLOR_BIT, | |
0, | |
0, 1 | |
}; | |
for(uint32_t mip = 0; mip < allocInfo->m_ImageInfo.mipLevels; ++ mip) | |
{ | |
subresourceLayers.mipLevel = mip; | |
VkImageCopy imageCopy{ | |
subresourceLayers, | |
offset, | |
subresourceLayers, | |
offset, | |
extent | |
}; | |
imageCopies.push_back(imageCopy); | |
extent.width = std::max(uint32_t(1), extent.width >> 1); | |
extent.height = std::max(uint32_t(1), extent.height >> 1); | |
extent.depth = std::max(uint32_t(1), extent.depth >> 1); | |
} | |
vkCmdCopyImage( | |
g_hTemporaryCommandBuffer, | |
allocInfo->m_Image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, | |
allocInfo->m_NewImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | |
(uint32_t)imageCopies.size(), imageCopies.data()); | |
} | |
else if(allocInfo->m_Buffer) | |
{ | |
VkBufferCopy region = { | |
0, | |
0, | |
allocInfo->m_BufferInfo.size }; | |
vkCmdCopyBuffer(g_hTemporaryCommandBuffer, | |
allocInfo->m_Buffer, allocInfo->m_NewBuffer, | |
1, ®ion); | |
} | |
} | |
if(!finalizeImageBarriers.empty() || wantsMemoryBarrier) | |
{ | |
const uint32_t memoryBarrierCount = wantsMemoryBarrier ? 1 : 0; | |
vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, finalizeSrcStageMask, finalizeDstStageMask, 0, | |
memoryBarrierCount, &finalizeMemoryBarrier, | |
0, nullptr, | |
(uint32_t)finalizeImageBarriers.size(), finalizeImageBarriers.data()); | |
} | |
} | |
static void TestDefragmentationIncrementalBasic() | |
{ | |
wprintf(L"Test defragmentation incremental basic\n"); | |
g_MemoryAliasingWarningEnabled = false; | |
std::vector<AllocInfo> allocations; | |
// Create that many allocations to surely fill 3 new blocks of 256 MB. | |
const std::array<uint32_t, 3> imageSizes = { 256, 512, 1024 }; | |
const VkDeviceSize bufSizeMin = 5ull * 1024 * 1024; | |
const VkDeviceSize bufSizeMax = 10ull * 1024 * 1024; | |
const VkDeviceSize totalSize = 3ull * 256 * 1024 * 1024; | |
const size_t imageCount = totalSize / ((size_t)imageSizes[0] * imageSizes[0] * 4) / 2; | |
const size_t bufCount = (size_t)(totalSize / bufSizeMin) / 2; | |
const size_t percentToLeave = 30; | |
RandomNumberGenerator rand = { 234522 }; | |
VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; | |
imageInfo.imageType = VK_IMAGE_TYPE_2D; | |
imageInfo.extent.depth = 1; | |
imageInfo.mipLevels = 1; | |
imageInfo.arrayLayers = 1; | |
imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM; | |
imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL; | |
imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED; | |
imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; | |
imageInfo.samples = VK_SAMPLE_COUNT_1_BIT; | |
VmaAllocationCreateInfo allocCreateInfo = {}; | |
allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; | |
allocCreateInfo.flags = 0; | |
// Create all intended images. | |
for(size_t i = 0; i < imageCount; ++i) | |
{ | |
const uint32_t size = imageSizes[rand.Generate() % 3]; | |
imageInfo.extent.width = size; | |
imageInfo.extent.height = size; | |
AllocInfo alloc; | |
alloc.CreateImage(imageInfo, allocCreateInfo, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); | |
alloc.m_StartValue = 0; | |
allocations.push_back(alloc); | |
} | |
// And all buffers | |
VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; | |
for(size_t i = 0; i < bufCount; ++i) | |
{ | |
bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16); | |
bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT; | |
AllocInfo alloc; | |
alloc.CreateBuffer(bufCreateInfo, allocCreateInfo); | |
alloc.m_StartValue = 0; | |
allocations.push_back(alloc); | |
} | |
// Destroy some percentage of them. | |
{ | |
const size_t allocationsToDestroy = round_div<size_t>((imageCount + bufCount) * (100 - percentToLeave), 100); | |
for(size_t i = 0; i < allocationsToDestroy; ++i) | |
{ | |
const size_t index = rand.Generate() % allocations.size(); | |
allocations[index].Destroy(); | |
allocations.erase(allocations.begin() + index); | |
} | |
} | |
{ | |
// Set our user data pointers. A real application should probably be more clever here | |
const size_t allocationCount = allocations.size(); | |
for(size_t i = 0; i < allocationCount; ++i) | |
{ | |
AllocInfo &alloc = allocations[i]; | |
vmaSetAllocationUserData(g_hAllocator, alloc.m_Allocation, &alloc); | |
} | |
} | |
// Fill them with meaningful data. | |
UploadGpuData(allocations.data(), allocations.size()); | |
wchar_t fileName[MAX_PATH]; | |
swprintf_s(fileName, L"GPU_defragmentation_incremental_basic_A_before.json"); | |
SaveAllocatorStatsToFile(fileName); | |
// Defragment using GPU only. | |
{ | |
const size_t allocCount = allocations.size(); | |
std::vector<VmaAllocation> allocationPtrs; | |
for(size_t i = 0; i < allocCount; ++i) | |
{ | |
allocationPtrs.push_back(allocations[i].m_Allocation); | |
} | |
const size_t movableAllocCount = allocationPtrs.size(); | |
VmaDefragmentationInfo2 defragInfo = {}; | |
defragInfo.flags = VMA_DEFRAGMENTATION_FLAG_INCREMENTAL; | |
defragInfo.allocationCount = (uint32_t)movableAllocCount; | |
defragInfo.pAllocations = allocationPtrs.data(); | |
defragInfo.maxGpuBytesToMove = VK_WHOLE_SIZE; | |
defragInfo.maxGpuAllocationsToMove = UINT32_MAX; | |
VmaDefragmentationStats stats = {}; | |
VmaDefragmentationContext ctx = VK_NULL_HANDLE; | |
VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &stats, &ctx); | |
TEST(res >= VK_SUCCESS); | |
res = VK_NOT_READY; | |
std::vector<VmaDefragmentationPassMoveInfo> moveInfo; | |
moveInfo.resize(movableAllocCount); | |
while(res == VK_NOT_READY) | |
{ | |
VmaDefragmentationPassInfo stepInfo = {}; | |
stepInfo.pMoves = moveInfo.data(); | |
stepInfo.moveCount = (uint32_t)moveInfo.size(); | |
res = vmaBeginDefragmentationPass(g_hAllocator, ctx, &stepInfo); | |
TEST(res >= VK_SUCCESS); | |
BeginSingleTimeCommands(); | |
std::vector<void*> newHandles; | |
ProcessDefragmentationStepInfo(stepInfo); | |
EndSingleTimeCommands(); | |
res = vmaEndDefragmentationPass(g_hAllocator, ctx); | |
// Destroy old buffers/images and replace them with new handles. | |
for(size_t i = 0; i < stepInfo.moveCount; ++i) | |
{ | |
VmaAllocation const alloc = stepInfo.pMoves[i].allocation; | |
VmaAllocationInfo vmaAllocInfo; | |
vmaGetAllocationInfo(g_hAllocator, alloc, &vmaAllocInfo); | |
AllocInfo* allocInfo = (AllocInfo*)vmaAllocInfo.pUserData; | |
if(allocInfo->m_Buffer) | |
{ | |
assert(allocInfo->m_NewBuffer && !allocInfo->m_Image && !allocInfo->m_NewImage); | |
vkDestroyBuffer(g_hDevice, allocInfo->m_Buffer, g_Allocs); | |
allocInfo->m_Buffer = allocInfo->m_NewBuffer; | |
allocInfo->m_NewBuffer = VK_NULL_HANDLE; | |
} | |
else if(allocInfo->m_Image) | |
{ | |
assert(allocInfo->m_NewImage && !allocInfo->m_Buffer && !allocInfo->m_NewBuffer); | |
vkDestroyImage(g_hDevice, allocInfo->m_Image, g_Allocs); | |
allocInfo->m_Image = allocInfo->m_NewImage; | |
allocInfo->m_NewImage = VK_NULL_HANDLE; | |
} | |
else | |
assert(0); | |
} | |
} | |
TEST(res >= VK_SUCCESS); | |
vmaDefragmentationEnd(g_hAllocator, ctx); | |
// If corruption detection is enabled, GPU defragmentation may not work on | |
// memory types that have this detection active, e.g. on Intel. | |
#if !defined(VMA_DEBUG_DETECT_CORRUPTION) || VMA_DEBUG_DETECT_CORRUPTION == 0 | |
TEST(stats.allocationsMoved > 0 && stats.bytesMoved > 0); | |
TEST(stats.deviceMemoryBlocksFreed > 0 && stats.bytesFreed > 0); | |
#endif | |
} | |
//ValidateGpuData(allocations.data(), allocations.size()); | |
swprintf_s(fileName, L"GPU_defragmentation_incremental_basic_B_after.json"); | |
SaveAllocatorStatsToFile(fileName); | |
// Destroy all remaining buffers and images. | |
for(size_t i = allocations.size(); i--; ) | |
{ | |
allocations[i].Destroy(); | |
} | |
g_MemoryAliasingWarningEnabled = true; | |
} | |
void TestDefragmentationIncrementalComplex() | |
{ | |
wprintf(L"Test defragmentation incremental complex\n"); | |
g_MemoryAliasingWarningEnabled = false; | |
std::vector<AllocInfo> allocations; | |
// Create that many allocations to surely fill 3 new blocks of 256 MB. | |
const std::array<uint32_t, 3> imageSizes = { 256, 512, 1024 }; | |
const VkDeviceSize bufSizeMin = 5ull * 1024 * 1024; | |
const VkDeviceSize bufSizeMax = 10ull * 1024 * 1024; | |
const VkDeviceSize totalSize = 3ull * 256 * 1024 * 1024; | |
const size_t imageCount = (size_t)(totalSize / (imageSizes[0] * imageSizes[0] * 4)) / 2; | |
const size_t bufCount = (size_t)(totalSize / bufSizeMin) / 2; | |
const size_t percentToLeave = 30; | |
RandomNumberGenerator rand = { 234522 }; | |
VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; | |
imageInfo.imageType = VK_IMAGE_TYPE_2D; | |
imageInfo.extent.depth = 1; | |
imageInfo.mipLevels = 1; | |
imageInfo.arrayLayers = 1; | |
imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM; | |
imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL; | |
imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED; | |
imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; | |
imageInfo.samples = VK_SAMPLE_COUNT_1_BIT; | |
VmaAllocationCreateInfo allocCreateInfo = {}; | |
allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; | |
allocCreateInfo.flags = 0; | |
// Create all intended images. | |
for(size_t i = 0; i < imageCount; ++i) | |
{ | |
const uint32_t size = imageSizes[rand.Generate() % 3]; | |
imageInfo.extent.width = size; | |
imageInfo.extent.height = size; | |
AllocInfo alloc; | |
alloc.CreateImage(imageInfo, allocCreateInfo, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); | |
alloc.m_StartValue = 0; | |
allocations.push_back(alloc); | |
} | |
// And all buffers | |
VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; | |
for(size_t i = 0; i < bufCount; ++i) | |
{ | |
bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16); | |
bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT; | |
AllocInfo alloc; | |
alloc.CreateBuffer(bufCreateInfo, allocCreateInfo); | |
alloc.m_StartValue = 0; | |
allocations.push_back(alloc); | |
} | |
// Destroy some percentage of them. | |
{ | |
const size_t allocationsToDestroy = round_div<size_t>((imageCount + bufCount) * (100 - percentToLeave), 100); | |
for(size_t i = 0; i < allocationsToDestroy; ++i) | |
{ | |
const size_t index = rand.Generate() % allocations.size(); | |
allocations[index].Destroy(); | |
allocations.erase(allocations.begin() + index); | |
} | |
} | |
{ | |
// Set our user data pointers. A real application should probably be more clever here | |
const size_t allocationCount = allocations.size(); | |
for(size_t i = 0; i < allocationCount; ++i) | |
{ | |
AllocInfo &alloc = allocations[i]; | |
vmaSetAllocationUserData(g_hAllocator, alloc.m_Allocation, &alloc); | |
} | |
} | |
// Fill them with meaningful data. | |
UploadGpuData(allocations.data(), allocations.size()); | |
wchar_t fileName[MAX_PATH]; | |
swprintf_s(fileName, L"GPU_defragmentation_incremental_complex_A_before.json"); | |
SaveAllocatorStatsToFile(fileName); | |
std::vector<AllocInfo> additionalAllocations; | |
#define MakeAdditionalAllocation() \ | |
do { \ | |
{ \ | |
bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16); \ | |
bufCreateInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT; \ | |
\ | |
AllocInfo alloc; \ | |
alloc.CreateBuffer(bufCreateInfo, allocCreateInfo); \ | |
\ | |
additionalAllocations.push_back(alloc); \ | |
} \ | |
} while(0) | |
// Defragment using GPU only. | |
{ | |
const size_t allocCount = allocations.size(); | |
std::vector<VmaAllocation> allocationPtrs; | |
for(size_t i = 0; i < allocCount; ++i) | |
{ | |
VmaAllocationInfo allocInfo = {}; | |
vmaGetAllocationInfo(g_hAllocator, allocations[i].m_Allocation, &allocInfo); | |
allocationPtrs.push_back(allocations[i].m_Allocation); | |
} | |
const size_t movableAllocCount = allocationPtrs.size(); | |
VmaDefragmentationInfo2 defragInfo = {}; | |
defragInfo.flags = VMA_DEFRAGMENTATION_FLAG_INCREMENTAL; | |
defragInfo.allocationCount = (uint32_t)movableAllocCount; | |
defragInfo.pAllocations = allocationPtrs.data(); | |
defragInfo.maxGpuBytesToMove = VK_WHOLE_SIZE; | |
defragInfo.maxGpuAllocationsToMove = UINT32_MAX; | |
VmaDefragmentationStats stats = {}; | |
VmaDefragmentationContext ctx = VK_NULL_HANDLE; | |
VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &stats, &ctx); | |
TEST(res >= VK_SUCCESS); | |
res = VK_NOT_READY; | |
std::vector<VmaDefragmentationPassMoveInfo> moveInfo; | |
moveInfo.resize(movableAllocCount); | |
MakeAdditionalAllocation(); | |
while(res == VK_NOT_READY) | |
{ | |
VmaDefragmentationPassInfo stepInfo = {}; | |
stepInfo.pMoves = moveInfo.data(); | |
stepInfo.moveCount = (uint32_t)moveInfo.size(); | |
res = vmaBeginDefragmentationPass(g_hAllocator, ctx, &stepInfo); | |
TEST(res >= VK_SUCCESS); | |
MakeAdditionalAllocation(); | |
BeginSingleTimeCommands(); | |
ProcessDefragmentationStepInfo(stepInfo); | |
EndSingleTimeCommands(); | |
res = vmaEndDefragmentationPass(g_hAllocator, ctx); | |
// Destroy old buffers/images and replace them with new handles. | |
for(size_t i = 0; i < stepInfo.moveCount; ++i) | |
{ | |
VmaAllocation const alloc = stepInfo.pMoves[i].allocation; | |
VmaAllocationInfo vmaAllocInfo; | |
vmaGetAllocationInfo(g_hAllocator, alloc, &vmaAllocInfo); | |
AllocInfo* allocInfo = (AllocInfo*)vmaAllocInfo.pUserData; | |
if(allocInfo->m_Buffer) | |
{ | |
assert(allocInfo->m_NewBuffer && !allocInfo->m_Image && !allocInfo->m_NewImage); | |
vkDestroyBuffer(g_hDevice, allocInfo->m_Buffer, g_Allocs); | |
allocInfo->m_Buffer = allocInfo->m_NewBuffer; | |
allocInfo->m_NewBuffer = VK_NULL_HANDLE; | |
} | |
else if(allocInfo->m_Image) | |
{ | |
assert(allocInfo->m_NewImage && !allocInfo->m_Buffer && !allocInfo->m_NewBuffer); | |
vkDestroyImage(g_hDevice, allocInfo->m_Image, g_Allocs); | |
allocInfo->m_Image = allocInfo->m_NewImage; | |
allocInfo->m_NewImage = VK_NULL_HANDLE; | |
} | |
else | |
assert(0); | |
} | |
MakeAdditionalAllocation(); | |
} | |
TEST(res >= VK_SUCCESS); | |
vmaDefragmentationEnd(g_hAllocator, ctx); | |
// If corruption detection is enabled, GPU defragmentation may not work on | |
// memory types that have this detection active, e.g. on Intel. | |
#if !defined(VMA_DEBUG_DETECT_CORRUPTION) || VMA_DEBUG_DETECT_CORRUPTION == 0 | |
TEST(stats.allocationsMoved > 0 && stats.bytesMoved > 0); | |
TEST(stats.deviceMemoryBlocksFreed > 0 && stats.bytesFreed > 0); | |
#endif | |
} | |
//ValidateGpuData(allocations.data(), allocations.size()); | |
swprintf_s(fileName, L"GPU_defragmentation_incremental_complex_B_after.j |