Prototype of a defragmentation interface that supports tiling optimal images
Merged #90 thanks @JustSid !
diff --git a/src/Tests.cpp b/src/Tests.cpp
index 5a6057f..e5b373d 100644
--- a/src/Tests.cpp
+++ b/src/Tests.cpp
@@ -688,6 +688,7 @@
VmaAllocation m_Allocation = VK_NULL_HANDLE;
VkBuffer m_Buffer = VK_NULL_HANDLE;
VkImage m_Image = VK_NULL_HANDLE;
+ VkImageLayout m_ImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
uint32_t m_StartValue = 0;
union
{
@@ -698,6 +699,10 @@
void CreateBuffer(
const VkBufferCreateInfo& bufCreateInfo,
const VmaAllocationCreateInfo& allocCreateInfo);
+ void CreateImage(
+ const VkImageCreateInfo& imageCreateInfo,
+ const VmaAllocationCreateInfo& allocCreateInfo,
+ VkImageLayout layout);
void Destroy();
};
@@ -709,6 +714,16 @@
VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &m_Buffer, &m_Allocation, nullptr);
TEST(res == VK_SUCCESS);
}
+void AllocInfo::CreateImage(
+ const VkImageCreateInfo& imageCreateInfo,
+ const VmaAllocationCreateInfo& allocCreateInfo,
+ VkImageLayout layout)
+{
+ m_ImageInfo = imageCreateInfo;
+ m_ImageLayout = layout;
+ VkResult res = vmaCreateImage(g_hAllocator, &imageCreateInfo, &allocCreateInfo, &m_Image, &m_Allocation, nullptr);
+ TEST(res == VK_SUCCESS);
+}
void AllocInfo::Destroy()
{
@@ -904,7 +919,88 @@
}
else
{
- TEST(0 && "Images not currently supported.");
+ TEST(currAllocInfo.m_ImageInfo.format == VK_FORMAT_R8G8B8A8_UNORM && "Only RGBA8 images are currently supported.");
+ TEST(currAllocInfo.m_ImageInfo.mipLevels == 1 && "Only single mip images are currently supported.");
+
+ const VkDeviceSize size = currAllocInfo.m_ImageInfo.extent.width * currAllocInfo.m_ImageInfo.extent.height * sizeof(uint32_t);
+
+ VkBuffer stagingBuf = VK_NULL_HANDLE;
+ void* stagingBufMappedPtr = nullptr;
+ if(!stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr))
+ {
+ TEST(cmdBufferStarted);
+ EndSingleTimeCommands();
+ stagingBufs.ReleaseAllBuffers();
+ cmdBufferStarted = false;
+
+ bool ok = stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr);
+ TEST(ok);
+ }
+
+ // Fill staging buffer.
+ {
+ assert(size % sizeof(uint32_t) == 0);
+ uint32_t *stagingValPtr = (uint32_t *)stagingBufMappedPtr;
+ uint32_t val = currAllocInfo.m_StartValue;
+ for(size_t i = 0; i < size / sizeof(uint32_t); ++i)
+ {
+ *stagingValPtr = val;
+ ++stagingValPtr;
+ ++val;
+ }
+ }
+
+ // Issue copy command from staging buffer to destination buffer.
+ if(!cmdBufferStarted)
+ {
+ cmdBufferStarted = true;
+ BeginSingleTimeCommands();
+ }
+
+
+ // Transfer to transfer dst layout
+ VkImageSubresourceRange subresourceRange = {
+ VK_IMAGE_ASPECT_COLOR_BIT,
+ 0, VK_REMAINING_MIP_LEVELS,
+ 0, VK_REMAINING_ARRAY_LAYERS
+ };
+
+ VkImageMemoryBarrier barrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER };
+ barrier.srcAccessMask = 0;
+ barrier.dstAccessMask = 0;
+ barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+ barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+ barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+ barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+ barrier.image = currAllocInfo.m_Image;
+ barrier.subresourceRange = subresourceRange;
+
+ vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0,
+ 0, nullptr,
+ 0, nullptr,
+ 1, &barrier);
+
+ // Copy image date
+ VkBufferImageCopy copy = {};
+ copy.bufferOffset = 0;
+ copy.bufferRowLength = 0;
+ copy.bufferImageHeight = 0;
+ copy.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+ copy.imageSubresource.layerCount = 1;
+ copy.imageExtent = currAllocInfo.m_ImageInfo.extent;
+
+ vkCmdCopyBufferToImage(g_hTemporaryCommandBuffer, stagingBuf, currAllocInfo.m_Image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©);
+
+ // Transfer to desired layout
+ barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+ barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
+ barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+ barrier.newLayout = currAllocInfo.m_ImageLayout;
+
+ vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0,
+ 0, nullptr,
+ 0, nullptr,
+ 1, &barrier);
}
}
@@ -1754,6 +1850,555 @@
g_MemoryAliasingWarningEnabled = true;
}
+static void ProcessDefragmentationStepInfo(VmaDefragmentationStepInfo &stepInfo)
+{
+ std::vector<VkImageMemoryBarrier> beginImageBarriers;
+ std::vector<VkImageMemoryBarrier> finalizeImageBarriers;
+
+ VkPipelineStageFlags beginSrcStageMask = 0;
+ VkPipelineStageFlags beginDstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
+
+ VkPipelineStageFlags finalizeSrcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
+ VkPipelineStageFlags finalizeDstStageMask = 0;
+
+ bool wantsMemoryBarrier = false;
+
+ VkMemoryBarrier beginMemoryBarrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER };
+ VkMemoryBarrier finalizeMemoryBarrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER };
+
+ std::vector<void *> newHandles;
+
+ for(uint32_t i = 0; i < stepInfo.moveCount; ++ i)
+ {
+ VmaAllocationInfo info;
+ vmaGetAllocationInfo(g_hAllocator, stepInfo.pMoves[i].allocation, &info);
+
+ AllocInfo *allocInfo = (AllocInfo *)info.pUserData;
+
+ if(allocInfo->m_Image)
+ {
+ VkImage newImage;
+
+ const VkResult result = vkCreateImage(g_hDevice, &allocInfo->m_ImageInfo, g_Allocs, &newImage);
+ TEST(result >= VK_SUCCESS);
+
+ vkBindImageMemory(g_hDevice, newImage, stepInfo.pMoves[i].memory, stepInfo.pMoves[i].offset);
+ newHandles.push_back(newImage);
+
+ // Keep track of our pipeline stages that we need to wait/signal on
+ beginSrcStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+ finalizeDstStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+
+ // We need one pipeline barrier and two image layout transitions here
+ // First we'll have to turn our newly created image into VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
+ // And the second one is turning the old image into VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
+
+ VkImageSubresourceRange subresourceRange = {
+ VK_IMAGE_ASPECT_COLOR_BIT,
+ 0, VK_REMAINING_MIP_LEVELS,
+ 0, VK_REMAINING_ARRAY_LAYERS
+ };
+
+ VkImageMemoryBarrier barrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER };
+ barrier.srcAccessMask = 0;
+ barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+ barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+ barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+ barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+ barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+ barrier.image = newImage;
+ barrier.subresourceRange = subresourceRange;
+
+ beginImageBarriers.push_back(barrier);
+
+ // Second barrier to convert the existing image. This one actually needs a real barrier
+ barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
+ barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+ barrier.oldLayout = allocInfo->m_ImageLayout;
+ barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
+ barrier.image = allocInfo->m_Image;
+
+ beginImageBarriers.push_back(barrier);
+
+ // And lastly we need a barrier that turns our new image into the layout of the old one
+ barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+ barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
+ barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+ barrier.newLayout = allocInfo->m_ImageLayout;
+ barrier.image = newImage;
+
+ finalizeImageBarriers.push_back(barrier);
+ }
+ else if(allocInfo->m_Buffer)
+ {
+ VkBuffer newBuffer;
+
+ const VkResult result = vkCreateBuffer(g_hDevice, &allocInfo->m_BufferInfo, g_Allocs, &newBuffer);
+ TEST(result >= VK_SUCCESS);
+
+ vkBindBufferMemory(g_hDevice, newBuffer, stepInfo.pMoves[i].memory, stepInfo.pMoves[i].offset);
+ newHandles.push_back(newBuffer);
+
+ // Keep track of our pipeline stages that we need to wait/signal on
+ beginSrcStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+ finalizeDstStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+
+ beginMemoryBarrier.srcAccessMask |= VK_ACCESS_MEMORY_WRITE_BIT;
+ beginMemoryBarrier.dstAccessMask |= VK_ACCESS_TRANSFER_READ_BIT;
+
+ finalizeMemoryBarrier.srcAccessMask |= VK_ACCESS_TRANSFER_WRITE_BIT;
+ finalizeMemoryBarrier.dstAccessMask |= VK_ACCESS_MEMORY_READ_BIT;
+
+ wantsMemoryBarrier = true;
+ }
+ }
+
+ if(!beginImageBarriers.empty() || wantsMemoryBarrier)
+ {
+ const uint32_t memoryBarrierCount = wantsMemoryBarrier ? 1 : 0;
+
+ vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, beginSrcStageMask, beginDstStageMask, 0,
+ memoryBarrierCount, &beginMemoryBarrier,
+ 0, nullptr,
+ (uint32_t)beginImageBarriers.size(), beginImageBarriers.data());
+ }
+
+ for(uint32_t i = 0; i < stepInfo.moveCount; ++ i)
+ {
+ VmaAllocationInfo info;
+ vmaGetAllocationInfo(g_hAllocator, stepInfo.pMoves[i].allocation, &info);
+
+ AllocInfo *allocInfo = (AllocInfo *)info.pUserData;
+
+ if(allocInfo->m_Image)
+ {
+ std::vector<VkImageCopy> imageCopies;
+
+ // Copy all mips of the source image into the target image
+ VkOffset3D offset = { 0, 0, 0 };
+ VkExtent3D extent = allocInfo->m_ImageInfo.extent;
+
+ VkImageSubresourceLayers subresourceLayers = {
+ VK_IMAGE_ASPECT_COLOR_BIT,
+ 0,
+ 0, 1
+ };
+
+ for(uint32_t mip = 0; mip < allocInfo->m_ImageInfo.mipLevels; ++ mip)
+ {
+ subresourceLayers.mipLevel = mip;
+
+ VkImageCopy imageCopy{
+ subresourceLayers,
+ offset,
+ subresourceLayers,
+ offset,
+ extent
+ };
+
+ imageCopies.push_back(imageCopy);
+
+ extent.width = std::max(uint32_t(1), extent.width >> 1);
+ extent.height = std::max(uint32_t(1), extent.height >> 1);
+ extent.depth = std::max(uint32_t(1), extent.depth >> 1);
+ }
+
+ vkCmdCopyImage(
+ g_hTemporaryCommandBuffer,
+ allocInfo->m_Image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+ (VkImage)newHandles[i], VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ (uint32_t)imageCopies.size(), imageCopies.data());
+
+ imageCopies.clear();
+
+ // Update our alloc info with the new resource to be used
+ allocInfo->m_Image = (VkImage)newHandles[i];
+ }
+ else if(allocInfo->m_Buffer)
+ {
+ VkBufferCopy region = {
+ 0,
+ 0,
+ allocInfo->m_BufferInfo.size };
+
+ vkCmdCopyBuffer(g_hTemporaryCommandBuffer,
+ allocInfo->m_Buffer, (VkBuffer)newHandles[i],
+ 1, ®ion);
+
+
+ // Update our alloc info with the new resource to be used
+ allocInfo->m_Buffer = (VkBuffer)newHandles[i];
+ }
+ }
+
+
+ if(!finalizeImageBarriers.empty() || wantsMemoryBarrier)
+ {
+ const uint32_t memoryBarrierCount = wantsMemoryBarrier ? 1 : 0;
+
+ vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, finalizeSrcStageMask, finalizeDstStageMask, 0,
+ memoryBarrierCount, &finalizeMemoryBarrier,
+ 0, nullptr,
+ (uint32_t)finalizeImageBarriers.size(), finalizeImageBarriers.data());
+ }
+}
+
+
+static void TestDefragmentationIncrementalBasic()
+{
+ wprintf(L"Test defragmentation incremental basic\n");
+ g_MemoryAliasingWarningEnabled = false;
+
+ std::vector<AllocInfo> allocations;
+
+ // Create that many allocations to surely fill 3 new blocks of 256 MB.
+ const std::array<uint32_t, 3> imageSizes = { 256, 512, 1024 };
+ const VkDeviceSize bufSizeMin = 5ull * 1024 * 1024;
+ const VkDeviceSize bufSizeMax = 10ull * 1024 * 1024;
+ const VkDeviceSize totalSize = 3ull * 256 * 1024 * 1024;
+ const size_t imageCount = (size_t)(totalSize / (imageSizes[0] * imageSizes[0] * 4)) / 2;
+ const size_t bufCount = (size_t)(totalSize / bufSizeMin) / 2;
+ const size_t percentToLeave = 30;
+ RandomNumberGenerator rand = { 234522 };
+
+ VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
+ imageInfo.imageType = VK_IMAGE_TYPE_2D;
+ imageInfo.extent.depth = 1;
+ imageInfo.mipLevels = 1;
+ imageInfo.arrayLayers = 1;
+ imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
+ imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
+ imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
+ imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
+ imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
+
+ VmaAllocationCreateInfo allocCreateInfo = {};
+ allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
+ allocCreateInfo.flags = 0;
+
+ // Create all intended images.
+ for(size_t i = 0; i < imageCount; ++i)
+ {
+ const uint32_t size = imageSizes[rand.Generate() % 3];
+
+ imageInfo.extent.width = size;
+ imageInfo.extent.height = size;
+
+ AllocInfo alloc;
+ alloc.CreateImage(imageInfo, allocCreateInfo, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+ alloc.m_StartValue = 0;
+
+ allocations.push_back(alloc);
+ }
+
+ // And all buffers
+ VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+
+ for(size_t i = 0; i < bufCount; ++i)
+ {
+ bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
+ bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
+
+ AllocInfo alloc;
+ alloc.CreateBuffer(bufCreateInfo, allocCreateInfo);
+ alloc.m_StartValue = 0;
+
+ allocations.push_back(alloc);
+ }
+
+ // Destroy some percentage of them.
+ {
+ const size_t allocationsToDestroy = round_div<size_t>((imageCount + bufCount) * (100 - percentToLeave), 100);
+ for(size_t i = 0; i < allocationsToDestroy; ++i)
+ {
+ const size_t index = rand.Generate() % allocations.size();
+ allocations[index].Destroy();
+ allocations.erase(allocations.begin() + index);
+ }
+ }
+
+ {
+ // Set our user data pointers. A real application should probably be more clever here
+ const size_t allocationCount = allocations.size();
+ for(size_t i = 0; i < allocationCount; ++i)
+ {
+ AllocInfo &alloc = allocations[i];
+ vmaSetAllocationUserData(g_hAllocator, alloc.m_Allocation, &alloc);
+ }
+ }
+
+ // Fill them with meaningful data.
+ UploadGpuData(allocations.data(), allocations.size());
+
+ wchar_t fileName[MAX_PATH];
+ swprintf_s(fileName, L"GPU_defragmentation_incremental_basic_A_before.json");
+ SaveAllocatorStatsToFile(fileName);
+
+ // Defragment using GPU only.
+ {
+ const size_t allocCount = allocations.size();
+
+ std::vector<VmaAllocation> allocationPtrs;
+
+ for(size_t i = 0; i < allocCount; ++i)
+ {
+ VmaAllocationInfo allocInfo = {};
+ vmaGetAllocationInfo(g_hAllocator, allocations[i].m_Allocation, &allocInfo);
+
+ allocationPtrs.push_back(allocations[i].m_Allocation);
+ }
+
+ const size_t movableAllocCount = allocationPtrs.size();
+
+ VmaDefragmentationInfo2 defragInfo = {};
+ defragInfo.flags = VMA_DEFRAGMENTATION_FLAG_INCREMENTAL;
+ defragInfo.allocationCount = (uint32_t)movableAllocCount;
+ defragInfo.pAllocations = allocationPtrs.data();
+ defragInfo.maxGpuBytesToMove = VK_WHOLE_SIZE;
+ defragInfo.maxGpuAllocationsToMove = UINT32_MAX;
+
+ VmaDefragmentationStats stats = {};
+ VmaDefragmentationContext ctx = VK_NULL_HANDLE;
+ VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &stats, &ctx);
+ TEST(res >= VK_SUCCESS);
+
+ res = VK_NOT_READY;
+
+ std::vector<VmaDefragmentationStepMoveInfo> moveInfo;
+ moveInfo.resize(movableAllocCount);
+
+ while(res == VK_NOT_READY)
+ {
+ VmaDefragmentationStepInfo stepInfo = {};
+ stepInfo.pMoves = moveInfo.data();
+ stepInfo.moveCount = (uint32_t)moveInfo.size();
+
+ res = vmaDefragmentationStepBegin(g_hAllocator, &stepInfo, ctx);
+ TEST(res >= VK_SUCCESS);
+
+ BeginSingleTimeCommands();
+ ProcessDefragmentationStepInfo(stepInfo);
+ EndSingleTimeCommands();
+
+ res = vmaDefragmentationStepEnd(g_hAllocator, ctx);
+ }
+
+ TEST(res >= VK_SUCCESS);
+ vmaDefragmentationEnd(g_hAllocator, ctx);
+
+ // If corruption detection is enabled, GPU defragmentation may not work on
+ // memory types that have this detection active, e.g. on Intel.
+#if !defined(VMA_DEBUG_DETECT_CORRUPTION) || VMA_DEBUG_DETECT_CORRUPTION == 0
+ TEST(stats.allocationsMoved > 0 && stats.bytesMoved > 0);
+ TEST(stats.deviceMemoryBlocksFreed > 0 && stats.bytesFreed > 0);
+#endif
+ }
+
+ //ValidateGpuData(allocations.data(), allocations.size());
+
+ swprintf_s(fileName, L"GPU_defragmentation_incremental_basic_B_after.json");
+ SaveAllocatorStatsToFile(fileName);
+
+ // Destroy all remaining buffers.
+ for(size_t i = allocations.size(); i--; )
+ {
+ allocations[i].Destroy();
+ }
+
+ g_MemoryAliasingWarningEnabled = true;
+}
+
+void TestDefragmentationIncrementalComplex()
+{
+ wprintf(L"Test defragmentation incremental complex\n");
+ g_MemoryAliasingWarningEnabled = false;
+
+ std::vector<AllocInfo> allocations;
+
+ // Create that many allocations to surely fill 3 new blocks of 256 MB.
+ const std::array<uint32_t, 3> imageSizes = { 256, 512, 1024 };
+ const VkDeviceSize bufSizeMin = 5ull * 1024 * 1024;
+ const VkDeviceSize bufSizeMax = 10ull * 1024 * 1024;
+ const VkDeviceSize totalSize = 3ull * 256 * 1024 * 1024;
+ const size_t imageCount = (size_t)(totalSize / (imageSizes[0] * imageSizes[0] * 4)) / 2;
+ const size_t bufCount = (size_t)(totalSize / bufSizeMin) / 2;
+ const size_t percentToLeave = 30;
+ RandomNumberGenerator rand = { 234522 };
+
+ VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
+ imageInfo.imageType = VK_IMAGE_TYPE_2D;
+ imageInfo.extent.depth = 1;
+ imageInfo.mipLevels = 1;
+ imageInfo.arrayLayers = 1;
+ imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
+ imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
+ imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
+ imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
+ imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
+
+ VmaAllocationCreateInfo allocCreateInfo = {};
+ allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
+ allocCreateInfo.flags = 0;
+
+ // Create all intended images.
+ for(size_t i = 0; i < imageCount; ++i)
+ {
+ const uint32_t size = imageSizes[rand.Generate() % 3];
+
+ imageInfo.extent.width = size;
+ imageInfo.extent.height = size;
+
+ AllocInfo alloc;
+ alloc.CreateImage(imageInfo, allocCreateInfo, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+ alloc.m_StartValue = 0;
+
+ allocations.push_back(alloc);
+ }
+
+ // And all buffers
+ VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+
+ for(size_t i = 0; i < bufCount; ++i)
+ {
+ bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
+ bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
+
+ AllocInfo alloc;
+ alloc.CreateBuffer(bufCreateInfo, allocCreateInfo);
+ alloc.m_StartValue = 0;
+
+ allocations.push_back(alloc);
+ }
+
+ // Destroy some percentage of them.
+ {
+ const size_t allocationsToDestroy = round_div<size_t>((imageCount + bufCount) * (100 - percentToLeave), 100);
+ for(size_t i = 0; i < allocationsToDestroy; ++i)
+ {
+ const size_t index = rand.Generate() % allocations.size();
+ allocations[index].Destroy();
+ allocations.erase(allocations.begin() + index);
+ }
+ }
+
+ {
+ // Set our user data pointers. A real application should probably be more clever here
+ const size_t allocationCount = allocations.size();
+ for(size_t i = 0; i < allocationCount; ++i)
+ {
+ AllocInfo &alloc = allocations[i];
+ vmaSetAllocationUserData(g_hAllocator, alloc.m_Allocation, &alloc);
+ }
+ }
+
+ // Fill them with meaningful data.
+ UploadGpuData(allocations.data(), allocations.size());
+
+ wchar_t fileName[MAX_PATH];
+ swprintf_s(fileName, L"GPU_defragmentation_incremental_complex_A_before.json");
+ SaveAllocatorStatsToFile(fileName);
+
+ std::vector<AllocInfo> additionalAllocations;
+
+#define MakeAdditionalAllocation() \
+ do { \
+ { \
+ bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16); \
+ bufCreateInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT; \
+ \
+ AllocInfo alloc; \
+ alloc.CreateBuffer(bufCreateInfo, allocCreateInfo); \
+ \
+ additionalAllocations.push_back(alloc); \
+ } \
+ } while(0)
+
+ // Defragment using GPU only.
+ {
+ const size_t allocCount = allocations.size();
+
+ std::vector<VmaAllocation> allocationPtrs;
+
+ for(size_t i = 0; i < allocCount; ++i)
+ {
+ VmaAllocationInfo allocInfo = {};
+ vmaGetAllocationInfo(g_hAllocator, allocations[i].m_Allocation, &allocInfo);
+
+ allocationPtrs.push_back(allocations[i].m_Allocation);
+ }
+
+ const size_t movableAllocCount = allocationPtrs.size();
+
+ VmaDefragmentationInfo2 defragInfo = {};
+ defragInfo.flags = VMA_DEFRAGMENTATION_FLAG_INCREMENTAL;
+ defragInfo.allocationCount = (uint32_t)movableAllocCount;
+ defragInfo.pAllocations = allocationPtrs.data();
+ defragInfo.maxGpuBytesToMove = VK_WHOLE_SIZE;
+ defragInfo.maxGpuAllocationsToMove = UINT32_MAX;
+
+ VmaDefragmentationStats stats = {};
+ VmaDefragmentationContext ctx = VK_NULL_HANDLE;
+ VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &stats, &ctx);
+ TEST(res >= VK_SUCCESS);
+
+ res = VK_NOT_READY;
+
+ std::vector<VmaDefragmentationStepMoveInfo> moveInfo;
+ moveInfo.resize(movableAllocCount);
+
+ MakeAdditionalAllocation();
+
+ while(res == VK_NOT_READY)
+ {
+ VmaDefragmentationStepInfo stepInfo = {};
+ stepInfo.pMoves = moveInfo.data();
+ stepInfo.moveCount = (uint32_t)moveInfo.size();
+
+ res = vmaDefragmentationStepBegin(g_hAllocator, &stepInfo, ctx);
+ TEST(res >= VK_SUCCESS);
+
+ MakeAdditionalAllocation();
+
+ BeginSingleTimeCommands();
+ ProcessDefragmentationStepInfo(stepInfo);
+ EndSingleTimeCommands();
+
+ res = vmaDefragmentationStepEnd(g_hAllocator, ctx);
+
+ MakeAdditionalAllocation();
+ }
+
+ TEST(res >= VK_SUCCESS);
+ vmaDefragmentationEnd(g_hAllocator, ctx);
+
+ // If corruption detection is enabled, GPU defragmentation may not work on
+ // memory types that have this detection active, e.g. on Intel.
+#if !defined(VMA_DEBUG_DETECT_CORRUPTION) || VMA_DEBUG_DETECT_CORRUPTION == 0
+ TEST(stats.allocationsMoved > 0 && stats.bytesMoved > 0);
+ TEST(stats.deviceMemoryBlocksFreed > 0 && stats.bytesFreed > 0);
+#endif
+ }
+
+ //ValidateGpuData(allocations.data(), allocations.size());
+
+ swprintf_s(fileName, L"GPU_defragmentation_incremental_complex_B_after.json");
+ SaveAllocatorStatsToFile(fileName);
+
+ // Destroy all remaining buffers.
+ for(size_t i = allocations.size(); i--; )
+ {
+ allocations[i].Destroy();
+ }
+
+ for(size_t i = additionalAllocations.size(); i--; )
+ {
+ additionalAllocations[i].Destroy();
+ }
+
+ g_MemoryAliasingWarningEnabled = true;
+}
+
+
static void TestUserData()
{
VkResult res;
@@ -5499,6 +6144,8 @@
TestDefragmentationFull();
TestDefragmentationWholePool();
TestDefragmentationGpu();
+ TestDefragmentationIncrementalBasic();
+ TestDefragmentationIncrementalComplex();
// # Detailed tests
FILE* file;
diff --git a/src/vk_mem_alloc.h b/src/vk_mem_alloc.h
index 1f7920c..9e78812 100644
--- a/src/vk_mem_alloc.h
+++ b/src/vk_mem_alloc.h
@@ -1952,6 +1952,7 @@
PFN_vkCreateImage vkCreateImage;
PFN_vkDestroyImage vkDestroyImage;
PFN_vkCmdCopyBuffer vkCmdCopyBuffer;
+ PFN_vkCmdCopyImage vkCmdCopyImage;
#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000
PFN_vkGetBufferMemoryRequirements2KHR vkGetBufferMemoryRequirements2KHR;
PFN_vkGetImageMemoryRequirements2KHR vkGetImageMemoryRequirements2KHR;
@@ -3111,6 +3112,7 @@
/// Flags to be used in vmaDefragmentationBegin(). None at the moment. Reserved for future use.
typedef enum VmaDefragmentationFlagBits {
+ VMA_DEFRAGMENTATION_FLAG_INCREMENTAL = 0x1,
VMA_DEFRAGMENTATION_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
} VmaDefragmentationFlagBits;
typedef VkFlags VmaDefragmentationFlags;
@@ -3191,6 +3193,21 @@
VkCommandBuffer commandBuffer;
} VmaDefragmentationInfo2;
+typedef struct VmaDefragmentationStepMoveInfo {
+ VmaAllocation allocation;
+ VkDeviceMemory memory;
+ VkDeviceSize offset;
+} VmaDefragmentationStepMoveInfo;
+
+/** \brief Parameters for incremental defragmentation steps.
+
+To be used with function vmaDefragmentationStepBegin().
+*/
+typedef struct VmaDefragmentationStepInfo {
+ uint32_t moveCount;
+ VmaDefragmentationStepMoveInfo* pMoves;
+} VmaDefragmentationStepInfo;
+
/** \brief Deprecated. Optional configuration parameters to be passed to function vmaDefragment().
\deprecated This is a part of the old interface. It is recommended to use structure #VmaDefragmentationInfo2 and function vmaDefragmentationBegin() instead.
@@ -3264,6 +3281,16 @@
VmaAllocator allocator,
VmaDefragmentationContext context);
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationStepBegin(
+ VmaAllocator allocator,
+ VmaDefragmentationStepInfo* pInfo,
+ VmaDefragmentationContext context
+);
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationStepEnd(
+ VmaAllocator allocator,
+ VmaDefragmentationContext context
+);
+
/** \brief Deprecated. Compacts memory by moving allocations.
@param pAllocations Array of allocations that can be moved during this compation.
@@ -3672,6 +3699,7 @@
public:
void Lock() { m_Mutex.lock(); }
void Unlock() { m_Mutex.unlock(); }
+ bool TryLock() { return m_Mutex.try_lock(); }
private:
std::mutex m_Mutex;
};
@@ -3688,8 +3716,10 @@
public:
void LockRead() { m_Mutex.lock_shared(); }
void UnlockRead() { m_Mutex.unlock_shared(); }
+ bool TryLockRead() { return m_Mutex.try_shared_lock(); }
void LockWrite() { m_Mutex.lock(); }
void UnlockWrite() { m_Mutex.unlock(); }
+ bool TryLockWrite() { return m_Mutex.try_lock(); }
private:
std::shared_mutex m_Mutex;
};
@@ -3703,8 +3733,10 @@
VmaRWMutex() { InitializeSRWLock(&m_Lock); }
void LockRead() { AcquireSRWLockShared(&m_Lock); }
void UnlockRead() { ReleaseSRWLockShared(&m_Lock); }
+ bool TryLockRead() { return TryAcquireSRWLockShared(&m_Lock); }
void LockWrite() { AcquireSRWLockExclusive(&m_Lock); }
void UnlockWrite() { ReleaseSRWLockExclusive(&m_Lock); }
+ bool TryLockWrite() { return TryAcquireSRWLockExclusive(&m_Lock); }
private:
SRWLOCK m_Lock;
};
@@ -3716,8 +3748,10 @@
public:
void LockRead() { m_Mutex.Lock(); }
void UnlockRead() { m_Mutex.Unlock(); }
+ bool TryLockRead() { return m_Mutex.TryLock(); }
void LockWrite() { m_Mutex.Lock(); }
void UnlockWrite() { m_Mutex.Unlock(); }
+ bool TryLockWrite() { return m_Mutex.TryLock(); }
private:
VMA_MUTEX m_Mutex;
};
@@ -6241,6 +6275,9 @@
VkDeviceSize srcOffset;
VkDeviceSize dstOffset;
VkDeviceSize size;
+ VmaAllocation hAllocation;
+ VmaDeviceMemoryBlock* pSrcBlock;
+ VmaDeviceMemoryBlock* pDstBlock;
};
class VmaDefragmentationAlgorithm;
@@ -6310,7 +6347,7 @@
// Saves results in pCtx->res.
void Defragment(
class VmaBlockVectorDefragmentationContext* pCtx,
- VmaDefragmentationStats* pStats,
+ VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags,
VkDeviceSize& maxCpuBytesToMove, uint32_t& maxCpuAllocationsToMove,
VkDeviceSize& maxGpuBytesToMove, uint32_t& maxGpuAllocationsToMove,
VkCommandBuffer commandBuffer);
@@ -6318,6 +6355,14 @@
class VmaBlockVectorDefragmentationContext* pCtx,
VmaDefragmentationStats* pStats);
+ uint32_t ProcessDefragmentations(
+ class VmaBlockVectorDefragmentationContext *pCtx,
+ VmaDefragmentationStepMoveInfo* pMove, uint32_t maxMoves);
+
+ void CommitDefragmentations(
+ class VmaBlockVectorDefragmentationContext *pCtx,
+ VmaDefragmentationStats* pStats);
+
////////////////////////////////////////////////////////////////////////////////
// To be used only while the m_Mutex is locked. Used during defragmentation.
@@ -6350,6 +6395,8 @@
VkDeviceSize CalcMaxBlockSize() const;
+ static VkImageAspectFlags ImageAspectMaskForFormat(VkFormat format);
+
// Finds and removes given block from vector.
void Remove(VmaDeviceMemoryBlock* pBlock);
@@ -6386,7 +6433,7 @@
// Saves result to pCtx->res.
void ApplyDefragmentationMovesGpu(
class VmaBlockVectorDefragmentationContext* pDefragCtx,
- const VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves,
+ VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves,
VkCommandBuffer commandBuffer);
/*
@@ -6455,7 +6502,8 @@
virtual VkResult Defragment(
VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves,
VkDeviceSize maxBytesToMove,
- uint32_t maxAllocationsToMove) = 0;
+ uint32_t maxAllocationsToMove,
+ VmaDefragmentationFlags flags) = 0;
virtual VkDeviceSize GetBytesMoved() const = 0;
virtual uint32_t GetAllocationsMoved() const = 0;
@@ -6500,7 +6548,8 @@
virtual VkResult Defragment(
VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves,
VkDeviceSize maxBytesToMove,
- uint32_t maxAllocationsToMove);
+ uint32_t maxAllocationsToMove,
+ VmaDefragmentationFlags flags);
virtual VkDeviceSize GetBytesMoved() const { return m_BytesMoved; }
virtual uint32_t GetAllocationsMoved() const { return m_AllocationsMoved; }
@@ -6601,7 +6650,8 @@
VkResult DefragmentRound(
VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves,
VkDeviceSize maxBytesToMove,
- uint32_t maxAllocationsToMove);
+ uint32_t maxAllocationsToMove,
+ bool freeOldAllocations);
size_t CalcBlocksWithNonMovableCount() const;
@@ -6627,7 +6677,8 @@
virtual VkResult Defragment(
VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves,
VkDeviceSize maxBytesToMove,
- uint32_t maxAllocationsToMove);
+ uint32_t maxAllocationsToMove,
+ VmaDefragmentationFlags flags);
virtual VkDeviceSize GetBytesMoved() const { return m_BytesMoved; }
virtual uint32_t GetAllocationsMoved() const { return m_AllocationsMoved; }
@@ -6775,6 +6826,10 @@
VkResult res;
bool mutexLocked;
VmaVector< VmaBlockDefragmentationContext, VmaStlAllocator<VmaBlockDefragmentationContext> > blockContexts;
+ VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> > defragmentationMoves;
+ uint32_t defragmentationMovesProcessed;
+ uint32_t defragmentationMovesCommitted;
+ bool hasDefragmentationPlan;
VmaBlockVectorDefragmentationContext(
VmaAllocator hAllocator,
@@ -6790,7 +6845,7 @@
void AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged);
void AddAll() { m_AllAllocations = true; }
- void Begin(bool overlappingMoveSupported);
+ void Begin(bool overlappingMoveSupported, VmaDefragmentationFlags flags);
private:
const VmaAllocator m_hAllocator;
@@ -6839,13 +6894,22 @@
VkResult Defragment(
VkDeviceSize maxCpuBytesToMove, uint32_t maxCpuAllocationsToMove,
VkDeviceSize maxGpuBytesToMove, uint32_t maxGpuAllocationsToMove,
- VkCommandBuffer commandBuffer, VmaDefragmentationStats* pStats);
+ VkCommandBuffer commandBuffer, VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags);
+
+ VkResult DefragmentStepBegin(VmaDefragmentationStepInfo* pInfo);
+ VkResult DefragmentStepEnd();
private:
const VmaAllocator m_hAllocator;
const uint32_t m_CurrFrameIndex;
const uint32_t m_Flags;
VmaDefragmentationStats* const m_pStats;
+
+ VkDeviceSize m_MaxCpuBytesToMove;
+ uint32_t m_MaxCpuAllocationsToMove;
+ VkDeviceSize m_MaxGpuBytesToMove;
+ uint32_t m_MaxGpuAllocationsToMove;
+
// Owner of these objects.
VmaBlockVectorDefragmentationContext* m_DefaultPoolContexts[VK_MAX_MEMORY_TYPES];
// Owner of these objects.
@@ -7185,6 +7249,12 @@
VkResult DefragmentationEnd(
VmaDefragmentationContext context);
+ VkResult DefragmentationStepBegin(
+ VmaDefragmentationStepInfo* pInfo,
+ VmaDefragmentationContext context);
+ VkResult DefragmentationStepEnd(
+ VmaDefragmentationContext context);
+
void GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo);
bool TouchAllocation(VmaAllocation hAllocation);
@@ -12618,7 +12688,7 @@
void VmaBlockVector::ApplyDefragmentationMovesGpu(
class VmaBlockVectorDefragmentationContext* pDefragCtx,
- const VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves,
+ VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves,
VkCommandBuffer commandBuffer)
{
const size_t blockCount = m_Blocks.size();
@@ -12631,8 +12701,13 @@
for(size_t moveIndex = 0; moveIndex < moveCount; ++moveIndex)
{
const VmaDefragmentationMove& move = moves[moveIndex];
- pDefragCtx->blockContexts[move.srcBlockIndex].flags |= VmaBlockDefragmentationContext::BLOCK_FLAG_USED;
- pDefragCtx->blockContexts[move.dstBlockIndex].flags |= VmaBlockDefragmentationContext::BLOCK_FLAG_USED;
+
+ //if(move.type == VMA_ALLOCATION_TYPE_UNKNOWN)
+ {
+ // Old school move still require us to map the whole block
+ pDefragCtx->blockContexts[move.srcBlockIndex].flags |= VmaBlockDefragmentationContext::BLOCK_FLAG_USED;
+ pDefragCtx->blockContexts[move.dstBlockIndex].flags |= VmaBlockDefragmentationContext::BLOCK_FLAG_USED;
+ }
}
VMA_ASSERT(pDefragCtx->res == VK_SUCCESS);
@@ -12806,7 +12881,7 @@
void VmaBlockVector::Defragment(
class VmaBlockVectorDefragmentationContext* pCtx,
- VmaDefragmentationStats* pStats,
+ VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags,
VkDeviceSize& maxCpuBytesToMove, uint32_t& maxCpuAllocationsToMove,
VkDeviceSize& maxGpuBytesToMove, uint32_t& maxGpuAllocationsToMove,
VkCommandBuffer commandBuffer)
@@ -12843,19 +12918,28 @@
if(m_hAllocator->m_UseMutex)
{
- m_Mutex.LockWrite();
- pCtx->mutexLocked = true;
+ if(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL)
+ {
+ if(!m_Mutex.TryLockWrite())
+ {
+ pCtx->res = VK_ERROR_INITIALIZATION_FAILED;
+ return;
+ }
+ }
+ else
+ {
+ m_Mutex.LockWrite();
+ pCtx->mutexLocked = true;
+ }
}
- pCtx->Begin(overlappingMoveSupported);
+ pCtx->Begin(overlappingMoveSupported, flags);
// Defragment.
const VkDeviceSize maxBytesToMove = defragmentOnGpu ? maxGpuBytesToMove : maxCpuBytesToMove;
const uint32_t maxAllocationsToMove = defragmentOnGpu ? maxGpuAllocationsToMove : maxCpuAllocationsToMove;
- VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> > moves =
- VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >(VmaStlAllocator<VmaDefragmentationMove>(m_hAllocator->GetAllocationCallbacks()));
- pCtx->res = pCtx->GetAlgorithm()->Defragment(moves, maxBytesToMove, maxAllocationsToMove);
+ pCtx->res = pCtx->GetAlgorithm()->Defragment(pCtx->defragmentationMoves, maxBytesToMove, maxAllocationsToMove, flags);
// Accumulate statistics.
if(pStats != VMA_NULL)
@@ -12877,16 +12961,27 @@
maxCpuAllocationsToMove -= allocationsMoved;
}
}
+
+ if(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL)
+ {
+ if(m_hAllocator->m_UseMutex)
+ m_Mutex.UnlockWrite();
+
+ if(pCtx->res >= VK_SUCCESS && !pCtx->defragmentationMoves.empty())
+ pCtx->res = VK_NOT_READY;
+
+ return;
+ }
if(pCtx->res >= VK_SUCCESS)
{
if(defragmentOnGpu)
{
- ApplyDefragmentationMovesGpu(pCtx, moves, commandBuffer);
+ ApplyDefragmentationMovesGpu(pCtx, pCtx->defragmentationMoves, commandBuffer);
}
else
{
- ApplyDefragmentationMovesCpu(pCtx, moves);
+ ApplyDefragmentationMovesCpu(pCtx, pCtx->defragmentationMoves);
}
}
}
@@ -12919,6 +13014,48 @@
}
}
+uint32_t VmaBlockVector::ProcessDefragmentations(
+ class VmaBlockVectorDefragmentationContext *pCtx,
+ VmaDefragmentationStepMoveInfo* pMove, uint32_t maxMoves)
+{
+ VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex);
+
+ const uint32_t moveCount = std::min(uint32_t(pCtx->defragmentationMoves.size()) - pCtx->defragmentationMovesProcessed, maxMoves);
+
+ for(uint32_t i = pCtx->defragmentationMovesProcessed; i < moveCount; ++ i)
+ {
+ VmaDefragmentationMove& move = pCtx->defragmentationMoves[i];
+
+ pMove->allocation = move.hAllocation;
+ pMove->memory = move.pDstBlock->GetDeviceMemory();
+ pMove->offset = move.dstOffset;
+
+ ++ pMove;
+ }
+
+ pCtx->defragmentationMovesProcessed += moveCount;
+
+ return moveCount;
+}
+
+void VmaBlockVector::CommitDefragmentations(
+ class VmaBlockVectorDefragmentationContext *pCtx,
+ VmaDefragmentationStats* pStats)
+{
+ VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex);
+
+ for(uint32_t i = pCtx->defragmentationMovesCommitted; i < pCtx->defragmentationMovesProcessed; ++ i)
+ {
+ const VmaDefragmentationMove &move = pCtx->defragmentationMoves[i];
+
+ move.pSrcBlock->m_pMetadata->FreeAtOffset(move.srcOffset);
+ move.hAllocation->ChangeBlockAllocation(m_hAllocator, move.pDstBlock, move.dstOffset);
+ }
+
+ pCtx->defragmentationMovesCommitted = pCtx->defragmentationMovesProcessed;
+ FreeEmptyBlocks(pStats);
+}
+
size_t VmaBlockVector::CalcAllocationCount() const
{
size_t result = 0;
@@ -13069,7 +13206,8 @@
VkResult VmaDefragmentationAlgorithm_Generic::DefragmentRound(
VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves,
VkDeviceSize maxBytesToMove,
- uint32_t maxAllocationsToMove)
+ uint32_t maxAllocationsToMove,
+ bool freeOldAllocations)
{
if(m_Blocks.empty())
{
@@ -13161,12 +13299,16 @@
return VK_SUCCESS;
}
- VmaDefragmentationMove move;
+ VmaDefragmentationMove move = {};
move.srcBlockIndex = pSrcBlockInfo->m_OriginalBlockIndex;
move.dstBlockIndex = pDstBlockInfo->m_OriginalBlockIndex;
move.srcOffset = srcOffset;
move.dstOffset = dstAllocRequest.offset;
move.size = size;
+ move.hAllocation = allocInfo.m_hAllocation;
+ move.pSrcBlock = pSrcBlockInfo->m_pBlock;
+ move.pDstBlock = pDstBlockInfo->m_pBlock;
+
moves.push_back(move);
pDstBlockInfo->m_pBlock->m_pMetadata->Alloc(
@@ -13174,10 +13316,13 @@
suballocType,
size,
allocInfo.m_hAllocation);
- pSrcBlockInfo->m_pBlock->m_pMetadata->FreeAtOffset(srcOffset);
-
- allocInfo.m_hAllocation->ChangeBlockAllocation(m_hAllocator, pDstBlockInfo->m_pBlock, dstAllocRequest.offset);
+ if(freeOldAllocations)
+ {
+ pSrcBlockInfo->m_pBlock->m_pMetadata->FreeAtOffset(srcOffset);
+ allocInfo.m_hAllocation->ChangeBlockAllocation(m_hAllocator, pDstBlockInfo->m_pBlock, dstAllocRequest.offset);
+ }
+
if(allocInfo.m_pChanged != VMA_NULL)
{
*allocInfo.m_pChanged = VK_TRUE;
@@ -13229,7 +13374,8 @@
VkResult VmaDefragmentationAlgorithm_Generic::Defragment(
VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves,
VkDeviceSize maxBytesToMove,
- uint32_t maxAllocationsToMove)
+ uint32_t maxAllocationsToMove,
+ VmaDefragmentationFlags flags)
{
if(!m_AllAllocations && m_AllocationCount == 0)
{
@@ -13275,7 +13421,7 @@
VkResult result = VK_SUCCESS;
for(uint32_t round = 0; (round < roundCount) && (result == VK_SUCCESS); ++round)
{
- result = DefragmentRound(moves, maxBytesToMove, maxAllocationsToMove);
+ result = DefragmentRound(moves, maxBytesToMove, maxAllocationsToMove, !(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL));
}
return result;
@@ -13327,7 +13473,8 @@
VkResult VmaDefragmentationAlgorithm_Fast::Defragment(
VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves,
VkDeviceSize maxBytesToMove,
- uint32_t maxAllocationsToMove)
+ uint32_t maxAllocationsToMove,
+ VmaDefragmentationFlags flags)
{
VMA_ASSERT(m_AllAllocations || m_pBlockVector->CalcAllocationCount() == m_AllocationCount);
@@ -13383,6 +13530,7 @@
}
const VkDeviceSize srcAllocOffset = srcSuballocIt->offset;
+ VmaDefragmentationMove move = {};
// Try to place it in one of free spaces from the database.
size_t freeSpaceInfoIndex;
VkDeviceSize dstAllocOffset;
@@ -13413,10 +13561,12 @@
InsertSuballoc(pFreeSpaceMetadata, suballoc);
- VmaDefragmentationMove move = {
- srcOrigBlockIndex, freeSpaceOrigBlockIndex,
- srcAllocOffset, dstAllocOffset,
- srcAllocSize };
+ move.srcBlockIndex = srcOrigBlockIndex;
+ move.dstBlockIndex = freeSpaceOrigBlockIndex;
+ move.srcOffset = srcAllocOffset;
+ move.dstOffset = dstAllocOffset;
+ move.size = srcAllocSize;
+
moves.push_back(move);
}
// Different block
@@ -13439,10 +13589,12 @@
InsertSuballoc(pFreeSpaceMetadata, suballoc);
- VmaDefragmentationMove move = {
- srcOrigBlockIndex, freeSpaceOrigBlockIndex,
- srcAllocOffset, dstAllocOffset,
- srcAllocSize };
+ move.srcBlockIndex = srcOrigBlockIndex;
+ move.dstBlockIndex = freeSpaceOrigBlockIndex;
+ move.srcOffset = srcAllocOffset;
+ move.dstOffset = dstAllocOffset;
+ move.size = srcAllocSize;
+
moves.push_back(move);
}
}
@@ -13497,10 +13649,13 @@
m_BytesMoved += srcAllocSize;
++m_AllocationsMoved;
++srcSuballocIt;
- VmaDefragmentationMove move = {
- srcOrigBlockIndex, dstOrigBlockIndex,
- srcAllocOffset, dstAllocOffset,
- srcAllocSize };
+
+ move.srcBlockIndex = srcOrigBlockIndex;
+ move.dstBlockIndex = dstOrigBlockIndex;
+ move.srcOffset = srcAllocOffset;
+ move.dstOffset = dstAllocOffset;
+ move.size = srcAllocSize;
+
moves.push_back(move);
}
}
@@ -13526,10 +13681,12 @@
pDstMetadata->m_Suballocations.push_back(suballoc);
- VmaDefragmentationMove move = {
- srcOrigBlockIndex, dstOrigBlockIndex,
- srcAllocOffset, dstAllocOffset,
- srcAllocSize };
+ move.srcBlockIndex = srcOrigBlockIndex;
+ move.dstBlockIndex = dstOrigBlockIndex;
+ move.srcOffset = srcAllocOffset;
+ move.dstOffset = dstAllocOffset;
+ move.size = srcAllocSize;
+
moves.push_back(move);
}
}
@@ -13679,6 +13836,10 @@
res(VK_SUCCESS),
mutexLocked(false),
blockContexts(VmaStlAllocator<VmaBlockDefragmentationContext>(hAllocator->GetAllocationCallbacks())),
+ defragmentationMoves(VmaStlAllocator<VmaDefragmentationMove>(hAllocator->GetAllocationCallbacks())),
+ defragmentationMovesProcessed(0),
+ defragmentationMovesCommitted(0),
+ hasDefragmentationPlan(0),
m_hAllocator(hAllocator),
m_hCustomPool(hCustomPool),
m_pBlockVector(pBlockVector),
@@ -13700,7 +13861,7 @@
m_Allocations.push_back(info);
}
-void VmaBlockVectorDefragmentationContext::Begin(bool overlappingMoveSupported)
+void VmaBlockVectorDefragmentationContext::Begin(bool overlappingMoveSupported, VmaDefragmentationFlags flags)
{
const bool allAllocations = m_AllAllocations ||
m_Allocations.size() == m_pBlockVector->CalcAllocationCount();
@@ -13714,10 +13875,12 @@
- VMA_DEBUG_MARGIN is 0.
- All allocations in this block vector are moveable.
- There is no possibility of image/buffer granularity conflict.
+ - The defragmentation is not incremental
*/
if(VMA_DEBUG_MARGIN == 0 &&
allAllocations &&
- !m_pBlockVector->IsBufferImageGranularityConflictPossible())
+ !m_pBlockVector->IsBufferImageGranularityConflictPossible() &&
+ !(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL))
{
m_pAlgorithm = vma_new(m_hAllocator, VmaDefragmentationAlgorithm_Fast)(
m_hAllocator, m_pBlockVector, m_CurrFrameIndex, overlappingMoveSupported);
@@ -13884,13 +14047,30 @@
VkResult VmaDefragmentationContext_T::Defragment(
VkDeviceSize maxCpuBytesToMove, uint32_t maxCpuAllocationsToMove,
VkDeviceSize maxGpuBytesToMove, uint32_t maxGpuAllocationsToMove,
- VkCommandBuffer commandBuffer, VmaDefragmentationStats* pStats)
+ VkCommandBuffer commandBuffer, VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags)
{
if(pStats)
{
memset(pStats, 0, sizeof(VmaDefragmentationStats));
}
+ if(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL)
+ {
+ // For incremental defragmetnations, we just earmark how much we can move
+ // The real meat is in the defragmentation steps
+ m_MaxCpuBytesToMove = maxCpuBytesToMove;
+ m_MaxCpuAllocationsToMove = maxCpuAllocationsToMove;
+
+ m_MaxGpuBytesToMove = maxGpuBytesToMove;
+ m_MaxGpuAllocationsToMove = maxGpuAllocationsToMove;
+
+ if(m_MaxCpuBytesToMove == 0 && m_MaxCpuAllocationsToMove == 0 &&
+ m_MaxGpuBytesToMove == 0 && m_MaxGpuAllocationsToMove == 0)
+ return VK_SUCCESS;
+
+ return VK_NOT_READY;
+ }
+
if(commandBuffer == VK_NULL_HANDLE)
{
maxGpuBytesToMove = 0;
@@ -13910,7 +14090,7 @@
VMA_ASSERT(pBlockVectorCtx->GetBlockVector());
pBlockVectorCtx->GetBlockVector()->Defragment(
pBlockVectorCtx,
- pStats,
+ pStats, flags,
maxCpuBytesToMove, maxCpuAllocationsToMove,
maxGpuBytesToMove, maxGpuAllocationsToMove,
commandBuffer);
@@ -13930,7 +14110,7 @@
VMA_ASSERT(pBlockVectorCtx && pBlockVectorCtx->GetBlockVector());
pBlockVectorCtx->GetBlockVector()->Defragment(
pBlockVectorCtx,
- pStats,
+ pStats, flags,
maxCpuBytesToMove, maxCpuAllocationsToMove,
maxGpuBytesToMove, maxGpuAllocationsToMove,
commandBuffer);
@@ -13943,6 +14123,132 @@
return res;
}
+VkResult VmaDefragmentationContext_T::DefragmentStepBegin(VmaDefragmentationStepInfo* pInfo)
+{
+ VmaDefragmentationStepMoveInfo* pCurrentMove = pInfo->pMoves;
+ uint32_t movesLeft = pInfo->moveCount;
+
+ // Process default pools.
+ for(uint32_t memTypeIndex = 0;
+ memTypeIndex < m_hAllocator->GetMemoryTypeCount();
+ ++memTypeIndex)
+ {
+ VmaBlockVectorDefragmentationContext *pBlockVectorCtx = m_DefaultPoolContexts[memTypeIndex];
+ if(pBlockVectorCtx)
+ {
+ VMA_ASSERT(pBlockVectorCtx->GetBlockVector());
+
+ if(!pBlockVectorCtx->hasDefragmentationPlan)
+ {
+ pBlockVectorCtx->GetBlockVector()->Defragment(
+ pBlockVectorCtx,
+ m_pStats, m_Flags,
+ m_MaxCpuBytesToMove, m_MaxCpuAllocationsToMove,
+ m_MaxGpuBytesToMove, m_MaxGpuAllocationsToMove,
+ VK_NULL_HANDLE);
+
+ if(pBlockVectorCtx->res < VK_SUCCESS)
+ continue;
+
+ pBlockVectorCtx->hasDefragmentationPlan = true;
+ }
+
+ const uint32_t processed = pBlockVectorCtx->GetBlockVector()->ProcessDefragmentations(
+ pBlockVectorCtx,
+ pCurrentMove, movesLeft);
+
+ movesLeft -= processed;
+ pCurrentMove += processed;
+ }
+ }
+
+ // Process custom pools.
+ for(size_t customCtxIndex = 0, customCtxCount = m_CustomPoolContexts.size();
+ customCtxIndex < customCtxCount;
+ ++customCtxIndex)
+ {
+ VmaBlockVectorDefragmentationContext *pBlockVectorCtx = m_CustomPoolContexts[customCtxIndex];
+ VMA_ASSERT(pBlockVectorCtx && pBlockVectorCtx->GetBlockVector());
+
+ if(!pBlockVectorCtx->hasDefragmentationPlan)
+ {
+ pBlockVectorCtx->GetBlockVector()->Defragment(
+ pBlockVectorCtx,
+ m_pStats, m_Flags,
+ m_MaxCpuBytesToMove, m_MaxCpuAllocationsToMove,
+ m_MaxGpuBytesToMove, m_MaxGpuAllocationsToMove,
+ VK_NULL_HANDLE);
+
+ if(pBlockVectorCtx->res < VK_SUCCESS)
+ continue;
+
+ pBlockVectorCtx->hasDefragmentationPlan = true;
+ }
+
+ const uint32_t processed = pBlockVectorCtx->GetBlockVector()->ProcessDefragmentations(
+ pBlockVectorCtx,
+ pCurrentMove, movesLeft);
+
+ movesLeft -= processed;
+ pCurrentMove += processed;
+ }
+
+ pInfo->moveCount = pInfo->moveCount - movesLeft;
+
+ return VK_SUCCESS;
+}
+VkResult VmaDefragmentationContext_T::DefragmentStepEnd()
+{
+ VkResult res = VK_SUCCESS;
+
+ // Process default pools.
+ for(uint32_t memTypeIndex = 0;
+ memTypeIndex < m_hAllocator->GetMemoryTypeCount();
+ ++memTypeIndex)
+ {
+ VmaBlockVectorDefragmentationContext *pBlockVectorCtx = m_DefaultPoolContexts[memTypeIndex];
+ if(pBlockVectorCtx)
+ {
+ VMA_ASSERT(pBlockVectorCtx->GetBlockVector());
+
+ if(!pBlockVectorCtx->hasDefragmentationPlan)
+ {
+ res = VK_NOT_READY;
+ continue;
+ }
+
+ pBlockVectorCtx->GetBlockVector()->CommitDefragmentations(
+ pBlockVectorCtx, m_pStats);
+
+ if(pBlockVectorCtx->defragmentationMoves.size() != pBlockVectorCtx->defragmentationMovesCommitted)
+ res = VK_NOT_READY;
+ }
+ }
+
+ // Process custom pools.
+ for(size_t customCtxIndex = 0, customCtxCount = m_CustomPoolContexts.size();
+ customCtxIndex < customCtxCount;
+ ++customCtxIndex)
+ {
+ VmaBlockVectorDefragmentationContext *pBlockVectorCtx = m_CustomPoolContexts[customCtxIndex];
+ VMA_ASSERT(pBlockVectorCtx && pBlockVectorCtx->GetBlockVector());
+
+ if(!pBlockVectorCtx->hasDefragmentationPlan)
+ {
+ res = VK_NOT_READY;
+ continue;
+ }
+
+ pBlockVectorCtx->GetBlockVector()->CommitDefragmentations(
+ pBlockVectorCtx, m_pStats);
+
+ if(pBlockVectorCtx->defragmentationMoves.size() != pBlockVectorCtx->defragmentationMovesCommitted)
+ res = VK_NOT_READY;
+ }
+
+ return res;
+}
+
////////////////////////////////////////////////////////////////////////////////
// VmaRecorder
@@ -14759,6 +15065,7 @@
m_VulkanFunctions.vkCreateImage = (PFN_vkCreateImage)vkCreateImage;
m_VulkanFunctions.vkDestroyImage = (PFN_vkDestroyImage)vkDestroyImage;
m_VulkanFunctions.vkCmdCopyBuffer = (PFN_vkCmdCopyBuffer)vkCmdCopyBuffer;
+ m_VulkanFunctions.vkCmdCopyImage = (PFN_vkCmdCopyImage)vkCmdCopyImage;
#if VMA_VULKAN_VERSION >= 1001000
if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0))
{
@@ -14825,6 +15132,7 @@
VMA_COPY_IF_NOT_NULL(vkCreateImage);
VMA_COPY_IF_NOT_NULL(vkDestroyImage);
VMA_COPY_IF_NOT_NULL(vkCmdCopyBuffer);
+ VMA_COPY_IF_NOT_NULL(vkCmdCopyImage);
#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000
VMA_COPY_IF_NOT_NULL(vkGetBufferMemoryRequirements2KHR);
VMA_COPY_IF_NOT_NULL(vkGetImageMemoryRequirements2KHR);
@@ -14859,6 +15167,7 @@
VMA_ASSERT(m_VulkanFunctions.vkCreateImage != VMA_NULL);
VMA_ASSERT(m_VulkanFunctions.vkDestroyImage != VMA_NULL);
VMA_ASSERT(m_VulkanFunctions.vkCmdCopyBuffer != VMA_NULL);
+ VMA_ASSERT(m_VulkanFunctions.vkCmdCopyImage != VMA_NULL);
#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000
if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0) || m_UseKhrDedicatedAllocation)
{
@@ -15578,7 +15887,7 @@
VkResult res = (*pContext)->Defragment(
info.maxCpuBytesToMove, info.maxCpuAllocationsToMove,
info.maxGpuBytesToMove, info.maxGpuAllocationsToMove,
- info.commandBuffer, pStats);
+ info.commandBuffer, pStats, info.flags);
if(res != VK_NOT_READY)
{
@@ -15596,6 +15905,19 @@
return VK_SUCCESS;
}
+VkResult VmaAllocator_T::DefragmentationStepBegin(
+ VmaDefragmentationStepInfo* pInfo,
+ VmaDefragmentationContext context)
+{
+ return context->DefragmentStepBegin(pInfo);
+}
+VkResult VmaAllocator_T::DefragmentationStepEnd(
+ VmaDefragmentationContext context)
+{
+ return context->DefragmentStepEnd();
+
+}
+
void VmaAllocator_T::GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo)
{
if(hAllocation->CanBecomeLost())
@@ -17414,6 +17736,42 @@
}
}
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationStepBegin(
+ VmaAllocator allocator,
+ VmaDefragmentationStepInfo* pInfo,
+ VmaDefragmentationContext context)
+{
+ VMA_ASSERT(allocator);
+ VMA_ASSERT(pInfo);
+ VMA_HEAVY_ASSERT(VmaValidatePointerArray(pInfo->moveCount, pInfo->pMoves));
+
+ VMA_DEBUG_LOG("vmaDefragmentationStepBegin");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ if(context == VK_NULL_HANDLE)
+ {
+ pInfo->moveCount = 0;
+ return VK_SUCCESS;
+ }
+
+ return allocator->DefragmentationStepBegin(pInfo, context);
+}
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationStepEnd(
+ VmaAllocator allocator,
+ VmaDefragmentationContext context)
+{
+ VMA_ASSERT(allocator);
+
+ VMA_DEBUG_LOG("vmaDefragmentationStepEnd");
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ if(context == VK_NULL_HANDLE)
+ return VK_SUCCESS;
+
+ return allocator->DefragmentationStepEnd(context);
+}
+
VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory(
VmaAllocator allocator,
VmaAllocation allocation,