Prototype of a defragmentation interface that supports tiling optimal images Merged #90 thanks @JustSid !
diff --git a/src/Tests.cpp b/src/Tests.cpp index 5a6057f..e5b373d 100644 --- a/src/Tests.cpp +++ b/src/Tests.cpp
@@ -688,6 +688,7 @@ VmaAllocation m_Allocation = VK_NULL_HANDLE; VkBuffer m_Buffer = VK_NULL_HANDLE; VkImage m_Image = VK_NULL_HANDLE; + VkImageLayout m_ImageLayout = VK_IMAGE_LAYOUT_UNDEFINED; uint32_t m_StartValue = 0; union { @@ -698,6 +699,10 @@ void CreateBuffer( const VkBufferCreateInfo& bufCreateInfo, const VmaAllocationCreateInfo& allocCreateInfo); + void CreateImage( + const VkImageCreateInfo& imageCreateInfo, + const VmaAllocationCreateInfo& allocCreateInfo, + VkImageLayout layout); void Destroy(); }; @@ -709,6 +714,16 @@ VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &m_Buffer, &m_Allocation, nullptr); TEST(res == VK_SUCCESS); } +void AllocInfo::CreateImage( + const VkImageCreateInfo& imageCreateInfo, + const VmaAllocationCreateInfo& allocCreateInfo, + VkImageLayout layout) +{ + m_ImageInfo = imageCreateInfo; + m_ImageLayout = layout; + VkResult res = vmaCreateImage(g_hAllocator, &imageCreateInfo, &allocCreateInfo, &m_Image, &m_Allocation, nullptr); + TEST(res == VK_SUCCESS); +} void AllocInfo::Destroy() { @@ -904,7 +919,88 @@ } else { - TEST(0 && "Images not currently supported."); + TEST(currAllocInfo.m_ImageInfo.format == VK_FORMAT_R8G8B8A8_UNORM && "Only RGBA8 images are currently supported."); + TEST(currAllocInfo.m_ImageInfo.mipLevels == 1 && "Only single mip images are currently supported."); + + const VkDeviceSize size = currAllocInfo.m_ImageInfo.extent.width * currAllocInfo.m_ImageInfo.extent.height * sizeof(uint32_t); + + VkBuffer stagingBuf = VK_NULL_HANDLE; + void* stagingBufMappedPtr = nullptr; + if(!stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr)) + { + TEST(cmdBufferStarted); + EndSingleTimeCommands(); + stagingBufs.ReleaseAllBuffers(); + cmdBufferStarted = false; + + bool ok = stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr); + TEST(ok); + } + + // Fill staging buffer. + { + assert(size % sizeof(uint32_t) == 0); + uint32_t *stagingValPtr = (uint32_t *)stagingBufMappedPtr; + uint32_t val = currAllocInfo.m_StartValue; + for(size_t i = 0; i < size / sizeof(uint32_t); ++i) + { + *stagingValPtr = val; + ++stagingValPtr; + ++val; + } + } + + // Issue copy command from staging buffer to destination buffer. + if(!cmdBufferStarted) + { + cmdBufferStarted = true; + BeginSingleTimeCommands(); + } + + + // Transfer to transfer dst layout + VkImageSubresourceRange subresourceRange = { + VK_IMAGE_ASPECT_COLOR_BIT, + 0, VK_REMAINING_MIP_LEVELS, + 0, VK_REMAINING_ARRAY_LAYERS + }; + + VkImageMemoryBarrier barrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER }; + barrier.srcAccessMask = 0; + barrier.dstAccessMask = 0; + barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = currAllocInfo.m_Image; + barrier.subresourceRange = subresourceRange; + + vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, + 0, nullptr, + 0, nullptr, + 1, &barrier); + + // Copy image date + VkBufferImageCopy copy = {}; + copy.bufferOffset = 0; + copy.bufferRowLength = 0; + copy.bufferImageHeight = 0; + copy.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + copy.imageSubresource.layerCount = 1; + copy.imageExtent = currAllocInfo.m_ImageInfo.extent; + + vkCmdCopyBufferToImage(g_hTemporaryCommandBuffer, stagingBuf, currAllocInfo.m_Image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©); + + // Transfer to desired layout + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + barrier.newLayout = currAllocInfo.m_ImageLayout; + + vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, + 0, nullptr, + 0, nullptr, + 1, &barrier); } } @@ -1754,6 +1850,555 @@ g_MemoryAliasingWarningEnabled = true; } +static void ProcessDefragmentationStepInfo(VmaDefragmentationStepInfo &stepInfo) +{ + std::vector<VkImageMemoryBarrier> beginImageBarriers; + std::vector<VkImageMemoryBarrier> finalizeImageBarriers; + + VkPipelineStageFlags beginSrcStageMask = 0; + VkPipelineStageFlags beginDstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + + VkPipelineStageFlags finalizeSrcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + VkPipelineStageFlags finalizeDstStageMask = 0; + + bool wantsMemoryBarrier = false; + + VkMemoryBarrier beginMemoryBarrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER }; + VkMemoryBarrier finalizeMemoryBarrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER }; + + std::vector<void *> newHandles; + + for(uint32_t i = 0; i < stepInfo.moveCount; ++ i) + { + VmaAllocationInfo info; + vmaGetAllocationInfo(g_hAllocator, stepInfo.pMoves[i].allocation, &info); + + AllocInfo *allocInfo = (AllocInfo *)info.pUserData; + + if(allocInfo->m_Image) + { + VkImage newImage; + + const VkResult result = vkCreateImage(g_hDevice, &allocInfo->m_ImageInfo, g_Allocs, &newImage); + TEST(result >= VK_SUCCESS); + + vkBindImageMemory(g_hDevice, newImage, stepInfo.pMoves[i].memory, stepInfo.pMoves[i].offset); + newHandles.push_back(newImage); + + // Keep track of our pipeline stages that we need to wait/signal on + beginSrcStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + finalizeDstStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + + // We need one pipeline barrier and two image layout transitions here + // First we'll have to turn our newly created image into VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL + // And the second one is turning the old image into VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL + + VkImageSubresourceRange subresourceRange = { + VK_IMAGE_ASPECT_COLOR_BIT, + 0, VK_REMAINING_MIP_LEVELS, + 0, VK_REMAINING_ARRAY_LAYERS + }; + + VkImageMemoryBarrier barrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER }; + barrier.srcAccessMask = 0; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = newImage; + barrier.subresourceRange = subresourceRange; + + beginImageBarriers.push_back(barrier); + + // Second barrier to convert the existing image. This one actually needs a real barrier + barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + barrier.oldLayout = allocInfo->m_ImageLayout; + barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + barrier.image = allocInfo->m_Image; + + beginImageBarriers.push_back(barrier); + + // And lastly we need a barrier that turns our new image into the layout of the old one + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + barrier.newLayout = allocInfo->m_ImageLayout; + barrier.image = newImage; + + finalizeImageBarriers.push_back(barrier); + } + else if(allocInfo->m_Buffer) + { + VkBuffer newBuffer; + + const VkResult result = vkCreateBuffer(g_hDevice, &allocInfo->m_BufferInfo, g_Allocs, &newBuffer); + TEST(result >= VK_SUCCESS); + + vkBindBufferMemory(g_hDevice, newBuffer, stepInfo.pMoves[i].memory, stepInfo.pMoves[i].offset); + newHandles.push_back(newBuffer); + + // Keep track of our pipeline stages that we need to wait/signal on + beginSrcStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + finalizeDstStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + + beginMemoryBarrier.srcAccessMask |= VK_ACCESS_MEMORY_WRITE_BIT; + beginMemoryBarrier.dstAccessMask |= VK_ACCESS_TRANSFER_READ_BIT; + + finalizeMemoryBarrier.srcAccessMask |= VK_ACCESS_TRANSFER_WRITE_BIT; + finalizeMemoryBarrier.dstAccessMask |= VK_ACCESS_MEMORY_READ_BIT; + + wantsMemoryBarrier = true; + } + } + + if(!beginImageBarriers.empty() || wantsMemoryBarrier) + { + const uint32_t memoryBarrierCount = wantsMemoryBarrier ? 1 : 0; + + vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, beginSrcStageMask, beginDstStageMask, 0, + memoryBarrierCount, &beginMemoryBarrier, + 0, nullptr, + (uint32_t)beginImageBarriers.size(), beginImageBarriers.data()); + } + + for(uint32_t i = 0; i < stepInfo.moveCount; ++ i) + { + VmaAllocationInfo info; + vmaGetAllocationInfo(g_hAllocator, stepInfo.pMoves[i].allocation, &info); + + AllocInfo *allocInfo = (AllocInfo *)info.pUserData; + + if(allocInfo->m_Image) + { + std::vector<VkImageCopy> imageCopies; + + // Copy all mips of the source image into the target image + VkOffset3D offset = { 0, 0, 0 }; + VkExtent3D extent = allocInfo->m_ImageInfo.extent; + + VkImageSubresourceLayers subresourceLayers = { + VK_IMAGE_ASPECT_COLOR_BIT, + 0, + 0, 1 + }; + + for(uint32_t mip = 0; mip < allocInfo->m_ImageInfo.mipLevels; ++ mip) + { + subresourceLayers.mipLevel = mip; + + VkImageCopy imageCopy{ + subresourceLayers, + offset, + subresourceLayers, + offset, + extent + }; + + imageCopies.push_back(imageCopy); + + extent.width = std::max(uint32_t(1), extent.width >> 1); + extent.height = std::max(uint32_t(1), extent.height >> 1); + extent.depth = std::max(uint32_t(1), extent.depth >> 1); + } + + vkCmdCopyImage( + g_hTemporaryCommandBuffer, + allocInfo->m_Image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + (VkImage)newHandles[i], VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + (uint32_t)imageCopies.size(), imageCopies.data()); + + imageCopies.clear(); + + // Update our alloc info with the new resource to be used + allocInfo->m_Image = (VkImage)newHandles[i]; + } + else if(allocInfo->m_Buffer) + { + VkBufferCopy region = { + 0, + 0, + allocInfo->m_BufferInfo.size }; + + vkCmdCopyBuffer(g_hTemporaryCommandBuffer, + allocInfo->m_Buffer, (VkBuffer)newHandles[i], + 1, ®ion); + + + // Update our alloc info with the new resource to be used + allocInfo->m_Buffer = (VkBuffer)newHandles[i]; + } + } + + + if(!finalizeImageBarriers.empty() || wantsMemoryBarrier) + { + const uint32_t memoryBarrierCount = wantsMemoryBarrier ? 1 : 0; + + vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, finalizeSrcStageMask, finalizeDstStageMask, 0, + memoryBarrierCount, &finalizeMemoryBarrier, + 0, nullptr, + (uint32_t)finalizeImageBarriers.size(), finalizeImageBarriers.data()); + } +} + + +static void TestDefragmentationIncrementalBasic() +{ + wprintf(L"Test defragmentation incremental basic\n"); + g_MemoryAliasingWarningEnabled = false; + + std::vector<AllocInfo> allocations; + + // Create that many allocations to surely fill 3 new blocks of 256 MB. + const std::array<uint32_t, 3> imageSizes = { 256, 512, 1024 }; + const VkDeviceSize bufSizeMin = 5ull * 1024 * 1024; + const VkDeviceSize bufSizeMax = 10ull * 1024 * 1024; + const VkDeviceSize totalSize = 3ull * 256 * 1024 * 1024; + const size_t imageCount = (size_t)(totalSize / (imageSizes[0] * imageSizes[0] * 4)) / 2; + const size_t bufCount = (size_t)(totalSize / bufSizeMin) / 2; + const size_t percentToLeave = 30; + RandomNumberGenerator rand = { 234522 }; + + VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; + imageInfo.imageType = VK_IMAGE_TYPE_2D; + imageInfo.extent.depth = 1; + imageInfo.mipLevels = 1; + imageInfo.arrayLayers = 1; + imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM; + imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL; + imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED; + imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + imageInfo.samples = VK_SAMPLE_COUNT_1_BIT; + + VmaAllocationCreateInfo allocCreateInfo = {}; + allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; + allocCreateInfo.flags = 0; + + // Create all intended images. + for(size_t i = 0; i < imageCount; ++i) + { + const uint32_t size = imageSizes[rand.Generate() % 3]; + + imageInfo.extent.width = size; + imageInfo.extent.height = size; + + AllocInfo alloc; + alloc.CreateImage(imageInfo, allocCreateInfo, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + alloc.m_StartValue = 0; + + allocations.push_back(alloc); + } + + // And all buffers + VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; + + for(size_t i = 0; i < bufCount; ++i) + { + bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16); + bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + + AllocInfo alloc; + alloc.CreateBuffer(bufCreateInfo, allocCreateInfo); + alloc.m_StartValue = 0; + + allocations.push_back(alloc); + } + + // Destroy some percentage of them. + { + const size_t allocationsToDestroy = round_div<size_t>((imageCount + bufCount) * (100 - percentToLeave), 100); + for(size_t i = 0; i < allocationsToDestroy; ++i) + { + const size_t index = rand.Generate() % allocations.size(); + allocations[index].Destroy(); + allocations.erase(allocations.begin() + index); + } + } + + { + // Set our user data pointers. A real application should probably be more clever here + const size_t allocationCount = allocations.size(); + for(size_t i = 0; i < allocationCount; ++i) + { + AllocInfo &alloc = allocations[i]; + vmaSetAllocationUserData(g_hAllocator, alloc.m_Allocation, &alloc); + } + } + + // Fill them with meaningful data. + UploadGpuData(allocations.data(), allocations.size()); + + wchar_t fileName[MAX_PATH]; + swprintf_s(fileName, L"GPU_defragmentation_incremental_basic_A_before.json"); + SaveAllocatorStatsToFile(fileName); + + // Defragment using GPU only. + { + const size_t allocCount = allocations.size(); + + std::vector<VmaAllocation> allocationPtrs; + + for(size_t i = 0; i < allocCount; ++i) + { + VmaAllocationInfo allocInfo = {}; + vmaGetAllocationInfo(g_hAllocator, allocations[i].m_Allocation, &allocInfo); + + allocationPtrs.push_back(allocations[i].m_Allocation); + } + + const size_t movableAllocCount = allocationPtrs.size(); + + VmaDefragmentationInfo2 defragInfo = {}; + defragInfo.flags = VMA_DEFRAGMENTATION_FLAG_INCREMENTAL; + defragInfo.allocationCount = (uint32_t)movableAllocCount; + defragInfo.pAllocations = allocationPtrs.data(); + defragInfo.maxGpuBytesToMove = VK_WHOLE_SIZE; + defragInfo.maxGpuAllocationsToMove = UINT32_MAX; + + VmaDefragmentationStats stats = {}; + VmaDefragmentationContext ctx = VK_NULL_HANDLE; + VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &stats, &ctx); + TEST(res >= VK_SUCCESS); + + res = VK_NOT_READY; + + std::vector<VmaDefragmentationStepMoveInfo> moveInfo; + moveInfo.resize(movableAllocCount); + + while(res == VK_NOT_READY) + { + VmaDefragmentationStepInfo stepInfo = {}; + stepInfo.pMoves = moveInfo.data(); + stepInfo.moveCount = (uint32_t)moveInfo.size(); + + res = vmaDefragmentationStepBegin(g_hAllocator, &stepInfo, ctx); + TEST(res >= VK_SUCCESS); + + BeginSingleTimeCommands(); + ProcessDefragmentationStepInfo(stepInfo); + EndSingleTimeCommands(); + + res = vmaDefragmentationStepEnd(g_hAllocator, ctx); + } + + TEST(res >= VK_SUCCESS); + vmaDefragmentationEnd(g_hAllocator, ctx); + + // If corruption detection is enabled, GPU defragmentation may not work on + // memory types that have this detection active, e.g. on Intel. +#if !defined(VMA_DEBUG_DETECT_CORRUPTION) || VMA_DEBUG_DETECT_CORRUPTION == 0 + TEST(stats.allocationsMoved > 0 && stats.bytesMoved > 0); + TEST(stats.deviceMemoryBlocksFreed > 0 && stats.bytesFreed > 0); +#endif + } + + //ValidateGpuData(allocations.data(), allocations.size()); + + swprintf_s(fileName, L"GPU_defragmentation_incremental_basic_B_after.json"); + SaveAllocatorStatsToFile(fileName); + + // Destroy all remaining buffers. + for(size_t i = allocations.size(); i--; ) + { + allocations[i].Destroy(); + } + + g_MemoryAliasingWarningEnabled = true; +} + +void TestDefragmentationIncrementalComplex() +{ + wprintf(L"Test defragmentation incremental complex\n"); + g_MemoryAliasingWarningEnabled = false; + + std::vector<AllocInfo> allocations; + + // Create that many allocations to surely fill 3 new blocks of 256 MB. + const std::array<uint32_t, 3> imageSizes = { 256, 512, 1024 }; + const VkDeviceSize bufSizeMin = 5ull * 1024 * 1024; + const VkDeviceSize bufSizeMax = 10ull * 1024 * 1024; + const VkDeviceSize totalSize = 3ull * 256 * 1024 * 1024; + const size_t imageCount = (size_t)(totalSize / (imageSizes[0] * imageSizes[0] * 4)) / 2; + const size_t bufCount = (size_t)(totalSize / bufSizeMin) / 2; + const size_t percentToLeave = 30; + RandomNumberGenerator rand = { 234522 }; + + VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; + imageInfo.imageType = VK_IMAGE_TYPE_2D; + imageInfo.extent.depth = 1; + imageInfo.mipLevels = 1; + imageInfo.arrayLayers = 1; + imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM; + imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL; + imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED; + imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + imageInfo.samples = VK_SAMPLE_COUNT_1_BIT; + + VmaAllocationCreateInfo allocCreateInfo = {}; + allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; + allocCreateInfo.flags = 0; + + // Create all intended images. + for(size_t i = 0; i < imageCount; ++i) + { + const uint32_t size = imageSizes[rand.Generate() % 3]; + + imageInfo.extent.width = size; + imageInfo.extent.height = size; + + AllocInfo alloc; + alloc.CreateImage(imageInfo, allocCreateInfo, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + alloc.m_StartValue = 0; + + allocations.push_back(alloc); + } + + // And all buffers + VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; + + for(size_t i = 0; i < bufCount; ++i) + { + bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16); + bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + + AllocInfo alloc; + alloc.CreateBuffer(bufCreateInfo, allocCreateInfo); + alloc.m_StartValue = 0; + + allocations.push_back(alloc); + } + + // Destroy some percentage of them. + { + const size_t allocationsToDestroy = round_div<size_t>((imageCount + bufCount) * (100 - percentToLeave), 100); + for(size_t i = 0; i < allocationsToDestroy; ++i) + { + const size_t index = rand.Generate() % allocations.size(); + allocations[index].Destroy(); + allocations.erase(allocations.begin() + index); + } + } + + { + // Set our user data pointers. A real application should probably be more clever here + const size_t allocationCount = allocations.size(); + for(size_t i = 0; i < allocationCount; ++i) + { + AllocInfo &alloc = allocations[i]; + vmaSetAllocationUserData(g_hAllocator, alloc.m_Allocation, &alloc); + } + } + + // Fill them with meaningful data. + UploadGpuData(allocations.data(), allocations.size()); + + wchar_t fileName[MAX_PATH]; + swprintf_s(fileName, L"GPU_defragmentation_incremental_complex_A_before.json"); + SaveAllocatorStatsToFile(fileName); + + std::vector<AllocInfo> additionalAllocations; + +#define MakeAdditionalAllocation() \ + do { \ + { \ + bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16); \ + bufCreateInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT; \ + \ + AllocInfo alloc; \ + alloc.CreateBuffer(bufCreateInfo, allocCreateInfo); \ + \ + additionalAllocations.push_back(alloc); \ + } \ + } while(0) + + // Defragment using GPU only. + { + const size_t allocCount = allocations.size(); + + std::vector<VmaAllocation> allocationPtrs; + + for(size_t i = 0; i < allocCount; ++i) + { + VmaAllocationInfo allocInfo = {}; + vmaGetAllocationInfo(g_hAllocator, allocations[i].m_Allocation, &allocInfo); + + allocationPtrs.push_back(allocations[i].m_Allocation); + } + + const size_t movableAllocCount = allocationPtrs.size(); + + VmaDefragmentationInfo2 defragInfo = {}; + defragInfo.flags = VMA_DEFRAGMENTATION_FLAG_INCREMENTAL; + defragInfo.allocationCount = (uint32_t)movableAllocCount; + defragInfo.pAllocations = allocationPtrs.data(); + defragInfo.maxGpuBytesToMove = VK_WHOLE_SIZE; + defragInfo.maxGpuAllocationsToMove = UINT32_MAX; + + VmaDefragmentationStats stats = {}; + VmaDefragmentationContext ctx = VK_NULL_HANDLE; + VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &stats, &ctx); + TEST(res >= VK_SUCCESS); + + res = VK_NOT_READY; + + std::vector<VmaDefragmentationStepMoveInfo> moveInfo; + moveInfo.resize(movableAllocCount); + + MakeAdditionalAllocation(); + + while(res == VK_NOT_READY) + { + VmaDefragmentationStepInfo stepInfo = {}; + stepInfo.pMoves = moveInfo.data(); + stepInfo.moveCount = (uint32_t)moveInfo.size(); + + res = vmaDefragmentationStepBegin(g_hAllocator, &stepInfo, ctx); + TEST(res >= VK_SUCCESS); + + MakeAdditionalAllocation(); + + BeginSingleTimeCommands(); + ProcessDefragmentationStepInfo(stepInfo); + EndSingleTimeCommands(); + + res = vmaDefragmentationStepEnd(g_hAllocator, ctx); + + MakeAdditionalAllocation(); + } + + TEST(res >= VK_SUCCESS); + vmaDefragmentationEnd(g_hAllocator, ctx); + + // If corruption detection is enabled, GPU defragmentation may not work on + // memory types that have this detection active, e.g. on Intel. +#if !defined(VMA_DEBUG_DETECT_CORRUPTION) || VMA_DEBUG_DETECT_CORRUPTION == 0 + TEST(stats.allocationsMoved > 0 && stats.bytesMoved > 0); + TEST(stats.deviceMemoryBlocksFreed > 0 && stats.bytesFreed > 0); +#endif + } + + //ValidateGpuData(allocations.data(), allocations.size()); + + swprintf_s(fileName, L"GPU_defragmentation_incremental_complex_B_after.json"); + SaveAllocatorStatsToFile(fileName); + + // Destroy all remaining buffers. + for(size_t i = allocations.size(); i--; ) + { + allocations[i].Destroy(); + } + + for(size_t i = additionalAllocations.size(); i--; ) + { + additionalAllocations[i].Destroy(); + } + + g_MemoryAliasingWarningEnabled = true; +} + + static void TestUserData() { VkResult res; @@ -5499,6 +6144,8 @@ TestDefragmentationFull(); TestDefragmentationWholePool(); TestDefragmentationGpu(); + TestDefragmentationIncrementalBasic(); + TestDefragmentationIncrementalComplex(); // # Detailed tests FILE* file;
diff --git a/src/vk_mem_alloc.h b/src/vk_mem_alloc.h index 1f7920c..9e78812 100644 --- a/src/vk_mem_alloc.h +++ b/src/vk_mem_alloc.h
@@ -1952,6 +1952,7 @@ PFN_vkCreateImage vkCreateImage; PFN_vkDestroyImage vkDestroyImage; PFN_vkCmdCopyBuffer vkCmdCopyBuffer; + PFN_vkCmdCopyImage vkCmdCopyImage; #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 PFN_vkGetBufferMemoryRequirements2KHR vkGetBufferMemoryRequirements2KHR; PFN_vkGetImageMemoryRequirements2KHR vkGetImageMemoryRequirements2KHR; @@ -3111,6 +3112,7 @@ /// Flags to be used in vmaDefragmentationBegin(). None at the moment. Reserved for future use. typedef enum VmaDefragmentationFlagBits { + VMA_DEFRAGMENTATION_FLAG_INCREMENTAL = 0x1, VMA_DEFRAGMENTATION_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VmaDefragmentationFlagBits; typedef VkFlags VmaDefragmentationFlags; @@ -3191,6 +3193,21 @@ VkCommandBuffer commandBuffer; } VmaDefragmentationInfo2; +typedef struct VmaDefragmentationStepMoveInfo { + VmaAllocation allocation; + VkDeviceMemory memory; + VkDeviceSize offset; +} VmaDefragmentationStepMoveInfo; + +/** \brief Parameters for incremental defragmentation steps. + +To be used with function vmaDefragmentationStepBegin(). +*/ +typedef struct VmaDefragmentationStepInfo { + uint32_t moveCount; + VmaDefragmentationStepMoveInfo* pMoves; +} VmaDefragmentationStepInfo; + /** \brief Deprecated. Optional configuration parameters to be passed to function vmaDefragment(). \deprecated This is a part of the old interface. It is recommended to use structure #VmaDefragmentationInfo2 and function vmaDefragmentationBegin() instead. @@ -3264,6 +3281,16 @@ VmaAllocator allocator, VmaDefragmentationContext context); +VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationStepBegin( + VmaAllocator allocator, + VmaDefragmentationStepInfo* pInfo, + VmaDefragmentationContext context +); +VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationStepEnd( + VmaAllocator allocator, + VmaDefragmentationContext context +); + /** \brief Deprecated. Compacts memory by moving allocations. @param pAllocations Array of allocations that can be moved during this compation. @@ -3672,6 +3699,7 @@ public: void Lock() { m_Mutex.lock(); } void Unlock() { m_Mutex.unlock(); } + bool TryLock() { return m_Mutex.try_lock(); } private: std::mutex m_Mutex; }; @@ -3688,8 +3716,10 @@ public: void LockRead() { m_Mutex.lock_shared(); } void UnlockRead() { m_Mutex.unlock_shared(); } + bool TryLockRead() { return m_Mutex.try_shared_lock(); } void LockWrite() { m_Mutex.lock(); } void UnlockWrite() { m_Mutex.unlock(); } + bool TryLockWrite() { return m_Mutex.try_lock(); } private: std::shared_mutex m_Mutex; }; @@ -3703,8 +3733,10 @@ VmaRWMutex() { InitializeSRWLock(&m_Lock); } void LockRead() { AcquireSRWLockShared(&m_Lock); } void UnlockRead() { ReleaseSRWLockShared(&m_Lock); } + bool TryLockRead() { return TryAcquireSRWLockShared(&m_Lock); } void LockWrite() { AcquireSRWLockExclusive(&m_Lock); } void UnlockWrite() { ReleaseSRWLockExclusive(&m_Lock); } + bool TryLockWrite() { return TryAcquireSRWLockExclusive(&m_Lock); } private: SRWLOCK m_Lock; }; @@ -3716,8 +3748,10 @@ public: void LockRead() { m_Mutex.Lock(); } void UnlockRead() { m_Mutex.Unlock(); } + bool TryLockRead() { return m_Mutex.TryLock(); } void LockWrite() { m_Mutex.Lock(); } void UnlockWrite() { m_Mutex.Unlock(); } + bool TryLockWrite() { return m_Mutex.TryLock(); } private: VMA_MUTEX m_Mutex; }; @@ -6241,6 +6275,9 @@ VkDeviceSize srcOffset; VkDeviceSize dstOffset; VkDeviceSize size; + VmaAllocation hAllocation; + VmaDeviceMemoryBlock* pSrcBlock; + VmaDeviceMemoryBlock* pDstBlock; }; class VmaDefragmentationAlgorithm; @@ -6310,7 +6347,7 @@ // Saves results in pCtx->res. void Defragment( class VmaBlockVectorDefragmentationContext* pCtx, - VmaDefragmentationStats* pStats, + VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags, VkDeviceSize& maxCpuBytesToMove, uint32_t& maxCpuAllocationsToMove, VkDeviceSize& maxGpuBytesToMove, uint32_t& maxGpuAllocationsToMove, VkCommandBuffer commandBuffer); @@ -6318,6 +6355,14 @@ class VmaBlockVectorDefragmentationContext* pCtx, VmaDefragmentationStats* pStats); + uint32_t ProcessDefragmentations( + class VmaBlockVectorDefragmentationContext *pCtx, + VmaDefragmentationStepMoveInfo* pMove, uint32_t maxMoves); + + void CommitDefragmentations( + class VmaBlockVectorDefragmentationContext *pCtx, + VmaDefragmentationStats* pStats); + //////////////////////////////////////////////////////////////////////////////// // To be used only while the m_Mutex is locked. Used during defragmentation. @@ -6350,6 +6395,8 @@ VkDeviceSize CalcMaxBlockSize() const; + static VkImageAspectFlags ImageAspectMaskForFormat(VkFormat format); + // Finds and removes given block from vector. void Remove(VmaDeviceMemoryBlock* pBlock); @@ -6386,7 +6433,7 @@ // Saves result to pCtx->res. void ApplyDefragmentationMovesGpu( class VmaBlockVectorDefragmentationContext* pDefragCtx, - const VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves, + VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves, VkCommandBuffer commandBuffer); /* @@ -6455,7 +6502,8 @@ virtual VkResult Defragment( VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves, VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove) = 0; + uint32_t maxAllocationsToMove, + VmaDefragmentationFlags flags) = 0; virtual VkDeviceSize GetBytesMoved() const = 0; virtual uint32_t GetAllocationsMoved() const = 0; @@ -6500,7 +6548,8 @@ virtual VkResult Defragment( VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves, VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove); + uint32_t maxAllocationsToMove, + VmaDefragmentationFlags flags); virtual VkDeviceSize GetBytesMoved() const { return m_BytesMoved; } virtual uint32_t GetAllocationsMoved() const { return m_AllocationsMoved; } @@ -6601,7 +6650,8 @@ VkResult DefragmentRound( VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves, VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove); + uint32_t maxAllocationsToMove, + bool freeOldAllocations); size_t CalcBlocksWithNonMovableCount() const; @@ -6627,7 +6677,8 @@ virtual VkResult Defragment( VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves, VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove); + uint32_t maxAllocationsToMove, + VmaDefragmentationFlags flags); virtual VkDeviceSize GetBytesMoved() const { return m_BytesMoved; } virtual uint32_t GetAllocationsMoved() const { return m_AllocationsMoved; } @@ -6775,6 +6826,10 @@ VkResult res; bool mutexLocked; VmaVector< VmaBlockDefragmentationContext, VmaStlAllocator<VmaBlockDefragmentationContext> > blockContexts; + VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> > defragmentationMoves; + uint32_t defragmentationMovesProcessed; + uint32_t defragmentationMovesCommitted; + bool hasDefragmentationPlan; VmaBlockVectorDefragmentationContext( VmaAllocator hAllocator, @@ -6790,7 +6845,7 @@ void AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged); void AddAll() { m_AllAllocations = true; } - void Begin(bool overlappingMoveSupported); + void Begin(bool overlappingMoveSupported, VmaDefragmentationFlags flags); private: const VmaAllocator m_hAllocator; @@ -6839,13 +6894,22 @@ VkResult Defragment( VkDeviceSize maxCpuBytesToMove, uint32_t maxCpuAllocationsToMove, VkDeviceSize maxGpuBytesToMove, uint32_t maxGpuAllocationsToMove, - VkCommandBuffer commandBuffer, VmaDefragmentationStats* pStats); + VkCommandBuffer commandBuffer, VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags); + + VkResult DefragmentStepBegin(VmaDefragmentationStepInfo* pInfo); + VkResult DefragmentStepEnd(); private: const VmaAllocator m_hAllocator; const uint32_t m_CurrFrameIndex; const uint32_t m_Flags; VmaDefragmentationStats* const m_pStats; + + VkDeviceSize m_MaxCpuBytesToMove; + uint32_t m_MaxCpuAllocationsToMove; + VkDeviceSize m_MaxGpuBytesToMove; + uint32_t m_MaxGpuAllocationsToMove; + // Owner of these objects. VmaBlockVectorDefragmentationContext* m_DefaultPoolContexts[VK_MAX_MEMORY_TYPES]; // Owner of these objects. @@ -7185,6 +7249,12 @@ VkResult DefragmentationEnd( VmaDefragmentationContext context); + VkResult DefragmentationStepBegin( + VmaDefragmentationStepInfo* pInfo, + VmaDefragmentationContext context); + VkResult DefragmentationStepEnd( + VmaDefragmentationContext context); + void GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo); bool TouchAllocation(VmaAllocation hAllocation); @@ -12618,7 +12688,7 @@ void VmaBlockVector::ApplyDefragmentationMovesGpu( class VmaBlockVectorDefragmentationContext* pDefragCtx, - const VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves, + VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves, VkCommandBuffer commandBuffer) { const size_t blockCount = m_Blocks.size(); @@ -12631,8 +12701,13 @@ for(size_t moveIndex = 0; moveIndex < moveCount; ++moveIndex) { const VmaDefragmentationMove& move = moves[moveIndex]; - pDefragCtx->blockContexts[move.srcBlockIndex].flags |= VmaBlockDefragmentationContext::BLOCK_FLAG_USED; - pDefragCtx->blockContexts[move.dstBlockIndex].flags |= VmaBlockDefragmentationContext::BLOCK_FLAG_USED; + + //if(move.type == VMA_ALLOCATION_TYPE_UNKNOWN) + { + // Old school move still require us to map the whole block + pDefragCtx->blockContexts[move.srcBlockIndex].flags |= VmaBlockDefragmentationContext::BLOCK_FLAG_USED; + pDefragCtx->blockContexts[move.dstBlockIndex].flags |= VmaBlockDefragmentationContext::BLOCK_FLAG_USED; + } } VMA_ASSERT(pDefragCtx->res == VK_SUCCESS); @@ -12806,7 +12881,7 @@ void VmaBlockVector::Defragment( class VmaBlockVectorDefragmentationContext* pCtx, - VmaDefragmentationStats* pStats, + VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags, VkDeviceSize& maxCpuBytesToMove, uint32_t& maxCpuAllocationsToMove, VkDeviceSize& maxGpuBytesToMove, uint32_t& maxGpuAllocationsToMove, VkCommandBuffer commandBuffer) @@ -12843,19 +12918,28 @@ if(m_hAllocator->m_UseMutex) { - m_Mutex.LockWrite(); - pCtx->mutexLocked = true; + if(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL) + { + if(!m_Mutex.TryLockWrite()) + { + pCtx->res = VK_ERROR_INITIALIZATION_FAILED; + return; + } + } + else + { + m_Mutex.LockWrite(); + pCtx->mutexLocked = true; + } } - pCtx->Begin(overlappingMoveSupported); + pCtx->Begin(overlappingMoveSupported, flags); // Defragment. const VkDeviceSize maxBytesToMove = defragmentOnGpu ? maxGpuBytesToMove : maxCpuBytesToMove; const uint32_t maxAllocationsToMove = defragmentOnGpu ? maxGpuAllocationsToMove : maxCpuAllocationsToMove; - VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> > moves = - VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >(VmaStlAllocator<VmaDefragmentationMove>(m_hAllocator->GetAllocationCallbacks())); - pCtx->res = pCtx->GetAlgorithm()->Defragment(moves, maxBytesToMove, maxAllocationsToMove); + pCtx->res = pCtx->GetAlgorithm()->Defragment(pCtx->defragmentationMoves, maxBytesToMove, maxAllocationsToMove, flags); // Accumulate statistics. if(pStats != VMA_NULL) @@ -12877,16 +12961,27 @@ maxCpuAllocationsToMove -= allocationsMoved; } } + + if(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL) + { + if(m_hAllocator->m_UseMutex) + m_Mutex.UnlockWrite(); + + if(pCtx->res >= VK_SUCCESS && !pCtx->defragmentationMoves.empty()) + pCtx->res = VK_NOT_READY; + + return; + } if(pCtx->res >= VK_SUCCESS) { if(defragmentOnGpu) { - ApplyDefragmentationMovesGpu(pCtx, moves, commandBuffer); + ApplyDefragmentationMovesGpu(pCtx, pCtx->defragmentationMoves, commandBuffer); } else { - ApplyDefragmentationMovesCpu(pCtx, moves); + ApplyDefragmentationMovesCpu(pCtx, pCtx->defragmentationMoves); } } } @@ -12919,6 +13014,48 @@ } } +uint32_t VmaBlockVector::ProcessDefragmentations( + class VmaBlockVectorDefragmentationContext *pCtx, + VmaDefragmentationStepMoveInfo* pMove, uint32_t maxMoves) +{ + VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex); + + const uint32_t moveCount = std::min(uint32_t(pCtx->defragmentationMoves.size()) - pCtx->defragmentationMovesProcessed, maxMoves); + + for(uint32_t i = pCtx->defragmentationMovesProcessed; i < moveCount; ++ i) + { + VmaDefragmentationMove& move = pCtx->defragmentationMoves[i]; + + pMove->allocation = move.hAllocation; + pMove->memory = move.pDstBlock->GetDeviceMemory(); + pMove->offset = move.dstOffset; + + ++ pMove; + } + + pCtx->defragmentationMovesProcessed += moveCount; + + return moveCount; +} + +void VmaBlockVector::CommitDefragmentations( + class VmaBlockVectorDefragmentationContext *pCtx, + VmaDefragmentationStats* pStats) +{ + VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex); + + for(uint32_t i = pCtx->defragmentationMovesCommitted; i < pCtx->defragmentationMovesProcessed; ++ i) + { + const VmaDefragmentationMove &move = pCtx->defragmentationMoves[i]; + + move.pSrcBlock->m_pMetadata->FreeAtOffset(move.srcOffset); + move.hAllocation->ChangeBlockAllocation(m_hAllocator, move.pDstBlock, move.dstOffset); + } + + pCtx->defragmentationMovesCommitted = pCtx->defragmentationMovesProcessed; + FreeEmptyBlocks(pStats); +} + size_t VmaBlockVector::CalcAllocationCount() const { size_t result = 0; @@ -13069,7 +13206,8 @@ VkResult VmaDefragmentationAlgorithm_Generic::DefragmentRound( VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves, VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove) + uint32_t maxAllocationsToMove, + bool freeOldAllocations) { if(m_Blocks.empty()) { @@ -13161,12 +13299,16 @@ return VK_SUCCESS; } - VmaDefragmentationMove move; + VmaDefragmentationMove move = {}; move.srcBlockIndex = pSrcBlockInfo->m_OriginalBlockIndex; move.dstBlockIndex = pDstBlockInfo->m_OriginalBlockIndex; move.srcOffset = srcOffset; move.dstOffset = dstAllocRequest.offset; move.size = size; + move.hAllocation = allocInfo.m_hAllocation; + move.pSrcBlock = pSrcBlockInfo->m_pBlock; + move.pDstBlock = pDstBlockInfo->m_pBlock; + moves.push_back(move); pDstBlockInfo->m_pBlock->m_pMetadata->Alloc( @@ -13174,10 +13316,13 @@ suballocType, size, allocInfo.m_hAllocation); - pSrcBlockInfo->m_pBlock->m_pMetadata->FreeAtOffset(srcOffset); - - allocInfo.m_hAllocation->ChangeBlockAllocation(m_hAllocator, pDstBlockInfo->m_pBlock, dstAllocRequest.offset); + if(freeOldAllocations) + { + pSrcBlockInfo->m_pBlock->m_pMetadata->FreeAtOffset(srcOffset); + allocInfo.m_hAllocation->ChangeBlockAllocation(m_hAllocator, pDstBlockInfo->m_pBlock, dstAllocRequest.offset); + } + if(allocInfo.m_pChanged != VMA_NULL) { *allocInfo.m_pChanged = VK_TRUE; @@ -13229,7 +13374,8 @@ VkResult VmaDefragmentationAlgorithm_Generic::Defragment( VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves, VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove) + uint32_t maxAllocationsToMove, + VmaDefragmentationFlags flags) { if(!m_AllAllocations && m_AllocationCount == 0) { @@ -13275,7 +13421,7 @@ VkResult result = VK_SUCCESS; for(uint32_t round = 0; (round < roundCount) && (result == VK_SUCCESS); ++round) { - result = DefragmentRound(moves, maxBytesToMove, maxAllocationsToMove); + result = DefragmentRound(moves, maxBytesToMove, maxAllocationsToMove, !(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL)); } return result; @@ -13327,7 +13473,8 @@ VkResult VmaDefragmentationAlgorithm_Fast::Defragment( VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves, VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove) + uint32_t maxAllocationsToMove, + VmaDefragmentationFlags flags) { VMA_ASSERT(m_AllAllocations || m_pBlockVector->CalcAllocationCount() == m_AllocationCount); @@ -13383,6 +13530,7 @@ } const VkDeviceSize srcAllocOffset = srcSuballocIt->offset; + VmaDefragmentationMove move = {}; // Try to place it in one of free spaces from the database. size_t freeSpaceInfoIndex; VkDeviceSize dstAllocOffset; @@ -13413,10 +13561,12 @@ InsertSuballoc(pFreeSpaceMetadata, suballoc); - VmaDefragmentationMove move = { - srcOrigBlockIndex, freeSpaceOrigBlockIndex, - srcAllocOffset, dstAllocOffset, - srcAllocSize }; + move.srcBlockIndex = srcOrigBlockIndex; + move.dstBlockIndex = freeSpaceOrigBlockIndex; + move.srcOffset = srcAllocOffset; + move.dstOffset = dstAllocOffset; + move.size = srcAllocSize; + moves.push_back(move); } // Different block @@ -13439,10 +13589,12 @@ InsertSuballoc(pFreeSpaceMetadata, suballoc); - VmaDefragmentationMove move = { - srcOrigBlockIndex, freeSpaceOrigBlockIndex, - srcAllocOffset, dstAllocOffset, - srcAllocSize }; + move.srcBlockIndex = srcOrigBlockIndex; + move.dstBlockIndex = freeSpaceOrigBlockIndex; + move.srcOffset = srcAllocOffset; + move.dstOffset = dstAllocOffset; + move.size = srcAllocSize; + moves.push_back(move); } } @@ -13497,10 +13649,13 @@ m_BytesMoved += srcAllocSize; ++m_AllocationsMoved; ++srcSuballocIt; - VmaDefragmentationMove move = { - srcOrigBlockIndex, dstOrigBlockIndex, - srcAllocOffset, dstAllocOffset, - srcAllocSize }; + + move.srcBlockIndex = srcOrigBlockIndex; + move.dstBlockIndex = dstOrigBlockIndex; + move.srcOffset = srcAllocOffset; + move.dstOffset = dstAllocOffset; + move.size = srcAllocSize; + moves.push_back(move); } } @@ -13526,10 +13681,12 @@ pDstMetadata->m_Suballocations.push_back(suballoc); - VmaDefragmentationMove move = { - srcOrigBlockIndex, dstOrigBlockIndex, - srcAllocOffset, dstAllocOffset, - srcAllocSize }; + move.srcBlockIndex = srcOrigBlockIndex; + move.dstBlockIndex = dstOrigBlockIndex; + move.srcOffset = srcAllocOffset; + move.dstOffset = dstAllocOffset; + move.size = srcAllocSize; + moves.push_back(move); } } @@ -13679,6 +13836,10 @@ res(VK_SUCCESS), mutexLocked(false), blockContexts(VmaStlAllocator<VmaBlockDefragmentationContext>(hAllocator->GetAllocationCallbacks())), + defragmentationMoves(VmaStlAllocator<VmaDefragmentationMove>(hAllocator->GetAllocationCallbacks())), + defragmentationMovesProcessed(0), + defragmentationMovesCommitted(0), + hasDefragmentationPlan(0), m_hAllocator(hAllocator), m_hCustomPool(hCustomPool), m_pBlockVector(pBlockVector), @@ -13700,7 +13861,7 @@ m_Allocations.push_back(info); } -void VmaBlockVectorDefragmentationContext::Begin(bool overlappingMoveSupported) +void VmaBlockVectorDefragmentationContext::Begin(bool overlappingMoveSupported, VmaDefragmentationFlags flags) { const bool allAllocations = m_AllAllocations || m_Allocations.size() == m_pBlockVector->CalcAllocationCount(); @@ -13714,10 +13875,12 @@ - VMA_DEBUG_MARGIN is 0. - All allocations in this block vector are moveable. - There is no possibility of image/buffer granularity conflict. + - The defragmentation is not incremental */ if(VMA_DEBUG_MARGIN == 0 && allAllocations && - !m_pBlockVector->IsBufferImageGranularityConflictPossible()) + !m_pBlockVector->IsBufferImageGranularityConflictPossible() && + !(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL)) { m_pAlgorithm = vma_new(m_hAllocator, VmaDefragmentationAlgorithm_Fast)( m_hAllocator, m_pBlockVector, m_CurrFrameIndex, overlappingMoveSupported); @@ -13884,13 +14047,30 @@ VkResult VmaDefragmentationContext_T::Defragment( VkDeviceSize maxCpuBytesToMove, uint32_t maxCpuAllocationsToMove, VkDeviceSize maxGpuBytesToMove, uint32_t maxGpuAllocationsToMove, - VkCommandBuffer commandBuffer, VmaDefragmentationStats* pStats) + VkCommandBuffer commandBuffer, VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags) { if(pStats) { memset(pStats, 0, sizeof(VmaDefragmentationStats)); } + if(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL) + { + // For incremental defragmetnations, we just earmark how much we can move + // The real meat is in the defragmentation steps + m_MaxCpuBytesToMove = maxCpuBytesToMove; + m_MaxCpuAllocationsToMove = maxCpuAllocationsToMove; + + m_MaxGpuBytesToMove = maxGpuBytesToMove; + m_MaxGpuAllocationsToMove = maxGpuAllocationsToMove; + + if(m_MaxCpuBytesToMove == 0 && m_MaxCpuAllocationsToMove == 0 && + m_MaxGpuBytesToMove == 0 && m_MaxGpuAllocationsToMove == 0) + return VK_SUCCESS; + + return VK_NOT_READY; + } + if(commandBuffer == VK_NULL_HANDLE) { maxGpuBytesToMove = 0; @@ -13910,7 +14090,7 @@ VMA_ASSERT(pBlockVectorCtx->GetBlockVector()); pBlockVectorCtx->GetBlockVector()->Defragment( pBlockVectorCtx, - pStats, + pStats, flags, maxCpuBytesToMove, maxCpuAllocationsToMove, maxGpuBytesToMove, maxGpuAllocationsToMove, commandBuffer); @@ -13930,7 +14110,7 @@ VMA_ASSERT(pBlockVectorCtx && pBlockVectorCtx->GetBlockVector()); pBlockVectorCtx->GetBlockVector()->Defragment( pBlockVectorCtx, - pStats, + pStats, flags, maxCpuBytesToMove, maxCpuAllocationsToMove, maxGpuBytesToMove, maxGpuAllocationsToMove, commandBuffer); @@ -13943,6 +14123,132 @@ return res; } +VkResult VmaDefragmentationContext_T::DefragmentStepBegin(VmaDefragmentationStepInfo* pInfo) +{ + VmaDefragmentationStepMoveInfo* pCurrentMove = pInfo->pMoves; + uint32_t movesLeft = pInfo->moveCount; + + // Process default pools. + for(uint32_t memTypeIndex = 0; + memTypeIndex < m_hAllocator->GetMemoryTypeCount(); + ++memTypeIndex) + { + VmaBlockVectorDefragmentationContext *pBlockVectorCtx = m_DefaultPoolContexts[memTypeIndex]; + if(pBlockVectorCtx) + { + VMA_ASSERT(pBlockVectorCtx->GetBlockVector()); + + if(!pBlockVectorCtx->hasDefragmentationPlan) + { + pBlockVectorCtx->GetBlockVector()->Defragment( + pBlockVectorCtx, + m_pStats, m_Flags, + m_MaxCpuBytesToMove, m_MaxCpuAllocationsToMove, + m_MaxGpuBytesToMove, m_MaxGpuAllocationsToMove, + VK_NULL_HANDLE); + + if(pBlockVectorCtx->res < VK_SUCCESS) + continue; + + pBlockVectorCtx->hasDefragmentationPlan = true; + } + + const uint32_t processed = pBlockVectorCtx->GetBlockVector()->ProcessDefragmentations( + pBlockVectorCtx, + pCurrentMove, movesLeft); + + movesLeft -= processed; + pCurrentMove += processed; + } + } + + // Process custom pools. + for(size_t customCtxIndex = 0, customCtxCount = m_CustomPoolContexts.size(); + customCtxIndex < customCtxCount; + ++customCtxIndex) + { + VmaBlockVectorDefragmentationContext *pBlockVectorCtx = m_CustomPoolContexts[customCtxIndex]; + VMA_ASSERT(pBlockVectorCtx && pBlockVectorCtx->GetBlockVector()); + + if(!pBlockVectorCtx->hasDefragmentationPlan) + { + pBlockVectorCtx->GetBlockVector()->Defragment( + pBlockVectorCtx, + m_pStats, m_Flags, + m_MaxCpuBytesToMove, m_MaxCpuAllocationsToMove, + m_MaxGpuBytesToMove, m_MaxGpuAllocationsToMove, + VK_NULL_HANDLE); + + if(pBlockVectorCtx->res < VK_SUCCESS) + continue; + + pBlockVectorCtx->hasDefragmentationPlan = true; + } + + const uint32_t processed = pBlockVectorCtx->GetBlockVector()->ProcessDefragmentations( + pBlockVectorCtx, + pCurrentMove, movesLeft); + + movesLeft -= processed; + pCurrentMove += processed; + } + + pInfo->moveCount = pInfo->moveCount - movesLeft; + + return VK_SUCCESS; +} +VkResult VmaDefragmentationContext_T::DefragmentStepEnd() +{ + VkResult res = VK_SUCCESS; + + // Process default pools. + for(uint32_t memTypeIndex = 0; + memTypeIndex < m_hAllocator->GetMemoryTypeCount(); + ++memTypeIndex) + { + VmaBlockVectorDefragmentationContext *pBlockVectorCtx = m_DefaultPoolContexts[memTypeIndex]; + if(pBlockVectorCtx) + { + VMA_ASSERT(pBlockVectorCtx->GetBlockVector()); + + if(!pBlockVectorCtx->hasDefragmentationPlan) + { + res = VK_NOT_READY; + continue; + } + + pBlockVectorCtx->GetBlockVector()->CommitDefragmentations( + pBlockVectorCtx, m_pStats); + + if(pBlockVectorCtx->defragmentationMoves.size() != pBlockVectorCtx->defragmentationMovesCommitted) + res = VK_NOT_READY; + } + } + + // Process custom pools. + for(size_t customCtxIndex = 0, customCtxCount = m_CustomPoolContexts.size(); + customCtxIndex < customCtxCount; + ++customCtxIndex) + { + VmaBlockVectorDefragmentationContext *pBlockVectorCtx = m_CustomPoolContexts[customCtxIndex]; + VMA_ASSERT(pBlockVectorCtx && pBlockVectorCtx->GetBlockVector()); + + if(!pBlockVectorCtx->hasDefragmentationPlan) + { + res = VK_NOT_READY; + continue; + } + + pBlockVectorCtx->GetBlockVector()->CommitDefragmentations( + pBlockVectorCtx, m_pStats); + + if(pBlockVectorCtx->defragmentationMoves.size() != pBlockVectorCtx->defragmentationMovesCommitted) + res = VK_NOT_READY; + } + + return res; +} + //////////////////////////////////////////////////////////////////////////////// // VmaRecorder @@ -14759,6 +15065,7 @@ m_VulkanFunctions.vkCreateImage = (PFN_vkCreateImage)vkCreateImage; m_VulkanFunctions.vkDestroyImage = (PFN_vkDestroyImage)vkDestroyImage; m_VulkanFunctions.vkCmdCopyBuffer = (PFN_vkCmdCopyBuffer)vkCmdCopyBuffer; + m_VulkanFunctions.vkCmdCopyImage = (PFN_vkCmdCopyImage)vkCmdCopyImage; #if VMA_VULKAN_VERSION >= 1001000 if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) { @@ -14825,6 +15132,7 @@ VMA_COPY_IF_NOT_NULL(vkCreateImage); VMA_COPY_IF_NOT_NULL(vkDestroyImage); VMA_COPY_IF_NOT_NULL(vkCmdCopyBuffer); + VMA_COPY_IF_NOT_NULL(vkCmdCopyImage); #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 VMA_COPY_IF_NOT_NULL(vkGetBufferMemoryRequirements2KHR); VMA_COPY_IF_NOT_NULL(vkGetImageMemoryRequirements2KHR); @@ -14859,6 +15167,7 @@ VMA_ASSERT(m_VulkanFunctions.vkCreateImage != VMA_NULL); VMA_ASSERT(m_VulkanFunctions.vkDestroyImage != VMA_NULL); VMA_ASSERT(m_VulkanFunctions.vkCmdCopyBuffer != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkCmdCopyImage != VMA_NULL); #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0) || m_UseKhrDedicatedAllocation) { @@ -15578,7 +15887,7 @@ VkResult res = (*pContext)->Defragment( info.maxCpuBytesToMove, info.maxCpuAllocationsToMove, info.maxGpuBytesToMove, info.maxGpuAllocationsToMove, - info.commandBuffer, pStats); + info.commandBuffer, pStats, info.flags); if(res != VK_NOT_READY) { @@ -15596,6 +15905,19 @@ return VK_SUCCESS; } +VkResult VmaAllocator_T::DefragmentationStepBegin( + VmaDefragmentationStepInfo* pInfo, + VmaDefragmentationContext context) +{ + return context->DefragmentStepBegin(pInfo); +} +VkResult VmaAllocator_T::DefragmentationStepEnd( + VmaDefragmentationContext context) +{ + return context->DefragmentStepEnd(); + +} + void VmaAllocator_T::GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo) { if(hAllocation->CanBecomeLost()) @@ -17414,6 +17736,42 @@ } } +VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationStepBegin( + VmaAllocator allocator, + VmaDefragmentationStepInfo* pInfo, + VmaDefragmentationContext context) +{ + VMA_ASSERT(allocator); + VMA_ASSERT(pInfo); + VMA_HEAVY_ASSERT(VmaValidatePointerArray(pInfo->moveCount, pInfo->pMoves)); + + VMA_DEBUG_LOG("vmaDefragmentationStepBegin"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + if(context == VK_NULL_HANDLE) + { + pInfo->moveCount = 0; + return VK_SUCCESS; + } + + return allocator->DefragmentationStepBegin(pInfo, context); +} +VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationStepEnd( + VmaAllocator allocator, + VmaDefragmentationContext context) +{ + VMA_ASSERT(allocator); + + VMA_DEBUG_LOG("vmaDefragmentationStepEnd"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + if(context == VK_NULL_HANDLE) + return VK_SUCCESS; + + return allocator->DefragmentationStepEnd(context); +} + VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory( VmaAllocator allocator, VmaAllocation allocation,