Prototype of a defragmentation interface that supports tiling optimal images

Merged #90 thanks @JustSid !
diff --git a/src/Tests.cpp b/src/Tests.cpp
index 5a6057f..e5b373d 100644
--- a/src/Tests.cpp
+++ b/src/Tests.cpp
@@ -688,6 +688,7 @@
     VmaAllocation m_Allocation = VK_NULL_HANDLE;

     VkBuffer m_Buffer = VK_NULL_HANDLE;

     VkImage m_Image = VK_NULL_HANDLE;

+    VkImageLayout m_ImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;

     uint32_t m_StartValue = 0;

     union

     {

@@ -698,6 +699,10 @@
     void CreateBuffer(

         const VkBufferCreateInfo& bufCreateInfo,

         const VmaAllocationCreateInfo& allocCreateInfo);

+    void CreateImage(

+        const VkImageCreateInfo& imageCreateInfo,

+        const VmaAllocationCreateInfo& allocCreateInfo,

+        VkImageLayout layout);

     void Destroy();

 };

 

@@ -709,6 +714,16 @@
     VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &m_Buffer, &m_Allocation, nullptr);

     TEST(res == VK_SUCCESS);

 }

+void AllocInfo::CreateImage(

+    const VkImageCreateInfo& imageCreateInfo,

+    const VmaAllocationCreateInfo& allocCreateInfo,

+    VkImageLayout layout)

+{

+    m_ImageInfo = imageCreateInfo;

+    m_ImageLayout = layout;

+    VkResult res = vmaCreateImage(g_hAllocator, &imageCreateInfo, &allocCreateInfo, &m_Image, &m_Allocation, nullptr);

+    TEST(res == VK_SUCCESS);

+}

 

 void AllocInfo::Destroy()

 {

@@ -904,7 +919,88 @@
         }

         else

         {

-            TEST(0 && "Images not currently supported.");

+            TEST(currAllocInfo.m_ImageInfo.format == VK_FORMAT_R8G8B8A8_UNORM && "Only RGBA8 images are currently supported.");

+            TEST(currAllocInfo.m_ImageInfo.mipLevels == 1 && "Only single mip images are currently supported.");

+

+            const VkDeviceSize size = currAllocInfo.m_ImageInfo.extent.width * currAllocInfo.m_ImageInfo.extent.height * sizeof(uint32_t);

+

+            VkBuffer stagingBuf = VK_NULL_HANDLE;

+            void* stagingBufMappedPtr = nullptr;

+            if(!stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr))

+            {

+                TEST(cmdBufferStarted);

+                EndSingleTimeCommands();

+                stagingBufs.ReleaseAllBuffers();

+                cmdBufferStarted = false;

+

+                bool ok = stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr);

+                TEST(ok);

+            }

+

+            // Fill staging buffer.

+            {

+                assert(size % sizeof(uint32_t) == 0);

+                uint32_t *stagingValPtr = (uint32_t *)stagingBufMappedPtr;

+                uint32_t val = currAllocInfo.m_StartValue;

+                for(size_t i = 0; i < size / sizeof(uint32_t); ++i)

+                {

+                    *stagingValPtr = val;

+                    ++stagingValPtr;

+                    ++val;

+                }

+            }

+            

+            // Issue copy command from staging buffer to destination buffer.

+            if(!cmdBufferStarted)

+            {

+                cmdBufferStarted = true;

+                BeginSingleTimeCommands();

+            }

+

+            

+            // Transfer to transfer dst layout

+            VkImageSubresourceRange subresourceRange = {

+                VK_IMAGE_ASPECT_COLOR_BIT,

+                0, VK_REMAINING_MIP_LEVELS,

+                0, VK_REMAINING_ARRAY_LAYERS

+            };

+            

+            VkImageMemoryBarrier barrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER };

+            barrier.srcAccessMask = 0;

+            barrier.dstAccessMask = 0;

+            barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;

+            barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;

+            barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;

+            barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;

+            barrier.image = currAllocInfo.m_Image;

+            barrier.subresourceRange = subresourceRange;

+

+            vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0,

+                0, nullptr,

+                0, nullptr,

+                1, &barrier);

+

+            // Copy image date

+            VkBufferImageCopy copy = {};

+            copy.bufferOffset = 0;

+            copy.bufferRowLength = 0;

+            copy.bufferImageHeight = 0;

+            copy.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;

+            copy.imageSubresource.layerCount = 1;

+            copy.imageExtent = currAllocInfo.m_ImageInfo.extent;

+

+            vkCmdCopyBufferToImage(g_hTemporaryCommandBuffer, stagingBuf, currAllocInfo.m_Image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &copy);

+

+            // Transfer to desired layout

+            barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;

+            barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;

+            barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;

+            barrier.newLayout = currAllocInfo.m_ImageLayout;

+

+            vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0,

+                0, nullptr,

+                0, nullptr,

+                1, &barrier);

         }

     }

 

@@ -1754,6 +1850,555 @@
     g_MemoryAliasingWarningEnabled = true;

 }

 

+static void ProcessDefragmentationStepInfo(VmaDefragmentationStepInfo &stepInfo)

+{

+    std::vector<VkImageMemoryBarrier> beginImageBarriers;

+    std::vector<VkImageMemoryBarrier> finalizeImageBarriers;

+

+    VkPipelineStageFlags beginSrcStageMask = 0;

+    VkPipelineStageFlags beginDstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;

+

+    VkPipelineStageFlags finalizeSrcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;

+    VkPipelineStageFlags finalizeDstStageMask = 0;

+

+    bool wantsMemoryBarrier = false;

+

+    VkMemoryBarrier beginMemoryBarrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER };

+    VkMemoryBarrier finalizeMemoryBarrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER };

+

+    std::vector<void *> newHandles;

+

+    for(uint32_t i = 0; i < stepInfo.moveCount; ++ i)

+    {

+        VmaAllocationInfo info;

+        vmaGetAllocationInfo(g_hAllocator, stepInfo.pMoves[i].allocation, &info);

+

+        AllocInfo *allocInfo = (AllocInfo *)info.pUserData;

+

+        if(allocInfo->m_Image)

+        {

+            VkImage newImage;

+

+            const VkResult result = vkCreateImage(g_hDevice, &allocInfo->m_ImageInfo, g_Allocs, &newImage);

+            TEST(result >= VK_SUCCESS);

+

+            vkBindImageMemory(g_hDevice, newImage, stepInfo.pMoves[i].memory, stepInfo.pMoves[i].offset);

+            newHandles.push_back(newImage);

+

+            // Keep track of our pipeline stages that we need to wait/signal on

+            beginSrcStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;

+            finalizeDstStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;

+

+            // We need one pipeline barrier and two image layout transitions here

+            // First we'll have to turn our newly created image into VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL

+            // And the second one is turning the old image into VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL

+

+            VkImageSubresourceRange subresourceRange = {

+                VK_IMAGE_ASPECT_COLOR_BIT,

+                0, VK_REMAINING_MIP_LEVELS,

+                0, VK_REMAINING_ARRAY_LAYERS

+            };

+

+            VkImageMemoryBarrier barrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER };

+            barrier.srcAccessMask = 0;

+            barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;

+            barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;

+            barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;

+            barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;

+            barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;

+            barrier.image = newImage;

+            barrier.subresourceRange = subresourceRange;

+

+            beginImageBarriers.push_back(barrier);

+

+            // Second barrier to convert the existing image. This one actually needs a real barrier                         

+            barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;

+            barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;

+            barrier.oldLayout = allocInfo->m_ImageLayout;

+            barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;

+            barrier.image = allocInfo->m_Image;

+

+            beginImageBarriers.push_back(barrier);

+

+            // And lastly we need a barrier that turns our new image into the layout of the old one

+            barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;

+            barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;

+            barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;

+            barrier.newLayout = allocInfo->m_ImageLayout;

+            barrier.image = newImage;

+

+            finalizeImageBarriers.push_back(barrier);

+        }

+        else if(allocInfo->m_Buffer)

+        {

+            VkBuffer newBuffer;

+

+            const VkResult result = vkCreateBuffer(g_hDevice, &allocInfo->m_BufferInfo, g_Allocs, &newBuffer);

+            TEST(result >= VK_SUCCESS);

+

+            vkBindBufferMemory(g_hDevice, newBuffer, stepInfo.pMoves[i].memory, stepInfo.pMoves[i].offset);

+            newHandles.push_back(newBuffer);

+

+            // Keep track of our pipeline stages that we need to wait/signal on

+            beginSrcStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;

+            finalizeDstStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;

+

+            beginMemoryBarrier.srcAccessMask |= VK_ACCESS_MEMORY_WRITE_BIT;

+            beginMemoryBarrier.dstAccessMask |= VK_ACCESS_TRANSFER_READ_BIT;

+

+            finalizeMemoryBarrier.srcAccessMask |= VK_ACCESS_TRANSFER_WRITE_BIT;

+            finalizeMemoryBarrier.dstAccessMask |= VK_ACCESS_MEMORY_READ_BIT;

+

+            wantsMemoryBarrier = true;

+        }

+    }

+

+    if(!beginImageBarriers.empty() || wantsMemoryBarrier)

+    {

+        const uint32_t memoryBarrierCount = wantsMemoryBarrier ? 1 : 0;

+

+        vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, beginSrcStageMask, beginDstStageMask, 0,

+            memoryBarrierCount, &beginMemoryBarrier,

+            0, nullptr,

+            (uint32_t)beginImageBarriers.size(), beginImageBarriers.data());

+    }

+

+    for(uint32_t i = 0; i < stepInfo.moveCount; ++ i)

+    {

+        VmaAllocationInfo info;

+        vmaGetAllocationInfo(g_hAllocator, stepInfo.pMoves[i].allocation, &info);

+

+        AllocInfo *allocInfo = (AllocInfo *)info.pUserData;

+

+        if(allocInfo->m_Image)

+        {

+            std::vector<VkImageCopy> imageCopies;

+

+            // Copy all mips of the source image into the target image

+            VkOffset3D offset = { 0, 0, 0 };

+            VkExtent3D extent = allocInfo->m_ImageInfo.extent;

+

+            VkImageSubresourceLayers subresourceLayers = {

+                VK_IMAGE_ASPECT_COLOR_BIT,

+                0,

+                0, 1

+            };

+

+            for(uint32_t mip = 0; mip < allocInfo->m_ImageInfo.mipLevels; ++ mip)

+            {

+                subresourceLayers.mipLevel = mip;

+

+                VkImageCopy imageCopy{

+                    subresourceLayers,

+                    offset,

+                    subresourceLayers,

+                    offset,

+                    extent

+                };

+

+                imageCopies.push_back(imageCopy);

+

+                extent.width = std::max(uint32_t(1), extent.width >> 1);

+                extent.height = std::max(uint32_t(1), extent.height >> 1);

+                extent.depth = std::max(uint32_t(1), extent.depth >> 1);

+            }

+

+            vkCmdCopyImage(

+                g_hTemporaryCommandBuffer,

+                allocInfo->m_Image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,

+                (VkImage)newHandles[i], VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,

+                (uint32_t)imageCopies.size(), imageCopies.data());

+

+            imageCopies.clear();

+

+            // Update our alloc info with the new resource to be used

+            allocInfo->m_Image = (VkImage)newHandles[i];

+        }

+        else if(allocInfo->m_Buffer)

+        {

+            VkBufferCopy region = {

+                0,

+                0,

+                allocInfo->m_BufferInfo.size };

+

+            vkCmdCopyBuffer(g_hTemporaryCommandBuffer, 

+                allocInfo->m_Buffer, (VkBuffer)newHandles[i],

+                1, &region);

+

+

+            // Update our alloc info with the new resource to be used

+            allocInfo->m_Buffer = (VkBuffer)newHandles[i];

+        }

+    }

+

+

+    if(!finalizeImageBarriers.empty() || wantsMemoryBarrier)

+    {

+        const uint32_t memoryBarrierCount = wantsMemoryBarrier ? 1 : 0;

+

+        vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, finalizeSrcStageMask, finalizeDstStageMask, 0,

+            memoryBarrierCount, &finalizeMemoryBarrier,

+            0, nullptr,

+            (uint32_t)finalizeImageBarriers.size(), finalizeImageBarriers.data());

+    }

+}

+

+

+static void TestDefragmentationIncrementalBasic()

+{

+    wprintf(L"Test defragmentation incremental basic\n");

+    g_MemoryAliasingWarningEnabled = false;

+

+    std::vector<AllocInfo> allocations;

+

+    // Create that many allocations to surely fill 3 new blocks of 256 MB.

+    const std::array<uint32_t, 3> imageSizes = { 256, 512, 1024 };

+    const VkDeviceSize bufSizeMin = 5ull * 1024 * 1024;

+    const VkDeviceSize bufSizeMax = 10ull * 1024 * 1024;

+    const VkDeviceSize totalSize = 3ull * 256 * 1024 * 1024;

+    const size_t imageCount = (size_t)(totalSize / (imageSizes[0] * imageSizes[0] * 4)) / 2;

+    const size_t bufCount = (size_t)(totalSize / bufSizeMin) / 2;

+    const size_t percentToLeave = 30;

+    RandomNumberGenerator rand = { 234522 };

+

+    VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };

+    imageInfo.imageType = VK_IMAGE_TYPE_2D;

+    imageInfo.extent.depth = 1;

+    imageInfo.mipLevels = 1;

+    imageInfo.arrayLayers = 1;

+    imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;

+    imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;

+    imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;

+    imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;

+    imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;

+

+    VmaAllocationCreateInfo allocCreateInfo = {};

+    allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;

+    allocCreateInfo.flags = 0;

+

+    // Create all intended images.

+    for(size_t i = 0; i < imageCount; ++i)

+    {

+        const uint32_t size = imageSizes[rand.Generate() % 3];

+

+        imageInfo.extent.width = size;

+        imageInfo.extent.height = size;

+

+        AllocInfo alloc;

+        alloc.CreateImage(imageInfo, allocCreateInfo, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);

+        alloc.m_StartValue = 0;

+

+        allocations.push_back(alloc);

+    }

+

+    // And all buffers

+    VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };

+

+    for(size_t i = 0; i < bufCount; ++i)

+    {

+        bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);

+        bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;

+

+        AllocInfo alloc;

+        alloc.CreateBuffer(bufCreateInfo, allocCreateInfo);

+        alloc.m_StartValue = 0;

+

+        allocations.push_back(alloc);

+    }

+

+    // Destroy some percentage of them.

+    {

+        const size_t allocationsToDestroy = round_div<size_t>((imageCount + bufCount) * (100 - percentToLeave), 100);

+        for(size_t i = 0; i < allocationsToDestroy; ++i)

+        {

+            const size_t index = rand.Generate() % allocations.size();

+            allocations[index].Destroy();

+            allocations.erase(allocations.begin() + index);

+        }

+    }

+

+    {

+        // Set our user data pointers. A real application should probably be more clever here

+        const size_t allocationCount = allocations.size();

+        for(size_t i = 0; i < allocationCount; ++i)

+        {

+            AllocInfo &alloc = allocations[i];

+            vmaSetAllocationUserData(g_hAllocator, alloc.m_Allocation, &alloc);

+        }

+    }

+

+    // Fill them with meaningful data.

+    UploadGpuData(allocations.data(), allocations.size());

+

+    wchar_t fileName[MAX_PATH];

+    swprintf_s(fileName, L"GPU_defragmentation_incremental_basic_A_before.json");

+    SaveAllocatorStatsToFile(fileName);

+

+    // Defragment using GPU only.

+    {

+        const size_t allocCount = allocations.size();

+

+        std::vector<VmaAllocation> allocationPtrs;

+

+        for(size_t i = 0; i < allocCount; ++i)

+        {

+            VmaAllocationInfo allocInfo = {};

+            vmaGetAllocationInfo(g_hAllocator, allocations[i].m_Allocation, &allocInfo);

+

+            allocationPtrs.push_back(allocations[i].m_Allocation);

+        }

+

+        const size_t movableAllocCount = allocationPtrs.size();

+

+        VmaDefragmentationInfo2 defragInfo = {};

+        defragInfo.flags = VMA_DEFRAGMENTATION_FLAG_INCREMENTAL;

+        defragInfo.allocationCount = (uint32_t)movableAllocCount;

+        defragInfo.pAllocations = allocationPtrs.data();

+        defragInfo.maxGpuBytesToMove = VK_WHOLE_SIZE;

+        defragInfo.maxGpuAllocationsToMove = UINT32_MAX;

+

+        VmaDefragmentationStats stats = {};

+        VmaDefragmentationContext ctx = VK_NULL_HANDLE;

+        VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &stats, &ctx);

+        TEST(res >= VK_SUCCESS);

+

+        res = VK_NOT_READY;

+

+        std::vector<VmaDefragmentationStepMoveInfo> moveInfo;

+        moveInfo.resize(movableAllocCount);

+

+        while(res == VK_NOT_READY)

+        {

+            VmaDefragmentationStepInfo stepInfo = {};

+            stepInfo.pMoves = moveInfo.data();

+            stepInfo.moveCount = (uint32_t)moveInfo.size();

+

+            res = vmaDefragmentationStepBegin(g_hAllocator, &stepInfo, ctx);

+            TEST(res >= VK_SUCCESS);

+

+            BeginSingleTimeCommands();

+            ProcessDefragmentationStepInfo(stepInfo);

+            EndSingleTimeCommands();

+

+            res = vmaDefragmentationStepEnd(g_hAllocator, ctx);

+        }

+

+        TEST(res >= VK_SUCCESS);

+        vmaDefragmentationEnd(g_hAllocator, ctx);

+

+        // If corruption detection is enabled, GPU defragmentation may not work on

+        // memory types that have this detection active, e.g. on Intel.

+#if !defined(VMA_DEBUG_DETECT_CORRUPTION) || VMA_DEBUG_DETECT_CORRUPTION == 0

+        TEST(stats.allocationsMoved > 0 && stats.bytesMoved > 0);

+        TEST(stats.deviceMemoryBlocksFreed > 0 && stats.bytesFreed > 0);

+#endif

+    }

+

+    //ValidateGpuData(allocations.data(), allocations.size());

+

+    swprintf_s(fileName, L"GPU_defragmentation_incremental_basic_B_after.json");

+    SaveAllocatorStatsToFile(fileName);

+

+    // Destroy all remaining buffers.

+    for(size_t i = allocations.size(); i--; )

+    {

+        allocations[i].Destroy();

+    }

+

+    g_MemoryAliasingWarningEnabled = true;

+}

+

+void TestDefragmentationIncrementalComplex()

+{

+    wprintf(L"Test defragmentation incremental complex\n");

+    g_MemoryAliasingWarningEnabled = false;

+

+    std::vector<AllocInfo> allocations;

+

+    // Create that many allocations to surely fill 3 new blocks of 256 MB.

+    const std::array<uint32_t, 3> imageSizes = { 256, 512, 1024 };

+    const VkDeviceSize bufSizeMin = 5ull * 1024 * 1024;

+    const VkDeviceSize bufSizeMax = 10ull * 1024 * 1024;

+    const VkDeviceSize totalSize = 3ull * 256 * 1024 * 1024;

+    const size_t imageCount = (size_t)(totalSize / (imageSizes[0] * imageSizes[0] * 4)) / 2;

+    const size_t bufCount = (size_t)(totalSize / bufSizeMin) / 2;

+    const size_t percentToLeave = 30;

+    RandomNumberGenerator rand = { 234522 };

+

+    VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };

+    imageInfo.imageType = VK_IMAGE_TYPE_2D;

+    imageInfo.extent.depth = 1;

+    imageInfo.mipLevels = 1;

+    imageInfo.arrayLayers = 1;

+    imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;

+    imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;

+    imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;

+    imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;

+    imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;

+

+    VmaAllocationCreateInfo allocCreateInfo = {};

+    allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;

+    allocCreateInfo.flags = 0;

+

+    // Create all intended images.

+    for(size_t i = 0; i < imageCount; ++i)

+    {

+        const uint32_t size = imageSizes[rand.Generate() % 3];

+

+        imageInfo.extent.width = size;

+        imageInfo.extent.height = size;

+

+        AllocInfo alloc;

+        alloc.CreateImage(imageInfo, allocCreateInfo, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);

+        alloc.m_StartValue = 0;

+

+        allocations.push_back(alloc);

+    }

+

+    // And all buffers

+    VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };

+

+    for(size_t i = 0; i < bufCount; ++i)

+    {

+        bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);

+        bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;

+

+        AllocInfo alloc;

+        alloc.CreateBuffer(bufCreateInfo, allocCreateInfo);

+        alloc.m_StartValue = 0;

+

+        allocations.push_back(alloc);

+    }

+

+    // Destroy some percentage of them.

+    {

+        const size_t allocationsToDestroy = round_div<size_t>((imageCount + bufCount) * (100 - percentToLeave), 100);

+        for(size_t i = 0; i < allocationsToDestroy; ++i)

+        {

+            const size_t index = rand.Generate() % allocations.size();

+            allocations[index].Destroy();

+            allocations.erase(allocations.begin() + index);

+        }

+    }

+

+    {

+        // Set our user data pointers. A real application should probably be more clever here

+        const size_t allocationCount = allocations.size();

+        for(size_t i = 0; i < allocationCount; ++i)

+        {

+            AllocInfo &alloc = allocations[i];

+            vmaSetAllocationUserData(g_hAllocator, alloc.m_Allocation, &alloc);

+        }

+    }

+

+    // Fill them with meaningful data.

+    UploadGpuData(allocations.data(), allocations.size());

+

+    wchar_t fileName[MAX_PATH];

+    swprintf_s(fileName, L"GPU_defragmentation_incremental_complex_A_before.json");

+    SaveAllocatorStatsToFile(fileName);

+

+    std::vector<AllocInfo> additionalAllocations;

+

+#define MakeAdditionalAllocation() \

+    do { \

+        { \

+            bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16); \

+            bufCreateInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT; \

+            \

+            AllocInfo alloc; \

+            alloc.CreateBuffer(bufCreateInfo, allocCreateInfo); \

+            \

+            additionalAllocations.push_back(alloc); \

+        } \

+    } while(0)

+

+    // Defragment using GPU only.

+    {

+        const size_t allocCount = allocations.size();

+

+        std::vector<VmaAllocation> allocationPtrs;

+

+        for(size_t i = 0; i < allocCount; ++i)

+        {

+            VmaAllocationInfo allocInfo = {};

+            vmaGetAllocationInfo(g_hAllocator, allocations[i].m_Allocation, &allocInfo);

+

+            allocationPtrs.push_back(allocations[i].m_Allocation);

+        }

+

+        const size_t movableAllocCount = allocationPtrs.size();

+

+        VmaDefragmentationInfo2 defragInfo = {};

+        defragInfo.flags = VMA_DEFRAGMENTATION_FLAG_INCREMENTAL;

+        defragInfo.allocationCount = (uint32_t)movableAllocCount;

+        defragInfo.pAllocations = allocationPtrs.data();

+        defragInfo.maxGpuBytesToMove = VK_WHOLE_SIZE;

+        defragInfo.maxGpuAllocationsToMove = UINT32_MAX;

+

+        VmaDefragmentationStats stats = {};

+        VmaDefragmentationContext ctx = VK_NULL_HANDLE;

+        VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &stats, &ctx);

+        TEST(res >= VK_SUCCESS);

+

+        res = VK_NOT_READY;

+

+        std::vector<VmaDefragmentationStepMoveInfo> moveInfo;

+        moveInfo.resize(movableAllocCount);

+

+        MakeAdditionalAllocation();

+

+        while(res == VK_NOT_READY)

+        {

+            VmaDefragmentationStepInfo stepInfo = {};

+            stepInfo.pMoves = moveInfo.data();

+            stepInfo.moveCount = (uint32_t)moveInfo.size();

+

+            res = vmaDefragmentationStepBegin(g_hAllocator, &stepInfo, ctx);

+            TEST(res >= VK_SUCCESS);

+

+            MakeAdditionalAllocation();

+

+            BeginSingleTimeCommands();

+            ProcessDefragmentationStepInfo(stepInfo);

+            EndSingleTimeCommands();

+

+            res = vmaDefragmentationStepEnd(g_hAllocator, ctx);

+

+            MakeAdditionalAllocation();

+        }

+

+        TEST(res >= VK_SUCCESS);

+        vmaDefragmentationEnd(g_hAllocator, ctx);

+

+        // If corruption detection is enabled, GPU defragmentation may not work on

+        // memory types that have this detection active, e.g. on Intel.

+#if !defined(VMA_DEBUG_DETECT_CORRUPTION) || VMA_DEBUG_DETECT_CORRUPTION == 0

+        TEST(stats.allocationsMoved > 0 && stats.bytesMoved > 0);

+        TEST(stats.deviceMemoryBlocksFreed > 0 && stats.bytesFreed > 0);

+#endif

+    }

+

+    //ValidateGpuData(allocations.data(), allocations.size());

+

+    swprintf_s(fileName, L"GPU_defragmentation_incremental_complex_B_after.json");

+    SaveAllocatorStatsToFile(fileName);

+

+    // Destroy all remaining buffers.

+    for(size_t i = allocations.size(); i--; )

+    {

+        allocations[i].Destroy();

+    }

+

+    for(size_t i = additionalAllocations.size(); i--; )

+    {

+        additionalAllocations[i].Destroy();

+    }

+

+    g_MemoryAliasingWarningEnabled = true;

+}

+

+

 static void TestUserData()

 {

     VkResult res;

@@ -5499,6 +6144,8 @@
     TestDefragmentationFull();

     TestDefragmentationWholePool();

     TestDefragmentationGpu();

+    TestDefragmentationIncrementalBasic();

+    TestDefragmentationIncrementalComplex();

 

     // # Detailed tests

     FILE* file;

diff --git a/src/vk_mem_alloc.h b/src/vk_mem_alloc.h
index 1f7920c..9e78812 100644
--- a/src/vk_mem_alloc.h
+++ b/src/vk_mem_alloc.h
@@ -1952,6 +1952,7 @@
     PFN_vkCreateImage vkCreateImage;

     PFN_vkDestroyImage vkDestroyImage;

     PFN_vkCmdCopyBuffer vkCmdCopyBuffer;

+    PFN_vkCmdCopyImage vkCmdCopyImage;

 #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000

     PFN_vkGetBufferMemoryRequirements2KHR vkGetBufferMemoryRequirements2KHR;

     PFN_vkGetImageMemoryRequirements2KHR vkGetImageMemoryRequirements2KHR;

@@ -3111,6 +3112,7 @@
 

 /// Flags to be used in vmaDefragmentationBegin(). None at the moment. Reserved for future use.

 typedef enum VmaDefragmentationFlagBits {

+    VMA_DEFRAGMENTATION_FLAG_INCREMENTAL = 0x1,

     VMA_DEFRAGMENTATION_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF

 } VmaDefragmentationFlagBits;

 typedef VkFlags VmaDefragmentationFlags;

@@ -3191,6 +3193,21 @@
     VkCommandBuffer commandBuffer;

 } VmaDefragmentationInfo2;

 

+typedef struct VmaDefragmentationStepMoveInfo {

+    VmaAllocation allocation;

+    VkDeviceMemory memory;

+    VkDeviceSize offset;

+} VmaDefragmentationStepMoveInfo;

+

+/** \brief Parameters for incremental defragmentation steps.

+

+To be used with function vmaDefragmentationStepBegin().

+*/

+typedef struct VmaDefragmentationStepInfo {

+    uint32_t moveCount;

+    VmaDefragmentationStepMoveInfo* pMoves;

+} VmaDefragmentationStepInfo;

+

 /** \brief Deprecated. Optional configuration parameters to be passed to function vmaDefragment().

 

 \deprecated This is a part of the old interface. It is recommended to use structure #VmaDefragmentationInfo2 and function vmaDefragmentationBegin() instead.

@@ -3264,6 +3281,16 @@
     VmaAllocator allocator,

     VmaDefragmentationContext context);

 

+VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationStepBegin(

+    VmaAllocator allocator,

+    VmaDefragmentationStepInfo* pInfo,

+    VmaDefragmentationContext context

+);

+VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationStepEnd(

+    VmaAllocator allocator,

+    VmaDefragmentationContext context

+);

+

 /** \brief Deprecated. Compacts memory by moving allocations.

 

 @param pAllocations Array of allocations that can be moved during this compation.

@@ -3672,6 +3699,7 @@
     public:

         void Lock() { m_Mutex.lock(); }

         void Unlock() { m_Mutex.unlock(); }

+        bool TryLock() { return m_Mutex.try_lock(); }

     private:

         std::mutex m_Mutex;

     };

@@ -3688,8 +3716,10 @@
         public:

             void LockRead() { m_Mutex.lock_shared(); }

             void UnlockRead() { m_Mutex.unlock_shared(); }

+            bool TryLockRead() { return m_Mutex.try_shared_lock(); }

             void LockWrite() { m_Mutex.lock(); }

             void UnlockWrite() { m_Mutex.unlock(); }

+            bool TryLockWrite() { return m_Mutex.try_lock(); }

         private:

             std::shared_mutex m_Mutex;

         };

@@ -3703,8 +3733,10 @@
             VmaRWMutex() { InitializeSRWLock(&m_Lock); }

             void LockRead() { AcquireSRWLockShared(&m_Lock); }

             void UnlockRead() { ReleaseSRWLockShared(&m_Lock); }

+            bool TryLockRead() { return TryAcquireSRWLockShared(&m_Lock); }

             void LockWrite() { AcquireSRWLockExclusive(&m_Lock); }

             void UnlockWrite() { ReleaseSRWLockExclusive(&m_Lock); }

+            bool TryLockWrite() { return TryAcquireSRWLockExclusive(&m_Lock); }

         private:

             SRWLOCK m_Lock;

         };

@@ -3716,8 +3748,10 @@
         public:

             void LockRead() { m_Mutex.Lock(); }

             void UnlockRead() { m_Mutex.Unlock(); }

+            bool TryLockRead() { return m_Mutex.TryLock(); }

             void LockWrite() { m_Mutex.Lock(); }

             void UnlockWrite() { m_Mutex.Unlock(); }

+            bool TryLockWrite() { return m_Mutex.TryLock(); }

         private:

             VMA_MUTEX m_Mutex;

         };

@@ -6241,6 +6275,9 @@
     VkDeviceSize srcOffset;

     VkDeviceSize dstOffset;

     VkDeviceSize size;

+    VmaAllocation hAllocation;

+    VmaDeviceMemoryBlock* pSrcBlock;

+    VmaDeviceMemoryBlock* pDstBlock;

 };

 

 class VmaDefragmentationAlgorithm;

@@ -6310,7 +6347,7 @@
     // Saves results in pCtx->res.

     void Defragment(

         class VmaBlockVectorDefragmentationContext* pCtx,

-        VmaDefragmentationStats* pStats,

+        VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags,

         VkDeviceSize& maxCpuBytesToMove, uint32_t& maxCpuAllocationsToMove,

         VkDeviceSize& maxGpuBytesToMove, uint32_t& maxGpuAllocationsToMove,

         VkCommandBuffer commandBuffer);

@@ -6318,6 +6355,14 @@
         class VmaBlockVectorDefragmentationContext* pCtx,

         VmaDefragmentationStats* pStats);

 

+    uint32_t ProcessDefragmentations(

+        class VmaBlockVectorDefragmentationContext *pCtx,

+        VmaDefragmentationStepMoveInfo* pMove, uint32_t maxMoves);

+

+    void CommitDefragmentations(

+        class VmaBlockVectorDefragmentationContext *pCtx,

+        VmaDefragmentationStats* pStats);

+

     ////////////////////////////////////////////////////////////////////////////////

     // To be used only while the m_Mutex is locked. Used during defragmentation.

 

@@ -6350,6 +6395,8 @@
 

     VkDeviceSize CalcMaxBlockSize() const;

 

+    static VkImageAspectFlags ImageAspectMaskForFormat(VkFormat format);

+    

     // Finds and removes given block from vector.

     void Remove(VmaDeviceMemoryBlock* pBlock);

 

@@ -6386,7 +6433,7 @@
     // Saves result to pCtx->res.

     void ApplyDefragmentationMovesGpu(

         class VmaBlockVectorDefragmentationContext* pDefragCtx,

-        const VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves,

+        VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves,

         VkCommandBuffer commandBuffer);

 

     /*

@@ -6455,7 +6502,8 @@
     virtual VkResult Defragment(

         VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves,

         VkDeviceSize maxBytesToMove,

-        uint32_t maxAllocationsToMove) = 0;

+        uint32_t maxAllocationsToMove,

+        VmaDefragmentationFlags flags) = 0;

 

     virtual VkDeviceSize GetBytesMoved() const = 0;

     virtual uint32_t GetAllocationsMoved() const = 0;

@@ -6500,7 +6548,8 @@
     virtual VkResult Defragment(

         VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves,

         VkDeviceSize maxBytesToMove,

-        uint32_t maxAllocationsToMove);

+        uint32_t maxAllocationsToMove,

+        VmaDefragmentationFlags flags);

 

     virtual VkDeviceSize GetBytesMoved() const { return m_BytesMoved; }

     virtual uint32_t GetAllocationsMoved() const { return m_AllocationsMoved; }

@@ -6601,7 +6650,8 @@
     VkResult DefragmentRound(

         VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves,

         VkDeviceSize maxBytesToMove,

-        uint32_t maxAllocationsToMove);

+        uint32_t maxAllocationsToMove,

+        bool freeOldAllocations);

 

     size_t CalcBlocksWithNonMovableCount() const;

 

@@ -6627,7 +6677,8 @@
     virtual VkResult Defragment(

         VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves,

         VkDeviceSize maxBytesToMove,

-        uint32_t maxAllocationsToMove);

+        uint32_t maxAllocationsToMove,

+        VmaDefragmentationFlags flags);

 

     virtual VkDeviceSize GetBytesMoved() const { return m_BytesMoved; }

     virtual uint32_t GetAllocationsMoved() const { return m_AllocationsMoved; }

@@ -6775,6 +6826,10 @@
     VkResult res;

     bool mutexLocked;

     VmaVector< VmaBlockDefragmentationContext, VmaStlAllocator<VmaBlockDefragmentationContext> > blockContexts;

+    VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> > defragmentationMoves;

+    uint32_t defragmentationMovesProcessed;

+    uint32_t defragmentationMovesCommitted;

+    bool hasDefragmentationPlan;

 

     VmaBlockVectorDefragmentationContext(

         VmaAllocator hAllocator,

@@ -6790,7 +6845,7 @@
     void AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged);

     void AddAll() { m_AllAllocations = true; }

 

-    void Begin(bool overlappingMoveSupported);

+    void Begin(bool overlappingMoveSupported, VmaDefragmentationFlags flags);

 

 private:

     const VmaAllocator m_hAllocator;

@@ -6839,13 +6894,22 @@
     VkResult Defragment(

         VkDeviceSize maxCpuBytesToMove, uint32_t maxCpuAllocationsToMove,

         VkDeviceSize maxGpuBytesToMove, uint32_t maxGpuAllocationsToMove,

-        VkCommandBuffer commandBuffer, VmaDefragmentationStats* pStats);

+        VkCommandBuffer commandBuffer, VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags);

+

+    VkResult DefragmentStepBegin(VmaDefragmentationStepInfo* pInfo);

+    VkResult DefragmentStepEnd();

 

 private:

     const VmaAllocator m_hAllocator;

     const uint32_t m_CurrFrameIndex;

     const uint32_t m_Flags;

     VmaDefragmentationStats* const m_pStats;

+

+    VkDeviceSize m_MaxCpuBytesToMove;

+    uint32_t m_MaxCpuAllocationsToMove;

+    VkDeviceSize m_MaxGpuBytesToMove;

+    uint32_t m_MaxGpuAllocationsToMove;

+

     // Owner of these objects.

     VmaBlockVectorDefragmentationContext* m_DefaultPoolContexts[VK_MAX_MEMORY_TYPES];

     // Owner of these objects.

@@ -7185,6 +7249,12 @@
     VkResult DefragmentationEnd(

         VmaDefragmentationContext context);

 

+    VkResult DefragmentationStepBegin(

+        VmaDefragmentationStepInfo* pInfo,

+        VmaDefragmentationContext context);

+    VkResult DefragmentationStepEnd(

+        VmaDefragmentationContext context);

+

     void GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo);

     bool TouchAllocation(VmaAllocation hAllocation);

 

@@ -12618,7 +12688,7 @@
 

 void VmaBlockVector::ApplyDefragmentationMovesGpu(

     class VmaBlockVectorDefragmentationContext* pDefragCtx,

-    const VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves,

+    VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves,

     VkCommandBuffer commandBuffer)

 {

     const size_t blockCount = m_Blocks.size();

@@ -12631,8 +12701,13 @@
     for(size_t moveIndex = 0; moveIndex < moveCount; ++moveIndex)

     {

         const VmaDefragmentationMove& move = moves[moveIndex];

-        pDefragCtx->blockContexts[move.srcBlockIndex].flags |= VmaBlockDefragmentationContext::BLOCK_FLAG_USED;

-        pDefragCtx->blockContexts[move.dstBlockIndex].flags |= VmaBlockDefragmentationContext::BLOCK_FLAG_USED;

+

+        //if(move.type == VMA_ALLOCATION_TYPE_UNKNOWN)

+        {

+            // Old school move still require us to map the whole block

+            pDefragCtx->blockContexts[move.srcBlockIndex].flags |= VmaBlockDefragmentationContext::BLOCK_FLAG_USED;

+            pDefragCtx->blockContexts[move.dstBlockIndex].flags |= VmaBlockDefragmentationContext::BLOCK_FLAG_USED;

+        }

     }

 

     VMA_ASSERT(pDefragCtx->res == VK_SUCCESS);

@@ -12806,7 +12881,7 @@
 

 void VmaBlockVector::Defragment(

     class VmaBlockVectorDefragmentationContext* pCtx,

-    VmaDefragmentationStats* pStats,

+    VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags,

     VkDeviceSize& maxCpuBytesToMove, uint32_t& maxCpuAllocationsToMove,

     VkDeviceSize& maxGpuBytesToMove, uint32_t& maxGpuAllocationsToMove,

     VkCommandBuffer commandBuffer)

@@ -12843,19 +12918,28 @@
 

         if(m_hAllocator->m_UseMutex)

         {

-            m_Mutex.LockWrite();

-            pCtx->mutexLocked = true;

+            if(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL)

+            {

+                if(!m_Mutex.TryLockWrite())

+                {

+                    pCtx->res = VK_ERROR_INITIALIZATION_FAILED;

+                    return;

+                }

+            }

+            else

+            {

+                m_Mutex.LockWrite();

+                pCtx->mutexLocked = true;

+            }

         }

 

-        pCtx->Begin(overlappingMoveSupported);

+        pCtx->Begin(overlappingMoveSupported, flags);

 

         // Defragment.

 

         const VkDeviceSize maxBytesToMove = defragmentOnGpu ? maxGpuBytesToMove : maxCpuBytesToMove;

         const uint32_t maxAllocationsToMove = defragmentOnGpu ? maxGpuAllocationsToMove : maxCpuAllocationsToMove;

-        VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> > moves = 

-            VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >(VmaStlAllocator<VmaDefragmentationMove>(m_hAllocator->GetAllocationCallbacks()));

-        pCtx->res = pCtx->GetAlgorithm()->Defragment(moves, maxBytesToMove, maxAllocationsToMove);

+        pCtx->res = pCtx->GetAlgorithm()->Defragment(pCtx->defragmentationMoves, maxBytesToMove, maxAllocationsToMove, flags);

 

         // Accumulate statistics.

         if(pStats != VMA_NULL)

@@ -12877,16 +12961,27 @@
                 maxCpuAllocationsToMove -= allocationsMoved;

             }

         }

+

+        if(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL)

+        {

+            if(m_hAllocator->m_UseMutex)

+                m_Mutex.UnlockWrite();

+            

+            if(pCtx->res >= VK_SUCCESS && !pCtx->defragmentationMoves.empty())

+                pCtx->res = VK_NOT_READY;

+

+            return;

+        }

     

         if(pCtx->res >= VK_SUCCESS)

         {

             if(defragmentOnGpu)

             {

-                ApplyDefragmentationMovesGpu(pCtx, moves, commandBuffer);

+                ApplyDefragmentationMovesGpu(pCtx, pCtx->defragmentationMoves, commandBuffer);

             }

             else

             {

-                ApplyDefragmentationMovesCpu(pCtx, moves);

+                ApplyDefragmentationMovesCpu(pCtx, pCtx->defragmentationMoves);

             }

         }

     }

@@ -12919,6 +13014,48 @@
     }

 }

 

+uint32_t VmaBlockVector::ProcessDefragmentations(

+    class VmaBlockVectorDefragmentationContext *pCtx,

+    VmaDefragmentationStepMoveInfo* pMove, uint32_t maxMoves)

+{

+    VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex);

+    

+    const uint32_t moveCount = std::min(uint32_t(pCtx->defragmentationMoves.size()) - pCtx->defragmentationMovesProcessed, maxMoves);

+

+    for(uint32_t i = pCtx->defragmentationMovesProcessed; i < moveCount; ++ i)

+    {

+        VmaDefragmentationMove& move = pCtx->defragmentationMoves[i];

+

+        pMove->allocation = move.hAllocation;

+        pMove->memory = move.pDstBlock->GetDeviceMemory();

+        pMove->offset = move.dstOffset;

+

+        ++ pMove;

+    }

+

+    pCtx->defragmentationMovesProcessed += moveCount;

+

+    return moveCount;

+}

+

+void VmaBlockVector::CommitDefragmentations(

+    class VmaBlockVectorDefragmentationContext *pCtx,

+    VmaDefragmentationStats* pStats)

+{

+    VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex);

+    

+    for(uint32_t i = pCtx->defragmentationMovesCommitted; i < pCtx->defragmentationMovesProcessed; ++ i)

+    {

+        const VmaDefragmentationMove &move = pCtx->defragmentationMoves[i];

+

+        move.pSrcBlock->m_pMetadata->FreeAtOffset(move.srcOffset);

+        move.hAllocation->ChangeBlockAllocation(m_hAllocator, move.pDstBlock, move.dstOffset);

+    }

+

+    pCtx->defragmentationMovesCommitted = pCtx->defragmentationMovesProcessed;

+    FreeEmptyBlocks(pStats);

+}

+

 size_t VmaBlockVector::CalcAllocationCount() const

 {

     size_t result = 0;

@@ -13069,7 +13206,8 @@
 VkResult VmaDefragmentationAlgorithm_Generic::DefragmentRound(

     VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves,

     VkDeviceSize maxBytesToMove,

-    uint32_t maxAllocationsToMove)

+    uint32_t maxAllocationsToMove,

+    bool freeOldAllocations)

 {

     if(m_Blocks.empty())

     {

@@ -13161,12 +13299,16 @@
                     return VK_SUCCESS;

                 }

 

-                VmaDefragmentationMove move;

+                VmaDefragmentationMove move = {};

                 move.srcBlockIndex = pSrcBlockInfo->m_OriginalBlockIndex;

                 move.dstBlockIndex = pDstBlockInfo->m_OriginalBlockIndex;

                 move.srcOffset = srcOffset;

                 move.dstOffset = dstAllocRequest.offset;

                 move.size = size;

+                move.hAllocation = allocInfo.m_hAllocation;

+                move.pSrcBlock = pSrcBlockInfo->m_pBlock;

+                move.pDstBlock = pDstBlockInfo->m_pBlock;

+

                 moves.push_back(move);

 

                 pDstBlockInfo->m_pBlock->m_pMetadata->Alloc(

@@ -13174,10 +13316,13 @@
                     suballocType,

                     size,

                     allocInfo.m_hAllocation);

-                pSrcBlockInfo->m_pBlock->m_pMetadata->FreeAtOffset(srcOffset);

-                

-                allocInfo.m_hAllocation->ChangeBlockAllocation(m_hAllocator, pDstBlockInfo->m_pBlock, dstAllocRequest.offset);

 

+                if(freeOldAllocations)

+                {

+                    pSrcBlockInfo->m_pBlock->m_pMetadata->FreeAtOffset(srcOffset);

+                    allocInfo.m_hAllocation->ChangeBlockAllocation(m_hAllocator, pDstBlockInfo->m_pBlock, dstAllocRequest.offset);

+                }

+                

                 if(allocInfo.m_pChanged != VMA_NULL)

                 {

                     *allocInfo.m_pChanged = VK_TRUE;

@@ -13229,7 +13374,8 @@
 VkResult VmaDefragmentationAlgorithm_Generic::Defragment(

     VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves,

     VkDeviceSize maxBytesToMove,

-    uint32_t maxAllocationsToMove)

+    uint32_t maxAllocationsToMove,

+    VmaDefragmentationFlags flags)

 {

     if(!m_AllAllocations && m_AllocationCount == 0)

     {

@@ -13275,7 +13421,7 @@
     VkResult result = VK_SUCCESS;

     for(uint32_t round = 0; (round < roundCount) && (result == VK_SUCCESS); ++round)

     {

-        result = DefragmentRound(moves, maxBytesToMove, maxAllocationsToMove);

+        result = DefragmentRound(moves, maxBytesToMove, maxAllocationsToMove, !(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL));

     }

 

     return result;

@@ -13327,7 +13473,8 @@
 VkResult VmaDefragmentationAlgorithm_Fast::Defragment(

     VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves,

     VkDeviceSize maxBytesToMove,

-    uint32_t maxAllocationsToMove)

+    uint32_t maxAllocationsToMove,

+    VmaDefragmentationFlags flags)

 {

     VMA_ASSERT(m_AllAllocations || m_pBlockVector->CalcAllocationCount() == m_AllocationCount);

 

@@ -13383,6 +13530,7 @@
             }

             const VkDeviceSize srcAllocOffset = srcSuballocIt->offset;

 

+            VmaDefragmentationMove move = {};

             // Try to place it in one of free spaces from the database.

             size_t freeSpaceInfoIndex;

             VkDeviceSize dstAllocOffset;

@@ -13413,10 +13561,12 @@
 

                     InsertSuballoc(pFreeSpaceMetadata, suballoc);

 

-                    VmaDefragmentationMove move = {

-                        srcOrigBlockIndex, freeSpaceOrigBlockIndex,

-                        srcAllocOffset, dstAllocOffset,

-                        srcAllocSize };

+                    move.srcBlockIndex = srcOrigBlockIndex;

+                    move.dstBlockIndex = freeSpaceOrigBlockIndex;

+                    move.srcOffset = srcAllocOffset;

+                    move.dstOffset = dstAllocOffset;

+                    move.size = srcAllocSize;

+                    

                     moves.push_back(move);

                 }

                 // Different block

@@ -13439,10 +13589,12 @@
 

                     InsertSuballoc(pFreeSpaceMetadata, suballoc);

 

-                    VmaDefragmentationMove move = {

-                        srcOrigBlockIndex, freeSpaceOrigBlockIndex,

-                        srcAllocOffset, dstAllocOffset,

-                        srcAllocSize };

+                    move.srcBlockIndex = srcOrigBlockIndex;

+                    move.dstBlockIndex = freeSpaceOrigBlockIndex;

+                    move.srcOffset = srcAllocOffset;

+                    move.dstOffset = dstAllocOffset;

+                    move.size = srcAllocSize;

+                    

                     moves.push_back(move);

                 }

             }

@@ -13497,10 +13649,13 @@
                         m_BytesMoved += srcAllocSize;

                         ++m_AllocationsMoved;

                         ++srcSuballocIt;

-                        VmaDefragmentationMove move = {

-                            srcOrigBlockIndex, dstOrigBlockIndex,

-                            srcAllocOffset, dstAllocOffset,

-                            srcAllocSize };

+                        

+                        move.srcBlockIndex = srcOrigBlockIndex;

+                        move.dstBlockIndex = dstOrigBlockIndex;

+                        move.srcOffset = srcAllocOffset;

+                        move.dstOffset = dstAllocOffset;

+                        move.size = srcAllocSize;

+                        

                         moves.push_back(move);

                     }

                 }

@@ -13526,10 +13681,12 @@
 

                     pDstMetadata->m_Suballocations.push_back(suballoc);

 

-                    VmaDefragmentationMove move = {

-                        srcOrigBlockIndex, dstOrigBlockIndex,

-                        srcAllocOffset, dstAllocOffset,

-                        srcAllocSize };

+                    move.srcBlockIndex = srcOrigBlockIndex;

+                    move.dstBlockIndex = dstOrigBlockIndex;

+                    move.srcOffset = srcAllocOffset;

+                    move.dstOffset = dstAllocOffset;

+                    move.size = srcAllocSize;

+                    

                     moves.push_back(move);

                 }

             }

@@ -13679,6 +13836,10 @@
     res(VK_SUCCESS),

     mutexLocked(false),

     blockContexts(VmaStlAllocator<VmaBlockDefragmentationContext>(hAllocator->GetAllocationCallbacks())),

+    defragmentationMoves(VmaStlAllocator<VmaDefragmentationMove>(hAllocator->GetAllocationCallbacks())),

+    defragmentationMovesProcessed(0),

+    defragmentationMovesCommitted(0),

+    hasDefragmentationPlan(0),

     m_hAllocator(hAllocator),

     m_hCustomPool(hCustomPool),

     m_pBlockVector(pBlockVector),

@@ -13700,7 +13861,7 @@
     m_Allocations.push_back(info);

 }

 

-void VmaBlockVectorDefragmentationContext::Begin(bool overlappingMoveSupported)

+void VmaBlockVectorDefragmentationContext::Begin(bool overlappingMoveSupported, VmaDefragmentationFlags flags)

 {

     const bool allAllocations = m_AllAllocations ||

         m_Allocations.size() == m_pBlockVector->CalcAllocationCount();

@@ -13714,10 +13875,12 @@
     - VMA_DEBUG_MARGIN is 0.

     - All allocations in this block vector are moveable.

     - There is no possibility of image/buffer granularity conflict.

+    - The defragmentation is not incremental

     */

     if(VMA_DEBUG_MARGIN == 0 &&

         allAllocations &&

-        !m_pBlockVector->IsBufferImageGranularityConflictPossible())

+        !m_pBlockVector->IsBufferImageGranularityConflictPossible() &&

+        !(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL))

     {

         m_pAlgorithm = vma_new(m_hAllocator, VmaDefragmentationAlgorithm_Fast)(

             m_hAllocator, m_pBlockVector, m_CurrFrameIndex, overlappingMoveSupported);

@@ -13884,13 +14047,30 @@
 VkResult VmaDefragmentationContext_T::Defragment(

     VkDeviceSize maxCpuBytesToMove, uint32_t maxCpuAllocationsToMove,

     VkDeviceSize maxGpuBytesToMove, uint32_t maxGpuAllocationsToMove,

-    VkCommandBuffer commandBuffer, VmaDefragmentationStats* pStats)

+    VkCommandBuffer commandBuffer, VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags)

 {

     if(pStats)

     {

         memset(pStats, 0, sizeof(VmaDefragmentationStats));

     }

 

+    if(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL)

+    {

+        // For incremental defragmetnations, we just earmark how much we can move

+        // The real meat is in the defragmentation steps

+        m_MaxCpuBytesToMove = maxCpuBytesToMove;

+        m_MaxCpuAllocationsToMove = maxCpuAllocationsToMove;

+

+        m_MaxGpuBytesToMove = maxGpuBytesToMove;

+        m_MaxGpuAllocationsToMove = maxGpuAllocationsToMove;

+

+        if(m_MaxCpuBytesToMove == 0 && m_MaxCpuAllocationsToMove == 0 &&

+            m_MaxGpuBytesToMove == 0 && m_MaxGpuAllocationsToMove == 0)

+            return VK_SUCCESS;

+

+        return VK_NOT_READY;

+    }

+

     if(commandBuffer == VK_NULL_HANDLE)

     {

         maxGpuBytesToMove = 0;

@@ -13910,7 +14090,7 @@
             VMA_ASSERT(pBlockVectorCtx->GetBlockVector());

             pBlockVectorCtx->GetBlockVector()->Defragment(

                 pBlockVectorCtx,

-                pStats,

+                pStats, flags,

                 maxCpuBytesToMove, maxCpuAllocationsToMove,

                 maxGpuBytesToMove, maxGpuAllocationsToMove,

                 commandBuffer);

@@ -13930,7 +14110,7 @@
         VMA_ASSERT(pBlockVectorCtx && pBlockVectorCtx->GetBlockVector());

         pBlockVectorCtx->GetBlockVector()->Defragment(

             pBlockVectorCtx,

-            pStats,

+            pStats, flags,

             maxCpuBytesToMove, maxCpuAllocationsToMove,

             maxGpuBytesToMove, maxGpuAllocationsToMove,

             commandBuffer);

@@ -13943,6 +14123,132 @@
     return res;

 }

 

+VkResult VmaDefragmentationContext_T::DefragmentStepBegin(VmaDefragmentationStepInfo* pInfo)

+{

+    VmaDefragmentationStepMoveInfo* pCurrentMove = pInfo->pMoves;

+    uint32_t movesLeft = pInfo->moveCount;

+

+    // Process default pools.

+    for(uint32_t memTypeIndex = 0;

+        memTypeIndex < m_hAllocator->GetMemoryTypeCount();

+        ++memTypeIndex)

+    {

+        VmaBlockVectorDefragmentationContext *pBlockVectorCtx = m_DefaultPoolContexts[memTypeIndex];

+        if(pBlockVectorCtx)

+        {

+            VMA_ASSERT(pBlockVectorCtx->GetBlockVector());

+

+            if(!pBlockVectorCtx->hasDefragmentationPlan)

+            {

+                pBlockVectorCtx->GetBlockVector()->Defragment(

+                    pBlockVectorCtx,

+                    m_pStats, m_Flags,

+                    m_MaxCpuBytesToMove, m_MaxCpuAllocationsToMove,

+                    m_MaxGpuBytesToMove, m_MaxGpuAllocationsToMove,

+                    VK_NULL_HANDLE);

+

+                if(pBlockVectorCtx->res < VK_SUCCESS)

+                    continue;

+

+                pBlockVectorCtx->hasDefragmentationPlan = true;

+            }

+

+            const uint32_t processed = pBlockVectorCtx->GetBlockVector()->ProcessDefragmentations(

+                pBlockVectorCtx,

+                pCurrentMove, movesLeft);

+

+            movesLeft -= processed;

+            pCurrentMove += processed;

+        }

+    }

+

+    // Process custom pools.

+    for(size_t customCtxIndex = 0, customCtxCount = m_CustomPoolContexts.size();

+        customCtxIndex < customCtxCount;

+        ++customCtxIndex)

+    {

+        VmaBlockVectorDefragmentationContext *pBlockVectorCtx = m_CustomPoolContexts[customCtxIndex];

+        VMA_ASSERT(pBlockVectorCtx && pBlockVectorCtx->GetBlockVector());

+

+        if(!pBlockVectorCtx->hasDefragmentationPlan)

+        {

+            pBlockVectorCtx->GetBlockVector()->Defragment(

+                pBlockVectorCtx,

+                m_pStats, m_Flags,

+                m_MaxCpuBytesToMove, m_MaxCpuAllocationsToMove,

+                m_MaxGpuBytesToMove, m_MaxGpuAllocationsToMove,

+                VK_NULL_HANDLE);

+

+            if(pBlockVectorCtx->res < VK_SUCCESS)

+                continue;

+

+            pBlockVectorCtx->hasDefragmentationPlan = true;

+        }

+

+        const uint32_t processed = pBlockVectorCtx->GetBlockVector()->ProcessDefragmentations(

+            pBlockVectorCtx,

+            pCurrentMove, movesLeft);

+

+        movesLeft -= processed;

+        pCurrentMove += processed;

+    }

+

+    pInfo->moveCount = pInfo->moveCount - movesLeft;

+

+    return VK_SUCCESS;

+}

+VkResult VmaDefragmentationContext_T::DefragmentStepEnd()

+{

+    VkResult res = VK_SUCCESS;

+

+    // Process default pools.

+    for(uint32_t memTypeIndex = 0;

+        memTypeIndex < m_hAllocator->GetMemoryTypeCount();

+        ++memTypeIndex)

+    {

+        VmaBlockVectorDefragmentationContext *pBlockVectorCtx = m_DefaultPoolContexts[memTypeIndex];

+        if(pBlockVectorCtx)

+        {

+            VMA_ASSERT(pBlockVectorCtx->GetBlockVector());

+

+            if(!pBlockVectorCtx->hasDefragmentationPlan)

+            {

+                res = VK_NOT_READY;

+                continue;

+            }

+

+            pBlockVectorCtx->GetBlockVector()->CommitDefragmentations(

+                pBlockVectorCtx, m_pStats);

+

+            if(pBlockVectorCtx->defragmentationMoves.size() != pBlockVectorCtx->defragmentationMovesCommitted)

+                res = VK_NOT_READY;

+        }

+    }

+

+    // Process custom pools.

+    for(size_t customCtxIndex = 0, customCtxCount = m_CustomPoolContexts.size();

+        customCtxIndex < customCtxCount;

+        ++customCtxIndex)

+    {

+        VmaBlockVectorDefragmentationContext *pBlockVectorCtx = m_CustomPoolContexts[customCtxIndex];

+        VMA_ASSERT(pBlockVectorCtx && pBlockVectorCtx->GetBlockVector());

+

+        if(!pBlockVectorCtx->hasDefragmentationPlan)

+        {

+            res = VK_NOT_READY;

+            continue;

+        }

+

+        pBlockVectorCtx->GetBlockVector()->CommitDefragmentations(

+            pBlockVectorCtx, m_pStats);

+

+        if(pBlockVectorCtx->defragmentationMoves.size() != pBlockVectorCtx->defragmentationMovesCommitted)

+            res = VK_NOT_READY;

+    }

+

+    return res;

+}

+

 ////////////////////////////////////////////////////////////////////////////////

 // VmaRecorder

 

@@ -14759,6 +15065,7 @@
     m_VulkanFunctions.vkCreateImage = (PFN_vkCreateImage)vkCreateImage;

     m_VulkanFunctions.vkDestroyImage = (PFN_vkDestroyImage)vkDestroyImage;

     m_VulkanFunctions.vkCmdCopyBuffer = (PFN_vkCmdCopyBuffer)vkCmdCopyBuffer;

+    m_VulkanFunctions.vkCmdCopyImage = (PFN_vkCmdCopyImage)vkCmdCopyImage;

 #if VMA_VULKAN_VERSION >= 1001000

     if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0))

     {

@@ -14825,6 +15132,7 @@
         VMA_COPY_IF_NOT_NULL(vkCreateImage);

         VMA_COPY_IF_NOT_NULL(vkDestroyImage);

         VMA_COPY_IF_NOT_NULL(vkCmdCopyBuffer);

+        VMA_COPY_IF_NOT_NULL(vkCmdCopyImage);

 #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000

         VMA_COPY_IF_NOT_NULL(vkGetBufferMemoryRequirements2KHR);

         VMA_COPY_IF_NOT_NULL(vkGetImageMemoryRequirements2KHR);

@@ -14859,6 +15167,7 @@
     VMA_ASSERT(m_VulkanFunctions.vkCreateImage != VMA_NULL);

     VMA_ASSERT(m_VulkanFunctions.vkDestroyImage != VMA_NULL);

     VMA_ASSERT(m_VulkanFunctions.vkCmdCopyBuffer != VMA_NULL);

+    VMA_ASSERT(m_VulkanFunctions.vkCmdCopyImage != VMA_NULL);

 #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000

     if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0) || m_UseKhrDedicatedAllocation)

     {

@@ -15578,7 +15887,7 @@
     VkResult res = (*pContext)->Defragment(

         info.maxCpuBytesToMove, info.maxCpuAllocationsToMove,

         info.maxGpuBytesToMove, info.maxGpuAllocationsToMove,

-        info.commandBuffer, pStats);

+        info.commandBuffer, pStats, info.flags);

 

     if(res != VK_NOT_READY)

     {

@@ -15596,6 +15905,19 @@
     return VK_SUCCESS;

 }

 

+VkResult VmaAllocator_T::DefragmentationStepBegin(

+    VmaDefragmentationStepInfo* pInfo,

+    VmaDefragmentationContext context)

+{

+    return context->DefragmentStepBegin(pInfo);

+}

+VkResult VmaAllocator_T::DefragmentationStepEnd(

+    VmaDefragmentationContext context)

+{

+    return context->DefragmentStepEnd();

+    

+}

+

 void VmaAllocator_T::GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo)

 {

     if(hAllocation->CanBecomeLost())

@@ -17414,6 +17736,42 @@
     }

 }

 

+VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationStepBegin(

+    VmaAllocator allocator,

+    VmaDefragmentationStepInfo* pInfo,

+    VmaDefragmentationContext context)

+{

+    VMA_ASSERT(allocator);

+    VMA_ASSERT(pInfo);

+    VMA_HEAVY_ASSERT(VmaValidatePointerArray(pInfo->moveCount, pInfo->pMoves));

+

+    VMA_DEBUG_LOG("vmaDefragmentationStepBegin");

+

+    VMA_DEBUG_GLOBAL_MUTEX_LOCK

+

+    if(context == VK_NULL_HANDLE)

+    {

+        pInfo->moveCount = 0;

+        return VK_SUCCESS;

+    }

+

+    return allocator->DefragmentationStepBegin(pInfo, context);

+}

+VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationStepEnd(

+    VmaAllocator allocator,

+    VmaDefragmentationContext context)

+{

+    VMA_ASSERT(allocator);

+

+    VMA_DEBUG_LOG("vmaDefragmentationStepEnd");

+    VMA_DEBUG_GLOBAL_MUTEX_LOCK

+

+    if(context == VK_NULL_HANDLE)

+        return VK_SUCCESS;

+

+    return allocator->DefragmentationStepEnd(context);

+}

+

 VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory(

     VmaAllocator allocator,

     VmaAllocation allocation,