Added benchmark for linear allocator.
diff --git a/src/Common.h b/src/Common.h
index d034196..111ccde 100644
--- a/src/Common.h
+++ b/src/Common.h
@@ -206,6 +206,19 @@
     uint32_t GenerateFast() { return m_Value = (m_Value * 196314165 + 907633515); }

 };

 

+// Wrapper for RandomNumberGenerator compatible with STL "UniformRandomNumberGenerator" idea.

+struct MyUniformRandomNumberGenerator

+{

+    typedef uint32_t result_type;

+    MyUniformRandomNumberGenerator(RandomNumberGenerator& gen) : m_Gen(gen) { }

+    static uint32_t min() { return 0; }

+    static uint32_t max() { return UINT32_MAX; }

+    uint32_t operator()() { return m_Gen.Generate(); }

+

+private:

+    RandomNumberGenerator& m_Gen;

+};

+

 void ReadFile(std::vector<char>& out, const char* fileName);

 

 enum class CONSOLE_COLOR

diff --git a/src/Tests.cpp b/src/Tests.cpp
index bc70869..36e86bc 100644
--- a/src/Tests.cpp
+++ b/src/Tests.cpp
@@ -7,8 +7,26 @@
 

 #ifdef _WIN32

 

+enum CONFIG_TYPE {

+    CONFIG_TYPE_MINIMUM,

+    CONFIG_TYPE_SMALL,

+    CONFIG_TYPE_AVERAGE,

+    CONFIG_TYPE_LARGE,

+    CONFIG_TYPE_MAXIMUM,

+    CONFIG_TYPE_COUNT

+};

+

+static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_SMALL;

+//static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_LARGE;

+

 enum class FREE_ORDER { FORWARD, BACKWARD, RANDOM, COUNT };

 

+static const wchar_t* FREE_ORDER_NAMES[] = {

+    L"FORWARD",

+    L"BACKWARD",

+    L"RANDOM",

+};

+

 struct AllocationSize

 {

     uint32_t Probability;

@@ -1948,6 +1966,169 @@
     vmaDestroyPool(g_hAllocator, pool);

 }

 

+static void BenchmarkLinearAllocatorCase(bool linear, bool empty, FREE_ORDER freeOrder)

+{

+    RandomNumberGenerator rand{16223};

+

+    const VkDeviceSize bufSizeMin = 32;

+    const VkDeviceSize bufSizeMax = 1024;

+    const size_t maxBufCapacity = 10000;

+    const uint32_t iterationCount = 10;

+

+    VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };

+    sampleBufCreateInfo.size = bufSizeMax;

+    sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;

+

+    VmaAllocationCreateInfo sampleAllocCreateInfo = {};

+    sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;

+

+    VmaPoolCreateInfo poolCreateInfo = {};

+    VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);

+    assert(res == VK_SUCCESS);

+

+    poolCreateInfo.blockSize = bufSizeMax * maxBufCapacity;

+    if(linear)

+        poolCreateInfo.flags = VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT;

+    poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;

+

+    VmaPool pool = nullptr;

+    res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);

+    assert(res == VK_SUCCESS);

+

+    // Buffer created just to get memory requirements. Never bound to any memory.

+    VkBuffer dummyBuffer = VK_NULL_HANDLE;

+    res = vkCreateBuffer(g_hDevice, &sampleBufCreateInfo, nullptr, &dummyBuffer);

+    assert(res == VK_SUCCESS && dummyBuffer);

+

+    VkMemoryRequirements memReq = {};

+    vkGetBufferMemoryRequirements(g_hDevice, dummyBuffer, &memReq);

+

+    vkDestroyBuffer(g_hDevice, dummyBuffer, nullptr);

+

+    VmaAllocationCreateInfo allocCreateInfo = {};

+    allocCreateInfo.pool = pool;

+

+    VmaAllocation alloc;

+    std::vector<VmaAllocation> baseAllocations;

+

+    if(!empty)

+    {

+        // Make allocations up to half of pool size.

+        VkDeviceSize totalSize = 0;

+        while(totalSize < poolCreateInfo.blockSize / 2)

+        {

+            memReq.size = bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin);

+            res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo, &alloc, nullptr);

+            assert(res == VK_SUCCESS);

+            baseAllocations.push_back(alloc);

+            totalSize += memReq.size;

+        }

+

+        // Delete half of them, choose randomly.

+        size_t allocsToDelete = baseAllocations.size() / 2;

+        for(size_t i = 0; i < allocsToDelete; ++i)

+        {

+            const size_t index = (size_t)rand.Generate() % baseAllocations.size();

+            vmaFreeMemory(g_hAllocator, baseAllocations[index]);

+            baseAllocations.erase(baseAllocations.begin() + index);

+        }

+    }

+

+    // BENCHMARK

+    const size_t allocCount = maxBufCapacity / 2;

+    std::vector<VmaAllocation> testAllocations;

+    testAllocations.reserve(allocCount);

+    duration allocTotalDuration = duration::zero();

+    duration freeTotalDuration = duration::zero();

+    for(uint32_t iterationIndex = 0; iterationIndex < iterationCount; ++iterationIndex)

+    {

+        // Allocations

+        time_point allocTimeBeg = std::chrono::high_resolution_clock::now();

+        for(size_t i = 0; i < allocCount; ++i)

+        {

+            memReq.size = bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin);

+            res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo, &alloc, nullptr);

+            assert(res == VK_SUCCESS);

+            testAllocations.push_back(alloc);

+        }

+        allocTotalDuration += std::chrono::high_resolution_clock::now() - allocTimeBeg;

+

+        // Deallocations

+        switch(freeOrder)

+        {

+        case FREE_ORDER::FORWARD:

+            // Leave testAllocations unchanged.

+            break;

+        case FREE_ORDER::BACKWARD:

+            std::reverse(testAllocations.begin(), testAllocations.end());

+            break;

+        case FREE_ORDER::RANDOM:

+            std::shuffle(testAllocations.begin(), testAllocations.end(), MyUniformRandomNumberGenerator(rand));

+            break;

+        default: assert(0);

+        }

+

+        time_point freeTimeBeg = std::chrono::high_resolution_clock::now();

+        for(size_t i = 0; i < allocCount; ++i)

+            vmaFreeMemory(g_hAllocator, testAllocations[i]);

+        freeTotalDuration += std::chrono::high_resolution_clock::now() - freeTimeBeg;

+

+        testAllocations.clear();

+    }

+

+    // Delete baseAllocations

+    while(!baseAllocations.empty())

+    {

+        vmaFreeMemory(g_hAllocator, baseAllocations.back());

+        baseAllocations.pop_back();

+    }

+

+    vmaDestroyPool(g_hAllocator, pool);

+

+    wprintf(L"    LinearAlgorithm=%u %s FreeOrder=%s: allocations %g s, free %g s\n",

+        linear ? 1 : 0,

+        empty ? L"Empty" : L"Not empty",

+        FREE_ORDER_NAMES[(size_t)freeOrder],

+        ToFloatSeconds(allocTotalDuration),

+        ToFloatSeconds(freeTotalDuration));

+}

+

+static void BenchmarkLinearAllocator()

+{

+    wprintf(L"Benchmark linear allocator\n");

+

+    uint32_t freeOrderCount = 1;

+    if(ConfigType >= CONFIG_TYPE::CONFIG_TYPE_LARGE)

+        freeOrderCount = 3;

+    else if(ConfigType >= CONFIG_TYPE::CONFIG_TYPE_SMALL)

+        freeOrderCount = 2;

+

+    const uint32_t emptyCount = ConfigType >= CONFIG_TYPE::CONFIG_TYPE_SMALL ? 2 : 1;

+

+    for(uint32_t freeOrderIndex = 0; freeOrderIndex < freeOrderCount; ++freeOrderIndex)

+    {

+        FREE_ORDER freeOrder = FREE_ORDER::COUNT;

+        switch(freeOrderIndex)

+        {

+        case 0: freeOrder = FREE_ORDER::BACKWARD; break;

+        case 1: freeOrder = FREE_ORDER::FORWARD; break;

+        case 2: freeOrder = FREE_ORDER::RANDOM; break;

+        default: assert(0);

+        }

+

+        for(uint32_t emptyIndex = 0; emptyIndex < emptyCount; ++emptyIndex)

+        {

+            for(uint32_t linearIndex = 0; linearIndex < 2; ++linearIndex)

+            {

+                BenchmarkLinearAllocatorCase(

+                    linearIndex ? 1 : 0, // linear

+                    emptyIndex ? 0 : 1, // empty

+                    freeOrder); // freeOrder

+            }

+        }

+    }

+}

+

 static void TestPool_SameSize()

 {

     const VkDeviceSize BUF_SIZE = 1024 * 1024;

@@ -3194,17 +3375,6 @@
     WritePoolTestResult(file, "Code desc", "Test desc", config, result);

 }

 

-enum CONFIG_TYPE {

-    CONFIG_TYPE_MINIMUM,

-    CONFIG_TYPE_SMALL,

-    CONFIG_TYPE_AVERAGE,

-    CONFIG_TYPE_LARGE,

-    CONFIG_TYPE_MAXIMUM,

-    CONFIG_TYPE_COUNT

-};

-

-static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_SMALL;

-//static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_LARGE;

 static const char* CODE_DESCRIPTION = "Foo";

 

 static void PerformMainTests(FILE* file)

@@ -3687,6 +3857,7 @@
     TestMappingMultithreaded();

     TestLinearAllocator();

     ManuallyTestLinearAllocator();

+    BenchmarkLinearAllocator();

     TestDefragmentationSimple();

     TestDefragmentationFull();

 

diff --git a/src/VmaUsage.h b/src/VmaUsage.h
index e788a30..a85bf9e 100644
--- a/src/VmaUsage.h
+++ b/src/VmaUsage.h
@@ -16,16 +16,14 @@
 include all public interface declarations. Example:

 */

 

-//#define VMA_USE_STL_CONTAINERS 1

-

 //#define VMA_HEAVY_ASSERT(expr) assert(expr)

-

+//#define VMA_USE_STL_CONTAINERS 1

 //#define VMA_DEDICATED_ALLOCATION 0

-

 //#define VMA_DEBUG_MARGIN 16

 //#define VMA_DEBUG_DETECT_CORRUPTION 1

 //#define VMA_DEBUG_INITIALIZE_ALLOCATIONS 1

 //#define VMA_RECORDING_ENABLED 0

+//#define VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY 256

 

 #pragma warning(push, 4)

 #pragma warning(disable: 4127) // conditional expression is constant

diff --git a/src/vk_mem_alloc.h b/src/vk_mem_alloc.h
index 3907819..e92873b 100644
--- a/src/vk_mem_alloc.h
+++ b/src/vk_mem_alloc.h
@@ -641,7 +641,7 @@
 

 ![Ring buffer](../gfx/Linear_allocator_5_ring_buffer.png)

 

-Pools with linear algorithm support lost allocations when used as ring buffer.

+Pools with linear algorithm support [lost allocations](@ref lost_allocations) when used as ring buffer.

 If there is not enough free space for a new allocation, but existing allocations

 from the front of the queue can become lost, they become lost and the allocation

 succeeds.

@@ -8333,7 +8333,7 @@
             for(size_t nextSuballocIndex = suballocations2nd.size(); nextSuballocIndex--; )

             {

                 const VmaSuballocation& nextSuballoc = suballocations2nd[nextSuballocIndex];

-                if(VmaBlocksOnSamePage(nextSuballoc.offset, nextSuballoc.size, resultOffset, bufferImageGranularity))

+                if(VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity))

                 {

                     if(VmaIsBufferImageGranularityConflict(nextSuballoc.type, allocType))

                     {

@@ -8364,7 +8364,7 @@
                 for(size_t prevSuballocIndex = suballocations1st.size(); prevSuballocIndex--; )

                 {

                     const VmaSuballocation& prevSuballoc = suballocations1st[prevSuballocIndex];

-                    if(VmaBlocksOnSamePage(resultOffset, allocSize, prevSuballoc.offset, bufferImageGranularity))

+                    if(VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity))

                     {

                         if(VmaIsBufferImageGranularityConflict(allocType, prevSuballoc.type))

                         {