Added benchmark for linear allocator.
diff --git a/src/Common.h b/src/Common.h
index d034196..111ccde 100644
--- a/src/Common.h
+++ b/src/Common.h
@@ -206,6 +206,19 @@
uint32_t GenerateFast() { return m_Value = (m_Value * 196314165 + 907633515); }
};
+// Wrapper for RandomNumberGenerator compatible with STL "UniformRandomNumberGenerator" idea.
+struct MyUniformRandomNumberGenerator
+{
+ typedef uint32_t result_type;
+ MyUniformRandomNumberGenerator(RandomNumberGenerator& gen) : m_Gen(gen) { }
+ static uint32_t min() { return 0; }
+ static uint32_t max() { return UINT32_MAX; }
+ uint32_t operator()() { return m_Gen.Generate(); }
+
+private:
+ RandomNumberGenerator& m_Gen;
+};
+
void ReadFile(std::vector<char>& out, const char* fileName);
enum class CONSOLE_COLOR
diff --git a/src/Tests.cpp b/src/Tests.cpp
index bc70869..36e86bc 100644
--- a/src/Tests.cpp
+++ b/src/Tests.cpp
@@ -7,8 +7,26 @@
#ifdef _WIN32
+enum CONFIG_TYPE {
+ CONFIG_TYPE_MINIMUM,
+ CONFIG_TYPE_SMALL,
+ CONFIG_TYPE_AVERAGE,
+ CONFIG_TYPE_LARGE,
+ CONFIG_TYPE_MAXIMUM,
+ CONFIG_TYPE_COUNT
+};
+
+static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_SMALL;
+//static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_LARGE;
+
enum class FREE_ORDER { FORWARD, BACKWARD, RANDOM, COUNT };
+static const wchar_t* FREE_ORDER_NAMES[] = {
+ L"FORWARD",
+ L"BACKWARD",
+ L"RANDOM",
+};
+
struct AllocationSize
{
uint32_t Probability;
@@ -1948,6 +1966,169 @@
vmaDestroyPool(g_hAllocator, pool);
}
+static void BenchmarkLinearAllocatorCase(bool linear, bool empty, FREE_ORDER freeOrder)
+{
+ RandomNumberGenerator rand{16223};
+
+ const VkDeviceSize bufSizeMin = 32;
+ const VkDeviceSize bufSizeMax = 1024;
+ const size_t maxBufCapacity = 10000;
+ const uint32_t iterationCount = 10;
+
+ VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+ sampleBufCreateInfo.size = bufSizeMax;
+ sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
+
+ VmaAllocationCreateInfo sampleAllocCreateInfo = {};
+ sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
+
+ VmaPoolCreateInfo poolCreateInfo = {};
+ VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
+ assert(res == VK_SUCCESS);
+
+ poolCreateInfo.blockSize = bufSizeMax * maxBufCapacity;
+ if(linear)
+ poolCreateInfo.flags = VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT;
+ poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;
+
+ VmaPool pool = nullptr;
+ res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
+ assert(res == VK_SUCCESS);
+
+ // Buffer created just to get memory requirements. Never bound to any memory.
+ VkBuffer dummyBuffer = VK_NULL_HANDLE;
+ res = vkCreateBuffer(g_hDevice, &sampleBufCreateInfo, nullptr, &dummyBuffer);
+ assert(res == VK_SUCCESS && dummyBuffer);
+
+ VkMemoryRequirements memReq = {};
+ vkGetBufferMemoryRequirements(g_hDevice, dummyBuffer, &memReq);
+
+ vkDestroyBuffer(g_hDevice, dummyBuffer, nullptr);
+
+ VmaAllocationCreateInfo allocCreateInfo = {};
+ allocCreateInfo.pool = pool;
+
+ VmaAllocation alloc;
+ std::vector<VmaAllocation> baseAllocations;
+
+ if(!empty)
+ {
+ // Make allocations up to half of pool size.
+ VkDeviceSize totalSize = 0;
+ while(totalSize < poolCreateInfo.blockSize / 2)
+ {
+ memReq.size = bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin);
+ res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo, &alloc, nullptr);
+ assert(res == VK_SUCCESS);
+ baseAllocations.push_back(alloc);
+ totalSize += memReq.size;
+ }
+
+ // Delete half of them, choose randomly.
+ size_t allocsToDelete = baseAllocations.size() / 2;
+ for(size_t i = 0; i < allocsToDelete; ++i)
+ {
+ const size_t index = (size_t)rand.Generate() % baseAllocations.size();
+ vmaFreeMemory(g_hAllocator, baseAllocations[index]);
+ baseAllocations.erase(baseAllocations.begin() + index);
+ }
+ }
+
+ // BENCHMARK
+ const size_t allocCount = maxBufCapacity / 2;
+ std::vector<VmaAllocation> testAllocations;
+ testAllocations.reserve(allocCount);
+ duration allocTotalDuration = duration::zero();
+ duration freeTotalDuration = duration::zero();
+ for(uint32_t iterationIndex = 0; iterationIndex < iterationCount; ++iterationIndex)
+ {
+ // Allocations
+ time_point allocTimeBeg = std::chrono::high_resolution_clock::now();
+ for(size_t i = 0; i < allocCount; ++i)
+ {
+ memReq.size = bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin);
+ res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo, &alloc, nullptr);
+ assert(res == VK_SUCCESS);
+ testAllocations.push_back(alloc);
+ }
+ allocTotalDuration += std::chrono::high_resolution_clock::now() - allocTimeBeg;
+
+ // Deallocations
+ switch(freeOrder)
+ {
+ case FREE_ORDER::FORWARD:
+ // Leave testAllocations unchanged.
+ break;
+ case FREE_ORDER::BACKWARD:
+ std::reverse(testAllocations.begin(), testAllocations.end());
+ break;
+ case FREE_ORDER::RANDOM:
+ std::shuffle(testAllocations.begin(), testAllocations.end(), MyUniformRandomNumberGenerator(rand));
+ break;
+ default: assert(0);
+ }
+
+ time_point freeTimeBeg = std::chrono::high_resolution_clock::now();
+ for(size_t i = 0; i < allocCount; ++i)
+ vmaFreeMemory(g_hAllocator, testAllocations[i]);
+ freeTotalDuration += std::chrono::high_resolution_clock::now() - freeTimeBeg;
+
+ testAllocations.clear();
+ }
+
+ // Delete baseAllocations
+ while(!baseAllocations.empty())
+ {
+ vmaFreeMemory(g_hAllocator, baseAllocations.back());
+ baseAllocations.pop_back();
+ }
+
+ vmaDestroyPool(g_hAllocator, pool);
+
+ wprintf(L" LinearAlgorithm=%u %s FreeOrder=%s: allocations %g s, free %g s\n",
+ linear ? 1 : 0,
+ empty ? L"Empty" : L"Not empty",
+ FREE_ORDER_NAMES[(size_t)freeOrder],
+ ToFloatSeconds(allocTotalDuration),
+ ToFloatSeconds(freeTotalDuration));
+}
+
+static void BenchmarkLinearAllocator()
+{
+ wprintf(L"Benchmark linear allocator\n");
+
+ uint32_t freeOrderCount = 1;
+ if(ConfigType >= CONFIG_TYPE::CONFIG_TYPE_LARGE)
+ freeOrderCount = 3;
+ else if(ConfigType >= CONFIG_TYPE::CONFIG_TYPE_SMALL)
+ freeOrderCount = 2;
+
+ const uint32_t emptyCount = ConfigType >= CONFIG_TYPE::CONFIG_TYPE_SMALL ? 2 : 1;
+
+ for(uint32_t freeOrderIndex = 0; freeOrderIndex < freeOrderCount; ++freeOrderIndex)
+ {
+ FREE_ORDER freeOrder = FREE_ORDER::COUNT;
+ switch(freeOrderIndex)
+ {
+ case 0: freeOrder = FREE_ORDER::BACKWARD; break;
+ case 1: freeOrder = FREE_ORDER::FORWARD; break;
+ case 2: freeOrder = FREE_ORDER::RANDOM; break;
+ default: assert(0);
+ }
+
+ for(uint32_t emptyIndex = 0; emptyIndex < emptyCount; ++emptyIndex)
+ {
+ for(uint32_t linearIndex = 0; linearIndex < 2; ++linearIndex)
+ {
+ BenchmarkLinearAllocatorCase(
+ linearIndex ? 1 : 0, // linear
+ emptyIndex ? 0 : 1, // empty
+ freeOrder); // freeOrder
+ }
+ }
+ }
+}
+
static void TestPool_SameSize()
{
const VkDeviceSize BUF_SIZE = 1024 * 1024;
@@ -3194,17 +3375,6 @@
WritePoolTestResult(file, "Code desc", "Test desc", config, result);
}
-enum CONFIG_TYPE {
- CONFIG_TYPE_MINIMUM,
- CONFIG_TYPE_SMALL,
- CONFIG_TYPE_AVERAGE,
- CONFIG_TYPE_LARGE,
- CONFIG_TYPE_MAXIMUM,
- CONFIG_TYPE_COUNT
-};
-
-static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_SMALL;
-//static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_LARGE;
static const char* CODE_DESCRIPTION = "Foo";
static void PerformMainTests(FILE* file)
@@ -3687,6 +3857,7 @@
TestMappingMultithreaded();
TestLinearAllocator();
ManuallyTestLinearAllocator();
+ BenchmarkLinearAllocator();
TestDefragmentationSimple();
TestDefragmentationFull();
diff --git a/src/VmaUsage.h b/src/VmaUsage.h
index e788a30..a85bf9e 100644
--- a/src/VmaUsage.h
+++ b/src/VmaUsage.h
@@ -16,16 +16,14 @@
include all public interface declarations. Example:
*/
-//#define VMA_USE_STL_CONTAINERS 1
-
//#define VMA_HEAVY_ASSERT(expr) assert(expr)
-
+//#define VMA_USE_STL_CONTAINERS 1
//#define VMA_DEDICATED_ALLOCATION 0
-
//#define VMA_DEBUG_MARGIN 16
//#define VMA_DEBUG_DETECT_CORRUPTION 1
//#define VMA_DEBUG_INITIALIZE_ALLOCATIONS 1
//#define VMA_RECORDING_ENABLED 0
+//#define VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY 256
#pragma warning(push, 4)
#pragma warning(disable: 4127) // conditional expression is constant
diff --git a/src/vk_mem_alloc.h b/src/vk_mem_alloc.h
index 3907819..e92873b 100644
--- a/src/vk_mem_alloc.h
+++ b/src/vk_mem_alloc.h
@@ -641,7 +641,7 @@
![Ring buffer](../gfx/Linear_allocator_5_ring_buffer.png)
-Pools with linear algorithm support lost allocations when used as ring buffer.
+Pools with linear algorithm support [lost allocations](@ref lost_allocations) when used as ring buffer.
If there is not enough free space for a new allocation, but existing allocations
from the front of the queue can become lost, they become lost and the allocation
succeeds.
@@ -8333,7 +8333,7 @@
for(size_t nextSuballocIndex = suballocations2nd.size(); nextSuballocIndex--; )
{
const VmaSuballocation& nextSuballoc = suballocations2nd[nextSuballocIndex];
- if(VmaBlocksOnSamePage(nextSuballoc.offset, nextSuballoc.size, resultOffset, bufferImageGranularity))
+ if(VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity))
{
if(VmaIsBufferImageGranularityConflict(nextSuballoc.type, allocType))
{
@@ -8364,7 +8364,7 @@
for(size_t prevSuballocIndex = suballocations1st.size(); prevSuballocIndex--; )
{
const VmaSuballocation& prevSuballoc = suballocations1st[prevSuballocIndex];
- if(VmaBlocksOnSamePage(resultOffset, allocSize, prevSuballoc.offset, bufferImageGranularity))
+ if(VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity))
{
if(VmaIsBufferImageGranularityConflict(allocType, prevSuballoc.type))
{