Major optimization: Added VmaAllocationObjectAllocator to allocate VmaAllocation objects out of free-list allocator instead of making separate CPU heap allocations.
diff --git a/src/vk_mem_alloc.h b/src/vk_mem_alloc.h
index 5ae0c29..4eb1305 100644
--- a/src/vk_mem_alloc.h
+++ b/src/vk_mem_alloc.h
@@ -3718,7 +3718,7 @@
{
VMA_CLASS_NO_COPY(VmaMutexLock)
public:
- VmaMutexLock(VMA_MUTEX& mutex, bool useMutex) :
+ VmaMutexLock(VMA_MUTEX& mutex, bool useMutex = true) :
m_pMutex(useMutex ? &mutex : VMA_NULL)
{ if(m_pMutex) { m_pMutex->Lock(); } }
~VmaMutexLock()
@@ -4907,7 +4907,6 @@
struct VmaAllocation_T
{
- VMA_CLASS_NO_COPY(VmaAllocation_T)
private:
static const uint8_t MAP_COUNT_FLAG_PERSISTENT_MAP = 0x80;
@@ -4924,23 +4923,29 @@
ALLOCATION_TYPE_DEDICATED,
};
- VmaAllocation_T(uint32_t currentFrameIndex, bool userDataString) :
- m_Alignment(1),
- m_Size(0),
- m_pUserData(VMA_NULL),
- m_LastUseFrameIndex(currentFrameIndex),
- m_Type((uint8_t)ALLOCATION_TYPE_NONE),
- m_SuballocationType((uint8_t)VMA_SUBALLOCATION_TYPE_UNKNOWN),
- m_MapCount(0),
- m_Flags(userDataString ? (uint8_t)FLAG_USER_DATA_STRING : 0)
+ /*
+ This struct cannot have constructor or destructor. It must be POD because it is
+ allocated using VmaPoolAllocator.
+ */
+
+ void Ctor(uint32_t currentFrameIndex, bool userDataString)
{
+ m_Alignment = 1;
+ m_Size = 0;
+ m_pUserData = VMA_NULL;
+ m_LastUseFrameIndex = currentFrameIndex;
+ m_Type = (uint8_t)ALLOCATION_TYPE_NONE;
+ m_SuballocationType = (uint8_t)VMA_SUBALLOCATION_TYPE_UNKNOWN;
+ m_MapCount = 0;
+ m_Flags = userDataString ? (uint8_t)FLAG_USER_DATA_STRING : 0;
+
#if VMA_STATS_STRING_ENABLED
m_CreationFrameIndex = currentFrameIndex;
m_BufferImageUsage = 0;
#endif
}
- ~VmaAllocation_T()
+ void Dtor()
{
VMA_ASSERT((m_MapCount & ~MAP_COUNT_FLAG_PERSISTENT_MAP) == 0 && "Allocation was not unmapped before destruction.");
@@ -6568,6 +6573,23 @@
#endif // #if VMA_RECORDING_ENABLED
+/*
+Thread-safe wrapper over VmaPoolAllocator free list, for allocation of VmaAllocation_T objects.
+*/
+class VmaAllocationObjectAllocator
+{
+ VMA_CLASS_NO_COPY(VmaAllocationObjectAllocator)
+public:
+ VmaAllocationObjectAllocator(const VkAllocationCallbacks* pAllocationCallbacks);
+
+ VmaAllocation Allocate();
+ void Free(VmaAllocation hAlloc);
+
+private:
+ VMA_MUTEX m_Mutex;
+ VmaPoolAllocator<VmaAllocation_T> m_Allocator;
+};
+
// Main allocator object.
struct VmaAllocator_T
{
@@ -6579,6 +6601,7 @@
bool m_AllocationCallbacksSpecified;
VkAllocationCallbacks m_AllocationCallbacks;
VmaDeviceMemoryCallbacks m_DeviceMemoryCallbacks;
+ VmaAllocationObjectAllocator m_AllocationObjectAllocator;
// Number of bytes free out of limit, or VK_WHOLE_SIZE if no limit for that heap.
VkDeviceSize m_HeapSizeLimit[VK_MAX_MEMORY_HEAPS];
@@ -11806,7 +11829,8 @@
m_HasEmptyBlock = false;
}
// Allocate from this pBlock.
- *pAllocation = vma_new(m_hAllocator, VmaAllocation_T)(currentFrameIndex, isUserDataString);
+ *pAllocation = m_hAllocator->m_AllocationObjectAllocator.Allocate();
+ (*pAllocation)->Ctor(currentFrameIndex, isUserDataString);
pBestRequestBlock->m_pMetadata->Alloc(bestRequest, suballocType, size, *pAllocation);
(*pAllocation)->InitBlockAllocation(
pBestRequestBlock,
@@ -12008,7 +12032,8 @@
m_HasEmptyBlock = false;
}
- *pAllocation = vma_new(m_hAllocator, VmaAllocation_T)(currentFrameIndex, isUserDataString);
+ *pAllocation = m_hAllocator->m_AllocationObjectAllocator.Allocate();
+ (*pAllocation)->Ctor(currentFrameIndex, isUserDataString);
pBlock->m_pMetadata->Alloc(currRequest, suballocType, size, *pAllocation);
(*pAllocation)->InitBlockAllocation(
pBlock,
@@ -14079,6 +14104,26 @@
#endif // #if VMA_RECORDING_ENABLED
////////////////////////////////////////////////////////////////////////////////
+// VmaAllocationObjectAllocator
+
+VmaAllocationObjectAllocator::VmaAllocationObjectAllocator(const VkAllocationCallbacks* pAllocationCallbacks) :
+ m_Allocator(pAllocationCallbacks, 1024)
+{
+}
+
+VmaAllocation VmaAllocationObjectAllocator::Allocate()
+{
+ VmaMutexLock mutexLock(m_Mutex);
+ return m_Allocator.Alloc();
+}
+
+void VmaAllocationObjectAllocator::Free(VmaAllocation hAlloc)
+{
+ VmaMutexLock mutexLock(m_Mutex);
+ m_Allocator.Free(hAlloc);
+}
+
+////////////////////////////////////////////////////////////////////////////////
// VmaAllocator_T
VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) :
@@ -14088,6 +14133,7 @@
m_AllocationCallbacksSpecified(pCreateInfo->pAllocationCallbacks != VMA_NULL),
m_AllocationCallbacks(pCreateInfo->pAllocationCallbacks ?
*pCreateInfo->pAllocationCallbacks : VmaEmptyAllocationCallbacks),
+ m_AllocationObjectAllocator(&m_AllocationCallbacks),
m_PreferredLargeHeapBlockSize(0),
m_PhysicalDevice(pCreateInfo->physicalDevice),
m_CurrentFrameIndex(0),
@@ -14224,6 +14270,11 @@
for(size_t i = GetMemoryTypeCount(); i--; )
{
+ if(m_pDedicatedAllocations[i] != VMA_NULL && !m_pDedicatedAllocations[i]->empty())
+ {
+ VMA_ASSERT(0 && "Unfreed dedicated allocations found.");
+ }
+
vma_delete(this, m_pDedicatedAllocations[i]);
vma_delete(this, m_pBlockVectors[i]);
}
@@ -14528,7 +14579,8 @@
FreeVulkanMemory(memTypeIndex, currAlloc->GetSize(), hMemory);
currAlloc->SetUserData(this, VMA_NULL);
- vma_delete(this, currAlloc);
+ currAlloc->Dtor();
+ m_AllocationObjectAllocator.Free(currAlloc);
}
memset(pAllocations, 0, sizeof(VmaAllocation) * allocationCount);
@@ -14573,7 +14625,8 @@
}
}
- *pAllocation = vma_new(this, VmaAllocation_T)(m_CurrentFrameIndex.load(), isUserDataString);
+ *pAllocation = m_AllocationObjectAllocator.Allocate();
+ (*pAllocation)->Ctor(m_CurrentFrameIndex.load(), isUserDataString);
(*pAllocation)->InitDedicatedAllocation(memTypeIndex, hMemory, suballocType, pMappedData, size);
(*pAllocation)->SetUserData(this, pUserData);
if(VMA_DEBUG_INITIALIZE_ALLOCATIONS)
@@ -14835,7 +14888,8 @@
}
allocation->SetUserData(this, VMA_NULL);
- vma_delete(this, allocation);
+ allocation->Dtor();
+ m_AllocationObjectAllocator.Free(allocation);
}
}
}
@@ -15214,7 +15268,8 @@
void VmaAllocator_T::CreateLostAllocation(VmaAllocation* pAllocation)
{
- *pAllocation = vma_new(this, VmaAllocation_T)(VMA_FRAME_INDEX_LOST, false);
+ *pAllocation = m_AllocationObjectAllocator.Allocate();
+ (*pAllocation)->Ctor(VMA_FRAME_INDEX_LOST, false);
(*pAllocation)->InitLost();
}