Add support for small texture alignment

Added configuration macro D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT.
diff --git a/src/D3D12MemAlloc.cpp b/src/D3D12MemAlloc.cpp
index 58f1fcd..d5279f7 100644
--- a/src/D3D12MemAlloc.cpp
+++ b/src/D3D12MemAlloc.cpp
@@ -260,6 +260,11 @@
 {

 	return (x + (y / (T)2)) / y;

 }

+template <typename T>

+static inline T DivideRoudingUp(T x, T y)

+{

+    return (x + y - 1) / y;

+}

 

 /*

 Returns true if given number is a power of two.

@@ -528,6 +533,193 @@
         D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT : D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;

 }

 

+static bool IsFormatCompressed(DXGI_FORMAT format)

+{

+    switch(format)

+    {

+    case DXGI_FORMAT_BC1_TYPELESS:

+    case DXGI_FORMAT_BC1_UNORM:

+    case DXGI_FORMAT_BC1_UNORM_SRGB:

+    case DXGI_FORMAT_BC2_TYPELESS:

+    case DXGI_FORMAT_BC2_UNORM:

+    case DXGI_FORMAT_BC2_UNORM_SRGB:

+    case DXGI_FORMAT_BC3_TYPELESS:

+    case DXGI_FORMAT_BC3_UNORM:

+    case DXGI_FORMAT_BC3_UNORM_SRGB:

+    case DXGI_FORMAT_BC4_TYPELESS:

+    case DXGI_FORMAT_BC4_UNORM:

+    case DXGI_FORMAT_BC4_SNORM:

+    case DXGI_FORMAT_BC5_TYPELESS:

+    case DXGI_FORMAT_BC5_UNORM:

+    case DXGI_FORMAT_BC5_SNORM:

+    case DXGI_FORMAT_BC6H_TYPELESS:

+    case DXGI_FORMAT_BC6H_UF16:

+    case DXGI_FORMAT_BC6H_SF16:

+    case DXGI_FORMAT_BC7_TYPELESS:

+    case DXGI_FORMAT_BC7_UNORM:

+    case DXGI_FORMAT_BC7_UNORM_SRGB:

+        return true;

+    default:

+        return false;

+    }

+}

+

+// Only some formats are supported. For others it returns 0.

+static UINT GetBitsPerPixel(DXGI_FORMAT format)

+{

+    switch(format)

+    {

+    case DXGI_FORMAT_R32G32B32A32_TYPELESS:

+    case DXGI_FORMAT_R32G32B32A32_FLOAT:

+    case DXGI_FORMAT_R32G32B32A32_UINT:

+    case DXGI_FORMAT_R32G32B32A32_SINT:

+        return 128;

+    case DXGI_FORMAT_R32G32B32_TYPELESS:

+    case DXGI_FORMAT_R32G32B32_FLOAT:

+    case DXGI_FORMAT_R32G32B32_UINT:

+    case DXGI_FORMAT_R32G32B32_SINT:

+        return 96;

+    case DXGI_FORMAT_R16G16B16A16_TYPELESS:

+    case DXGI_FORMAT_R16G16B16A16_FLOAT:

+    case DXGI_FORMAT_R16G16B16A16_UNORM:

+    case DXGI_FORMAT_R16G16B16A16_UINT:

+    case DXGI_FORMAT_R16G16B16A16_SNORM:

+    case DXGI_FORMAT_R16G16B16A16_SINT:

+        return 64;

+    case DXGI_FORMAT_R32G32_TYPELESS:

+    case DXGI_FORMAT_R32G32_FLOAT:

+    case DXGI_FORMAT_R32G32_UINT:

+    case DXGI_FORMAT_R32G32_SINT:

+        return 64;

+    case DXGI_FORMAT_R32G8X24_TYPELESS:

+    case DXGI_FORMAT_D32_FLOAT_S8X24_UINT:

+    case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS:

+    case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT:

+        return 64;

+    case DXGI_FORMAT_R10G10B10A2_TYPELESS:

+    case DXGI_FORMAT_R10G10B10A2_UNORM:

+    case DXGI_FORMAT_R10G10B10A2_UINT:

+    case DXGI_FORMAT_R11G11B10_FLOAT:

+        return 32;

+    case DXGI_FORMAT_R8G8B8A8_TYPELESS:

+    case DXGI_FORMAT_R8G8B8A8_UNORM:

+    case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:

+    case DXGI_FORMAT_R8G8B8A8_UINT:

+    case DXGI_FORMAT_R8G8B8A8_SNORM:

+    case DXGI_FORMAT_R8G8B8A8_SINT:

+        return 32;

+    case DXGI_FORMAT_R16G16_TYPELESS:

+    case DXGI_FORMAT_R16G16_FLOAT:

+    case DXGI_FORMAT_R16G16_UNORM:

+    case DXGI_FORMAT_R16G16_UINT:

+    case DXGI_FORMAT_R16G16_SNORM:

+    case DXGI_FORMAT_R16G16_SINT:

+        return 32;

+    case DXGI_FORMAT_R32_TYPELESS:

+    case DXGI_FORMAT_D32_FLOAT:

+    case DXGI_FORMAT_R32_FLOAT:

+    case DXGI_FORMAT_R32_UINT:

+    case DXGI_FORMAT_R32_SINT:

+        return 32;

+    case DXGI_FORMAT_R24G8_TYPELESS:

+    case DXGI_FORMAT_D24_UNORM_S8_UINT:

+    case DXGI_FORMAT_R24_UNORM_X8_TYPELESS:

+    case DXGI_FORMAT_X24_TYPELESS_G8_UINT:

+        return 32;

+    case DXGI_FORMAT_R8G8_TYPELESS:

+    case DXGI_FORMAT_R8G8_UNORM:

+    case DXGI_FORMAT_R8G8_UINT:

+    case DXGI_FORMAT_R8G8_SNORM:

+    case DXGI_FORMAT_R8G8_SINT:

+        return 16;

+    case DXGI_FORMAT_R16_TYPELESS:

+    case DXGI_FORMAT_R16_FLOAT:

+    case DXGI_FORMAT_D16_UNORM:

+    case DXGI_FORMAT_R16_UNORM:

+    case DXGI_FORMAT_R16_UINT:

+    case DXGI_FORMAT_R16_SNORM:

+    case DXGI_FORMAT_R16_SINT:

+        return 16;

+    case DXGI_FORMAT_R8_TYPELESS:

+    case DXGI_FORMAT_R8_UNORM:

+    case DXGI_FORMAT_R8_UINT:

+    case DXGI_FORMAT_R8_SNORM:

+    case DXGI_FORMAT_R8_SINT:

+    case DXGI_FORMAT_A8_UNORM:

+        return 8;

+    case DXGI_FORMAT_BC1_TYPELESS:

+    case DXGI_FORMAT_BC1_UNORM:

+    case DXGI_FORMAT_BC1_UNORM_SRGB:

+        return 4;

+    case DXGI_FORMAT_BC2_TYPELESS:

+    case DXGI_FORMAT_BC2_UNORM:

+    case DXGI_FORMAT_BC2_UNORM_SRGB:

+        return 8;

+    case DXGI_FORMAT_BC3_TYPELESS:

+    case DXGI_FORMAT_BC3_UNORM:

+    case DXGI_FORMAT_BC3_UNORM_SRGB:

+        return 8;

+    case DXGI_FORMAT_BC4_TYPELESS:

+    case DXGI_FORMAT_BC4_UNORM:

+    case DXGI_FORMAT_BC4_SNORM:

+        return 4;

+    case DXGI_FORMAT_BC5_TYPELESS:

+    case DXGI_FORMAT_BC5_UNORM:

+    case DXGI_FORMAT_BC5_SNORM:

+        return 8;

+    case DXGI_FORMAT_BC6H_TYPELESS:

+    case DXGI_FORMAT_BC6H_UF16:

+    case DXGI_FORMAT_BC6H_SF16:

+        return 8;

+    case DXGI_FORMAT_BC7_TYPELESS:

+    case DXGI_FORMAT_BC7_UNORM:

+    case DXGI_FORMAT_BC7_UNORM_SRGB:

+        return 8;

+    default:

+        return 0;

+    }

+}

+

+// This algorithm is overly conservative.

+static bool CanUseSmallAlignment(const D3D12_RESOURCE_DESC& resourceDesc)

+{

+    if(resourceDesc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE2D)

+        return false;

+    if((resourceDesc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) != 0)

+        return false;

+    if(resourceDesc.SampleDesc.Count > 1)

+        return false;

+    if(resourceDesc.DepthOrArraySize != 1)

+        return false;

+

+    UINT sizeX = (UINT)resourceDesc.Width;

+    UINT sizeY = resourceDesc.Height;

+    UINT bitsPerPixel = GetBitsPerPixel(resourceDesc.Format);

+    if(GetBitsPerPixel == 0)

+        return false;

+

+    if(IsFormatCompressed(resourceDesc.Format))

+    {

+        sizeX = DivideRoudingUp(sizeX / 4, 1u);

+        sizeY = DivideRoudingUp(sizeY / 4, 1u);

+        bitsPerPixel *= 16;

+    }

+

+    UINT tileSizeX = 0, tileSizeY = 0;

+    switch(bitsPerPixel)

+    {

+    case   8: tileSizeX = 64; tileSizeY = 64; break;

+    case  16: tileSizeX = 64; tileSizeY = 32; break;

+    case  32: tileSizeX = 32; tileSizeY = 32; break;

+    case  64: tileSizeX = 32; tileSizeY = 16; break;

+    case 128: tileSizeX = 16; tileSizeY = 16; break;

+    default: return false;

+    }

+

+    const UINT tileCount = DivideRoudingUp(sizeX, tileSizeX) * DivideRoudingUp(sizeY, tileSizeY);

+    return tileCount <= 16;

+}

+

 ////////////////////////////////////////////////////////////////////////////////

 // Private class Vector

 

@@ -2285,6 +2477,8 @@
     void UnregisterCommittedAllocation(Allocation* alloc, D3D12_HEAP_TYPE heapType);

 

     HRESULT UpdateD3D12Budget();

+    

+    D3D12_RESOURCE_ALLOCATION_INFO GetResourceAllocationInfo(D3D12_RESOURCE_DESC& inOutResourceDesc) const;

 

     // Writes object { } with data of given budget.

     static void WriteBudgetToJson(JsonWriter& json, const Budget& budget);

@@ -3476,19 +3670,20 @@
         *ppvResource = NULL;

     }

 

-    D3D12_RESOURCE_ALLOCATION_INFO resAllocInfo = m_Device->GetResourceAllocationInfo(0, 1, pResourceDesc);

+    D3D12_RESOURCE_DESC resourceDesc2 = *pResourceDesc;

+    D3D12_RESOURCE_ALLOCATION_INFO resAllocInfo = GetResourceAllocationInfo(resourceDesc2);

     resAllocInfo.Alignment = D3D12MA_MAX<UINT64>(resAllocInfo.Alignment, D3D12MA_DEBUG_ALIGNMENT);

     D3D12MA_ASSERT(IsPow2(resAllocInfo.Alignment));

     D3D12MA_ASSERT(resAllocInfo.SizeInBytes > 0);

 

-    const UINT defaultPoolIndex = CalcDefaultPoolIndex(*pAllocDesc, *pResourceDesc);

+    const UINT defaultPoolIndex = CalcDefaultPoolIndex(*pAllocDesc, resourceDesc2);

     BlockVector* blockVector = m_BlockVectors[defaultPoolIndex];

     D3D12MA_ASSERT(blockVector);

 

     const UINT64 preferredBlockSize = blockVector->GetPreferredBlockSize();

     bool preferCommittedMemory =

         m_AlwaysCommitted ||

-        PrefersCommittedAllocation(*pResourceDesc) ||

+        PrefersCommittedAllocation(resourceDesc2) ||

         // Heuristics: Allocate committed memory if requested size if greater than half of preferred block size.

         resAllocInfo.SizeInBytes > preferredBlockSize / 2;

     if(preferCommittedMemory &&

@@ -3501,7 +3696,7 @@
     {

         return AllocateCommittedResource(

             &finalAllocDesc,

-            pResourceDesc,

+            &resourceDesc2,

             resAllocInfo,

             InitialResourceState,

             pOptimizedClearValue,

@@ -3523,14 +3718,14 @@
             hr = m_Device->CreatePlacedResource(

                 (*ppAllocation)->m_Placed.block->GetHeap(),

                 (*ppAllocation)->GetOffset(),

-                pResourceDesc,

+                &resourceDesc2,

                 InitialResourceState,

                 pOptimizedClearValue,

                 riidResource,

                 (void**)&res);

             if(SUCCEEDED(hr))

             {

-                (*ppAllocation)->SetResource(res, pResourceDesc);

+                (*ppAllocation)->SetResource(res, &resourceDesc2);

                 if(ppvResource != NULL)

                 {

                     res->AddRef();

@@ -3547,7 +3742,7 @@
 

         return AllocateCommittedResource(

             &finalAllocDesc,

-            pResourceDesc,

+            &resourceDesc2,

             resAllocInfo,

             InitialResourceState,

             pOptimizedClearValue,

@@ -4245,6 +4440,38 @@
 #endif

 }

 

+D3D12_RESOURCE_ALLOCATION_INFO AllocatorPimpl::GetResourceAllocationInfo(D3D12_RESOURCE_DESC& inOutResourceDesc) const

+{

+#if D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT

+    if(inOutResourceDesc.Alignment == 0 &&

+        inOutResourceDesc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE2D &&

+        (inOutResourceDesc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) == 0

+#if D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT == 1

+        && CanUseSmallAlignment(inOutResourceDesc)

+#endif

+        )

+    {

+        /*

+        The algorithm here is based on Microsoft sample: "Small Resources Sample"

+        https://github.com/microsoft/DirectX-Graphics-Samples/tree/master/Samples/Desktop/D3D12SmallResources

+        */

+        const UINT64 smallAlignmentToTry = inOutResourceDesc.SampleDesc.Count > 1 ?

+            D3D12_SMALL_MSAA_RESOURCE_PLACEMENT_ALIGNMENT :

+            D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT;

+        inOutResourceDesc.Alignment = smallAlignmentToTry;

+        const D3D12_RESOURCE_ALLOCATION_INFO smallAllocInfo = m_Device->GetResourceAllocationInfo(0, 1, &inOutResourceDesc);

+        // Check if alignment requested has been granted.

+        if(smallAllocInfo.Alignment == smallAlignmentToTry)

+        {

+            return smallAllocInfo;

+        }

+        inOutResourceDesc.Alignment = 0; // Restore original

+    }

+#endif // #if D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT

+

+    return m_Device->GetResourceAllocationInfo(0, 1, &inOutResourceDesc);

+}

+

 void AllocatorPimpl::WriteBudgetToJson(JsonWriter& json, const Budget& budget)

 {

     json.BeginObject();

diff --git a/src/D3D12MemAlloc.h b/src/D3D12MemAlloc.h
index d7b34cd..026fdb0 100644
--- a/src/D3D12MemAlloc.h
+++ b/src/D3D12MemAlloc.h
@@ -24,7 +24,7 @@
 

 /** \mainpage D3D12 Memory Allocator

 

-<b>Version 2.0.0-development</b> (2020-01-27)

+<b>Version 2.0.0-development</b> (2020-03-11)

 

 Copyright (c) 2019-2020 Advanced Micro Devices, Inc. All rights reserved. \n

 License: MIT

@@ -335,6 +335,27 @@
     #include <dxgi.h>

 #endif

 

+/*

+When defined to value other than 0, the library will try to use

+D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT or D3D12_SMALL_MSAA_RESOURCE_PLACEMENT_ALIGNMENT

+for created textures when possible, which can save memory because some small textures

+may get their alignment 4K and their size a multiply of 4K instead of 64K.

+

+#define D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT 0

+    Disables small texture alignment.

+#define D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT 1

+    Enables conservative algorithm that will use small alignment only for some textures

+    that are surely known to support it.

+#define D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT 2

+    Enables query for small alignment to D3D12 (based on Microsoft sample) which will

+    enable small alignment for more textures, but will also generate D3D Debug Layer

+    error #721 on call to ID3D12Device::GetResourceAllocationInfo, which you should just

+    ignore.

+*/

+#ifndef D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT

+    #define D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT 1

+#endif

+

 /// \cond INTERNAL

 

 #define D3D12MA_CLASS_NO_COPY(className) \