From 78643623c318788b558b5d382e812716495a2928 Mon Sep 17 00:00:00 2001 From: JordanTheToaster Date: Sat, 7 Jun 2025 02:23:52 +0100 Subject: [PATCH] 3rdparty: Update d3d12memalloc to 3.0.1 --- 3rdparty/d3d12memalloc/CHANGELOG.md | 31 + 3rdparty/d3d12memalloc/LICENSE.txt | 2 +- 3rdparty/d3d12memalloc/README.md | 9 +- .../d3d12memalloc/include/D3D12MemAlloc.h | 1062 ++++++++++++++++- 3rdparty/d3d12memalloc/src/D3D12MemAlloc.cpp | 467 +++++--- 5 files changed, 1348 insertions(+), 223 deletions(-) diff --git a/3rdparty/d3d12memalloc/CHANGELOG.md b/3rdparty/d3d12memalloc/CHANGELOG.md index bc027d3bf8..95b68c26f4 100644 --- a/3rdparty/d3d12memalloc/CHANGELOG.md +++ b/3rdparty/d3d12memalloc/CHANGELOG.md @@ -1,3 +1,34 @@ +# 3.0.1 (2025-05-08) + +- Fixed macros `D3D12MA_RECOMMENDED_ALLOCATOR_FLAGS`, `D3D12MA_RECOMMENDED_POOL_FLAGS` (#73). + +# 3.0.0 (2025-05-05) + +It has been a long time since the previous official release, so hopefully everyone has been using the latest code from "master" branch, which is always maintained in a good state, not the old version. For completeness, here is the list of changes since v2.0.1. The major version number has changed, so there are some compatibility-breaking changes, but the basic API stays the same and is mostly backward-compatible. + +- Added helper structs: `CALLOCATION_DESC`, `CPOOL_DESC`, `CVIRTUAL_BLOCK_DESC`, `CVIRTUAL_ALLOCATION_DESC`. +- Added macros: `D3D12MA_RECOMMENDED_ALLOCATOR_FLAGS`, `D3D12MA_RECOMMENDED_HEAP_FLAGS`, `D3D12MA_RECOMMENDED_POOL_FLAGS`. +- Added functions: `Allocator::CreateResource3`, `CreateAliasingResource2`. + - They support parameters: `D3D12_BARRIER_LAYOUT InitialLayout`, `const DXGI_FORMAT* pCastableFormats`. + - They require recent DirectX 12 Agility SDK. To use them, `ID3D12Device10` must be available. + To use non-empty list of castable formats, `ID3D12Device12` must be available. +- Added support for GPU Upload Heaps (`D3D12_HEAP_TYPE_GPU_UPLOAD`). + - Requires recent DirectX 12 Agility SDK. Support on the user's machine is available only when supported by the motherboard, GPU, drivers, and enabled as "Resizable BAR" in UEFI settings. It can be queried using new `Allocator::IsGPUUploadHeapSupported` function. + - `TotalStatistics::HeapType` array was extended from 4 to 5 elements. +- Added missing function `Allocator::CreateAliasingResource1`. +- Added `POOL_DESC::ResidencyPriority` member. +- Removed `Allocation::WasZeroInitialized` function. It wasn't fully implemented anyway. +- Added `POOL_FLAG_ALWAYS_COMMITTED`. +- Added a heuristic that prefers creating small buffers as committed to save memory. + - It is enabled by default. It can be disabled by new flag `ALLOCATOR_FLAG_DONT_PREFER_SMALL_BUFFERS_COMMITTED`. +- Macro `D3D12MA_OPTIONS16_SUPPORTED` is no longer exposed in the header or Cmake script. + It is defined automatically based on the Agility SDK version. +- Added macro `D3D12MA_DEBUG_LOG`, which can be used to log unfreed allocations. +- Many improvements in the documentation, including new chapters: "Frequently asked questions", "Optimal resource allocation". +- Countless fixes and improvements, including performance optimizations, compatibility with various compilers, tests. +- Major changes in the Cmake script. +- Fixes in "GpuMemDumpVis.py" script. + # 2.0.1 (2022-04-05) A maintenance release with some bug fixes and improvements. There are no changes in the library API. diff --git a/3rdparty/d3d12memalloc/LICENSE.txt b/3rdparty/d3d12memalloc/LICENSE.txt index 0761191395..eb08d9e879 100644 --- a/3rdparty/d3d12memalloc/LICENSE.txt +++ b/3rdparty/d3d12memalloc/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019-2025 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/3rdparty/d3d12memalloc/README.md b/3rdparty/d3d12memalloc/README.md index 98d61a2ab8..053d0a726d 100644 --- a/3rdparty/d3d12memalloc/README.md +++ b/3rdparty/d3d12memalloc/README.md @@ -10,10 +10,6 @@ Easy to integrate memory allocation library for Direct3D 12. **Product page:** [D3D12 Memory Allocator on GPUOpen](https://gpuopen.com/gaming-product/d3d12-memory-allocator/) -**Build status:** - -Windows: [![Build status](https://ci.appveyor.com/api/projects/status/860i07bxv55ydgvg?svg=true)](https://ci.appveyor.com/project/adam-sawicki-amd/d3d12memoryallocator) - [![Average time to resolve an issue](http://isitmaintained.com/badge/resolution/GPUOpen-LibrariesAndSDKs/D3D12MemoryAllocator.svg)](http://isitmaintained.com/project/GPUOpen-LibrariesAndSDKs/D3D12MemoryAllocator "Average time to resolve an issue") # Problem @@ -91,7 +87,7 @@ With this one function call: # Binaries -The release comes with precompiled binary executable for "D3D12Sample" application which contains test suite. It is compiled using Visual Studio 2019, so it requires appropriate libraries to work, including "MSVCP140.dll", "VCRUNTIME140.dll", "VCRUNTIME140_1.dll". If its launch fails with error message telling about those files missing, please download and install [Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017 and 2019](https://support.microsoft.com/en-us/help/2977003/the-latest-supported-visual-c-downloads), "x64" version. +The release comes with precompiled binary executable for "D3D12Sample" application which contains test suite. It is compiled using Visual Studio 2022, so it requires appropriate libraries to work, including "MSVCP140.dll", "VCRUNTIME140.dll", "VCRUNTIME140_1.dll". If its launch fails with error message telling about those files missing, please download and install [Microsoft Visual C++ Redistributable](https://learn.microsoft.com/en-us/cpp/windows/latest-supported-vc-redist?view=msvc-170), "X64" version. # Copyright notice @@ -113,7 +109,8 @@ For more information see [NOTICES.txt](NOTICES.txt). - **[Qt Project](https://github.com/qt)** - **[Ghost of Tsushima: Director's Cut PC](https://www.youtube.com/watch?v=cPKBDbCYctc&t=698s)** - Information avaliable in 11:38 of credits +- **[Godot Engine](https://github.com/godotengine/godot/)** - multi-platform 2D and 3D game engine. License: MIT. - **[The Forge](https://github.com/ConfettiFX/The-Forge)** - cross-platform rendering framework. Apache License 2.0. -- **[Wicked Engine](https://github.com/turanszkij/WickedEngine)** - 3D engine with modern graphics +- **[Wicked Engine](https://github.com/turanszkij/WickedEngine)** - 3D engine with modern graphics [Some other projects on GitHub](https://github.com/search?q=D3D12MemAlloc.h&type=Code) and some game development studios that use DX12 in their games. diff --git a/3rdparty/d3d12memalloc/include/D3D12MemAlloc.h b/3rdparty/d3d12memalloc/include/D3D12MemAlloc.h index 84306054ce..de809103c3 100644 --- a/3rdparty/d3d12memalloc/include/D3D12MemAlloc.h +++ b/3rdparty/d3d12memalloc/include/D3D12MemAlloc.h @@ -1,5 +1,5 @@ // -// Copyright (c) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2019-2025 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -24,21 +24,31 @@ /** \mainpage D3D12 Memory Allocator -Version 2.1.0-development (2024-07-05) +Version 3.0.1 (2025-05-08) -Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All rights reserved. \n +Copyright (c) 2019-2025 Advanced Micro Devices, Inc. All rights reserved. \n License: MIT Documentation of all members: D3D12MemAlloc.h \section main_table_of_contents Table of contents +- \subpage faq - \subpage quick_start - [Project setup](@ref quick_start_project_setup) - [Creating resources](@ref quick_start_creating_resources) - [Resource reference counting](@ref quick_start_resource_reference_counting) - [Mapping memory](@ref quick_start_mapping_memory) + - [Helper structures](@ref quick_start_helper_structures) - \subpage custom_pools +- \subpage optimal_allocation + - [Avoiding running out of memory](@ref optimal_allocation_avoiding_running_out_of_memory) + - [Allocation performance](@ref optimal_allocation_allocation_Performance) + - [Sub-allocating buffers](@ref optimal_allocation_suballocating_buffers) + - [Residency priority](@ref optimal_allocation_residency_priority) + - [GPU upload heap](@ref optimal_allocation_gpu_upload_heap) + - [Committed versus placed resources](@ref optimal_allocation_committed_vs_placed) + - [Resource alignment](@ref optimal_allocation_resource_alignment) - \subpage defragmentation - \subpage statistics - \subpage resource_aliasing @@ -52,10 +62,10 @@ Documentation of all members: D3D12MemAlloc.h - [Versioning and compatibility](@ref general_considerations_versioning_and_compatibility) - [Features not supported](@ref general_considerations_features_not_supported) -\section main_see_also See also +\section main_see_also Web links -- [Product page on GPUOpen](https://gpuopen.com/gaming-product/d3d12-memory-allocator/) -- [Source repository on GitHub](https://github.com/GPUOpen-LibrariesAndSDKs/D3D12MemoryAllocator) +- [Direct3D 12 Memory Allocator at GPUOpen.com](https://gpuopen.com/gaming-product/d3d12-memory-allocator/) - product page +- [GPUOpen-LibrariesAndSDKs/D3D12MemoryAllocator at GitHub.com](https://github.com/GPUOpen-LibrariesAndSDKs/D3D12MemoryAllocator) - source code repository */ // If using this library on a platform different than Windows PC or want to use different version of DXGI, @@ -74,36 +84,65 @@ Documentation of all members: D3D12MemAlloc.h #include #endif -// Define this macro to 0 to disable usage of DXGI 1.4 (needed for IDXGIAdapter3 and query for memory budget). #ifndef D3D12MA_DXGI_1_4 #ifdef __IDXGIAdapter3_INTERFACE_DEFINED__ + /// Define this macro to 0 to disable usage of DXGI 1.4 (which is used for `IDXGIAdapter3` and query for memory budget). #define D3D12MA_DXGI_1_4 1 #else #define D3D12MA_DXGI_1_4 0 #endif #endif -/* -When defined to value other than 0, the library will try to use -D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT or D3D12_SMALL_MSAA_RESOURCE_PLACEMENT_ALIGNMENT -for created textures when possible, which can save memory because some small textures -may get their alignment 4K and their size a multiply of 4K instead of 64K. +#ifndef D3D12MA_CREATE_NOT_ZEROED_AVAILABLE + #ifdef __ID3D12Device8_INTERFACE_DEFINED__ + /// This macro is defined to 0 or 1 automatically. Define it to 0 to disable support for `D3D12_HEAP_FLAG_CREATE_NOT_ZEROED`. + #define D3D12MA_CREATE_NOT_ZEROED_AVAILABLE 1 + #else + #define D3D12MA_CREATE_NOT_ZEROED_AVAILABLE 0 + #endif +#endif -#define D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT 0 - Disables small texture alignment. -#define D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT 1 - Enables conservative algorithm that will use small alignment only for some textures - that are surely known to support it. -#define D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT 2 - Enables query for small alignment to D3D12 (based on Microsoft sample) which will - enable small alignment for more textures, but will also generate D3D Debug Layer - error #721 on call to ID3D12Device::GetResourceAllocationInfo, which you should just - ignore. -*/ #ifndef D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT + /** \brief + When defined to value other than 0, the library will try to use + `D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT` or `D3D12_SMALL_MSAA_RESOURCE_PLACEMENT_ALIGNMENT` + for created textures when possible, which can save memory because some small textures + may get their alignment 4 KB and their size a multiply of 4 KB instead of 64 KB. + + - `#define D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT 0` - + Disables small texture alignment. + - `#define D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT 1` (the default) - + Enables conservative algorithm that will use small alignment only for some textures + that are surely known to support it. + - `#define D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT 2` - + Enables query for small alignment to D3D12 (based on Microsoft sample) which will + enable small alignment for more textures, but will also generate D3D Debug Layer + error #721 on call to `ID3D12Device::GetResourceAllocationInfo`, which you should just + ignore. + */ #define D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT 1 #endif +#ifndef D3D12MA_RECOMMENDED_ALLOCATOR_FLAGS + /// Set of flags recommended for use in D3D12MA::ALLOCATOR_DESC::Flags for optimal performance. + #define D3D12MA_RECOMMENDED_ALLOCATOR_FLAGS (D3D12MA::ALLOCATOR_FLAG_DEFAULT_POOLS_NOT_ZEROED | D3D12MA::ALLOCATOR_FLAG_MSAA_TEXTURES_ALWAYS_COMMITTED) +#endif + +#ifndef D3D12MA_RECOMMENDED_HEAP_FLAGS + #if D3D12MA_CREATE_NOT_ZEROED_AVAILABLE + #define D3D12MA_RECOMMENDED_HEAP_FLAGS (D3D12_HEAP_FLAG_CREATE_NOT_ZEROED) + #else + /// Set of flags recommended for use in D3D12MA::POOL_DESC::HeapFlags for optimal performance. + #define D3D12MA_RECOMMENDED_HEAP_FLAGS (D3D12_HEAP_FLAG_NONE) + #endif +#endif + +#ifndef D3D12MA_RECOMMENDED_POOL_FLAGS + /// Set of flags recommended for use in D3D12MA::POOL_DESC::Flags for optimal performance. + #define D3D12MA_RECOMMENDED_POOL_FLAGS (D3D12MA::POOL_FLAG_MSAA_TEXTURES_ALWAYS_COMMITTED) +#endif + + /// \cond INTERNAL #define D3D12MA_CLASS_NO_COPY(className) \ @@ -297,7 +336,7 @@ enum ALLOCATION_FLAGS /// \brief Parameters of created D3D12MA::Allocation object. To be used with Allocator::CreateResource. struct ALLOCATION_DESC { - /// Flags. + /// Flags for the allocation. ALLOCATION_FLAGS Flags; /** \brief The type of memory heap where the new allocation should be placed. @@ -324,7 +363,8 @@ struct ALLOCATION_DESC D3D12_HEAP_FLAGS ExtraHeapFlags; /** \brief Custom pool to place the new resource in. Optional. - When not NULL, the resource will be created inside specified custom pool. + When not null, the resource will be created inside specified custom pool. + Members `HeapType`, `ExtraHeapFlags` are then ignored. */ Pool* CustomPool; /// Custom general-purpose pointer that will be stored in D3D12MA::Allocation. @@ -511,7 +551,13 @@ public: */ ID3D12Resource* GetResource() const { return m_Resource; } - /// Releases the resource currently pointed by the allocation (if any), sets it to new one, incrementing its reference counter (if not null). + /** \brief Releases the resource currently pointed by the allocation (if not null), sets it to new one, incrementing its reference counter (if not null). + + \warning + This is an advanced feature that should be used only in special cases, e.g. during \subpage defragmentation. + Typically, an allocation object should reference the resource that was created together with it. + If you swap it to another resource of different size, \subpage statistics and budgets can be calculated incorrectly. + */ void SetResource(ID3D12Resource* pResource); /** \brief Returns memory heap that the resource is created in. @@ -820,7 +866,7 @@ enum POOL_FLAGS /// Zero POOL_FLAG_NONE = 0, - /** \brief Enables alternative, linear allocation algorithm in this pool. + /** Enables alternative, linear allocation algorithm in this pool. Specify this flag to enable linear allocation algorithm, which always creates new allocations after last one and doesn't reuse space from allocations freed in @@ -833,13 +879,20 @@ enum POOL_FLAGS */ POOL_FLAG_ALGORITHM_LINEAR = 0x1, - /** \brief Optimization, allocate MSAA textures as committed resources always. + /** Optimization, allocate MSAA textures as committed resources always. Specify this flag to create MSAA textures with implicit heaps, as if they were created with flag D3D12MA::ALLOCATION_FLAG_COMMITTED. Usage of this flags enables pool to create its heaps on smaller alignment not suitable for MSAA textures. + + You should always use this flag unless you really need to create some MSAA textures in this pool as placed. */ POOL_FLAG_MSAA_TEXTURES_ALWAYS_COMMITTED = 0x2, + /** Every allocation made in this pool will be created as a committed resource - will have its own memory block. + + There is also an equivalent flag for the entire allocator: D3D12MA::ALLOCATOR_FLAG_ALWAYS_COMMITTED. + */ + POOL_FLAG_ALWAYS_COMMITTED = 0x4, // Bit mask to extract only `ALGORITHM` bits from entire set of flags. POOL_FLAG_ALGORITHM_MASK = POOL_FLAG_ALGORITHM_LINEAR @@ -848,7 +901,10 @@ enum POOL_FLAGS /// \brief Parameters of created D3D12MA::Pool object. To be used with D3D12MA::Allocator::CreatePool. struct POOL_DESC { - /// Flags. + /** \brief Flags for the heap. + + It is recommended to use #D3D12MA_RECOMMENDED_HEAP_FLAGS. + */ POOL_FLAGS Flags; /** \brief The parameters of memory heap where allocations of this pool should be placed. @@ -863,7 +919,8 @@ struct POOL_DESC `D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES`, `D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES`. Except if ResourceHeapTier = 2, then it may be `D3D12_HEAP_FLAG_ALLOW_ALL_BUFFERS_AND_TEXTURES` = 0. - + + It is recommended to also add #D3D12MA_RECOMMENDED_POOL_FLAGS. You can specify additional flags if needed. */ D3D12_HEAP_FLAGS HeapFlags; @@ -1014,9 +1071,11 @@ enum ALLOCATOR_FLAGS */ ALLOCATOR_FLAG_SINGLETHREADED = 0x1, - /** - Every allocation will have its own memory block. - To be used for debugging purposes. + /** Every allocation will be created as a committed resource - will have its own memory block. + + Affects both default pools and custom pools. + To be used for debugging purposes only. + There is also an equivalent flag for custom pools: D3D12MA::POOL_FLAG_ALWAYS_COMMITTED. */ ALLOCATOR_FLAG_ALWAYS_COMMITTED = 0x2, @@ -1036,14 +1095,16 @@ enum ALLOCATOR_FLAGS */ ALLOCATOR_FLAG_DEFAULT_POOLS_NOT_ZEROED = 0x4, - /** \brief Optimization, allocate MSAA textures as committed resources always. + /** Optimization, allocate MSAA textures as committed resources always. Specify this flag to create MSAA textures with implicit heaps, as if they were created with flag D3D12MA::ALLOCATION_FLAG_COMMITTED. Usage of this flags enables all default pools to create its heaps on smaller alignment not suitable for MSAA textures. + + You should always use this flag unless you really need to create some MSAA textures as placed. */ ALLOCATOR_FLAG_MSAA_TEXTURES_ALWAYS_COMMITTED = 0x8, - /** \brief Disable optimization that prefers creating small buffers as committed to avoid 64 KB alignment. + /** Disable optimization that prefers creating small buffers as committed to avoid 64 KB alignment. By default, the library prefers creating small buffers <= 32 KB as committed, because drivers tend to pack them better, while placed buffers require 64 KB alignment. @@ -1058,7 +1119,10 @@ enum ALLOCATOR_FLAGS /// \brief Parameters of created Allocator object. To be used with CreateAllocator(). struct ALLOCATOR_DESC { - /// Flags. + /** \brief Flags for the entire allocator. + + It is recommended to use #D3D12MA_RECOMMENDED_ALLOCATOR_FLAGS. + */ ALLOCATOR_FLAGS Flags; /** Direct3D device object that the allocator should be attached to. @@ -1123,13 +1187,11 @@ public: When true, you can use `D3D12_HEAP_TYPE_GPU_UPLOAD`. This flag is fetched from `D3D12_FEATURE_D3D12_OPTIONS16::GPUUploadHeapSupported`. - - `#define D3D12MA_OPTIONS16_SUPPORTED 1` is needed for the compilation of this library. Otherwise the flag is always false. */ BOOL IsGPUUploadHeapSupported() const; /** \brief Returns total amount of memory of specific segment group, in bytes. - \param memorySegmentGroup use `DXGI_MEMORY_SEGMENT_GROUP_LOCAL` or DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL`. + \param memorySegmentGroup use `DXGI_MEMORY_SEGMENT_GROUP_LOCAL` or `DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL`. This information is taken from `DXGI_ADAPTER_DESC`. It is not recommended to use this number. @@ -1206,13 +1268,14 @@ public: It internally uses `ID3D12Device10::CreateCommittedResource3` or `ID3D12Device10::CreatePlacedResource2`. To work correctly, `ID3D12Device10` interface must be available in the current system. Otherwise, `E_NOINTERFACE` is returned. + If you use `pCastableFormats`, `ID3D12Device12` must albo be available. */ HRESULT CreateResource3(const ALLOCATION_DESC* pAllocDesc, const D3D12_RESOURCE_DESC1* pResourceDesc, D3D12_BARRIER_LAYOUT InitialLayout, const D3D12_CLEAR_VALUE* pOptimizedClearValue, UINT32 NumCastableFormats, - DXGI_FORMAT* pCastableFormats, + const DXGI_FORMAT* pCastableFormats, Allocation** ppAllocation, REFIID riidResource, void** ppvResource); @@ -1291,11 +1354,12 @@ public: #ifdef __ID3D12Device10_INTERFACE_DEFINED__ /** \brief Similar to Allocator::CreateAliasingResource1, but there are initial layout instead of state and - castable formats list + castable formats list. It internally uses `ID3D12Device10::CreatePlacedResource2`. To work correctly, `ID3D12Device10` interface must be available in the current system. Otherwise, `E_NOINTERFACE` is returned. + If you use `pCastableFormats`, `ID3D12Device12` must albo be available. */ HRESULT CreateAliasingResource2(Allocation* pAllocation, UINT64 AllocationLocalOffset, @@ -1303,7 +1367,7 @@ public: D3D12_BARRIER_LAYOUT InitialLayout, const D3D12_CLEAR_VALUE* pOptimizedClearValue, UINT32 NumCastableFormats, - DXGI_FORMAT* pCastableFormats, + const DXGI_FORMAT* pCastableFormats, REFIID riidResource, void** ppvResource); #endif // #ifdef __ID3D12Device10_INTERFACE_DEFINED__ @@ -1462,7 +1526,7 @@ enum VIRTUAL_ALLOCATION_FLAGS /// Parameters of created virtual allocation to be passed to VirtualBlock::Allocate(). struct VIRTUAL_ALLOCATION_DESC { - /// Flags. + /// Flags for the virtual allocation. VIRTUAL_ALLOCATION_FLAGS Flags; /** \brief Size of the allocation. @@ -1587,6 +1651,143 @@ Note you don't need to create D3D12MA::Allocator to use virtual blocks. */ D3D12MA_API HRESULT CreateVirtualBlock(const VIRTUAL_BLOCK_DESC* pDesc, VirtualBlock** ppVirtualBlock); +#ifndef D3D12MA_NO_HELPERS + +/** \brief Helper structure that helps with complete and conscise initialization of the D3D12MA::ALLOCATION_DESC structure. + */ +struct CALLOCATION_DESC : public ALLOCATION_DESC +{ + /// Default constructor. Leaves the structure uninitialized. + CALLOCATION_DESC() = default; + /// Constructor initializing from the base D3D12MA::ALLOCATION_DESC structure. + explicit CALLOCATION_DESC(const ALLOCATION_DESC& o) noexcept + : ALLOCATION_DESC(o) + { + } + /// Constructor initializing description of an allocation to be created in a specific custom pool. + explicit CALLOCATION_DESC(Pool* customPool, + ALLOCATION_FLAGS flags = ALLOCATION_FLAG_NONE, + void* privateData = NULL) noexcept + { + Flags = flags; + HeapType = (D3D12_HEAP_TYPE)0; + ExtraHeapFlags = D3D12_HEAP_FLAG_NONE; + CustomPool = customPool; + pPrivateData = privateData; + } + /// Constructor initializing description of an allocation to be created in a default pool of a specific `D3D12_HEAP_TYPE`. + explicit CALLOCATION_DESC(D3D12_HEAP_TYPE heapType, + ALLOCATION_FLAGS flags = ALLOCATION_FLAG_NONE, + void* privateData = NULL, + D3D12_HEAP_FLAGS extraHeapFlags = D3D12MA_RECOMMENDED_HEAP_FLAGS) noexcept + { + Flags = flags; + HeapType = heapType; + ExtraHeapFlags = extraHeapFlags; + CustomPool = NULL; + pPrivateData = privateData; + } +}; + +/** \brief Helper structure that helps with complete and conscise initialization of the D3D12MA::POOL_DESC structure. + */ +struct CPOOL_DESC : public POOL_DESC +{ + /// Default constructor. Leaves the structure uninitialized. + CPOOL_DESC() = default; + /// Constructor initializing from the base D3D12MA::POOL_DESC structure. + explicit CPOOL_DESC(const POOL_DESC& o) noexcept + : POOL_DESC(o) + { + } + /// Constructor initializing description of a custom pool created in one of the standard `D3D12_HEAP_TYPE`. + explicit CPOOL_DESC(D3D12_HEAP_TYPE heapType, + D3D12_HEAP_FLAGS heapFlags, + POOL_FLAGS flags = D3D12MA_RECOMMENDED_POOL_FLAGS, + UINT64 blockSize = 0, + UINT minBlockCount = 0, + UINT maxBlockCount = UINT_MAX, + D3D12_RESIDENCY_PRIORITY residencyPriority = D3D12_RESIDENCY_PRIORITY_NORMAL) noexcept + { + Flags = flags; + HeapProperties = {}; + HeapProperties.Type = heapType; + HeapFlags = heapFlags; + BlockSize = blockSize; + MinBlockCount = minBlockCount; + MaxBlockCount = maxBlockCount; + MinAllocationAlignment = 0; + pProtectedSession = NULL; + ResidencyPriority = residencyPriority; + } + /// Constructor initializing description of a custom pool created with custom `D3D12_HEAP_PROPERTIES`. + explicit CPOOL_DESC(const D3D12_HEAP_PROPERTIES heapProperties, + D3D12_HEAP_FLAGS heapFlags, + POOL_FLAGS flags = D3D12MA_RECOMMENDED_POOL_FLAGS, + UINT64 blockSize = 0, + UINT minBlockCount = 0, + UINT maxBlockCount = UINT_MAX, + D3D12_RESIDENCY_PRIORITY residencyPriority = D3D12_RESIDENCY_PRIORITY_NORMAL) noexcept + { + Flags = flags; + HeapProperties = heapProperties; + HeapFlags = heapFlags; + BlockSize = blockSize; + MinBlockCount = minBlockCount; + MaxBlockCount = maxBlockCount; + MinAllocationAlignment = 0; + pProtectedSession = NULL; + ResidencyPriority = residencyPriority; + } +}; + +/** \brief Helper structure that helps with complete and conscise initialization of the D3D12MA::VIRTUAL_BLOCK_DESC structure. + */ +struct CVIRTUAL_BLOCK_DESC : public VIRTUAL_BLOCK_DESC +{ + /// Default constructor. Leaves the structure uninitialized. + CVIRTUAL_BLOCK_DESC() = default; + /// Constructor initializing from the base D3D12MA::VIRTUAL_BLOCK_DESC structure. + explicit CVIRTUAL_BLOCK_DESC(const VIRTUAL_BLOCK_DESC& o) noexcept + : VIRTUAL_BLOCK_DESC(o) + { + } + /// Constructor initializing description of a virtual block with given parameters. + explicit CVIRTUAL_BLOCK_DESC(UINT64 size, + VIRTUAL_BLOCK_FLAGS flags = VIRTUAL_BLOCK_FLAG_NONE, + const ALLOCATION_CALLBACKS* allocationCallbacks = NULL) noexcept + { + Flags = flags; + Size = size; + pAllocationCallbacks = allocationCallbacks; + } +}; + +/** \brief Helper structure that helps with complete and conscise initialization of the D3D12MA::VIRTUAL_ALLOCATION_DESC structure. + */ +struct CVIRTUAL_ALLOCATION_DESC : public VIRTUAL_ALLOCATION_DESC +{ + /// Default constructor. Leaves the structure uninitialized. + CVIRTUAL_ALLOCATION_DESC() = default; + /// Constructor initializing from the base D3D12MA::VIRTUAL_ALLOCATION_DESC structure. + explicit CVIRTUAL_ALLOCATION_DESC(const VIRTUAL_ALLOCATION_DESC& o) noexcept + : VIRTUAL_ALLOCATION_DESC(o) + { + } + /// Constructor initializing description of a virtual allocation with given parameters. + explicit CVIRTUAL_ALLOCATION_DESC(UINT64 size, UINT64 alignment, + VIRTUAL_ALLOCATION_FLAGS flags = VIRTUAL_ALLOCATION_FLAG_NONE, + void* privateData = NULL) noexcept + { + Flags = flags; + Size = size; + Alignment = alignment; + pPrivateData = privateData; + } +}; + +#endif // #ifndef D3D12MA_NO_HELPERS + } // namespace D3D12MA /// \cond INTERNAL @@ -1599,17 +1800,232 @@ DEFINE_ENUM_FLAG_OPERATORS(D3D12MA::VIRTUAL_ALLOCATION_FLAGS); /// \endcond /** +\page faq Frequenty asked questions + +What is %D3D12MA? + +D3D12 Memory Allocator (%D3D12MA) is a software library for developers who use the DirectX(R) 12 graphics API in their code. +It is written in C++. + +What is the license of %D3D12MA? + +%D3D12MA is licensed under MIT, which means it is open source and free software. + +What is the purpose of %D3D12MA? + +%D3D12MA helps with handling one aspect of DX12 usage, which is GPU memory management - +allocation of `ID3D12Heap` objects and creation of `ID3D12Resource` objects - buffers and textures. + +Do I need to use %D3D12MA? + +You don't need to, but it may be beneficial in many cases. +DX12 is a complex and low-level API, so libraries like this that abstract certain aspects of the API +and bring them to a higher level are useful. +When developing any non-trivial graphics application, you may benefit from using a memory allocator. +Using %D3D12MA can save time compared to implementing your own. + +In DX12 you can create each resource separately with its own implicit memory heap by calling `CreateCommittedResource`, +but this may not be the optimal solution. +For more information, see [Committed versus placed resources](@ref optimal_allocation_committed_vs_placed). + +When should I not use %D3D12MA? + +While %D3D12MA is useful for many applications that use the DX12 API, there are cases +when it may be a better choice not to use it. +For example, if the application is very simple, e.g. serving as a sample or a learning exercise +to help you understand or teach others the basics of DX12, +and it creates only a small number of buffers and textures, then including %D3D12MA may be an overkill. +Developing your own memory allocator may also be a good learning exercise. + +What are the benefits of using %D3D12MA? + +-# %D3D12MA allocates large blocks of `ID3D12Heap` memory and sub-allocates parts of them to create your placed resources. + Allocating a new block of GPU memory may be a time-consuming operation. + Sub-allocating parts of a memory block requires implementing an allocation algorithm, + which is a non-trivial task. + %D3D12MA does that, using an advanced and efficient algorithm that works well in various use cases. +-# %D3D12MA offers a simple API that allows creating placed buffers and textures within one function call + like D3D12MA::Allocator::CreateResource. + +The library is doing much more under the hood. +For example, it keeps buffers separate from textures when needed, respecting `D3D12_RESOURCE_HEAP_TIER`. +It also makes use of the "small texture alignment" automatically, so you don't need to think about it. + +Which version should I pick? + +You can just pick [the latest version from the "master" branch](https://github.com/GPUOpen-LibrariesAndSDKs/D3D12MemoryAllocator). +It is kept in a good shape most of the time, compiling and working correctly, +with no compatibility-breaking changes and no unfinished code. + +If you want an even more stable version, you can pick +[the latest official release](https://github.com/GPUOpen-LibrariesAndSDKs/D3D12MemoryAllocator/releases). +Current code from the master branch is occasionally tagged as a release, +with [CHANGELOG](https://github.com/GPUOpen-LibrariesAndSDKs/D3D12MemoryAllocator/blob/master/CHANGELOG.md) +carefully curated to enumerate all important changes since the previous version. + +The library uses [Semantic Versioning](https://semver.org/), +which means versions that only differ in the patch number are forward and backward compatible +(e.g., only fixing some bugs), while versions that differ in the minor number are backward compatible +(e.g., only adding new functions to the API, but not removing or changing existing ones). + +How to integrate it with my code? + +%D3D12MA is an small library fully implemented in a single pair of CPP + H files. + +You can pull the entire GitHub repository, e.g. using Git submodules. +The repository contains ancillary files like the Cmake script, Doxygen config file, +sample application, test suite, and others. +You can compile it as a library and link with your project. + +However, a simpler way is taking only files "include\D3D12MemAlloc.h", "src\D3D12MemAlloc.cpp" +and including them in your project. +These files contain all you need: a copyright notice, +declarations of the public library interface (API), its internal implementation, +and even the documentation in form of Doxygen-style comments. + +I am not a fan of modern C++. Can I still use it? + +Very likely yes. +We acknowledge that many C++ developers, especially in the games industry, +do not appreciate all the latest features that the language has to offer. + +- %D3D12MA doesn't throw or catch any C++ exceptions. + It reports errors by returning a `HRESULT` value instead, just like DX12. + If you don't use exceptions in your project, your code is not exception-safe, + or even if you disable exception handling in the compiler options, you can still use %D3D12MA. +- %D3D12MA doesn't use C++ run-time type information like `typeid` or `dynamic_cast`, + so if you disable RTTI in the compiler options, you can still use the library. +- %D3D12MA uses only a limited subset of standard C and C++ library. + It doesn't use STL containers like `std::vector`, `map`, or `string`, + either in the public interface nor in the internal implementation. + It implements its own containers instead. +- If you don't use the default heap memory allocator through `malloc/free` or `new/delete` + but implement your own allocator instead, you can pass it to %D3D12MA as + D3D12MA::ALLOCATOR_DESC::pAllocationCallbacks + and the library will use your functions for every dynamic heap allocation made internally. + +Is it available for other programming languages? + +%D3D12MA is a C++ library in similar style as DX12. +Bindings to other programming languages are out of scope of this project, +but they are welcome as external projects. +Some of them are listed in [README.md, "See also" section](https://github.com/GPUOpen-LibrariesAndSDKs/D3D12MemoryAllocator/?tab=readme-ov-file#see-also), +including binding to C. +Before using any of them, please check if they are still maintained and updated to use a recent version of %D3D12MA. + +What platforms does it support? + +%D3D12MA relies only on DX12 and some parts of the standard C and C++ library, +so it could support any platform where a C++ compiler and DX12 are available. +However, it is developed and tested only on Microsoft(R) Windows(R). + +Does it only work on AMD GPUs? + +No! While %D3D12MA is published by AMD, it works on any GPU that supports DX12, +whether a discrete PC graphics card or a processor integrated graphics. +It doesn't give AMD GPUs any advantage over any other GPUs. + +What DirectX 12 versions are supported? + +%D3D12MA is updated to support latest versions of DirectX 12, as available through recent retail versions of the +[DirectX 12 Agility SDK](https://devblogs.microsoft.com/directx/directx12agility/). +Support for new features added in the preview version of the Agility SDK is developed on separate branches until they are included in the retail version. + +The library also supports older versions down to the base DX12 shipping with Windows SDK. +Features added by later versions of the Agility SDK are automatically enabled conditionally using +`#ifdef` preprocessor macros depending on the version of the SDK that you compile your project with. + +Does it support other graphics APIs, like Vulkan(R)? + +No, but we offer an equivalent library for Vulkan: +[Vulkan Memory Allocator](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator). +It uses the same core allocation algorithm. +It also shares many features with %D3D12MA, like the support for custom pools and virtual allocator. +However, it is not identical in terms of the features supported. +Its API also looks different, because while the interface of %D3D12MA is similar in style to DX12, +the interface of VMA is similar to Vulkan. + +Is the library lightweight? + +Yes. +%D3D12MA is implemented with high-performance and real-time applications like video games in mind. +The CPU performance overhead of using this library is low. +It uses a high-quality allocation algorithm called Two-Level Segregated Fit (TLSF), +which in most cases can find a free place for a new allocation in few steps. +The library also doesn't perform too many CPU heap allocations. +In many cases, the allocation happens with 0 new CPU heap allocations performed by the library. +Even the creation of a D3D12MA::Allocation object doesn't typically feature an CPU allocation, +because these objects are returned out of a dedicated memory pool. + +That said, %D3D12MA needs some extra memory and extra time +to maintain the metadata about the occupied and free regions of the memory blocks, +and the algorithms and data structures used must be generic enough to work well in most cases. + +Does it have a documentation? + +Yes! %D3D12MA comes with full documentation of all elements of the API (classes, structures, enums), +as well as many generic chapters that provide an introduction, +describe core concepts of the library, good practices, etc. +The entire documentation is written in form of code comments inside "D3D12MemAlloc.h", in Doxygen format. +You can access it in multiple ways: + +- Browsable online: https://gpuopen-librariesandsdks.github.io/D3D12MemoryAllocator/html/ +- Local HTML pages available after you clone the repository and open file "docs\html\index.html". +- You can rebuild the documentation in HTML or some other format from the source code using Doxygen. + Configuration file "Doxyfile" is part of the repository. +- Finally, you can just read the comments preceding declarations of any public classes and functions of the library. + +Is it a mature project? + +Yes! The library is in development since May 2019, has over 300 commits, and multiple contributors. +It is used by many software projects, including some large and popular ones like Qt or Godot Engine, +as well as some AAA games. + +How can I contribute to the project? + +If you have an idea for improvement or a feature request, +you can go to [the library repository](https://github.com/GPUOpen-LibrariesAndSDKs/D3D12MemoryAllocator) +and create an Issue ticket, describing your idea. +You can also implement it yourself by forking the repository, making changes to the code, +and creating a Pull request. + +If you want to ask a question, you can also create a ticket the same way. +Before doing this, please make sure you read the relevant part of the DX12 documentation and %D3D12MA documentation, +where you may find the answers to your question. + +If you want to report a suspected bug, you can also create a ticket the same way. +Before doing this, please put some effort into the investigation of whether the bug is really +in the library and not in your code or in the DX12 implementation (the GPU driver) on your platform: + +- Enable D3D Debug Layer and make sure it is free from any errors. +- Make sure `D3D12MA_ASSERT` is defined to an implementation that can report a failure and not ignore it. +- Try making your allocation using pure DX12 functions like `CreateCommittedResource()` rather than %D3D12MA and see if the bug persists. + +I found some compilation warnings. How can we fix them? + +Seeing compiler warnings may be annoying to some developers, +but it is a design decision to not fix all of them. +Due to the nature of the C++ language, certain preprocessor macros can make some variables unused, +function parameters unreferenced, or conditional expressions constant in some configurations. +The code of this library should not be bigger or more complicated just to silence these warnings. +It is recommended to disable such warnings instead. +For more information, see [Features not supported](@ref general_considerations_features_not_supported). + +However, if you observe a warning that is really dangerous, e.g., +about an implicit conversion from a larger to a smaller integer type, please report it and it will be fixed ASAP. + + \page quick_start Quick start \section quick_start_project_setup Project setup and initialization -This is a small, standalone C++ library. It consists of a pair of 2 files: +This is a small, standalone C++ library. It consists of 2 files: "D3D12MemAlloc.h" header file with public interface and "D3D12MemAlloc.cpp" with internal implementation. The only external dependencies are WinAPI, Direct3D 12, and parts of C/C++ standard library (but STL containers, exceptions, or RTTI are not used). -The library is developed and tested using Microsoft Visual Studio 2019, but it +The library is developed and tested using Microsoft Visual Studio 2022, but it should work with other compilers as well. It is designed for 64-bit code. To use the library in your project: @@ -1631,22 +2047,25 @@ D3D12MA::Allocator object. Please note that all symbols of the library are declared inside #D3D12MA namespace. \code -IDXGIAdapter* adapter = (...) -ID3D12Device* device = (...) +IDXGIAdapter* adapter = ... +ID3D12Device* device = ... D3D12MA::ALLOCATOR_DESC allocatorDesc = {}; allocatorDesc.pDevice = device; allocatorDesc.pAdapter = adapter; -// These flags are optional but recommended. -allocatorDesc.Flags = D3D12MA::ALLOCATOR_FLAG_MSAA_TEXTURES_ALWAYS_COMMITTED | - D3D12MA::ALLOCATOR_FLAG_DEFAULT_POOLS_NOT_ZEROED; +allocatorDesc.Flags = D3D12MA_RECOMMENDED_ALLOCATOR_FLAGS; D3D12MA::Allocator* allocator; HRESULT hr = D3D12MA::CreateAllocator(&allocatorDesc, &allocator); +// Check hr... \endcode (5.) Right before destroying the D3D12 device, destroy the allocator object. +\code +allocator->Release(); +\endcode + Objects of this library must be destroyed by calling `Release` method. They are somewhat compatible with COM: they implement `IUnknown` interface with its virtual methods: `AddRef`, `Release`, `QueryInterface`, and they are reference-counted internally. @@ -1654,10 +2073,6 @@ You can use smart pointers designed for COM with objects of this library - e.g. The reference counter is thread-safe. `QueryInterface` method supports only `IUnknown`, as classes of this library don't define their own GUIDs. -\code -allocator->Release(); -\endcode - \section quick_start_creating_resources Creating resources @@ -1699,8 +2114,10 @@ HRESULT hr = allocator->CreateResource( NULL, &allocation, IID_NULL, NULL); +// Check hr... -// Use allocation->GetResource()... +ID3D12Resource* res = allocation->GetResource(); +// Use res... \endcode You need to release the allocation object when no longer needed. @@ -1720,10 +2137,10 @@ parts of them are occupied and which parts are free to be used for new resources It is important to remember that resources created as placed don't have their memory initialized to zeros, but may contain garbage data, so they need to be fully initialized before usage, e.g. using Clear (`ClearRenderTargetView`), Discard (`DiscardResource`), -or copy (`CopyResource`). +or Copy (`CopyResource`). The library also automatically handles resource heap tier. -When `D3D12_FEATURE_DATA_D3D12_OPTIONS::ResourceHeapTier` equals `D3D12_RESOURCE_HEAP_TIER_1`, +When `D3D12_FEATURE_DATA_D3D12_OPTIONS::ResourceHeapTier == D3D12_RESOURCE_HEAP_TIER_1`, resources of 3 types: buffers, textures that are render targets or depth-stencil, and other textures must be kept in separate heaps. When `D3D12_RESOURCE_HEAP_TIER_2`, they can be kept together. By using this library, you don't need to handle this @@ -1746,7 +2163,7 @@ When only D3D12MA::Allocation object is obtained from a function call like D3D12 it remembers the `ID3D12Resource` that was created with it and holds a reference to it. The resource can be obtained by calling `allocation->GetResource()`, which doesn't increment the resource reference counter. -Calling `allocation->Release()` will decrease the resource reference counter, which is = 1 in this case, +Calling `allocation->Release()` will decrease the resource reference counter, which is 1 in this case, so the resource will be released. Second option is to retrieve a pointer to the resource along with D3D12MA::Allocation. @@ -1812,7 +2229,7 @@ Example for buffer created and filled in `UPLOAD` heap type: \code const UINT64 bufSize = 65536; -const float* bufData = (...); +const float* bufData = ...; D3D12_RESOURCE_DESC resourceDesc = {}; resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; @@ -1840,8 +2257,9 @@ HRESULT hr = allocator->CreateResource( &allocation, IID_PPV_ARGS(&resource)); +D3D12_RANGE emptyRange = {0, 0}; void* mappedPtr; -hr = resource->Map(0, NULL, &mappedPtr); +hr = resource->Map(0, &emptyRange, &mappedPtr); memcpy(mappedPtr, bufData, bufSize); @@ -1849,6 +2267,68 @@ resource->Unmap(0, NULL); \endcode +\section quick_start_helper_structures Helper structures + +DirectX 12 Agility SDK offers a library of helpers in files "build\native\include\d3dx12\*.h". +They include structures that help with complete and concise initialization of the core D3D12 `*_DESC` structures +by using some basic C++ features (constructors, static methods, default parameters). +They inherit from these structures, so they support implicit casting to them. +For example, structure `CD3DX12_RESOURCE_DESC` can be used to conveniently fill in structure `D3D12_RESOURCE_DESC`. + +Similarly, this library provides a set of helper structures that aid in initialization of some of the `*_DESC` structures defined in the library. +These are: + +- D3D12MA::CALLOCATION_DESC, which inherits from D3D12MA::ALLOCATION_DESC. +- D3D12MA::CPOOL_DESC, which inherits from D3D12MA::POOL_DESC. +- D3D12MA::CVIRTUAL_BLOCK_DESC, which inherits from D3D12MA::VIRTUAL_BLOCK_DESC. +- D3D12MA::CVIRTUAL_ALLOCATION_DESC, which inherits from D3D12MA::VIRTUAL_ALLOCATION_DESC. + +For example, when you want to create a buffer in the `UPLAOD` heap using minimal allocation time, you can use base structures: + +\code +D3D12MA::ALLOCATION_DESC allocDesc; +allocDesc.Flags = D3D12MA::ALLOCATION_FLAG_STRATEGY_MIN_TIME; +allocDesc.HeapType = D3D12_HEAP_TYPE_UPLOAD; +allocDesc.ExtraHeapFlags = D3D12_HEAP_FLAG_NONE; +allocDesc.CustomPool = NULL; +allocDesc.pPrivateData = NULL; + +D3D12_RESOURCE_DESC resDesc; +resDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; +resDesc.Alignment = 0; +resDesc.Width = myBufSize; +resDesc.Height = 1; +resDesc.DepthOrArraySize = 1; +resDesc.MipLevels = 1; +resDesc.Format = DXGI_FORMAT_UNKNOWN; +resDesc.SampleDesc.Count = 1; +resDesc.SampleDesc.Quality = 0; +resDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; +resDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + +D3D12MA::Allocation* alloc; +ID3D12Resource* res; +HRESULT hr = allocator->CreateResource(&allocDesc, &resDesc, + D3D12_RESOURCE_STATE_COMMON, NULL, &alloc, IID_PPV_ARGS(&res)); +// Check hr... +\endcode + +Or you can use helper structs from both D3X12 library and this library to make the code shorter: + +\code +D3D12MA::CALLOCATION_DESC allocDesc = D3D12MA::CALLOCATION_DESC{ + D3D12_HEAP_TYPE_UPLOAD, + D3D12MA::ALLOCATION_FLAG_STRATEGY_MIN_TIME }; + +CD3DX12_RESOURCE_DESC resDesc = CD3DX12_RESOURCE_DESC::Buffer(myBufSize); + +D3D12MA::Allocation* alloc; +ID3D12Resource* res; +HRESULT hr = allocator->CreateResource(&allocDesc, &resDesc, + D3D12_RESOURCE_STATE_COMMON, NULL, &alloc, IID_PPV_ARGS(&res)); +// Check hr... +\endcode + \page custom_pools Custom memory pools A "pool" is a collection of memory blocks that share certain properties. @@ -1864,9 +2344,8 @@ to obtain object D3D12MA::Pool. Example: \code POOL_DESC poolDesc = {}; poolDesc.HeapProperties.Type = D3D12_HEAP_TYPE_DEFAULT; -// These flags are optional but recommended. -poolDesc.Flags = D3D12MA::POOL_FLAG_MSAA_TEXTURES_ALWAYS_COMMITTED; -poolDesc.HeapFlags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; +poolDesc.Flags = D3D12MA_RECOMMENDED_POOL_FLAGS; +poolDesc.HeapFlags = D3D12MA_RECOMMENDED_HEAP_FLAGS | D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; Pool* pool; HRESULT hr = allocator->CreatePool(&poolDesc, &pool); @@ -1933,6 +2412,458 @@ in some cases, e.g. to have separate memory usage statistics for some group of r extended allocation parameters, like custom `D3D12_HEAP_PROPERTIES`, which are available only in custom pools. +\page optimal_allocation Optimal resource allocation + +This library tries to automatically make optimal choices for the resources you create, +so you don't need to care about them. +There are some advanced features of Direct3D 12 that you may use to optimize your memory management. +There are also some settings in %D3D12MA that you may change to alter its default behavior. +This page provides miscellaneous advice about features of D3D12 and %D3D12MA that are +non-essential, but may improve the stability or performance of your app. + +\section optimal_allocation_avoiding_running_out_of_memory Avoiding running out of memory + +When trying to allocate more memory than available in the current heap +(e.g., video memory on the graphics card, system memory), one of few bad things can happen: + +- The allocation (resource creation) function call can fail with `HRESULT` value other than `S_OK`. +- The allocation may succeed, but take long time (even a significant fraction of a second). +- Some resources are automatically demoted from video memory to system memory, degrading the app performance. +- Even a crash of the entire graphics driver can happen, resulting in the D3D12 "device removal", which is usually + catastrophic for the application. + +Unfortunately, there is no way to be 100% protected against memory overcommitment. +The best approach is to avoid allocating too much memory. + +The full capacity of the memory can be queried using function D3D12MA::Allocator::GetMemoryCapacity. +However, it is not recommended, because the amount of memory available to the application +is typically smaller than the full capacity, as some portion of it is reserved by the operating system +or used by other processes. + +Because of this, the recommended way of fetching the **memory budget** available to the application +is using function D3D12MA::Allocator::GetBudget. +Preventing value D3D12MA::Budget::UsageBytes from exceeding the D3D12MA::Budget::BudgetBytes +is probably the best we can do in trying to avoid the consequences of over-commitment. +For more information, see also: \subpage statistics. + +Example: + +\code +D3D12MA::Budget videoMemBudget = {}; +allocator->GetBudget(&videoMemBudget, NULL); + +UINT64 freeBytes = videoMemBudget.BudgetBytes - videoMemBudget.UsageBytes; +gameStreamingSystem->SetAvailableFreeMemory(freeBytes); +\endcode + +\par Implementation detail +DXGI interface offers function `IDXGIAdapter3::QueryVideoMemoryInfo` that queries the current memory usage and budget. +This library automatically makes use of it when available (when you use recent enough version of the DirectX SDK). +If not, it falls back to estimating the usage and budget based on the total amount of the allocated memory +and 80% of the full memory capacity, respectively. + +\par Implementation detail +Allocating large heaps and creating placed resources in them is one of the main features of this library. +However, if allocating new such block would exceed the budget, it will automatically prefer creating the resource as committed +to have exactly the right size, which can lower the chance of getting into trouble in case of over-commitment. + +When creating non-essential resources, you can use D3D12MA::ALLOCATION_FLAG_WITHIN_BUDGET. +Then, in case the allocation would exceed the budget, the library will return failure from the function +without attempting to allocate the actual D3D12 memory. + +It may also be a good idea to support failed resource creation. +For non-essential resources, when function D3D12MA::Allocator::CreateResource fails with a result other than `S_OK`, +it is worth implementing some way of recovery instead of terminating or crashing the entire app. + +\section optimal_allocation_allocation_Performance Allocation performance + +Creating D3D12 resources (buffers and textures) can be a time-consuming operation. +The duration can be unpredictable, spanning from a small fraction of a millisecond to a significant fraction of a second. +Thus, it is recommended to allocate all the memory and create all the resources needed upfront +rather than doing it during application runtime. +For example, a video game can try to create its resources on startup or when loading a new level. +Of course, is is not always possible. +For example, open-world games may require loading and unloading some graphical assets in the background (often called "streaming"). + +Creating and releasing D3D12 resources **on a separate thread** in the background may help. +Both `ID3D12Device` and D3D12MA::Allocator objects are thread-safe, synchronized internally. +However, cases were observed where resource creation calls like `ID3D12Device::CreateCommittedResource` +were blocking other D3D12 calls like `ExecuteCommandLists` or `Present` +somewhere inside the graphics driver, so hitches can happen even when using multithreading. + +The most expensive part is typically **the allocation of a new D3D12 memory heap**. +This library tackles this problem by automatically allocating large heaps (64 MB by default) +and creating resources as placed inside of them. +When a new requested resource can be placed in a free space of an existing heap and doesn't require allocating a new heap, +this operation is typically much faster, as it only requires creating a new `ID3D12Resource` object +and not allocating new memory. +This is the main benefit of using %D3D12MA compared to the naive approach of using Direct3D 12 directly +and creating each resource as committed with `CreateCommittedResource`, which would result in a separate allocation of an implicit heap every time. + +When **a large number of small buffers** needs to be created, the overhead of creating even just separate `ID3D12Resource` objects can be significant. +It can be avoided by creating one or few larger buffers and manually sub-allocating parts of them for specific needs. +This library can also help with it. See section "Sub-allocating buffers" below. + +\par Implementation detail +The CPU performance overhead of using this library is low. +It uses a high-quality allocation algorithm called Two-Level Segregated Fit (TLSF), +which in most cases can find a free place for a new allocation in few steps. +The library also doesn't perform too many CPU heap allocations. +In may cases, the allocation happens with 0 new CPU heap allocations performed by the library. +Even the creation of a D3D12MA::Allocation object itself doesn't typically feature an CPU allocation, +because these objects are returned out of a dedicated memory pool. + +Another reason for the slowness of D3D12 memory allocation is the guarantee that the **newly allocated memory is filled with zeros**. +When creating and destroying resources placed in an existing heap, this overhead is not present, +and the memory is not zeroed - it may contain random data left by the resource previously allocated in that place. +In recent versions of the DirectX 12 SDK, clearing the memory of the newly created D3D12 heaps can also be disabled for the improved performance. +%D3D12MA can use this feature when: + +- D3D12MA::ALLOCATOR_FLAG_DEFAULT_POOLS_NOT_ZEROED is used during the creation of the main allocator object. +- `D3D12_HEAP_FLAG_CREATE_NOT_ZEROED` is passed to D3D12MA::POOL_DESC::HeapFlags during the creation of a custom pool. + +It is recommended to always use these flags. +The downside is that when the memory is not filled with zeros, while you don't properly clear it or otherwise initialize its content before use +(which is required by D3D12), you may observe incorrect behavior. +This problem mostly affects render-target and depth-stencil textures. + +When an allocation needs to be made in a performance-critical code, you can use D3D12MA::ALLOCATION_FLAG_STRATEGY_MIN_TIME. +In influences multiple heuristics inside the library to prefer faster allocation +at the expense of possibly less optimal placement in the memory. + +If the resource to be created is non-essential, while the performance is paramount, +you can also use D3D12MA::ALLOCATION_FLAG_NEVER_ALLOCATE. +It will create the resource only if it can be placed inside and existing memory heap +and return failure from the function if a new heap would need to be allocated, +which should guarantee good performance of such function call. + +\section optimal_allocation_suballocating_buffers Sub-allocating buffers + +When a large number of small buffers needs to be created, the overhead of creating separate `ID3D12Resource` objects can be significant. +It can also cause a significant waste of memory, as placed buffers need to be aligned to `D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT` = 64 KB by default. +These problems can be avoided by creating one or few larger buffers and manually sub-allocating parts of them for specific needs. + +It requires implementing a custom allocator for the data inside the buffer and using offsets to individual regions. +When all the regions can be allocated linearly and freed all at once, implementing such allocator is trivial. +When every region has the same size, implementing an allocator is also quite simple when using a "free list" algorithm. +However, when regions can have different sizes and can be allocated and freed in random order, +it requires a full allocation algorithm. +%D3D12MA can help with it by exposing its core allocation algorithm for custom usages. +For more details and example code, see chapter: \subpage virtual_allocator. +It can be used for all the cases mentioned above without too much performance overhead, +because the D3D12MA::VirtualAllocation object is just a lightweight handle. + +When sub-allocating a buffer, you need to remember to explicitly request proper alignment required for each region. +For example, data used as a constant buffer must be aligned to `D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT` = 256 B. + +\section optimal_allocation_residency_priority Residency priority + +When too much video memory is allocated, one of the things that can happen is the system +demoting some heaps to the system memory. +Moving data between memory pools or reaching out directly to the system memory through PCI Express bus can have large performance overhead, +which can slow down the application, or even make the game unplayable any more. +Unfortunately, it is not possible to fully control or prevent this demotion. +Best thing to do is avoiding memory over-commitment. +For more information, see section "Avoiding running out of memory" above. + +Recent versions of DirectX 12 SDK offer function `ID3D12Device1::SetResidencyPriority` that sets a hint +about the priority of a resource - how important it is to stay resident in the video memory. +Setting the priority happens at the level of an entire memory heap. +%D3D12MA offers an interface to set this priority in form of D3D12MA::POOL_DESC::ResidencyPriority parameter. +It affects all allocations made out of the custom pool created with it, both placed inside large heaps +and created as committed. + +It is recommended to create a custom pool for the purpose of using high residency priority +of all resources that are critical for the performance, especially those that are written by the GPU, +like render-target, depth-stencil textures, UAV textures and buffers. +It is also worth creating them as committed, so that each one will have its own implicit heap. +This can minimize the chance that an entire large heap is demoted to system memory, degrading performance +of all the resources placed in it. + +Example: + +\code +D3D12MA::CPOOL_DESC poolDesc = D3D12MA::CPOOL_DESC{ + D3D12_HEAP_TYPE_DEFAULT, + D3D12MA_RECOMMENDED_HEAP_FLAGS | D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS }; +poolDesc.ResidencyPriority = D3D12_RESIDENCY_PRIORITY_HIGH; // !!! + +D3D12MA::Pool* pool; +HRESULT hr = allocator->CreatePool(&poolDesc, &pool); +// Check hr... + +D3D12MA::CALLOCATION_DESC allocDesc = D3D12MA::CALLOCATION_DESC{ + pool, + ALLOCATION_FLAG_COMMITTED }; // !!! + +CD3DX12_RESOURCE_DESC resDesc = CD3DX12_RESOURCE_DESC::Buffer( + 1048576); // Requested buffer size. + +D3D12MA::Allocation* alloc; +hr = allocator->CreateResource(&allocDesc, &resDesc, D3D12_RESOURCE_STATE_COMMON, + NULL, &alloc, IID_NULL, NULL); +// Check hr... +\endcode + +When you have a committed allocation created, you can also set the residency priority of its resource +using the D3D12 function: + +\code +D3D12MA::Allocation* committedAlloc = ... +ID3D12Pageable* res = committedAlloc->GetResource(); +D3D12_RESIDENCY_PRIORITY priority = D3D12_RESIDENCY_PRIORITY_HIGH; +device1->SetResidencyPriority(1, &res, &priority); +\endcode + +Note this is not the same as explicit eviction controlled using `ID3D12Device::Evict` and `MakeResident` functions. +Resources evicted explicitly are illegal to access until they are made resident again, +while the demotion described here happens automatically and only slows down the execution. + +\section optimal_allocation_gpu_upload_heap GPU upload heap + +Direct3D 12 offers a fixed set of memory heap types: + +- `D3D12_HEAP_TYPE_DEFAULT`: Represents the video memory. It is available and fast to access for the GPU. + It should be used for all resources that are written by the GPU (like render-target and depth-stencil textures, + UAV) and resources that are frequently read by the GPU (like textures intended for sampling, + vertex, index, and constant buffers). +- `D3D12_HEAP_TYPE_UPLOAD`: Represents the system memory that is uncached and write-combined. + It can be mapped and accessed by the CPU code using a pointer. + It supports only buffers, not textures. + It is intended for "staging buffers" that are filled by the CPU code and then used as a source of copy operations to the `DEFAULT` heap. + It can also be accessed directly by the GPU - shaders can read from buffers created in this memory. +- `D3D12_HEAP_TYPE_READBACK`: Represents the system memory that is cached. + It is intended for buffers used as a destination of copy operations from the `DEFAULT` heap. + +Note that in systems with a discrete graphics card, access to system memory is fast from the CPU code +(like the C++ code mapping D3D12 buffers and accessing them through a pointer), +while access to the video memory is fast from the GPU code (like shaders reading and writing buffers and textures). +Any copy operation or direct access between these memory heap types happens through PCI Express bus, which can be relatively slow. + +Modern systems offer a feature called **Resizable BAR (ReBAR)** that gives the CPU direct access to the full video memory. +To be available, this feature needs to be supported by the whole hardware-software environment, including: + +- Supporting motherboard and its UEFI. +- Supporting graphics card and its graphics driver. +- Supporting operating system. +- The feature needs to be enabled in the UEFI settings. It is typically called "Above 4G Decoding" and "Resizable Bar". + +Recent versions of DirectX 12 SDK give access to this feature in form of a new, 4th memory pool: `D3D12_HEAP_TYPE_GPU_UPLOAD`. +Resources created in it behave logically similar to the `D3D12_HEAP_TYPE_UPLOAD` heap: + +- They support mapping and direct access from the CPU code through a pointer. +- The mapped memory is uncached and write-combined, so it should be only written sequentially + (e.g., number-by-number or using `memcpy`). It shouldn't be accessed randomly or read, + because it is extremely slow for uncached memory. +- Only buffers are supported. +- Those buffers can be used as a source of copy operations or directly accessed by the GPU. + +The main difference is that resources created in the new `D3D12_HEAP_TYPE_GPU_UPLOAD` are placed in the video memory, +while resources created in the old `D3D12_HEAP_TYPE_UPLOAD` are placed in the system memory. +This implies which budgets are consumed by new resources allocated in those heaps. +This also implies which operations involve transferring data through the PCI Express bus. + +- As `D3D12_HEAP_TYPE_UPLOAD` uses the system memory, writes from the CPU code through a mapped pointer are faster, + while copies or direct access from the GPU are slower because they need to go through PCIe. +- As the new `D3D12_HEAP_TYPE_GPU_UPLOAD` uses the video memory, + copies or direct access from the GPU are faster, + while writes from the CPU code through a mapped pointer can be slower, because they need to go through PCIe. + For maximum performance of copy operations from this heap, a graphics or compute queue should be used, not a copy queue. + +GPU Upload Heap can be used for performance optimization of some resources that need to be written by the CPU and read by the GPU. +It can be beneficial especially for resources that need to change frequently (often called "dynamic"). + +%D3D12MA supports GPU upload heap when recent enough version of DirectX 12 SDK is used and when the current system supports it. +The support can be queried using function D3D12MA::Allocator::IsGPUUploadHeapSupported(). +When it returns `TRUE`, you can create resources using `D3D12_HEAP_TYPE_GPU_UPLOAD`. +You can also just try creating such resource. Example: + +\code + D3D12MA::CALLOCATION_DESC allocDesc = D3D12MA::CALLOCATION_DESC{ + D3D12_HEAP_TYPE_GPU_UPLOAD }; // !!! + + CD3DX12_RESOURCE_DESC resDesc = CD3DX12_RESOURCE_DESC::Buffer( + 1048576); // Requested buffer size. + + D3D12MA::Allocation* alloc; + ID3D12Resource* res; + hr = allocator->CreateResource(&allocDesc, &resDesc, D3D12_RESOURCE_STATE_COMMON, + NULL, &alloc, IID_PPV_ARGS(&res)); + if(SUCCEEDED(hr)) + { + // Fast path for data upload. + + D3D12_RANGE emptyRange = {0, 0}; + void* mappedPtr = NULL; + hr = res->Map(0, &emptyRange, &mappedPtr); + memcpy(mappedPtr, srcData, 1048576); + res->Unmap(0, NULL); // Optional. You can leave it persistently mapped. + + D3D12_GPU_VIRTUAL_ADDRESS gpuva = res->GetGPUVirtualAddress(); + // Use gpuva to access the buffer on the GPU... + } + else if(hr == E_NOTIMPL) + { + // GPU Upload Heap not supported in this system. + // Fall back to creating a staging buffer in UPLOAD and another copy in DEFAULT. + + allocDesc.HeapType = D3D12_HEAP_TYPE_UPLOAD; + // ... + } + else + // Some other error code e.g., out of memory... +\endcode + +\section optimal_allocation_committed_vs_placed Committed versus placed resources + +When using D3D12 API directly, there are 3 ways of creating resources: + +1. **Committed**, using function `ID3D12Device::CreateCommittedResource`. + It creates the resource with its own memory heap, which is called an "implicit heap" and cannot be accessed directly. +2. **Placed**, using function `ID3D12Device::CreatePlacedResource`. + A `ID3D12Heap` needs to be created beforehand using `ID3D12Device::CreateHeap`. + Then, the resource can be created as placed inside the heap at a specific offset. +3. **Reserved**, using function `ID3D12Device::CreateReservedResource`. + This library doesn't support them directly. + +A naive solution would be to create all the resources as committed. +It works, because in D3D12 there is no strict limit on the number of resources or heaps that can be created. +However, there are certain advantages and disadvantages of using committed versus placed resources: + +- The biggest advantage of using placed resources is the allocation performance. + Once a heap is allocated, creating and releasing resources placed in it can be much faster than + creating them as committed, which would involve allocating a new heap for each resource. + - Using large number of small heaps can put an extra burden on the software stack, + including D3D12 runtime, graphics driver, operating system, and developer tools like Radeon Memory Visualizer (RMV). +- The advantage of committed resources is that their implicit heaps have exactly the right size, + while creating resources as placed inside larger heaps can lead to some memory wasted because: + - Some part of the allocated heap memory is unused. + - After placed resources of various sizes are created and released in random order, + gaps between remaining resources can be too small to fit new allocations. + This is also known as "fragmentation". A solution to this problem is implementing \subpage defragmentation. + - The alignment required by placed resources can leave gaps between them, while the driver can pack individual committed resources better. + For details, see section "Resource alignment" below. +- The advantage of committed resources is that they are always created with a new heap, which is initialized with zeros. + When a resource is created as placed, the memory may contain random data left by the resource previously allocated in that place. + When the memory is not filled with zeros, while you don't properly clear it or otherwise initialize its content before use + (which is required by D3D12), you may observe incorrect behavior. + On the other hand, using committed resources and having every new resource filled with zeros can leave this kind of bugs undetected. +- Manual eviction with `ID3D12Device::Evict` and `MakeResident` functions work at the level of the entire heap, + and so does `ID3D12Device1::SetResidencyPriority`, so creating resources as committed allows more fine-grained control + over the eviction and residency priority of individual resources. +- The advantage of placed resources is that they can be created in a region of a heap overlapping with some other resources. + This approach is commonly called "aliasing". + It can save memory, but it needs careful control over the resources that overlap in memory + to make sure they are not used at the same time, there is an aliasing barrier issued between their usage, + and the resource used after aliasing is correctly cleared every time. + Committed resources don't offer this possibility, because every committed resource has its own exclusive memory heap. + For more information, see chapter \subpage resource_aliasing. + +When creating resources with the help of %D3D12MA using function D3D12MA::Allocator::CreateResource, +you typically don't need to care about all this. +The library automatically makes the choice of creating the new resource as committed or placed. +However, in cases when you need the information or the control over this choice between committed and placed, +the library offers facilities to do that, described below. + +\par Implementation detail +%D3D12MA creates large heaps (default size is 64 MB) and creates resources as placed in them. +However, it may decide that it is required or preferred to create the specific resource as committed for many reasons, including: +- When the resource is large (larger than half of the default heap size). +- When allocating an entire new heap would exceed the current budget or when we are already over the budget. +- When the resource is a very small buffer. Placed buffers need to be aligned to 64 KB by default, + while creating them as committed can allow the driver to pack them better. + This heuristics can be disabled for an individual resource by using D3D12MA::ALLOCATION_FLAG_STRATEGY_MIN_TIME + and for the entire allocator by using D3D12MA::ALLOCATOR_FLAG_DONT_PREFER_SMALL_BUFFERS_COMMITTED. +- When the resource uses non-standard flags specified via D3D12MA::ALLOCATION_DESC::ExtraHeapFlags. + +You can check whether an allocation was created as a committed resource by checking if its heap is null. +Committed resources have an implicit heap that is not directly accessible. + +\code +bool isCommitted = allocation->GetHeap() == NULL; +\endcode + +You can request a new resource to be created as committed by using D3D12MA::ALLOCATION_FLAG_COMMITTED. +Note that committed resources can also be created out of \subpage custom_pools. + +You can also request all resources to be created as committed globally for the entire allocator +by using D3D12MA::ALLOCATOR_FLAG_ALWAYS_COMMITTED. +However, this contradicts the main purpose of using this library. +It can also prevent certain other features of the library to be used. +This flag should be used only for debugging purposes. + +You can create a custom pool with an explicit block size by specifying non-zero D3D12MA::POOL_DESC::BlockSize. +When doing this, all **resources created in such pool are placed** in those blocks (heaps) and never created as committed. +Example: + +\code +D3D12MA::CPOOL_DESC poolDesc = D3D12MA::CPOOL_DESC{ + D3D12_HEAP_TYPE_DEFAULT, + D3D12MA_RECOMMENDED_HEAP_FLAGS | D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS }; +poolDesc.BlockSize = 100llu * 1024 * 1024; // 100 MB. Explicit BlockSize guarantees placed. + +D3D12MA::Pool* pool; +HRESULT hr = allocator->CreatePool(&poolDesc, &pool); +// Check hr... + +D3D12MA::CALLOCATION_DESC allocDesc = D3D12MA::CALLOCATION_DESC{ pool }; + +CD3DX12_RESOURCE_DESC resDesc = CD3DX12_RESOURCE_DESC::Buffer( + 90llu * 1024 * 1024); // 90 MB + +D3D12MA::Allocation* alloc; +ID3D12Resource* res; +hr = allocator->CreateResource(&allocDesc, &resDesc, D3D12_RESOURCE_STATE_COMMON, + NULL, &alloc, IID_PPV_ARGS(&res)); +// Check hr... + +// Even a large buffer like this, filling 90% of the block, was created as placed! +assert(alloc->GetHeap() != NULL); +\endcode + +You can request a new resource to be created as placed by using D3D12MA::ALLOCATION_FLAG_CAN_ALIAS. +This is required especially if you plan to create another resource in the same region of memory, aliasing with your resource - +hence the name of this flag. + +Note D3D12MA::ALLOCATION_FLAG_CAN_ALIAS can be even combined with D3D12MA::ALLOCATION_FLAG_COMMITTED. +In this case, the resource is not created as committed, but it is also not placed as part of a larger heap. +What happens instead is that a new heap is created with the exact size required for the resource, +and the resource is created in it, placed at offset 0. + +\section optimal_allocation_resource_alignment Resource alignment + +Certain types of resources require certain alignment in memory. +An alignment is a requirement for the address or offset to the beginning of the resource to be a multiply of some value, which is always a power of 2. +For committed resources, the problem is non-existent, because committed resources have their own implicit heaps +where they are created at offset 0, which meets any alignment requirement. +For placed resources, %D3D12MA takes care of the alignment automatically. + +\par Implementation detail +Default alignment required MSAA textures is `D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT` = 4 MB. +Default alignment required for buffers and other textures is `D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT` = 64 KB. + +Because the alignment required for buffers is 64 KB, **small buffers** can waste a lot of memory in between when created as placed. +When such small buffers are created as committed, some graphics drivers are able to pack them better. +%D3D12MA automatically takes advantage of this by preferring to create small buffers as committed. +This heuristics is enabled by default. It is also a tradeoff - it can make the allocation of these buffers slower. +It can be disabled for an individual resource by using D3D12MA::ALLOCATION_FLAG_STRATEGY_MIN_TIME +and for the entire allocator by using D3D12MA::ALLOCATOR_FLAG_DONT_PREFER_SMALL_BUFFERS_COMMITTED. + +For certain textures that meet a complex set of requirements, special **"small alignment"** can be applied. +Details can be found in Microsoft documentation of the `D3D12_RESOURCE_DESC` structure. +For MSAA textures, the small alignment is `D3D12_SMALL_MSAA_RESOURCE_PLACEMENT_ALIGNMENT` = 64 KB. +For other textures, the small alignment is `D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT` = 4 KB. +%D3D12MA uses this feature automatically. +Detailed behavior can be disabled or controlled by predefining macro #D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT. + +D3D12 also has a concept of **alignment of the entire heap**, passed through `D3D12_HEAP_DESC::Alignment`. +This library automatically sets the alignment as small as possible. +Unfortunately, any heap that has a chance of hosting an MSAA texture needs to have the alignment set to 4 MB. +This problem can be overcome by passing D3D12MA::ALLOCATOR_FLAG_MSAA_TEXTURES_ALWAYS_COMMITTED on the creation of the main allocator object +and D3D12MA::POOL_FLAG_MSAA_TEXTURES_ALWAYS_COMMITTED on the creation of any custom heap that supports textures, not only buffers. +With those flags, the alignment of the heaps created by %D3D12MA can be lower, but any MSAA textures are created as committed. +You should always use these flags in your code unless you really need to create some MSAA textures as placed. + \page defragmentation Defragmentation Interleaved allocations and deallocations of many objects of varying size can @@ -2351,6 +3282,7 @@ As an extra feature, the core allocation algorithm of the library is exposed thr It doesn't allocate any real GPU memory. It just keeps track of used and free regions of a "virtual block". You can use it to allocate your own memory or other objects, even completely unrelated to D3D12. A common use case is sub-allocation of pieces of one large GPU buffer. +Another suggested use case is allocating descriptors in a `ID3D12DescriptorHeap`. \section virtual_allocator_creating_virtual_block Creating virtual block @@ -2535,7 +3467,7 @@ void CustomFree(void* pMemory, void* pPrivateData) _aligned_free(pMemory); } -(...) +... D3D12MA::ALLOCATION_CALLBACKS allocationCallbacks = {}; allocationCallbacks.pAllocate = &CustomAllocate; @@ -2544,10 +3476,12 @@ allocationCallbacks.pFree = &CustomFree; D3D12MA::ALLOCATOR_DESC allocatorDesc = {}; allocatorDesc.pDevice = device; allocatorDesc.pAdapter = adapter; +allocatorDesc.Flags = D3D12MA_RECOMMENDED_ALLOCATOR_FLAGS; allocatorDesc.pAllocationCallbacks = &allocationCallbacks; D3D12MA::Allocator* allocator; HRESULT hr = D3D12MA::CreateAllocator(&allocatorDesc, &allocator); +// Check hr... \endcode diff --git a/3rdparty/d3d12memalloc/src/D3D12MemAlloc.cpp b/3rdparty/d3d12memalloc/src/D3D12MemAlloc.cpp index f13f568496..5de26f290f 100644 --- a/3rdparty/d3d12memalloc/src/D3D12MemAlloc.cpp +++ b/3rdparty/d3d12memalloc/src/D3D12MemAlloc.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2019-2025 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -33,6 +33,13 @@ #include #endif +// Includes needed for MinGW - see #71. +#ifndef _MSC_VER + #include + // guiddef.h must be included first. + #include +#endif + //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// // @@ -42,6 +49,14 @@ //////////////////////////////////////////////////////////////////////////////// #ifndef _D3D12MA_CONFIGURATION +#if !defined(D3D12MA_CPP20) + #if __cplusplus >= 202002L || _MSVC_LANG >= 202002L // C++20 + #define D3D12MA_CPP20 1 + #else + #define D3D12MA_CPP20 0 + #endif +#endif + #ifdef _WIN32 #if !defined(WINVER) || WINVER < 0x0600 #error Required at least WinAPI version supporting: client = Windows Vista, server = Windows Server 2008. @@ -64,6 +79,10 @@ #define D3D12MA_ASSERT(cond) assert(cond) #endif +#if D3D12MA_CPP20 + #include +#endif + // Assert that will be called very often, like inside data structures e.g. operator[]. // Making it non-empty can make program slow. #ifndef D3D12MA_HEAVY_ASSERT @@ -107,6 +126,14 @@ especially to test compatibility with D3D12_RESOURCE_HEAP_TIER_1 on modern GPUs. #define D3D12MA_DEFAULT_BLOCK_SIZE (64ull * 1024 * 1024) #endif +#ifndef D3D12MA_OPTIONS16_SUPPORTED + #if D3D12_SDK_VERSION >= 610 + #define D3D12MA_OPTIONS16_SUPPORTED 1 + #else + #define D3D12MA_OPTIONS16_SUPPORTED 0 + #endif +#endif + #ifndef D3D12MA_DEBUG_LOG #define D3D12MA_DEBUG_LOG(format, ...) /* @@ -128,10 +155,6 @@ especially to test compatibility with D3D12_RESOURCE_HEAP_TIER_1 on modern GPUs. #define D3D12MA_IID_PPV_ARGS(ppType) __uuidof(**(ppType)), reinterpret_cast(ppType) -#ifdef __ID3D12Device8_INTERFACE_DEFINED__ - #define D3D12MA_CREATE_NOT_ZEROED_AVAILABLE 1 -#endif - namespace D3D12MA { static constexpr UINT HEAP_TYPE_COUNT = 5; @@ -292,6 +315,10 @@ static UINT8 BitScanLSB(UINT64 mask) if (_BitScanForward64(&pos, mask)) return static_cast(pos); return UINT8_MAX; +#elif D3D12MA_CPP20 + if (mask != 0) + return static_cast(std::countr_zero(mask)); + return UINT8_MAX; #elif defined __GNUC__ || defined __clang__ return static_cast(__builtin_ffsll(mask)) - 1U; #else @@ -314,6 +341,10 @@ static UINT8 BitScanLSB(UINT32 mask) if (_BitScanForward(&pos, mask)) return static_cast(pos); return UINT8_MAX; +#elif D3D12MA_CPP20 + if (mask != 0) + return static_cast(std::countr_zero(mask)); + return UINT8_MAX; #elif defined __GNUC__ || defined __clang__ return static_cast(__builtin_ffs(mask)) - 1U; #else @@ -336,6 +367,9 @@ static UINT8 BitScanMSB(UINT64 mask) unsigned long pos; if (_BitScanReverse64(&pos, mask)) return static_cast(pos); +#elif D3D12MA_CPP20 + if (mask != 0) + return 63 - static_cast(std::countl_zero(mask)); #elif defined __GNUC__ || defined __clang__ if (mask) return 63 - static_cast(__builtin_clzll(mask)); @@ -358,6 +392,9 @@ static UINT8 BitScanMSB(UINT32 mask) unsigned long pos; if (_BitScanReverse(&pos, mask)) return static_cast(pos); +#elif D3D12MA_CPP20 + if (mask != 0) + return 31 - static_cast(std::countl_zero(mask)); #elif defined __GNUC__ || defined __clang__ if (mask) return 31 - static_cast(__builtin_clz(mask)); @@ -2791,7 +2828,7 @@ class AllocationObjectAllocator D3D12MA_CLASS_NO_COPY(AllocationObjectAllocator); public: AllocationObjectAllocator(const ALLOCATION_CALLBACKS& allocationCallbacks, bool useMutex) - : m_Allocator(allocationCallbacks, 1024), m_UseMutex(useMutex) {} + : m_UseMutex(useMutex), m_Allocator(allocationCallbacks, 1024) {} template Allocation* Allocate(Types... args); @@ -2982,7 +3019,7 @@ void BlockMetadata::DebugLogAllocation(UINT64 offset, UINT64 size, void* private LPCWSTR name = allocation->GetName(); D3D12MA_DEBUG_LOG(L"UNFREED ALLOCATION; Offset: %llu; Size: %llu; PrivateData: %p; Name: %s", - offset, size, privateData, name ? name : L"D3D12MA_Empty"); + offset, size, privateData, name ? name : L""); } } @@ -5369,8 +5406,8 @@ struct CREATE_RESOURCE_PARAMS { CREATE_RESOURCE_PARAMS() = delete; CREATE_RESOURCE_PARAMS( - const D3D12_RESOURCE_DESC* pResourceDesc, - D3D12_RESOURCE_STATES InitialResourceState, + const D3D12_RESOURCE_DESC* pResourceDesc, + D3D12_RESOURCE_STATES InitialResourceState, const D3D12_CLEAR_VALUE* pOptimizedClearValue) : Variant(VARIANT_WITH_STATE) , pResourceDesc(pResourceDesc) @@ -5380,8 +5417,8 @@ struct CREATE_RESOURCE_PARAMS } #ifdef __ID3D12Device8_INTERFACE_DEFINED__ CREATE_RESOURCE_PARAMS( - const D3D12_RESOURCE_DESC1* pResourceDesc, - D3D12_RESOURCE_STATES InitialResourceState, + const D3D12_RESOURCE_DESC1* pResourceDesc, + D3D12_RESOURCE_STATES InitialResourceState, const D3D12_CLEAR_VALUE* pOptimizedClearValue) : Variant(VARIANT_WITH_STATE_AND_DESC1) , pResourceDesc1(pResourceDesc) @@ -5396,7 +5433,7 @@ struct CREATE_RESOURCE_PARAMS D3D12_BARRIER_LAYOUT InitialLayout, const D3D12_CLEAR_VALUE* pOptimizedClearValue, UINT32 NumCastableFormats, - DXGI_FORMAT* pCastableFormats) + const DXGI_FORMAT* pCastableFormats) : Variant(VARIANT_WITH_LAYOUT) , pResourceDesc1(pResourceDesc) , InitialLayout(InitialLayout) @@ -5466,7 +5503,7 @@ struct CREATE_RESOURCE_PARAMS D3D12MA_ASSERT(Variant >= VARIANT_WITH_LAYOUT); return NumCastableFormats; } - DXGI_FORMAT* GetCastableFormats() const + const DXGI_FORMAT* GetCastableFormats() const { D3D12MA_ASSERT(Variant >= VARIANT_WITH_LAYOUT); return pCastableFormats; @@ -5491,7 +5528,7 @@ private: const D3D12_CLEAR_VALUE* pOptimizedClearValue; #ifdef __ID3D12Device10_INTERFACE_DEFINED__ UINT32 NumCastableFormats; - DXGI_FORMAT* pCastableFormats; + const DXGI_FORMAT* pCastableFormats; #endif }; @@ -5541,6 +5578,7 @@ public: UINT64 size, UINT64 alignment, const ALLOCATION_DESC& allocDesc, + bool committedAllowed, size_t allocationCount, Allocation** pAllocations); @@ -5551,6 +5589,7 @@ public: UINT64 alignment, const ALLOCATION_DESC& allocDesc, const CREATE_RESOURCE_PARAMS& createParams, + bool committedAllowed, Allocation** ppAllocation, REFIID riidResource, void** ppvResource); @@ -5601,6 +5640,7 @@ private: UINT64 size, UINT64 alignment, const ALLOCATION_DESC& allocDesc, + bool committedAllowed, Allocation** pAllocation); HRESULT AllocateFromBlock( @@ -5706,29 +5746,31 @@ HRESULT CurrentBudgetData::UpdateBudget(IDXGIAdapter3* adapter3, bool useMutex) DXGI_QUERY_VIDEO_MEMORY_INFO infoLocal = {}; DXGI_QUERY_VIDEO_MEMORY_INFO infoNonLocal = {}; const HRESULT hrLocal = adapter3->QueryVideoMemoryInfo(0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &infoLocal); + if (FAILED(hrLocal)) + { + return hrLocal; + } const HRESULT hrNonLocal = adapter3->QueryVideoMemoryInfo(0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &infoNonLocal); + if (FAILED(hrNonLocal)) + { + return hrNonLocal; + } - if (SUCCEEDED(hrLocal) || SUCCEEDED(hrNonLocal)) { MutexLockWrite lockWrite(m_BudgetMutex, useMutex); - if (SUCCEEDED(hrLocal)) - { - m_D3D12Usage[0] = infoLocal.CurrentUsage; - m_D3D12Budget[0] = infoLocal.Budget; - } - if (SUCCEEDED(hrNonLocal)) - { - m_D3D12Usage[1] = infoNonLocal.CurrentUsage; - m_D3D12Budget[1] = infoNonLocal.Budget; - } + m_D3D12Usage[0] = infoLocal.CurrentUsage; + m_D3D12Budget[0] = infoLocal.Budget; + + m_D3D12Usage[1] = infoNonLocal.CurrentUsage; + m_D3D12Budget[1] = infoNonLocal.Budget; m_BlockBytesAtD3D12Fetch[0] = m_BlockBytes[0]; m_BlockBytesAtD3D12Fetch[1] = m_BlockBytes[1]; m_OperationsSinceBudgetFetch = 0; } - return FAILED(hrLocal) ? hrLocal : hrNonLocal; + return S_OK; } #endif // #if D3D12MA_DXGI_1_4 @@ -5847,6 +5889,7 @@ public: AllocatorPimpl* GetAllocator() const { return m_Allocator; } const POOL_DESC& GetDesc() const { return m_Desc; } + bool AlwaysCommitted() const { return (m_Desc.Flags & POOL_FLAG_ALWAYS_COMMITTED) != 0; } bool SupportsCommittedAllocations() const { return m_Desc.BlockSize == 0; } LPCWSTR GetName() const { return m_Name; } @@ -5903,6 +5946,12 @@ public: #endif #ifdef __ID3D12Device8_INTERFACE_DEFINED__ ID3D12Device8* GetDevice8() const { return m_Device8; } +#endif +#ifdef __ID3D12Device10_INTERFACE_DEFINED__ + ID3D12Device10* GetDevice10() const { return m_Device10; } +#endif +#ifdef __ID3D12Device12_INTERFACE_DEFINED__ + ID3D12Device12* GetDevice12() const { return m_Device12; } #endif // Shortcut for "Allocation Callbacks", because this function is called so often. const ALLOCATION_CALLBACKS& GetAllocs() const { return m_AllocationCallbacks; } @@ -6011,6 +6060,9 @@ private: #endif #ifdef __ID3D12Device10_INTERFACE_DEFINED__ ID3D12Device10* m_Device10 = NULL; // AddRef, optional +#endif +#ifdef __ID3D12Device12_INTERFACE_DEFINED__ + ID3D12Device12* m_Device12 = NULL; // AddRef, optional #endif IDXGIAdapter* m_Adapter; // AddRef #if D3D12MA_DXGI_1_4 @@ -6071,12 +6123,26 @@ private: HRESULT UpdateD3D12Budget(); D3D12_RESOURCE_ALLOCATION_INFO GetResourceAllocationInfoNative(const D3D12_RESOURCE_DESC& resourceDesc) const; + HRESULT GetResourceAllocationInfoMiddle(D3D12_RESOURCE_DESC& inOutResourceDesc, + UINT32 NumCastableFormats, const DXGI_FORMAT* pCastableFormats, + D3D12_RESOURCE_ALLOCATION_INFO& outAllocInfo) const; + #ifdef __ID3D12Device8_INTERFACE_DEFINED__ - D3D12_RESOURCE_ALLOCATION_INFO GetResourceAllocationInfoNative(const D3D12_RESOURCE_DESC1& resourceDesc) const; + D3D12_RESOURCE_ALLOCATION_INFO GetResourceAllocationInfo2Native(const D3D12_RESOURCE_DESC1& resourceDesc) const; + HRESULT GetResourceAllocationInfoMiddle(D3D12_RESOURCE_DESC1& inOutResourceDesc, + UINT32 NumCastableFormats, const DXGI_FORMAT* pCastableFormats, + D3D12_RESOURCE_ALLOCATION_INFO& outAllocInfo) const; +#endif + +#ifdef __ID3D12Device12_INTERFACE_DEFINED__ + D3D12_RESOURCE_ALLOCATION_INFO GetResourceAllocationInfo3Native(const D3D12_RESOURCE_DESC1& resourceDesc, + UINT32 NumCastableFormats, const DXGI_FORMAT* pCastableFormats) const; #endif template - D3D12_RESOURCE_ALLOCATION_INFO GetResourceAllocationInfo(D3D12_RESOURCE_DESC_T& inOutResourceDesc) const; + HRESULT GetResourceAllocationInfo(D3D12_RESOURCE_DESC_T& inOutResourceDesc, + UINT32 NumCastableFormats, const DXGI_FORMAT* pCastableFormats, + D3D12_RESOURCE_ALLOCATION_INFO& outAllocInfo) const; bool NewAllocationWithinBudget(D3D12_HEAP_TYPE heapType, UINT64 size); @@ -6148,6 +6214,10 @@ HRESULT AllocatorPimpl::Init(const ALLOCATOR_DESC& desc) m_Device->QueryInterface(D3D12MA_IID_PPV_ARGS(&m_Device10)); #endif +#ifdef __ID3D12Device12_INTERFACE_DEFINED__ + m_Device->QueryInterface(D3D12MA_IID_PPV_ARGS(&m_Device12)); +#endif + HRESULT hr = m_Adapter->GetDesc(&m_AdapterDesc); if (FAILED(hr)) { @@ -6163,9 +6233,6 @@ HRESULT AllocatorPimpl::Init(const ALLOCATOR_DESC& desc) m_D3D12Options.ResourceHeapTier = (D3D12MA_FORCE_RESOURCE_HEAP_TIER); #endif -// You must define this macro to like `#define D3D12MA_OPTIONS16_SUPPORTED 1` to enable GPU Upload Heaps! -// Unfortunately there is no way to programmatically check if the included defines D3D12_FEATURE_DATA_D3D12_OPTIONS16 or not. -// Main interfaces have respective macros like __ID3D12Device4_INTERFACE_DEFINED__, but structures like this do not. #if D3D12MA_OPTIONS16_SUPPORTED { D3D12_FEATURE_DATA_D3D12_OPTIONS16 options16 = {}; @@ -6175,7 +6242,7 @@ HRESULT AllocatorPimpl::Init(const ALLOCATOR_DESC& desc) m_GPUUploadHeapSupported = options16.GPUUploadHeapSupported; } } -#endif +#endif // #if D3D12MA_OPTIONS16_SUPPORTED hr = m_Device->CheckFeatureSupport(D3D12_FEATURE_ARCHITECTURE, &m_D3D12Architecture, sizeof(m_D3D12Architecture)); if (FAILED(hr)) @@ -6223,6 +6290,9 @@ HRESULT AllocatorPimpl::Init(const ALLOCATOR_DESC& desc) AllocatorPimpl::~AllocatorPimpl() { +#ifdef __ID3D12Device12_INTERFACE_DEFINED__ + SAFE_RELEASE(m_Device12); +#endif #ifdef __ID3D12Device10_INTERFACE_DEFINED__ SAFE_RELEASE(m_Device10); #endif @@ -6319,12 +6389,15 @@ HRESULT AllocatorPimpl::CreatePlacedResourceWrap( { return E_NOINTERFACE; } + // Microsoft defined pCastableFormats parameter as pointer to non-const and only fixed it in later Agility SDK, + // thus we need const_cast. return m_Device10->CreatePlacedResource2(pHeap, HeapOffset, createParams.GetResourceDesc1(), createParams.GetInitialLayout(), createParams.GetOptimizedClearValue(), createParams.GetNumCastableFormats(), - createParams.GetCastableFormats(), riidResource, ppvResource); - } else + const_cast(createParams.GetCastableFormats()), riidResource, ppvResource); + } #endif + #ifdef __ID3D12Device8_INTERFACE_DEFINED__ if (createParams.Variant == CREATE_RESOURCE_PARAMS::VARIANT_WITH_STATE_AND_DESC1) { @@ -6335,21 +6408,19 @@ HRESULT AllocatorPimpl::CreatePlacedResourceWrap( return m_Device8->CreatePlacedResource1(pHeap, HeapOffset, createParams.GetResourceDesc1(), createParams.GetInitialResourceState(), createParams.GetOptimizedClearValue(), riidResource, ppvResource); - } else + } #endif + if (createParams.Variant == CREATE_RESOURCE_PARAMS::VARIANT_WITH_STATE) { return m_Device->CreatePlacedResource(pHeap, HeapOffset, createParams.GetResourceDesc(), createParams.GetInitialResourceState(), createParams.GetOptimizedClearValue(), riidResource, ppvResource); } - else - { - D3D12MA_ASSERT(0); - return E_INVALIDARG; - } -} + D3D12MA_ASSERT(0); + return E_INVALIDARG; +} HRESULT AllocatorPimpl::CreateResource( const ALLOCATION_DESC* pAllocDesc, @@ -6366,6 +6437,7 @@ HRESULT AllocatorPimpl::CreateResource( *ppvResource = NULL; } + HRESULT hr = E_NOINTERFACE; CREATE_RESOURCE_PARAMS finalCreateParams = createParams; D3D12_RESOURCE_DESC finalResourceDesc; #ifdef __ID3D12Device8_INTERFACE_DEFINED__ @@ -6376,45 +6448,49 @@ HRESULT AllocatorPimpl::CreateResource( { finalResourceDesc = *createParams.GetResourceDesc(); finalCreateParams.AccessResourceDesc() = &finalResourceDesc; - resAllocInfo = GetResourceAllocationInfo(finalResourceDesc); + hr = GetResourceAllocationInfo(finalResourceDesc, 0, NULL, resAllocInfo); } #ifdef __ID3D12Device8_INTERFACE_DEFINED__ else if (createParams.Variant == CREATE_RESOURCE_PARAMS::VARIANT_WITH_STATE_AND_DESC1) { - if (!m_Device8) + if (m_Device8 != NULL) { - return E_NOINTERFACE; + finalResourceDesc1 = *createParams.GetResourceDesc1(); + finalCreateParams.AccessResourceDesc1() = &finalResourceDesc1; + hr = GetResourceAllocationInfo(finalResourceDesc1, 0, NULL, resAllocInfo); } - finalResourceDesc1 = *createParams.GetResourceDesc1(); - finalCreateParams.AccessResourceDesc1() = &finalResourceDesc1; - resAllocInfo = GetResourceAllocationInfo(finalResourceDesc1); } #endif #ifdef __ID3D12Device10_INTERFACE_DEFINED__ else if (createParams.Variant == CREATE_RESOURCE_PARAMS::VARIANT_WITH_LAYOUT) { - if (!m_Device10) + if (m_Device10 != NULL) { - return E_NOINTERFACE; + finalResourceDesc1 = *createParams.GetResourceDesc1(); + finalCreateParams.AccessResourceDesc1() = &finalResourceDesc1; + hr = GetResourceAllocationInfo(finalResourceDesc1, + createParams.GetNumCastableFormats(), createParams.GetCastableFormats(), resAllocInfo); } - finalResourceDesc1 = *createParams.GetResourceDesc1(); - finalCreateParams.AccessResourceDesc1() = &finalResourceDesc1; - resAllocInfo = GetResourceAllocationInfo(finalResourceDesc1); } #endif else { D3D12MA_ASSERT(0); - return E_INVALIDARG; + hr = E_INVALIDARG; } + + if (FAILED(hr)) + return hr; + D3D12MA_ASSERT(IsPow2(resAllocInfo.Alignment)); + // We've seen UINT64_MAX returned when the call to GetResourceAllocationInfo was invalid. + D3D12MA_ASSERT(resAllocInfo.SizeInBytes != UINT64_MAX); D3D12MA_ASSERT(resAllocInfo.SizeInBytes > 0); BlockVector* blockVector = NULL; CommittedAllocationParameters committedAllocationParams = {}; bool preferCommitted = false; - HRESULT hr; #ifdef __ID3D12Device8_INTERFACE_DEFINED__ if (createParams.Variant >= CREATE_RESOURCE_PARAMS::VARIANT_WITH_STATE_AND_DESC1) { @@ -6445,7 +6521,7 @@ HRESULT AllocatorPimpl::CreateResource( if (blockVector != NULL) { hr = blockVector->CreateResource(resAllocInfo.SizeInBytes, resAllocInfo.Alignment, - *pAllocDesc, finalCreateParams, + *pAllocDesc, finalCreateParams, committedAllocationParams.IsValid(), ppAllocation, riidResource, ppvResource); if (SUCCEEDED(hr)) return hr; @@ -6488,7 +6564,7 @@ HRESULT AllocatorPimpl::AllocateMemory( if (blockVector != NULL) { hr = blockVector->Allocate(pAllocInfo->SizeInBytes, pAllocInfo->Alignment, - *pAllocDesc, 1, (Allocation**)ppAllocation); + *pAllocDesc, committedAllocationParams.IsValid(), 1, (Allocation**)ppAllocation); if (SUCCEEDED(hr)) return hr; } @@ -6510,6 +6586,7 @@ HRESULT AllocatorPimpl::CreateAliasingResource( { *ppvResource = NULL; + HRESULT hr = E_NOINTERFACE; CREATE_RESOURCE_PARAMS finalCreateParams = createParams; D3D12_RESOURCE_DESC finalResourceDesc; #ifdef __ID3D12Device8_INTERFACE_DEFINED__ @@ -6520,37 +6597,40 @@ HRESULT AllocatorPimpl::CreateAliasingResource( { finalResourceDesc = *createParams.GetResourceDesc(); finalCreateParams.AccessResourceDesc() = &finalResourceDesc; - resAllocInfo = GetResourceAllocationInfo(finalResourceDesc); + hr = GetResourceAllocationInfo(finalResourceDesc, 0, NULL, resAllocInfo); } #ifdef __ID3D12Device8_INTERFACE_DEFINED__ else if (createParams.Variant == CREATE_RESOURCE_PARAMS::VARIANT_WITH_STATE_AND_DESC1) { - if (!m_Device8) + if (m_Device8 != NULL) { - return E_NOINTERFACE; + finalResourceDesc1 = *createParams.GetResourceDesc1(); + finalCreateParams.AccessResourceDesc1() = &finalResourceDesc1; + hr = GetResourceAllocationInfo(finalResourceDesc1, 0, NULL, resAllocInfo); } - finalResourceDesc1 = *createParams.GetResourceDesc1(); - finalCreateParams.AccessResourceDesc1() = &finalResourceDesc1; - resAllocInfo = GetResourceAllocationInfo(finalResourceDesc1); } #endif #ifdef __ID3D12Device10_INTERFACE_DEFINED__ else if (createParams.Variant == CREATE_RESOURCE_PARAMS::VARIANT_WITH_LAYOUT) { - if (!m_Device10) + if (m_Device10 != NULL) { - return E_NOINTERFACE; + finalResourceDesc1 = *createParams.GetResourceDesc1(); + finalCreateParams.AccessResourceDesc1() = &finalResourceDesc1; + hr = GetResourceAllocationInfo(finalResourceDesc1, + createParams.GetNumCastableFormats(), createParams.GetCastableFormats(), resAllocInfo); } - finalResourceDesc1 = *createParams.GetResourceDesc1(); - finalCreateParams.AccessResourceDesc1() = &finalResourceDesc1; - resAllocInfo = GetResourceAllocationInfo(finalResourceDesc1); } #endif else { D3D12MA_ASSERT(0); - return E_INVALIDARG; + hr = E_INVALIDARG; } + + if (FAILED(hr)) + return hr; + D3D12MA_ASSERT(IsPow2(resAllocInfo.Alignment)); D3D12MA_ASSERT(resAllocInfo.SizeInBytes > 0); @@ -6773,42 +6853,41 @@ void AllocatorPimpl::GetBudget(Budget* outLocalBudget, Budget* outNonLocalBudget outLocalBudget ? &outLocalBudget->BudgetBytes : NULL, outNonLocalBudget ? &outNonLocalBudget->UsageBytes : NULL, outNonLocalBudget ? &outNonLocalBudget->BudgetBytes : NULL); + return; } - else + + if (SUCCEEDED(UpdateD3D12Budget())) { - UpdateD3D12Budget(); - GetBudget(outLocalBudget, outNonLocalBudget); // Recursion + GetBudget(outLocalBudget, outNonLocalBudget); // Recursion. + return; } } - else #endif + + // Fallback path - manual calculation, not real budget. + if (outLocalBudget) { - if (outLocalBudget) - { - outLocalBudget->UsageBytes = outLocalBudget->Stats.BlockBytes; - outLocalBudget->BudgetBytes = GetMemoryCapacity(DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY) * 8 / 10; // 80% heuristics. - } - if (outNonLocalBudget) - { - outNonLocalBudget->UsageBytes = outNonLocalBudget->Stats.BlockBytes; - outNonLocalBudget->BudgetBytes = GetMemoryCapacity(DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL_COPY) * 8 / 10; // 80% heuristics. - } + outLocalBudget->UsageBytes = outLocalBudget->Stats.BlockBytes; + outLocalBudget->BudgetBytes = GetMemoryCapacity(DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY) * 8 / 10; // 80% heuristics. + } + if (outNonLocalBudget) + { + outNonLocalBudget->UsageBytes = outNonLocalBudget->Stats.BlockBytes; + outNonLocalBudget->BudgetBytes = GetMemoryCapacity(DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL_COPY) * 8 / 10; // 80% heuristics. } } void AllocatorPimpl::GetBudgetForHeapType(Budget& outBudget, D3D12_HEAP_TYPE heapType) { - switch (heapType) + const bool isLocal = StandardHeapTypeToMemorySegmentGroup(heapType) == + DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY; + if (isLocal) { - case D3D12_HEAP_TYPE_DEFAULT: - case D3D12_HEAP_TYPE_GPU_UPLOAD_COPY: GetBudget(&outBudget, NULL); - break; - case D3D12_HEAP_TYPE_UPLOAD: - case D3D12_HEAP_TYPE_READBACK: + } + else + { GetBudget(NULL, &outBudget); - break; - default: D3D12MA_ASSERT(0); } } @@ -7248,12 +7327,15 @@ HRESULT AllocatorPimpl::AllocateCommittedResource( { return E_NOINTERFACE; } + + // Microsoft defined pCastableFormats parameter as pointer to non-const and only fixed it in later Agility SDK, + // thus we need const_cast. hr = m_Device10->CreateCommittedResource3( &committedAllocParams.m_HeapProperties, committedAllocParams.m_HeapFlags & ~RESOURCE_CLASS_HEAP_FLAGS, createParams.GetResourceDesc1(), createParams.GetInitialLayout(), createParams.GetOptimizedClearValue(), committedAllocParams.m_ProtectedSession, - createParams.GetNumCastableFormats(), createParams.GetCastableFormats(), + createParams.GetNumCastableFormats(), const_cast(createParams.GetCastableFormats()), D3D12MA_IID_PPV_ARGS(&res)); } else #endif @@ -7396,8 +7478,8 @@ HRESULT AllocatorPimpl::CalcAllocationParams(const ALLOCATION_DESC& allocDesc, U outCommittedAllocationParams = CommittedAllocationParameters(); outPreferCommitted = false; - D3D12MA_ASSERT((allocDesc.HeapType != D3D12_HEAP_TYPE_GPU_UPLOAD_COPY || IsGPUUploadHeapSupported()) && - "Trying to allocate from D3D12_HEAP_TYPE_GPU_UPLOAD while GPUUploadHeapSupported == FALSE or D3D12MA_OPTIONS16_SUPPORTED macro was not defined when compiling D3D12MA library."); + if (allocDesc.HeapType == D3D12_HEAP_TYPE_GPU_UPLOAD_COPY && !IsGPUUploadHeapSupported()) + return E_NOTIMPL; bool msaaAlwaysCommitted; if (allocDesc.CustomPool != NULL) @@ -7405,7 +7487,8 @@ HRESULT AllocatorPimpl::CalcAllocationParams(const ALLOCATION_DESC& allocDesc, U PoolPimpl* const pool = allocDesc.CustomPool->m_Pimpl; msaaAlwaysCommitted = pool->GetBlockVector()->DeniesMsaaTextures(); - outBlockVector = pool->GetBlockVector(); + if(!pool->AlwaysCommitted()) + outBlockVector = pool->GetBlockVector(); const auto& desc = pool->GetDesc(); outCommittedAllocationParams.m_ProtectedSession = desc.pProtectedSession; @@ -7444,12 +7527,6 @@ HRESULT AllocatorPimpl::CalcAllocationParams(const ALLOCATION_DESC& allocDesc, U outPreferCommitted = true; } } - - const D3D12_HEAP_FLAGS extraHeapFlags = allocDesc.ExtraHeapFlags & ~RESOURCE_CLASS_HEAP_FLAGS; - if (outBlockVector != NULL && extraHeapFlags != 0) - { - outBlockVector = NULL; - } } if ((allocDesc.Flags & ALLOCATION_FLAG_COMMITTED) != 0 || @@ -7479,12 +7556,7 @@ UINT AllocatorPimpl::CalcDefaultPoolIndex(const ALLOCATION_DESC& allocDesc, Reso D3D12_HEAP_FLAGS extraHeapFlags = allocDesc.ExtraHeapFlags & ~RESOURCE_CLASS_HEAP_FLAGS; #if D3D12MA_CREATE_NOT_ZEROED_AVAILABLE - // If allocator was created with ALLOCATOR_FLAG_DEFAULT_POOLS_NOT_ZEROED, also ignore - // D3D12_HEAP_FLAG_CREATE_NOT_ZEROED. - if(m_DefaultPoolsNotZeroed) - { - extraHeapFlags &= ~D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; - } + extraHeapFlags &= ~D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; #endif if (extraHeapFlags != 0) @@ -7603,7 +7675,7 @@ D3D12_RESOURCE_ALLOCATION_INFO AllocatorPimpl::GetResourceAllocationInfoNative(c } #ifdef __ID3D12Device8_INTERFACE_DEFINED__ -D3D12_RESOURCE_ALLOCATION_INFO AllocatorPimpl::GetResourceAllocationInfoNative(const D3D12_RESOURCE_DESC1& resourceDesc) const +D3D12_RESOURCE_ALLOCATION_INFO AllocatorPimpl::GetResourceAllocationInfo2Native(const D3D12_RESOURCE_DESC1& resourceDesc) const { D3D12MA_ASSERT(m_Device8 != NULL); D3D12_RESOURCE_ALLOCATION_INFO1 info1Unused; @@ -7619,8 +7691,71 @@ D3D12_RESOURCE_ALLOCATION_INFO AllocatorPimpl::GetResourceAllocationInfoNative(c } #endif // #ifdef __ID3D12Device8_INTERFACE_DEFINED__ +#ifdef __ID3D12Device12_INTERFACE_DEFINED__ +D3D12_RESOURCE_ALLOCATION_INFO AllocatorPimpl::GetResourceAllocationInfo3Native(const D3D12_RESOURCE_DESC1& resourceDesc, + UINT32 NumCastableFormats, const DXGI_FORMAT* pCastableFormats) const +{ + D3D12MA_ASSERT(m_Device12 != NULL); + D3D12_RESOURCE_ALLOCATION_INFO1 info1Unused; + + // This is how new D3D12 headers define GetResourceAllocationInfo function - + // different signature depending on these macros. +#if defined(_MSC_VER) || !defined(_WIN32) + return m_Device12->GetResourceAllocationInfo3(0, 1, &resourceDesc, + &NumCastableFormats, &pCastableFormats, &info1Unused); +#else + D3D12_RESOURCE_ALLOCATION_INFO retVal; + return *m_Device12->GetResourceAllocationInfo3(&retVal, 0, 1, &resourceDesc, + &NumCastableFormats, &pCastableFormats, &info1Unused); +#endif +} +#endif // #ifdef __ID3D12Device12_INTERFACE_DEFINED__ + +HRESULT AllocatorPimpl::GetResourceAllocationInfoMiddle( + D3D12_RESOURCE_DESC& inOutResourceDesc, + UINT32 NumCastableFormats, const DXGI_FORMAT* pCastableFormats, + D3D12_RESOURCE_ALLOCATION_INFO& outAllocInfo) const +{ + if (NumCastableFormats > 0) + { + return E_NOTIMPL; + } + + outAllocInfo = GetResourceAllocationInfoNative(inOutResourceDesc); + return outAllocInfo.SizeInBytes != UINT64_MAX ? S_OK : E_INVALIDARG; +} + +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + +HRESULT AllocatorPimpl::GetResourceAllocationInfoMiddle( + D3D12_RESOURCE_DESC1& inOutResourceDesc, + UINT32 NumCastableFormats, const DXGI_FORMAT* pCastableFormats, + D3D12_RESOURCE_ALLOCATION_INFO& outAllocInfo) const +{ + if (NumCastableFormats > 0) + { +#ifdef __ID3D12Device12_INTERFACE_DEFINED__ + if (m_Device12 != NULL) + { + outAllocInfo = GetResourceAllocationInfo3Native(inOutResourceDesc, NumCastableFormats, pCastableFormats); + return outAllocInfo.SizeInBytes != UINT64_MAX ? S_OK : E_INVALIDARG; + } +#else + return E_NOTIMPL; +#endif + } + + outAllocInfo = GetResourceAllocationInfo2Native(inOutResourceDesc); + return outAllocInfo.SizeInBytes != UINT64_MAX ? S_OK : E_INVALIDARG; +} + +#endif // #ifdef __ID3D12Device8_INTERFACE_DEFINED__ + template -D3D12_RESOURCE_ALLOCATION_INFO AllocatorPimpl::GetResourceAllocationInfo(D3D12_RESOURCE_DESC_T& inOutResourceDesc) const +HRESULT AllocatorPimpl::GetResourceAllocationInfo( + D3D12_RESOURCE_DESC_T& inOutResourceDesc, + UINT32 NumCastableFormats, const DXGI_FORMAT* pCastableFormats, + D3D12_RESOURCE_ALLOCATION_INFO& outAllocInfo) const { #ifdef __ID3D12Device1_INTERFACE_DEFINED__ /* Optional optimization: Microsoft documentation says: @@ -7634,12 +7769,15 @@ D3D12_RESOURCE_ALLOCATION_INFO AllocatorPimpl::GetResourceAllocationInfo(D3D12_R if (inOutResourceDesc.Alignment == 0 && inOutResourceDesc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) { - return { + outAllocInfo = { AlignUp(inOutResourceDesc.Width, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT), // SizeInBytes D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT }; // Alignment + return S_OK; } #endif // #ifdef __ID3D12Device1_INTERFACE_DEFINED__ + HRESULT hr = S_OK; + #if D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT if (inOutResourceDesc.Alignment == 0 && inOutResourceDesc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE2D && @@ -7657,17 +7795,19 @@ D3D12_RESOURCE_ALLOCATION_INFO AllocatorPimpl::GetResourceAllocationInfo(D3D12_R D3D12_SMALL_MSAA_RESOURCE_PLACEMENT_ALIGNMENT : D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT; inOutResourceDesc.Alignment = smallAlignmentToTry; - const D3D12_RESOURCE_ALLOCATION_INFO smallAllocInfo = GetResourceAllocationInfoNative(inOutResourceDesc); + hr = GetResourceAllocationInfoMiddle( + inOutResourceDesc, NumCastableFormats, pCastableFormats, outAllocInfo); // Check if alignment requested has been granted. - if (smallAllocInfo.Alignment == smallAlignmentToTry) + if (SUCCEEDED(hr) && outAllocInfo.Alignment == smallAlignmentToTry) { - return smallAllocInfo; + return S_OK; } inOutResourceDesc.Alignment = 0; // Restore original } #endif // #if D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT - return GetResourceAllocationInfoNative(inOutResourceDesc); + return GetResourceAllocationInfoMiddle( + inOutResourceDesc, NumCastableFormats, pCastableFormats, outAllocInfo); } bool AllocatorPimpl::NewAllocationWithinBudget(D3D12_HEAP_TYPE heapType, UINT64 size) @@ -7988,6 +8128,7 @@ HRESULT BlockVector::Allocate( UINT64 size, UINT64 alignment, const ALLOCATION_DESC& allocDesc, + bool committedAllowed, size_t allocationCount, Allocation** pAllocations) { @@ -8002,6 +8143,7 @@ HRESULT BlockVector::Allocate( size, alignment, allocDesc, + committedAllowed, pAllocations + allocIndex); if (FAILED(hr)) { @@ -8090,40 +8232,43 @@ HRESULT BlockVector::CreateResource( UINT64 alignment, const ALLOCATION_DESC& allocDesc, const CREATE_RESOURCE_PARAMS& createParams, + bool committedAllowed, Allocation** ppAllocation, REFIID riidResource, void** ppvResource) { - HRESULT hr = Allocate(size, alignment, allocDesc, 1, ppAllocation); + HRESULT hr = Allocate(size, alignment, allocDesc, committedAllowed, 1, ppAllocation); + if (FAILED(hr)) + { + return hr; + } + + ID3D12Resource* res = NULL; + hr = m_hAllocator->CreatePlacedResourceWrap( + (*ppAllocation)->m_Placed.block->GetHeap(), + (*ppAllocation)->GetOffset(), + createParams, + D3D12MA_IID_PPV_ARGS(&res)); if (SUCCEEDED(hr)) { - ID3D12Resource* res = NULL; - hr = m_hAllocator->CreatePlacedResourceWrap( - (*ppAllocation)->m_Placed.block->GetHeap(), - (*ppAllocation)->GetOffset(), - createParams, - D3D12MA_IID_PPV_ARGS(&res)); + if (ppvResource != NULL) + { + hr = res->QueryInterface(riidResource, ppvResource); + } if (SUCCEEDED(hr)) { - if (ppvResource != NULL) - { - hr = res->QueryInterface(riidResource, ppvResource); - } - if (SUCCEEDED(hr)) - { - (*ppAllocation)->SetResourcePointer(res, createParams.GetBaseResourceDesc()); - } - else - { - res->Release(); - SAFE_RELEASE(*ppAllocation); - } + (*ppAllocation)->SetResourcePointer(res, createParams.GetBaseResourceDesc()); } else { + res->Release(); SAFE_RELEASE(*ppAllocation); } } + else + { + SAFE_RELEASE(*ppAllocation); + } return hr; } @@ -8241,6 +8386,7 @@ HRESULT BlockVector::AllocatePage( UINT64 size, UINT64 alignment, const ALLOCATION_DESC& allocDesc, + bool committedAllowed, Allocation** pAllocation) { // Early reject: requested allocation size is larger that maximum block size for this block vector. @@ -8257,13 +8403,19 @@ HRESULT BlockVector::AllocatePage( freeMemory = (budget.UsageBytes < budget.BudgetBytes) ? (budget.BudgetBytes - budget.UsageBytes) : 0; } - const bool canCreateNewBlock = + const bool canExceedFreeMemory = !committedAllowed; + + bool canCreateNewBlock = ((allocDesc.Flags & ALLOCATION_FLAG_NEVER_ALLOCATE) == 0) && - (m_Blocks.size() < m_MaxBlockCount) && - // Even if we don't have to stay within budget with this allocation, when the - // budget would be exceeded, we don't want to allocate new blocks, but always - // create resources as committed. - freeMemory >= size; + (m_Blocks.size() < m_MaxBlockCount); + + // Even if we don't have to stay within budget with this allocation, when the + // budget would be exceeded, we don't want to allocate new blocks, but always + // create resources as committed. + if (freeMemory < size && !canExceedFreeMemory) + { + canCreateNewBlock = false; + } // 1. Search existing allocations { @@ -8313,26 +8465,29 @@ HRESULT BlockVector::AllocatePage( } } - size_t newBlockIndex = 0; - HRESULT hr = newBlockSize <= freeMemory ? - CreateBlock(newBlockSize, &newBlockIndex) : E_OUTOFMEMORY; + size_t newBlockIndex = SIZE_MAX; + HRESULT hr = E_OUTOFMEMORY; + if (newBlockSize <= freeMemory || canExceedFreeMemory) + { + hr = CreateBlock(newBlockSize, &newBlockIndex); + } // Allocation of this size failed? Try 1/2, 1/4, 1/8 of m_PreferredBlockSize. if (!m_ExplicitBlockSize) { while (FAILED(hr) && newBlockSizeShift < NEW_BLOCK_SIZE_SHIFT_MAX) { const UINT64 smallerNewBlockSize = newBlockSize / 2; - if (smallerNewBlockSize >= size) - { - newBlockSize = smallerNewBlockSize; - ++newBlockSizeShift; - hr = newBlockSize <= freeMemory ? - CreateBlock(newBlockSize, &newBlockIndex) : E_OUTOFMEMORY; - } - else + if (smallerNewBlockSize < size) { break; } + + newBlockSize = smallerNewBlockSize; + ++newBlockSizeShift; + if (newBlockSize <= freeMemory || canExceedFreeMemory) + { + hr = CreateBlock(newBlockSize, &newBlockIndex); + } } } @@ -9509,6 +9664,8 @@ HRESULT Pool::BeginDefragmentation(const DEFRAGMENTATION_DESC* pDesc, Defragment // Check for support if (m_Pimpl->GetBlockVector()->GetAlgorithm() & POOL_FLAG_ALGORITHM_LINEAR) return E_NOINTERFACE; + if(m_Pimpl->AlwaysCommitted()) + return E_NOINTERFACE; AllocatorPimpl* allocator = m_Pimpl->GetAllocator(); *ppContext = D3D12MA_NEW(allocator->GetAllocs(), DefragmentationContext)(allocator, *pDesc, m_Pimpl->GetBlockVector()); @@ -9612,7 +9769,7 @@ HRESULT Allocator::CreateResource3( D3D12_BARRIER_LAYOUT InitialLayout, const D3D12_CLEAR_VALUE* pOptimizedClearValue, UINT32 NumCastableFormats, - DXGI_FORMAT* pCastableFormats, + const DXGI_FORMAT* pCastableFormats, Allocation** ppAllocation, REFIID riidResource, void** ppvResource) @@ -9702,7 +9859,7 @@ HRESULT Allocator::CreateAliasingResource2( D3D12_BARRIER_LAYOUT InitialLayout, const D3D12_CLEAR_VALUE* pOptimizedClearValue, UINT32 NumCastableFormats, - DXGI_FORMAT* pCastableFormats, + const DXGI_FORMAT* pCastableFormats, REFIID riidResource, void** ppvResource) { @@ -9732,6 +9889,12 @@ HRESULT Allocator::CreatePool( D3D12MA_ASSERT(0 && "Invalid arguments passed to Allocator::CreatePool."); return E_INVALIDARG; } + if ((pPoolDesc->Flags & POOL_FLAG_ALWAYS_COMMITTED) != 0 && + (pPoolDesc->BlockSize != 0 || pPoolDesc->MinBlockCount > 0)) + { + D3D12MA_ASSERT(0 && "Invalid arguments passed to Allocator::CreatePool while POOL_FLAG_ALWAYS_COMMITTED is specified."); + return E_INVALIDARG; + } if (!m_Pimpl->HeapFlagsFulfillResourceHeapTier(pPoolDesc->HeapFlags)) { D3D12MA_ASSERT(0 && "Invalid pPoolDesc->HeapFlags passed to Allocator::CreatePool. Did you forget to handle ResourceHeapTier=1?");