Add GPUMemoryStats for GPU video memory stats
Add new `Total/GPU` category to memory profiler. Implement for D3D12 and Vulkan with allocators.
This commit is contained in:
@@ -361,8 +361,7 @@ void GPUDevice::OnRequestingExit()
|
|||||||
Engine::FatalError != FatalErrorType::GPUHang &&
|
Engine::FatalError != FatalErrorType::GPUHang &&
|
||||||
Engine::FatalError != FatalErrorType::GPUOutOfMemory)
|
Engine::FatalError != FatalErrorType::GPUOutOfMemory)
|
||||||
return;
|
return;
|
||||||
// TODO: get and log actual GPU memory used by the engine (API-specific)
|
OnCrash();
|
||||||
DumpResourcesToLog();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
GPUDevice::GPUDevice(RendererType type, ShaderProfile profile)
|
GPUDevice::GPUDevice(RendererType type, ShaderProfile profile)
|
||||||
@@ -751,6 +750,11 @@ void GPUDevice::RenderEnd()
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GPUDevice::OnCrash()
|
||||||
|
{
|
||||||
|
DumpResourcesToLog();
|
||||||
|
}
|
||||||
|
|
||||||
GPUTasksContext* GPUDevice::CreateTasksContext()
|
GPUTasksContext* GPUDevice::CreateTasksContext()
|
||||||
{
|
{
|
||||||
return New<GPUTasksContext>(this);
|
return New<GPUTasksContext>(this);
|
||||||
@@ -822,6 +826,16 @@ uint64 GPUDevice::GetMemoryUsage() const
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GPUMemoryStats GPUDevice::GetMemoryStats()
|
||||||
|
{
|
||||||
|
GPUMemoryStats stats;
|
||||||
|
stats.UsedDedicatedMemory = GetMemoryUsage();
|
||||||
|
stats.TotalDedicatedMemory = TotalGraphicsMemory;
|
||||||
|
stats.UsedSystemMemory = 0;
|
||||||
|
stats.TotalSystemMemory = 0;
|
||||||
|
return stats;
|
||||||
|
}
|
||||||
|
|
||||||
Array<GPUResource*> GPUDevice::GetResources() const
|
Array<GPUResource*> GPUDevice::GetResources() const
|
||||||
{
|
{
|
||||||
_resourcesLock.Lock();
|
_resourcesLock.Lock();
|
||||||
|
|||||||
@@ -33,6 +33,34 @@ class Model;
|
|||||||
class Material;
|
class Material;
|
||||||
class MaterialBase;
|
class MaterialBase;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Contains information about current GPU memory usage and budget.
|
||||||
|
/// </summary>
|
||||||
|
API_STRUCT(NoDefault) struct GPUMemoryStats
|
||||||
|
{
|
||||||
|
DECLARE_SCRIPTING_TYPE_MINIMAL(GPUMemoryStats);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Amount of used dedicated video memory in bytes. Memory local to the device, and represents the fastest available memory to the GPU.
|
||||||
|
/// </summary>
|
||||||
|
API_FIELD() uint64 UsedDedicatedMemory = 0;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Total amount of dedicated memory budget in bytes. Memory local to the device, and represents the fastest available memory to the GPU.
|
||||||
|
/// </summary>
|
||||||
|
API_FIELD() uint64 TotalDedicatedMemory = 0;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Amount of used system video memory in bytes. Memory non-local to the device, and may have slower performance than the dedicated/local.
|
||||||
|
/// </summary>
|
||||||
|
API_FIELD() uint64 UsedSystemMemory = 0;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Total amount of system memory budget in bytes. Memory non-local to the device, and may have slower performance than the dedicated/local.
|
||||||
|
/// </summary>
|
||||||
|
API_FIELD() uint64 TotalSystemMemory = 0;
|
||||||
|
};
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Graphics device object for rendering on GPU.
|
/// Graphics device object for rendering on GPU.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
@@ -272,10 +300,15 @@ public:
|
|||||||
API_PROPERTY() virtual void* GetNativePtr() const = 0;
|
API_PROPERTY() virtual void* GetNativePtr() const = 0;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Gets the amount of memory usage by all the GPU resources (in bytes).
|
/// Gets the amount of memory usage by all the GPU resources (in bytes). Returned value is estimated based on resources created by the engine and might not be accurate. Use GPUMemoryStats for more detailed memory budget usage.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
API_PROPERTY() uint64 GetMemoryUsage() const;
|
API_PROPERTY() uint64 GetMemoryUsage() const;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Gets the current GPU memory stats.
|
||||||
|
/// </summary>
|
||||||
|
API_PROPERTY() virtual GPUMemoryStats GetMemoryStats();
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Gets the list with all active GPU resources.
|
/// Gets the list with all active GPU resources.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
@@ -417,6 +450,11 @@ protected:
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
virtual void RenderEnd();
|
virtual void RenderEnd();
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Called when program crashed due to GPU error (out of memory, hang, error - see Engine::FatalError). By default, it logs all GPU resources to the log. Can be used to update platform-specific stats or extract crash-info.
|
||||||
|
/// </summary>
|
||||||
|
virtual void OnCrash();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Creates the texture.
|
/// Creates the texture.
|
||||||
|
|||||||
@@ -578,6 +578,28 @@ GPUDeviceDX12::GPUDeviceDX12(IDXGIFactory4* dxgiFactory, GPUAdapterDX* adapter)
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GPUMemoryStats GPUDeviceDX12::GetMemoryStats()
|
||||||
|
{
|
||||||
|
GPUMemoryStats stats;
|
||||||
|
D3D12MA::Budget localBudget, nonLocalBudget;
|
||||||
|
Allocator->GetBudget(&localBudget, &nonLocalBudget);
|
||||||
|
if (Allocator->IsUMA())
|
||||||
|
{
|
||||||
|
// UMA (Unified Memory Architecture) means no dedicated video memory and the system memory is shared between CPU and GPU
|
||||||
|
stats.UsedSystemMemory = localBudget.UsageBytes;
|
||||||
|
stats.TotalSystemMemory = localBudget.BudgetBytes;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Discrete GPU
|
||||||
|
stats.UsedDedicatedMemory = localBudget.UsageBytes;
|
||||||
|
stats.TotalDedicatedMemory = localBudget.BudgetBytes;
|
||||||
|
stats.UsedSystemMemory = nonLocalBudget.UsageBytes;
|
||||||
|
stats.TotalSystemMemory = nonLocalBudget.BudgetBytes;
|
||||||
|
}
|
||||||
|
return stats;
|
||||||
|
}
|
||||||
|
|
||||||
bool GPUDeviceDX12::Init()
|
bool GPUDeviceDX12::Init()
|
||||||
{
|
{
|
||||||
#if PLATFORM_XBOX_SCARLETT || PLATFORM_XBOX_ONE
|
#if PLATFORM_XBOX_SCARLETT || PLATFORM_XBOX_ONE
|
||||||
@@ -972,6 +994,19 @@ void GPUDeviceDX12::RenderEnd()
|
|||||||
heap->EndQueryBatchAndResolveQueryData(_mainContext);
|
heap->EndQueryBatchAndResolveQueryData(_mainContext);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GPUDeviceDX12::OnCrash()
|
||||||
|
{
|
||||||
|
// Dump allocator stats
|
||||||
|
//D3D12MA::TotalStatistics statistics;
|
||||||
|
//Allocator->CalculateStatistics(&statistics);
|
||||||
|
D3D12MA::Budget localBudget, nonLocalBudget;
|
||||||
|
Allocator->GetBudget(&localBudget, &nonLocalBudget);
|
||||||
|
LOG(Info, "[D3D12 Memory] Local budget: {} / {} bytes (blocks: {}, allocs: {})", localBudget.UsageBytes, localBudget.BudgetBytes, localBudget.Stats.BlockCount, localBudget.Stats.AllocationCount);
|
||||||
|
LOG(Info, "[D3D12 Memory] Non-local budget: {} / {} bytes (blocks: {}, allocs: {})", nonLocalBudget.UsageBytes, nonLocalBudget.BudgetBytes, nonLocalBudget.Stats.BlockCount, nonLocalBudget.Stats.AllocationCount);
|
||||||
|
|
||||||
|
GPUDeviceDX::OnCrash();
|
||||||
|
}
|
||||||
|
|
||||||
GPUDeviceDX12::~GPUDeviceDX12()
|
GPUDeviceDX12::~GPUDeviceDX12()
|
||||||
{
|
{
|
||||||
// Ensure to be disposed
|
// Ensure to be disposed
|
||||||
|
|||||||
@@ -33,6 +33,7 @@ namespace D3D12MA
|
|||||||
};
|
};
|
||||||
#define _D3D12MA_JSON_WRITER 0
|
#define _D3D12MA_JSON_WRITER 0
|
||||||
#define _D3D12MA_STRING_BUILDER 0
|
#define _D3D12MA_STRING_BUILDER 0
|
||||||
|
#define D3D12MA_NO_HELPERS 1
|
||||||
#if !BUILD_DEBUG
|
#if !BUILD_DEBUG
|
||||||
#define D3D12MA_ASSERT(cond)
|
#define D3D12MA_ASSERT(cond)
|
||||||
#endif
|
#endif
|
||||||
@@ -191,7 +192,6 @@ private:
|
|||||||
void updateRes2Dispose();
|
void updateRes2Dispose();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
// [GPUDeviceDX]
|
// [GPUDeviceDX]
|
||||||
GPUContext* GetMainContext() override
|
GPUContext* GetMainContext() override
|
||||||
{
|
{
|
||||||
@@ -201,9 +201,11 @@ public:
|
|||||||
{
|
{
|
||||||
return _device;
|
return _device;
|
||||||
}
|
}
|
||||||
|
GPUMemoryStats GetMemoryStats() override;
|
||||||
bool Init() override;
|
bool Init() override;
|
||||||
void DrawBegin() override;
|
void DrawBegin() override;
|
||||||
void RenderEnd() override;
|
void RenderEnd() override;
|
||||||
|
void OnCrash() override;
|
||||||
void Dispose() final override;
|
void Dispose() final override;
|
||||||
void WaitForGPU() override;
|
void WaitForGPU() override;
|
||||||
bool GetQueryResult(uint64 queryID, uint64& result, bool wait = false) override;
|
bool GetQueryResult(uint64 queryID, uint64& result, bool wait = false) override;
|
||||||
|
|||||||
@@ -1537,6 +1537,31 @@ void* GPUDeviceVulkan::GetNativePtr() const
|
|||||||
return _nativePtr;
|
return _nativePtr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GPUMemoryStats GPUDeviceVulkan::GetMemoryStats()
|
||||||
|
{
|
||||||
|
GPUMemoryStats stats;
|
||||||
|
VmaBudget budgets[VK_MAX_MEMORY_HEAPS];
|
||||||
|
vmaGetHeapBudgets(Allocator, budgets);
|
||||||
|
const VkPhysicalDeviceMemoryProperties* memoryProperties;
|
||||||
|
vmaGetMemoryProperties(Allocator, &memoryProperties);
|
||||||
|
for (uint32 i = 0; i < memoryProperties->memoryHeapCount; i++)
|
||||||
|
{
|
||||||
|
VkMemoryHeap heap = memoryProperties->memoryHeaps[i];
|
||||||
|
VmaBudget& budget = budgets[i];
|
||||||
|
if (heap.flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT && Adapter->GpuProps.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU)
|
||||||
|
{
|
||||||
|
stats.UsedDedicatedMemory += budget.usage;
|
||||||
|
stats.TotalDedicatedMemory += budget.budget;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
stats.UsedSystemMemory += budget.usage;
|
||||||
|
stats.TotalSystemMemory += budget.budget;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return stats;
|
||||||
|
}
|
||||||
|
|
||||||
static int32 GetMaxSampleCount(VkSampleCountFlags counts)
|
static int32 GetMaxSampleCount(VkSampleCountFlags counts)
|
||||||
{
|
{
|
||||||
if (counts & VK_SAMPLE_COUNT_64_BIT)
|
if (counts & VK_SAMPLE_COUNT_64_BIT)
|
||||||
|
|||||||
@@ -569,6 +569,7 @@ public:
|
|||||||
GPUContext* GetMainContext() override;
|
GPUContext* GetMainContext() override;
|
||||||
GPUAdapter* GetAdapter() const override;
|
GPUAdapter* GetAdapter() const override;
|
||||||
void* GetNativePtr() const override;
|
void* GetNativePtr() const override;
|
||||||
|
GPUMemoryStats GetMemoryStats() override;
|
||||||
bool Init() override;
|
bool Init() override;
|
||||||
void DrawBegin() override;
|
void DrawBegin() override;
|
||||||
void Dispose() override;
|
void Dispose() override;
|
||||||
|
|||||||
@@ -41,6 +41,7 @@
|
|||||||
#define VMA_SYSTEM_ALIGNED_FREE(ptr) Platform::Free(ptr)
|
#define VMA_SYSTEM_ALIGNED_FREE(ptr) Platform::Free(ptr)
|
||||||
#define VMA_NULLABLE
|
#define VMA_NULLABLE
|
||||||
#define VMA_NOT_NULL
|
#define VMA_NOT_NULL
|
||||||
|
#define VMA_STATS_STRING_ENABLED 0
|
||||||
#include <ThirdParty/VulkanMemoryAllocator/vk_mem_alloc.h>
|
#include <ThirdParty/VulkanMemoryAllocator/vk_mem_alloc.h>
|
||||||
|
|
||||||
#if PLATFORM_APPLE_FAMILY
|
#if PLATFORM_APPLE_FAMILY
|
||||||
|
|||||||
@@ -15,6 +15,7 @@
|
|||||||
#include "Engine/Scripting/ManagedCLR/MCore.h"
|
#include "Engine/Scripting/ManagedCLR/MCore.h"
|
||||||
#include "Engine/Threading/ThreadLocal.h"
|
#include "Engine/Threading/ThreadLocal.h"
|
||||||
#include "Engine/Utilities/StringConverter.h"
|
#include "Engine/Utilities/StringConverter.h"
|
||||||
|
#include "Engine/Graphics/GPUDevice.h"
|
||||||
#include <ThirdParty/tracy/tracy/Tracy.hpp>
|
#include <ThirdParty/tracy/tracy/Tracy.hpp>
|
||||||
|
|
||||||
#define GROUPS_COUNT (int32)ProfilerMemory::Groups::MAX
|
#define GROUPS_COUNT (int32)ProfilerMemory::Groups::MAX
|
||||||
@@ -338,11 +339,17 @@ void TickProfilerMemory()
|
|||||||
memory.UsedPhysicalMemory -= GroupMemory[(int32)ProfilerMemory::Groups::Profiler];
|
memory.UsedPhysicalMemory -= GroupMemory[(int32)ProfilerMemory::Groups::Profiler];
|
||||||
GroupMemory[(int32)ProfilerMemory::Groups::Total] = memory.UsedPhysicalMemory;
|
GroupMemory[(int32)ProfilerMemory::Groups::Total] = memory.UsedPhysicalMemory;
|
||||||
GroupMemory[(int32)ProfilerMemory::Groups::TotalUntracked] = Math::Max<int64>(memory.UsedPhysicalMemory - GroupMemory[(int32)ProfilerMemory::Groups::TotalTracked], 0);
|
GroupMemory[(int32)ProfilerMemory::Groups::TotalUntracked] = Math::Max<int64>(memory.UsedPhysicalMemory - GroupMemory[(int32)ProfilerMemory::Groups::TotalTracked], 0);
|
||||||
|
if (GPUDevice::Instance)
|
||||||
|
{
|
||||||
|
auto memoryGPU = GPUDevice::Instance->GetMemoryStats();
|
||||||
|
GroupMemory[(int32)ProfilerMemory::Groups::TotalGPU] = memoryGPU.UsedDedicatedMemory + memoryGPU.UsedSystemMemory;
|
||||||
|
}
|
||||||
|
|
||||||
// Update peeks
|
// Update peeks
|
||||||
UPDATE_PEEK(ProfilerMemory::Groups::Profiler);
|
UPDATE_PEEK(ProfilerMemory::Groups::Profiler);
|
||||||
UPDATE_PEEK(ProfilerMemory::Groups::Total);
|
UPDATE_PEEK(ProfilerMemory::Groups::Total);
|
||||||
UPDATE_PEEK(ProfilerMemory::Groups::TotalUntracked);
|
UPDATE_PEEK(ProfilerMemory::Groups::TotalUntracked);
|
||||||
|
UPDATE_PEEK(ProfilerMemory::Groups::TotalGPU);
|
||||||
GroupMemoryPeek[(int32)ProfilerMemory::Groups::Total] = Math::Max(GroupMemoryPeek[(int32)ProfilerMemory::Groups::Total], GroupMemoryPeek[(int32)ProfilerMemory::Groups::TotalTracked]);
|
GroupMemoryPeek[(int32)ProfilerMemory::Groups::Total] = Math::Max(GroupMemoryPeek[(int32)ProfilerMemory::Groups::Total], GroupMemoryPeek[(int32)ProfilerMemory::Groups::TotalTracked]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -28,6 +28,8 @@ public:
|
|||||||
TotalTracked,
|
TotalTracked,
|
||||||
// Total amount of untracked memory (gap between total system memory usage and tracked memory size).
|
// Total amount of untracked memory (gap between total system memory usage and tracked memory size).
|
||||||
TotalUntracked,
|
TotalUntracked,
|
||||||
|
// Total amount of used GPU video memory (sum of dedicated and system memory).
|
||||||
|
TotalGPU,
|
||||||
// Initial memory used by program upon startup (eg. executable size, static variables).
|
// Initial memory used by program upon startup (eg. executable size, static variables).
|
||||||
ProgramSize,
|
ProgramSize,
|
||||||
// Profiling tool memory overhead.
|
// Profiling tool memory overhead.
|
||||||
|
|||||||
Reference in New Issue
Block a user