diff --git a/Source/Editor/Windows/Profiler/GPU.cs b/Source/Editor/Windows/Profiler/GPU.cs index 5cc9cd681..671866e41 100644 --- a/Source/Editor/Windows/Profiler/GPU.cs +++ b/Source/Editor/Windows/Profiler/GPU.cs @@ -49,7 +49,7 @@ namespace FlaxEditor.Windows.Profiler { Title = "Draw (GPU)", AnchorPreset = AnchorPresets.HorizontalStretchTop, - Offsets = new Margin(0, 0, _drawTimeCPU.Height + 2, 0), + Offsets = new Margin(0, 0, _drawTimeCPU.Height + 2, SingleChart.DefaultHeight), FormatSample = v => (Mathf.RoundToInt(v * 10.0f) / 10.0f) + " ms", Parent = mainPanel, }; diff --git a/Source/Engine/Graphics/Async/DefaultGPUTasksExecutor.cpp b/Source/Engine/Graphics/Async/DefaultGPUTasksExecutor.cpp index 18bf65ca3..89daf6d23 100644 --- a/Source/Engine/Graphics/Async/DefaultGPUTasksExecutor.cpp +++ b/Source/Engine/Graphics/Async/DefaultGPUTasksExecutor.cpp @@ -7,6 +7,7 @@ #include "Engine/Graphics/GPUDevice.h" #include "Engine/Graphics/GPUPass.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerGPU.h" DefaultGPUTasksExecutor::DefaultGPUTasksExecutor() : _context(nullptr) @@ -33,6 +34,7 @@ void DefaultGPUTasksExecutor::FrameBegin() const int32 count = GPUDevice::Instance->GetTasksManager()->RequestWork(buffer, 32); if (count == 0) return; + PROFILE_GPU("GPUTasks"); GPUMemoryPass pass(_context->GPU); for (int32 i = 0; i < count; i++) { diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp index 4c8d71fc5..e17dd6605 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp @@ -499,6 +499,7 @@ void GPUContextDX11::UpdateCB(GPUConstantBuffer* cb, const void* data) return; _context->UpdateSubresource(cbDX11->GetBuffer(), 0, nullptr, data, size, 1); + RENDER_STAT_DATA_UPLOAD(size); } void GPUContextDX11::Dispatch(GPUShaderProgramCS* shader, uint32 threadGroupCountX, uint32 threadGroupCountY, uint32 threadGroupCountZ) @@ -904,6 +905,7 @@ void GPUContextDX11::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32 si box.bottom = 1; _context->UpdateSubresource(bufferDX11->GetResource(), 0, &box, data, size, 0); } + RENDER_STAT_DATA_UPLOAD(size); } void GPUContextDX11::CopyBuffer(GPUBuffer* dstBuffer, GPUBuffer* srcBuffer, uint32 size, uint32 dstOffset, uint32 srcOffset) @@ -934,6 +936,7 @@ void GPUContextDX11::UpdateTexture(GPUTexture* texture, int32 arrayIndex, int32 if (texture->IsVolume()) depthPitch /= Math::Max(1, texture->Depth() >> mipIndex); _context->UpdateSubresource(textureDX11->GetResource(), subresourceIndex, nullptr, data, (UINT)rowPitch, (UINT)depthPitch); + RENDER_STAT_DATA_UPLOAD(slicePitch); //D3D11_MAPPED_SUBRESOURCE mapped; //_device->GetIM()->Map(_resource, textureMipIndex, D3D11_MAP_WRITE_DISCARD, 0, &mapped); diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp index 2e61471b7..5ddd83285 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp @@ -1131,6 +1131,7 @@ void GPUContextDX12::UpdateCB(GPUConstantBuffer* cb, const void* data) // Allocate bytes for the buffer auto allocation = _device->UploadBuffer.Allocate(size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); + RENDER_STAT_DATA_UPLOAD(size); // Copy data Platform::MemoryCopy(allocation.CPUAddress, data, allocation.Size); @@ -1388,6 +1389,7 @@ void GPUContextDX12::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32 si flushRBs(); _device->UploadBuffer.UploadBuffer(GetCommandList(), bufferDX12->GetResource(), offset, data, size); + RENDER_STAT_DATA_UPLOAD(size); } void GPUContextDX12::CopyBuffer(GPUBuffer* dstBuffer, GPUBuffer* srcBuffer, uint32 size, uint32 dstOffset, uint32 srcOffset) @@ -1414,6 +1416,7 @@ void GPUContextDX12::UpdateTexture(GPUTexture* texture, int32 arrayIndex, int32 flushRBs(); _device->UploadBuffer.UploadTexture(GetCommandList(), textureDX12->GetResource(), data, rowPitch, slicePitch, mipIndex, arrayIndex); + RENDER_STAT_DATA_UPLOAD(slicePitch); } void GPUContextDX12::CopyTexture(GPUTexture* dstResource, uint32 dstSubresource, uint32 dstX, uint32 dstY, uint32 dstZ, GPUTexture* srcResource, uint32 srcSubresource) @@ -1469,6 +1472,7 @@ void GPUContextDX12::ResetCounter(GPUBuffer* buffer) uint32 value = 0; _device->UploadBuffer.UploadBuffer(GetCommandList(), counter->GetResource(), 0, &value, 4); + RENDER_STAT_DATA_UPLOAD(4); SetResourceState(counter, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); } diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/ResourceOwnerDX12.h b/Source/Engine/GraphicsDevice/DirectX/DX12/ResourceOwnerDX12.h index 80f2fea42..217c42e77 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/ResourceOwnerDX12.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/ResourceOwnerDX12.h @@ -13,7 +13,6 @@ namespace D3D12MA } class GPUResource; class GPUContextDX12; -class GPUAsyncContextDX12; /// /// Default amount of frames to wait until resource delete. @@ -59,7 +58,6 @@ public: class ResourceOwnerDX12 { friend GPUContextDX12; - friend GPUAsyncContextDX12; protected: D3D12MA::Allocation* _allocation = nullptr; diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp index 901e38157..b1c901786 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp @@ -682,6 +682,7 @@ void GPUContextVulkan::UpdateDescriptorSets(const SpirvShaderDescriptorInfo& des auto cb = (GPUConstantBufferVulkan*)_device->HelperResources.GetDummyConstantBuffer(); // TODO: cache this allocation within a frame const auto allocation = _device->UniformBufferUploader->Allocate(cb->GetSize(), 0, this); + RENDER_STAT_DATA_UPLOAD(allocation.Size); Platform::MemoryClear(allocation.CPUAddress, allocation.Size); cb->Allocation = allocation; handle = cb; @@ -1187,6 +1188,7 @@ void GPUContextVulkan::UpdateCB(GPUConstantBuffer* cb, const void* data) // Allocate bytes for the buffer const auto allocation = _device->UniformBufferUploader->Allocate(size, 0, this); + RENDER_STAT_DATA_UPLOAD(size); // Copy data Platform::MemoryCopy(allocation.CPUAddress, data, allocation.Size); @@ -1538,6 +1540,7 @@ void GPUContextVulkan::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32 region.dstOffset = offset; vkCmdCopyBuffer(cmdBuffer->GetHandle(), allocation.Buffer, ((GPUBufferVulkan*)buffer)->GetHandle(), 1, ®ion); } + RENDER_STAT_DATA_UPLOAD(size); // Memory transfer barrier to ensure buffer is ready to read (eg. by Draw or Dispatch) if (_pass == 0) @@ -1586,6 +1589,7 @@ void GPUContextVulkan::UpdateTexture(GPUTexture* texture, int32 arrayIndex, int3 FlushBarriers(); auto allocation = _device->UploadBuffer.Upload(data, slicePitch, 512); + RENDER_STAT_DATA_UPLOAD(slicePitch); // Setup buffer copy region int32 mipWidth, mipHeight, mipDepth; diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp index 4365b8c7e..3d007cea0 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp @@ -364,6 +364,7 @@ void GPUContextWebGPU::UpdateCB(GPUConstantBuffer* cb, const void* data) cbWebGPU->AllocationSize = alignedSize; // TODO: consider holding CPU-side staging buffer and copying data to the GPU buffer in a single batch for all uniforms (before flushing the active command encoder) wgpuQueueWriteBuffer(_device->Queue, allocation.Buffer, allocation.Offset, data, size); + RENDER_STAT_DATA_UPLOAD(size); _bindGroupDirty = true; } } @@ -593,6 +594,7 @@ void GPUContextWebGPU::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32 // Efficient upload via queue wgpuQueueWriteBuffer(_device->Queue, bufferWebGPU->Buffer, offset, data, size); } + RENDER_STAT_DATA_UPLOAD(size); } void GPUContextWebGPU::CopyBuffer(GPUBuffer* dstBuffer, GPUBuffer* srcBuffer, uint32 size, uint32 dstOffset, uint32 srcOffset) @@ -633,6 +635,7 @@ void GPUContextWebGPU::UpdateTexture(GPUTexture* texture, int32 arrayIndex, int3 dataLayout.rowsPerImage = mipHeight; WGPUExtent3D writeSize = { (uint32_t)mipWidth, (uint32_t)mipHeight, (uint32_t)mipDepth }; wgpuQueueWriteTexture(_device->Queue, ©Info, data, slicePitch, &dataLayout, &writeSize); + RENDER_STAT_DATA_UPLOAD(slicePitch); } void GPUContextWebGPU::CopyTexture(GPUTexture* dstResource, uint32 dstSubresource, uint32 dstX, uint32 dstY, uint32 dstZ, GPUTexture* srcResource, uint32 srcSubresource) diff --git a/Source/Engine/Profiler/ProfilerGPU.cpp b/Source/Engine/Profiler/ProfilerGPU.cpp index 2cf40afb2..35b7267d2 100644 --- a/Source/Engine/Profiler/ProfilerGPU.cpp +++ b/Source/Engine/Profiler/ProfilerGPU.cpp @@ -439,6 +439,8 @@ void GraphicsDumping::Print() sb.AppendFormat(TEXT(", 1 tri, {} verts"), FormatValue(NameBuffers[0], item.Stats.Vertices)); else if (item.Stats.Triangles != 0) sb.AppendFormat(TEXT(", {} tris, {} verts"), FormatValue(NameBuffers[0], item.Stats.Triangles), FormatValue(NameBuffers[1], item.Stats.Vertices)); + if (item.Stats.DataUpload > 4096) + sb.AppendFormat(TEXT(", {} sent"), Utilities::BytesToText(item.Stats.DataUpload)); } else { diff --git a/Source/Engine/Profiler/RenderStats.h b/Source/Engine/Profiler/RenderStats.h index 7afbeb1d4..8441ec5e0 100644 --- a/Source/Engine/Profiler/RenderStats.h +++ b/Source/Engine/Profiler/RenderStats.h @@ -16,39 +16,32 @@ API_STRUCT() struct RenderStatsData /// /// The draw calls count. /// - API_FIELD() int64 DrawCalls; + API_FIELD() int64 DrawCalls = 0; /// /// The compute shader dispatch calls count. /// - API_FIELD() int64 DispatchCalls; + API_FIELD() int64 DispatchCalls = 0; /// /// The vertices drawn count. /// - API_FIELD() int64 Vertices; + API_FIELD() int64 Vertices = 0; /// /// The triangles drawn count. /// - API_FIELD() int64 Triangles; + API_FIELD() int64 Triangles = 0; /// /// The pipeline state changes count. /// - API_FIELD() int64 PipelineStateChanges; + API_FIELD() int64 PipelineStateChanges = 0; /// - /// Initializes a new instance of the struct. + /// The amount of bytes uploaded to GPU (to buffers and textures). /// - RenderStatsData() - : DrawCalls(0) - , DispatchCalls(0) - , Vertices(0) - , Triangles(0) - , PipelineStateChanges(0) - { - } + API_FIELD() int64 DataUpload = 0; /// /// The global rendering stats counter. @@ -67,6 +60,7 @@ API_STRUCT() struct RenderStatsData MIX(Vertices); MIX(Triangles); MIX(PipelineStateChanges); + MIX(DataUpload); #undef MIX } @@ -78,6 +72,7 @@ API_STRUCT() struct RenderStatsData MIX(Vertices); MIX(Triangles); MIX(PipelineStateChanges); + MIX(DataUpload); #undef MIX return *this; } @@ -90,6 +85,7 @@ API_STRUCT() struct RenderStatsData MIX(Vertices); MIX(Triangles); MIX(PipelineStateChanges); + MIX(DataUpload); #undef MIX return *this; } @@ -97,6 +93,7 @@ API_STRUCT() struct RenderStatsData #define RENDER_STAT_DISPATCH_CALL() Platform::InterlockedIncrement(&RenderStatsData::Counter.DispatchCalls) #define RENDER_STAT_PS_STATE_CHANGE() Platform::InterlockedIncrement(&RenderStatsData::Counter.PipelineStateChanges) +#define RENDER_STAT_DATA_UPLOAD(bytes) Platform::InterlockedAdd(&RenderStatsData::Counter.DataUpload, bytes) #define RENDER_STAT_DRAW_CALL(vertices, triangles) \ Platform::InterlockedIncrement(&RenderStatsData::Counter.DrawCalls); \ Platform::InterlockedAdd(&RenderStatsData::Counter.Vertices, vertices); \ @@ -106,6 +103,7 @@ API_STRUCT() struct RenderStatsData #define RENDER_STAT_DISPATCH_CALL() #define RENDER_STAT_PS_STATE_CHANGE() +#define RENDER_STAT_DATA_UPLOAD(bytes) #define RENDER_STAT_DRAW_CALL(vertices, primitives) #endif