diff --git a/Source/Engine/Graphics/GPUContext.cpp b/Source/Engine/Graphics/GPUContext.cpp index 3ef903be2..bec817cb9 100644 --- a/Source/Engine/Graphics/GPUContext.cpp +++ b/Source/Engine/Graphics/GPUContext.cpp @@ -2,6 +2,7 @@ #include "GPUContext.h" #include "GPUDevice.h" +#include "GPUPass.h" #include "RenderTask.h" #include "Textures/GPUTexture.h" @@ -137,3 +138,8 @@ void GPUContext::SetResourceState(GPUResource* resource, uint64 state, int32 sub void GPUContext::ForceRebindDescriptors() { } + +void GPUContext::BeginDrawPass(GPUDrawPass& pass) +{ + SetRenderTarget(pass.DepthBuffer, ToSpan(pass.RenderTargets, pass.RenderTargetsCount)); +} diff --git a/Source/Engine/Graphics/GPUContext.h b/Source/Engine/Graphics/GPUContext.h index 68cdfa77b..3a103d3da 100644 --- a/Source/Engine/Graphics/GPUContext.h +++ b/Source/Engine/Graphics/GPUContext.h @@ -27,6 +27,7 @@ class GPUTextureView; class GPUBufferView; class GPUVertexLayout; struct GPUPass; +struct GPUDrawPass; enum class GPUResourceAccess; enum class GPUQueryType; @@ -697,4 +698,12 @@ public: virtual void OverlapUA(bool end) { } + + // Begins draw pass rendering. See GPUDrawPass. + virtual void BeginDrawPass(GPUDrawPass& pass); + + // Ends draw pass rendering. See GPUDrawPass. + virtual void EndDrawPass() + { + } }; diff --git a/Source/Engine/Graphics/GPUPass.h b/Source/Engine/Graphics/GPUPass.h index 5a0520ec0..7f98c15ce 100644 --- a/Source/Engine/Graphics/GPUPass.h +++ b/Source/Engine/Graphics/GPUPass.h @@ -71,4 +71,87 @@ struct FLAXENGINE_API GPUComputePass : GPUPass } }; -// TODO: add GPUDrawPass for render targets and depth/stencil setup with optimized clear for faster drawing on tiled-GPUs (mobile) +/// +/// GPU pass operations on attached render targets and depth buffer. Defines the load/store actions for each attachment to optimize GPU rendering by reducing memory bandwidth usage. +/// +enum class GPUDrawPassAction +{ + // No action, the content of the render target or depth buffer is undefined. Discards the resulting value of the render pass for this attachment. + None = 0, + + // Loads the existing value for this attachment into the draw pass. + Load = 1, + + // Loads the clear value for this attachment into the draw pass. Clear value is provided by the GPUContext::Clear performed on the texture before pass begins. + Clear = 2, + + // Stores the resulting value of the render pass for this attachment. + Store = 4, + + // Resolves the resulting MSAA value and stores final value into the attachment. + ResolveMultisample = 8, + + // Mask of flags allowed by load operation (reading data from attachment). + LoadMask = None | Load | Clear, + + // Mask of flags allowed by store operation (writing data to attachment). + StoreMask = None | Store | ResolveMultisample, + + // Loads the existing value for this attachment into the draw pass and stores the resulting value of the render pass for this attachment. + LoadStore = Load | Store, + + // Loads the clear value for this attachment into the draw pass and stores the resulting value of the render pass for this attachment. + ClearStore = Clear | Store, +}; + +/// +/// GPU pass that explicitly defines render targets and depth buffer within a rendering pass. Can be used to optimize GPU rendering on tiled GPUs with manual control over attachment operations (load, store, clear, discard, etc.) that reduce memory bandwidth usage. +/// +/// Draw Pass discards any render targets or depth buffer bound to the context (reduces state-tracking). +struct FLAXENGINE_API GPUDrawPass : GPUPass +{ + GPUTextureView* DepthBuffer; + GPUTextureView** RenderTargets; + GPUDrawPassAction* RenderTargetsActions; + int32 RenderTargetsCount; + GPUDrawPassAction DepthAction; + + GPUDrawPass(GPUContext* context, Span renderTargets) + : GPUPass(context) + , DepthBuffer(nullptr) + , RenderTargets(renderTargets.Get()) + , RenderTargetsActions(nullptr) + , RenderTargetsCount(renderTargets.Length()) + , DepthAction(GPUDrawPassAction::None) + { + Context->BeginDrawPass(*this); + } + + GPUDrawPass(GPUContext* context, GPUTextureView* depthBuffer, Span renderTargets) + : GPUPass(context) + , DepthBuffer(depthBuffer) + , RenderTargets(renderTargets.Get()) + , RenderTargetsActions(nullptr) + , RenderTargetsCount(renderTargets.Length()) + , DepthAction(GPUDrawPassAction::LoadStore) + { + Context->BeginDrawPass(*this); + } + + GPUDrawPass(GPUContext* context, GPUTextureView* depthBuffer, GPUDrawPassAction depthAction, Span renderTargets, Span renderTargetsActions) + : GPUPass(context) + , DepthBuffer(depthBuffer) + , RenderTargets(renderTargets.Get()) + , RenderTargetsActions(renderTargetsActions.Get()) + , RenderTargetsCount(renderTargets.Length()) + , DepthAction(depthAction) + { + ASSERT_LOW_LAYER(renderTargets.Length() == renderTargetsActions.Length()); + Context->BeginDrawPass(*this); + } + + ~GPUDrawPass() + { + Context->EndDrawPass(); + } +}; diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp index e54a1b9a3..6b552d2b7 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp @@ -17,6 +17,7 @@ #include "GPUShaderProgramVulkan.h" #include "GPUTextureVulkan.h" #include "QueueVulkan.h" +#include "Engine/Graphics/GPUPass.h" #include "Engine/Graphics/PixelFormatExtensions.h" #include "Engine/Debug/Exceptions/NotImplementedException.h" @@ -436,11 +437,22 @@ void GPUContextVulkan::BeginRenderPass() layout.RTVsFormats[i] = handle->GetFormat(); framebufferKey.Attachments[i] = handle->GetFramebufferView(); AddImageBarrier(handle, handle->LayoutRTV); + uint64 mask = 1ull << i; if (FindClear(handle, clear)) { - layout.ClearFlags |= 1 << i; + layout.LoadClear |= mask; clearValues[i] = clear.Value; } + if (_drawPass && _drawPass->RenderTargetsActions) + { + GPUDrawPassAction action = _drawPass->RenderTargetsActions[i]; + if ((uint32)action & (uint32)GPUDrawPassAction::Clear) + layout.LoadClear |= mask; + else if (((uint32)action & (uint32)GPUDrawPassAction::LoadMask) == 0) + layout.LoadDontCare |= mask; + else if (((uint32)action & (uint32)GPUDrawPassAction::StoreMask) == 0) + layout.StoreDontCare |= mask; + } } else { @@ -459,11 +471,22 @@ void GPUContextVulkan::BeginRenderPass() framebufferKey.AttachmentCount++; framebufferKey.Attachments[_rtCount] = handle->GetFramebufferView(); AddImageBarrier(handle, handle->LayoutRTV); + uint64 mask = 1ull << _rtCount; if (FindClear(handle, clear)) { - layout.ClearFlags |= 1 << _rtCount; + layout.LoadClear |= mask; clearValues[_rtCount] = clear.Value; } + if (_drawPass) + { + GPUDrawPassAction action = _drawPass->DepthAction; + if ((uint32)action & (uint32)GPUDrawPassAction::Clear) + layout.LoadClear |= mask; + else if (((uint32)action & (uint32)GPUDrawPassAction::LoadMask) == 0) + layout.LoadDontCare |= mask; + else if (((uint32)action & (uint32)GPUDrawPassAction::StoreMask) == 0) + layout.StoreDontCare |= mask; + } } else { @@ -1004,7 +1027,7 @@ void GPUContextVulkan::SetRenderTarget(GPUTextureView* depthBuffer, GPUTextureVi void GPUContextVulkan::SetRenderTarget(GPUTextureView* depthBuffer, const Span& rts) { - ASSERT(Math::IsInRange(rts.Length(), 1, GPU_MAX_RT_BINDED)); + ASSERT(Math::IsInRange(rts.Length(), 0, GPU_MAX_RT_BINDED)); const auto depthBufferVulkan = static_cast(depthBuffer); @@ -1963,4 +1986,19 @@ void GPUContextVulkan::OverlapUA(bool end) AddUABarrier(); } +void GPUContextVulkan::BeginDrawPass(GPUDrawPass& pass) +{ + _drawPass = &pass; + _rtDirtyFlag = true; + _psDirtyFlag = true; + _rtCount = pass.RenderTargetsCount; + _rtDepth = (GPUTextureViewVulkan*)pass.DepthBuffer; + Platform::MemoryCopy(_rtHandles, pass.RenderTargets, pass.RenderTargetsCount * sizeof(void*)); +} + +void GPUContextVulkan::EndDrawPass() +{ + _drawPass = nullptr; +} + #endif diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h index 2ce1aaaea..819a8ed6c 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h @@ -91,6 +91,7 @@ private: uint32 _stencilRef; RenderPassVulkan* _renderPass; + GPUDrawPass* _drawPass = nullptr; GPUPipelineStateVulkan* _currentState; GPUShaderProgramCSVulkan* _currentCompute; GPUVertexLayoutVulkan* _vertexLayout; @@ -219,6 +220,8 @@ public: void Transition(GPUResource* resource, GPUResourceAccess access) override; void MemoryBarrier() override; void OverlapUA(bool end) override; + void BeginDrawPass(GPUDrawPass& pass) override; + void EndDrawPass() override; }; #endif diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp index a0f232251..4876e719a 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp @@ -537,9 +537,9 @@ RenderPassVulkan::RenderPassVulkan(GPUDeviceVulkan* device, const RenderTargetLa attachment.flags = 0; attachment.format = RenderToolsVulkan::ToVulkanFormat(layout.RTVsFormats[i]); attachment.samples = (VkSampleCountFlagBits)layout.MSAA; - // TODO: we need render passes into high-level rendering api to perform more optimal rendering (esp. for tiled gpus) - attachment.loadOp = layout.ClearFlags & 1 << i ? VK_ATTACHMENT_LOAD_OP_CLEAR : VK_ATTACHMENT_LOAD_OP_LOAD; - attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + uint64 mask = 1ull << i; + attachment.loadOp = layout.LoadClear & mask ? VK_ATTACHMENT_LOAD_OP_CLEAR : (layout.LoadDontCare & mask ? VK_ATTACHMENT_LOAD_OP_DONT_CARE : VK_ATTACHMENT_LOAD_OP_LOAD); + attachment.storeOp = layout.StoreDontCare & mask ? VK_ATTACHMENT_STORE_OP_DONT_CARE : VK_ATTACHMENT_STORE_OP_STORE; attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; attachment.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; @@ -592,10 +592,11 @@ RenderPassVulkan::RenderPassVulkan(GPUDeviceVulkan* device, const RenderTargetLa // TODO: use VK_ATTACHMENT_STORE_OP_NONE for readonly depth/stencil but check for 'VK_KHR_load_store_op_none' extension attachment.stencilLoadOp = layout.ReadStencil ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachment.stencilStoreOp = layout.WriteStencil ? VK_ATTACHMENT_STORE_OP_STORE : VK_ATTACHMENT_STORE_OP_DONT_CARE; - if (layout.ClearFlags & 1 << colorAttachmentsCount) - { + uint64 mask = 1ull << colorAttachmentsCount; + if (layout.LoadClear & mask) attachment.loadOp = attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; - } + else if (layout.LoadDontCare & mask) + attachment.loadOp = attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachment.initialLayout = depthStencilLayout; attachment.finalLayout = depthStencilLayout; depthStencilReference.attachment = colorAttachmentsCount; diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h index 61869e2e9..953ee79ee 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h @@ -198,16 +198,18 @@ struct RenderTargetLayoutVulkan { struct { - uint32 Layers : 10; // Limited by GPU_MAX_TEXTURE_ARRAY_SIZE - uint32 RTsCount : 3; // Limited by GPU_MAX_RT_BINDED - uint32 ReadDepth : 1; - uint32 WriteDepth : 1; - uint32 ReadStencil : 1; - uint32 WriteStencil : 1; - uint32 ClearFlags : 7; // GPU_MAX_RT_BINDED + 1 + uint64 Layers : 10; // Limited by GPU_MAX_TEXTURE_ARRAY_SIZE + uint64 RTsCount : 3; // Limited by GPU_MAX_RT_BINDED + uint64 ReadDepth : 1; + uint64 WriteDepth : 1; + uint64 ReadStencil : 1; + uint64 WriteStencil : 1; + uint64 LoadClear : 7; // GPU_MAX_RT_BINDED + 1 + uint64 LoadDontCare : 7; // GPU_MAX_RT_BINDED + 1 + uint64 StoreDontCare : 7; // GPU_MAX_RT_BINDED + 1 }; - uint32 Flags; + uint64 Flags; }; MSAALevel MSAA; diff --git a/Source/Engine/Renderer/GBufferPass.cpp b/Source/Engine/Renderer/GBufferPass.cpp index 1f760619b..fd0b7987f 100644 --- a/Source/Engine/Renderer/GBufferPass.cpp +++ b/Source/Engine/Renderer/GBufferPass.cpp @@ -11,17 +11,18 @@ #include "Engine/Core/Collections/Sorting.h" #include "Engine/Graphics/GPUDevice.h" #include "Engine/Graphics/GPUContext.h" +#include "Engine/Graphics/GPUPass.h" #include "Engine/Graphics/Shaders/GPUShader.h" #include "Engine/Graphics/RenderTask.h" #include "Engine/Graphics/RenderBuffers.h" #include "Engine/Graphics/RenderTargetPool.h" +#include "Engine/Graphics/Graphics.h" #include "Engine/Content/Assets/Shader.h" #include "Engine/Content/Content.h" #include "Engine/Content/Assets/Model.h" #include "Engine/Level/Actors/Decal.h" #include "Engine/Level/Actors/Sky.h" #include "Engine/Engine/Engine.h" -#include "Engine/Graphics/Graphics.h" GPU_CB_STRUCT(GBufferPassData { ShaderGBufferData GBuffer; @@ -167,13 +168,19 @@ void GBufferPass::Fill(RenderContext& renderContext, GPUTexture* lightBuffer) renderContext.Buffers->GBuffer2->View(), renderContext.Buffers->GBuffer3->View(), }; + GPUDrawPassAction targetActions[5] = + { + GPUDrawPassAction::ClearStore, + GPUDrawPassAction::ClearStore, + GPUDrawPassAction::ClearStore, + GPUDrawPassAction::ClearStore, + GPUDrawPassAction::ClearStore, + }; renderContext.View.Pass = DrawPass::GBuffer; context->SetViewportAndScissors(renderContext.Buffers->GetViewport()); // Clear GBuffer { - PROFILE_GPU_CPU_NAMED("Clear"); - context->ClearDepth(*renderContext.Buffers->DepthBuffer); context->Clear(lightBuffer->View(), Color::Transparent); context->Clear(renderContext.Buffers->GBuffer0->View(), Color::Transparent); @@ -211,8 +218,15 @@ void GBufferPass::Fill(RenderContext& renderContext, GPUTexture* lightBuffer) #endif // Draw objects that can get decals - context->SetRenderTarget(*renderContext.Buffers->DepthBuffer, ToSpan(targetBuffers, ARRAY_COUNT(targetBuffers))); - renderContext.List->ExecuteDrawCalls(renderContext, DrawCallsListType::GBuffer); + { + GPUDrawPass drawPass( + context, + *renderContext.Buffers->DepthBuffer, + GPUDrawPassAction::ClearStore, + ToSpan(targetBuffers, ARRAY_COUNT(targetBuffers)), + ToSpan(targetActions, ARRAY_COUNT(targetActions))); + renderContext.List->ExecuteDrawCalls(renderContext, DrawCallsListType::GBuffer); + } // Draw decals DrawDecals(renderContext, lightBuffer->View());