diff --git a/Source/Engine/Graphics/GPUContext.cpp b/Source/Engine/Graphics/GPUContext.cpp
index 3ef903be2..bec817cb9 100644
--- a/Source/Engine/Graphics/GPUContext.cpp
+++ b/Source/Engine/Graphics/GPUContext.cpp
@@ -2,6 +2,7 @@
#include "GPUContext.h"
#include "GPUDevice.h"
+#include "GPUPass.h"
#include "RenderTask.h"
#include "Textures/GPUTexture.h"
@@ -137,3 +138,8 @@ void GPUContext::SetResourceState(GPUResource* resource, uint64 state, int32 sub
void GPUContext::ForceRebindDescriptors()
{
}
+
+void GPUContext::BeginDrawPass(GPUDrawPass& pass)
+{
+ SetRenderTarget(pass.DepthBuffer, ToSpan(pass.RenderTargets, pass.RenderTargetsCount));
+}
diff --git a/Source/Engine/Graphics/GPUContext.h b/Source/Engine/Graphics/GPUContext.h
index 68cdfa77b..3a103d3da 100644
--- a/Source/Engine/Graphics/GPUContext.h
+++ b/Source/Engine/Graphics/GPUContext.h
@@ -27,6 +27,7 @@ class GPUTextureView;
class GPUBufferView;
class GPUVertexLayout;
struct GPUPass;
+struct GPUDrawPass;
enum class GPUResourceAccess;
enum class GPUQueryType;
@@ -697,4 +698,12 @@ public:
virtual void OverlapUA(bool end)
{
}
+
+ // Begins draw pass rendering. See GPUDrawPass.
+ virtual void BeginDrawPass(GPUDrawPass& pass);
+
+ // Ends draw pass rendering. See GPUDrawPass.
+ virtual void EndDrawPass()
+ {
+ }
};
diff --git a/Source/Engine/Graphics/GPUPass.h b/Source/Engine/Graphics/GPUPass.h
index 5a0520ec0..7f98c15ce 100644
--- a/Source/Engine/Graphics/GPUPass.h
+++ b/Source/Engine/Graphics/GPUPass.h
@@ -71,4 +71,87 @@ struct FLAXENGINE_API GPUComputePass : GPUPass
}
};
-// TODO: add GPUDrawPass for render targets and depth/stencil setup with optimized clear for faster drawing on tiled-GPUs (mobile)
+///
+/// GPU pass operations on attached render targets and depth buffer. Defines the load/store actions for each attachment to optimize GPU rendering by reducing memory bandwidth usage.
+///
+enum class GPUDrawPassAction
+{
+ // No action, the content of the render target or depth buffer is undefined. Discards the resulting value of the render pass for this attachment.
+ None = 0,
+
+ // Loads the existing value for this attachment into the draw pass.
+ Load = 1,
+
+ // Loads the clear value for this attachment into the draw pass. Clear value is provided by the GPUContext::Clear performed on the texture before pass begins.
+ Clear = 2,
+
+ // Stores the resulting value of the render pass for this attachment.
+ Store = 4,
+
+ // Resolves the resulting MSAA value and stores final value into the attachment.
+ ResolveMultisample = 8,
+
+ // Mask of flags allowed by load operation (reading data from attachment).
+ LoadMask = None | Load | Clear,
+
+ // Mask of flags allowed by store operation (writing data to attachment).
+ StoreMask = None | Store | ResolveMultisample,
+
+ // Loads the existing value for this attachment into the draw pass and stores the resulting value of the render pass for this attachment.
+ LoadStore = Load | Store,
+
+ // Loads the clear value for this attachment into the draw pass and stores the resulting value of the render pass for this attachment.
+ ClearStore = Clear | Store,
+};
+
+///
+/// GPU pass that explicitly defines render targets and depth buffer within a rendering pass. Can be used to optimize GPU rendering on tiled GPUs with manual control over attachment operations (load, store, clear, discard, etc.) that reduce memory bandwidth usage.
+///
+/// Draw Pass discards any render targets or depth buffer bound to the context (reduces state-tracking).
+struct FLAXENGINE_API GPUDrawPass : GPUPass
+{
+ GPUTextureView* DepthBuffer;
+ GPUTextureView** RenderTargets;
+ GPUDrawPassAction* RenderTargetsActions;
+ int32 RenderTargetsCount;
+ GPUDrawPassAction DepthAction;
+
+ GPUDrawPass(GPUContext* context, Span renderTargets)
+ : GPUPass(context)
+ , DepthBuffer(nullptr)
+ , RenderTargets(renderTargets.Get())
+ , RenderTargetsActions(nullptr)
+ , RenderTargetsCount(renderTargets.Length())
+ , DepthAction(GPUDrawPassAction::None)
+ {
+ Context->BeginDrawPass(*this);
+ }
+
+ GPUDrawPass(GPUContext* context, GPUTextureView* depthBuffer, Span renderTargets)
+ : GPUPass(context)
+ , DepthBuffer(depthBuffer)
+ , RenderTargets(renderTargets.Get())
+ , RenderTargetsActions(nullptr)
+ , RenderTargetsCount(renderTargets.Length())
+ , DepthAction(GPUDrawPassAction::LoadStore)
+ {
+ Context->BeginDrawPass(*this);
+ }
+
+ GPUDrawPass(GPUContext* context, GPUTextureView* depthBuffer, GPUDrawPassAction depthAction, Span renderTargets, Span renderTargetsActions)
+ : GPUPass(context)
+ , DepthBuffer(depthBuffer)
+ , RenderTargets(renderTargets.Get())
+ , RenderTargetsActions(renderTargetsActions.Get())
+ , RenderTargetsCount(renderTargets.Length())
+ , DepthAction(depthAction)
+ {
+ ASSERT_LOW_LAYER(renderTargets.Length() == renderTargetsActions.Length());
+ Context->BeginDrawPass(*this);
+ }
+
+ ~GPUDrawPass()
+ {
+ Context->EndDrawPass();
+ }
+};
diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp
index e54a1b9a3..6b552d2b7 100644
--- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp
+++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp
@@ -17,6 +17,7 @@
#include "GPUShaderProgramVulkan.h"
#include "GPUTextureVulkan.h"
#include "QueueVulkan.h"
+#include "Engine/Graphics/GPUPass.h"
#include "Engine/Graphics/PixelFormatExtensions.h"
#include "Engine/Debug/Exceptions/NotImplementedException.h"
@@ -436,11 +437,22 @@ void GPUContextVulkan::BeginRenderPass()
layout.RTVsFormats[i] = handle->GetFormat();
framebufferKey.Attachments[i] = handle->GetFramebufferView();
AddImageBarrier(handle, handle->LayoutRTV);
+ uint64 mask = 1ull << i;
if (FindClear(handle, clear))
{
- layout.ClearFlags |= 1 << i;
+ layout.LoadClear |= mask;
clearValues[i] = clear.Value;
}
+ if (_drawPass && _drawPass->RenderTargetsActions)
+ {
+ GPUDrawPassAction action = _drawPass->RenderTargetsActions[i];
+ if ((uint32)action & (uint32)GPUDrawPassAction::Clear)
+ layout.LoadClear |= mask;
+ else if (((uint32)action & (uint32)GPUDrawPassAction::LoadMask) == 0)
+ layout.LoadDontCare |= mask;
+ else if (((uint32)action & (uint32)GPUDrawPassAction::StoreMask) == 0)
+ layout.StoreDontCare |= mask;
+ }
}
else
{
@@ -459,11 +471,22 @@ void GPUContextVulkan::BeginRenderPass()
framebufferKey.AttachmentCount++;
framebufferKey.Attachments[_rtCount] = handle->GetFramebufferView();
AddImageBarrier(handle, handle->LayoutRTV);
+ uint64 mask = 1ull << _rtCount;
if (FindClear(handle, clear))
{
- layout.ClearFlags |= 1 << _rtCount;
+ layout.LoadClear |= mask;
clearValues[_rtCount] = clear.Value;
}
+ if (_drawPass)
+ {
+ GPUDrawPassAction action = _drawPass->DepthAction;
+ if ((uint32)action & (uint32)GPUDrawPassAction::Clear)
+ layout.LoadClear |= mask;
+ else if (((uint32)action & (uint32)GPUDrawPassAction::LoadMask) == 0)
+ layout.LoadDontCare |= mask;
+ else if (((uint32)action & (uint32)GPUDrawPassAction::StoreMask) == 0)
+ layout.StoreDontCare |= mask;
+ }
}
else
{
@@ -1004,7 +1027,7 @@ void GPUContextVulkan::SetRenderTarget(GPUTextureView* depthBuffer, GPUTextureVi
void GPUContextVulkan::SetRenderTarget(GPUTextureView* depthBuffer, const Span& rts)
{
- ASSERT(Math::IsInRange(rts.Length(), 1, GPU_MAX_RT_BINDED));
+ ASSERT(Math::IsInRange(rts.Length(), 0, GPU_MAX_RT_BINDED));
const auto depthBufferVulkan = static_cast(depthBuffer);
@@ -1963,4 +1986,19 @@ void GPUContextVulkan::OverlapUA(bool end)
AddUABarrier();
}
+void GPUContextVulkan::BeginDrawPass(GPUDrawPass& pass)
+{
+ _drawPass = &pass;
+ _rtDirtyFlag = true;
+ _psDirtyFlag = true;
+ _rtCount = pass.RenderTargetsCount;
+ _rtDepth = (GPUTextureViewVulkan*)pass.DepthBuffer;
+ Platform::MemoryCopy(_rtHandles, pass.RenderTargets, pass.RenderTargetsCount * sizeof(void*));
+}
+
+void GPUContextVulkan::EndDrawPass()
+{
+ _drawPass = nullptr;
+}
+
#endif
diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h
index 2ce1aaaea..819a8ed6c 100644
--- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h
+++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h
@@ -91,6 +91,7 @@ private:
uint32 _stencilRef;
RenderPassVulkan* _renderPass;
+ GPUDrawPass* _drawPass = nullptr;
GPUPipelineStateVulkan* _currentState;
GPUShaderProgramCSVulkan* _currentCompute;
GPUVertexLayoutVulkan* _vertexLayout;
@@ -219,6 +220,8 @@ public:
void Transition(GPUResource* resource, GPUResourceAccess access) override;
void MemoryBarrier() override;
void OverlapUA(bool end) override;
+ void BeginDrawPass(GPUDrawPass& pass) override;
+ void EndDrawPass() override;
};
#endif
diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp
index a0f232251..4876e719a 100644
--- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp
+++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp
@@ -537,9 +537,9 @@ RenderPassVulkan::RenderPassVulkan(GPUDeviceVulkan* device, const RenderTargetLa
attachment.flags = 0;
attachment.format = RenderToolsVulkan::ToVulkanFormat(layout.RTVsFormats[i]);
attachment.samples = (VkSampleCountFlagBits)layout.MSAA;
- // TODO: we need render passes into high-level rendering api to perform more optimal rendering (esp. for tiled gpus)
- attachment.loadOp = layout.ClearFlags & 1 << i ? VK_ATTACHMENT_LOAD_OP_CLEAR : VK_ATTACHMENT_LOAD_OP_LOAD;
- attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
+ uint64 mask = 1ull << i;
+ attachment.loadOp = layout.LoadClear & mask ? VK_ATTACHMENT_LOAD_OP_CLEAR : (layout.LoadDontCare & mask ? VK_ATTACHMENT_LOAD_OP_DONT_CARE : VK_ATTACHMENT_LOAD_OP_LOAD);
+ attachment.storeOp = layout.StoreDontCare & mask ? VK_ATTACHMENT_STORE_OP_DONT_CARE : VK_ATTACHMENT_STORE_OP_STORE;
attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachment.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
@@ -592,10 +592,11 @@ RenderPassVulkan::RenderPassVulkan(GPUDeviceVulkan* device, const RenderTargetLa
// TODO: use VK_ATTACHMENT_STORE_OP_NONE for readonly depth/stencil but check for 'VK_KHR_load_store_op_none' extension
attachment.stencilLoadOp = layout.ReadStencil ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachment.stencilStoreOp = layout.WriteStencil ? VK_ATTACHMENT_STORE_OP_STORE : VK_ATTACHMENT_STORE_OP_DONT_CARE;
- if (layout.ClearFlags & 1 << colorAttachmentsCount)
- {
+ uint64 mask = 1ull << colorAttachmentsCount;
+ if (layout.LoadClear & mask)
attachment.loadOp = attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
- }
+ else if (layout.LoadDontCare & mask)
+ attachment.loadOp = attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachment.initialLayout = depthStencilLayout;
attachment.finalLayout = depthStencilLayout;
depthStencilReference.attachment = colorAttachmentsCount;
diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h
index 61869e2e9..953ee79ee 100644
--- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h
+++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h
@@ -198,16 +198,18 @@ struct RenderTargetLayoutVulkan
{
struct
{
- uint32 Layers : 10; // Limited by GPU_MAX_TEXTURE_ARRAY_SIZE
- uint32 RTsCount : 3; // Limited by GPU_MAX_RT_BINDED
- uint32 ReadDepth : 1;
- uint32 WriteDepth : 1;
- uint32 ReadStencil : 1;
- uint32 WriteStencil : 1;
- uint32 ClearFlags : 7; // GPU_MAX_RT_BINDED + 1
+ uint64 Layers : 10; // Limited by GPU_MAX_TEXTURE_ARRAY_SIZE
+ uint64 RTsCount : 3; // Limited by GPU_MAX_RT_BINDED
+ uint64 ReadDepth : 1;
+ uint64 WriteDepth : 1;
+ uint64 ReadStencil : 1;
+ uint64 WriteStencil : 1;
+ uint64 LoadClear : 7; // GPU_MAX_RT_BINDED + 1
+ uint64 LoadDontCare : 7; // GPU_MAX_RT_BINDED + 1
+ uint64 StoreDontCare : 7; // GPU_MAX_RT_BINDED + 1
};
- uint32 Flags;
+ uint64 Flags;
};
MSAALevel MSAA;
diff --git a/Source/Engine/Renderer/GBufferPass.cpp b/Source/Engine/Renderer/GBufferPass.cpp
index 1f760619b..fd0b7987f 100644
--- a/Source/Engine/Renderer/GBufferPass.cpp
+++ b/Source/Engine/Renderer/GBufferPass.cpp
@@ -11,17 +11,18 @@
#include "Engine/Core/Collections/Sorting.h"
#include "Engine/Graphics/GPUDevice.h"
#include "Engine/Graphics/GPUContext.h"
+#include "Engine/Graphics/GPUPass.h"
#include "Engine/Graphics/Shaders/GPUShader.h"
#include "Engine/Graphics/RenderTask.h"
#include "Engine/Graphics/RenderBuffers.h"
#include "Engine/Graphics/RenderTargetPool.h"
+#include "Engine/Graphics/Graphics.h"
#include "Engine/Content/Assets/Shader.h"
#include "Engine/Content/Content.h"
#include "Engine/Content/Assets/Model.h"
#include "Engine/Level/Actors/Decal.h"
#include "Engine/Level/Actors/Sky.h"
#include "Engine/Engine/Engine.h"
-#include "Engine/Graphics/Graphics.h"
GPU_CB_STRUCT(GBufferPassData {
ShaderGBufferData GBuffer;
@@ -167,13 +168,19 @@ void GBufferPass::Fill(RenderContext& renderContext, GPUTexture* lightBuffer)
renderContext.Buffers->GBuffer2->View(),
renderContext.Buffers->GBuffer3->View(),
};
+ GPUDrawPassAction targetActions[5] =
+ {
+ GPUDrawPassAction::ClearStore,
+ GPUDrawPassAction::ClearStore,
+ GPUDrawPassAction::ClearStore,
+ GPUDrawPassAction::ClearStore,
+ GPUDrawPassAction::ClearStore,
+ };
renderContext.View.Pass = DrawPass::GBuffer;
context->SetViewportAndScissors(renderContext.Buffers->GetViewport());
// Clear GBuffer
{
- PROFILE_GPU_CPU_NAMED("Clear");
-
context->ClearDepth(*renderContext.Buffers->DepthBuffer);
context->Clear(lightBuffer->View(), Color::Transparent);
context->Clear(renderContext.Buffers->GBuffer0->View(), Color::Transparent);
@@ -211,8 +218,15 @@ void GBufferPass::Fill(RenderContext& renderContext, GPUTexture* lightBuffer)
#endif
// Draw objects that can get decals
- context->SetRenderTarget(*renderContext.Buffers->DepthBuffer, ToSpan(targetBuffers, ARRAY_COUNT(targetBuffers)));
- renderContext.List->ExecuteDrawCalls(renderContext, DrawCallsListType::GBuffer);
+ {
+ GPUDrawPass drawPass(
+ context,
+ *renderContext.Buffers->DepthBuffer,
+ GPUDrawPassAction::ClearStore,
+ ToSpan(targetBuffers, ARRAY_COUNT(targetBuffers)),
+ ToSpan(targetActions, ARRAY_COUNT(targetActions)));
+ renderContext.List->ExecuteDrawCalls(renderContext, DrawCallsListType::GBuffer);
+ }
// Draw decals
DrawDecals(renderContext, lightBuffer->View());