Optimize Vulkan render passes with a new GPUDrawPass to manually control attachment operations in higher-level API
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
|
||||
#include "GPUContext.h"
|
||||
#include "GPUDevice.h"
|
||||
#include "GPUPass.h"
|
||||
#include "RenderTask.h"
|
||||
#include "Textures/GPUTexture.h"
|
||||
|
||||
@@ -137,3 +138,8 @@ void GPUContext::SetResourceState(GPUResource* resource, uint64 state, int32 sub
|
||||
void GPUContext::ForceRebindDescriptors()
|
||||
{
|
||||
}
|
||||
|
||||
void GPUContext::BeginDrawPass(GPUDrawPass& pass)
|
||||
{
|
||||
SetRenderTarget(pass.DepthBuffer, ToSpan(pass.RenderTargets, pass.RenderTargetsCount));
|
||||
}
|
||||
|
||||
@@ -27,6 +27,7 @@ class GPUTextureView;
|
||||
class GPUBufferView;
|
||||
class GPUVertexLayout;
|
||||
struct GPUPass;
|
||||
struct GPUDrawPass;
|
||||
enum class GPUResourceAccess;
|
||||
enum class GPUQueryType;
|
||||
|
||||
@@ -697,4 +698,12 @@ public:
|
||||
virtual void OverlapUA(bool end)
|
||||
{
|
||||
}
|
||||
|
||||
// Begins draw pass rendering. See GPUDrawPass.
|
||||
virtual void BeginDrawPass(GPUDrawPass& pass);
|
||||
|
||||
// Ends draw pass rendering. See GPUDrawPass.
|
||||
virtual void EndDrawPass()
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
@@ -71,4 +71,87 @@ struct FLAXENGINE_API GPUComputePass : GPUPass
|
||||
}
|
||||
};
|
||||
|
||||
// TODO: add GPUDrawPass for render targets and depth/stencil setup with optimized clear for faster drawing on tiled-GPUs (mobile)
|
||||
/// <summary>
|
||||
/// GPU pass operations on attached render targets and depth buffer. Defines the load/store actions for each attachment to optimize GPU rendering by reducing memory bandwidth usage.
|
||||
/// </summary>
|
||||
enum class GPUDrawPassAction
|
||||
{
|
||||
// No action, the content of the render target or depth buffer is undefined. Discards the resulting value of the render pass for this attachment.
|
||||
None = 0,
|
||||
|
||||
// Loads the existing value for this attachment into the draw pass.
|
||||
Load = 1,
|
||||
|
||||
// Loads the clear value for this attachment into the draw pass. Clear value is provided by the GPUContext::Clear performed on the texture before pass begins.
|
||||
Clear = 2,
|
||||
|
||||
// Stores the resulting value of the render pass for this attachment.
|
||||
Store = 4,
|
||||
|
||||
// Resolves the resulting MSAA value and stores final value into the attachment.
|
||||
ResolveMultisample = 8,
|
||||
|
||||
// Mask of flags allowed by load operation (reading data from attachment).
|
||||
LoadMask = None | Load | Clear,
|
||||
|
||||
// Mask of flags allowed by store operation (writing data to attachment).
|
||||
StoreMask = None | Store | ResolveMultisample,
|
||||
|
||||
// Loads the existing value for this attachment into the draw pass and stores the resulting value of the render pass for this attachment.
|
||||
LoadStore = Load | Store,
|
||||
|
||||
// Loads the clear value for this attachment into the draw pass and stores the resulting value of the render pass for this attachment.
|
||||
ClearStore = Clear | Store,
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// GPU pass that explicitly defines render targets and depth buffer within a rendering pass. Can be used to optimize GPU rendering on tiled GPUs with manual control over attachment operations (load, store, clear, discard, etc.) that reduce memory bandwidth usage.
|
||||
/// </summary>
|
||||
/// <remarks>Draw Pass discards any render targets or depth buffer bound to the context (reduces state-tracking).</remarks>
|
||||
struct FLAXENGINE_API GPUDrawPass : GPUPass
|
||||
{
|
||||
GPUTextureView* DepthBuffer;
|
||||
GPUTextureView** RenderTargets;
|
||||
GPUDrawPassAction* RenderTargetsActions;
|
||||
int32 RenderTargetsCount;
|
||||
GPUDrawPassAction DepthAction;
|
||||
|
||||
GPUDrawPass(GPUContext* context, Span<GPUTextureView*> renderTargets)
|
||||
: GPUPass(context)
|
||||
, DepthBuffer(nullptr)
|
||||
, RenderTargets(renderTargets.Get())
|
||||
, RenderTargetsActions(nullptr)
|
||||
, RenderTargetsCount(renderTargets.Length())
|
||||
, DepthAction(GPUDrawPassAction::None)
|
||||
{
|
||||
Context->BeginDrawPass(*this);
|
||||
}
|
||||
|
||||
GPUDrawPass(GPUContext* context, GPUTextureView* depthBuffer, Span<GPUTextureView*> renderTargets)
|
||||
: GPUPass(context)
|
||||
, DepthBuffer(depthBuffer)
|
||||
, RenderTargets(renderTargets.Get())
|
||||
, RenderTargetsActions(nullptr)
|
||||
, RenderTargetsCount(renderTargets.Length())
|
||||
, DepthAction(GPUDrawPassAction::LoadStore)
|
||||
{
|
||||
Context->BeginDrawPass(*this);
|
||||
}
|
||||
|
||||
GPUDrawPass(GPUContext* context, GPUTextureView* depthBuffer, GPUDrawPassAction depthAction, Span<GPUTextureView*> renderTargets, Span<GPUDrawPassAction> renderTargetsActions)
|
||||
: GPUPass(context)
|
||||
, DepthBuffer(depthBuffer)
|
||||
, RenderTargets(renderTargets.Get())
|
||||
, RenderTargetsActions(renderTargetsActions.Get())
|
||||
, RenderTargetsCount(renderTargets.Length())
|
||||
, DepthAction(depthAction)
|
||||
{
|
||||
ASSERT_LOW_LAYER(renderTargets.Length() == renderTargetsActions.Length());
|
||||
Context->BeginDrawPass(*this);
|
||||
}
|
||||
|
||||
~GPUDrawPass()
|
||||
{
|
||||
Context->EndDrawPass();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include "GPUShaderProgramVulkan.h"
|
||||
#include "GPUTextureVulkan.h"
|
||||
#include "QueueVulkan.h"
|
||||
#include "Engine/Graphics/GPUPass.h"
|
||||
#include "Engine/Graphics/PixelFormatExtensions.h"
|
||||
#include "Engine/Debug/Exceptions/NotImplementedException.h"
|
||||
|
||||
@@ -436,11 +437,22 @@ void GPUContextVulkan::BeginRenderPass()
|
||||
layout.RTVsFormats[i] = handle->GetFormat();
|
||||
framebufferKey.Attachments[i] = handle->GetFramebufferView();
|
||||
AddImageBarrier(handle, handle->LayoutRTV);
|
||||
uint64 mask = 1ull << i;
|
||||
if (FindClear(handle, clear))
|
||||
{
|
||||
layout.ClearFlags |= 1 << i;
|
||||
layout.LoadClear |= mask;
|
||||
clearValues[i] = clear.Value;
|
||||
}
|
||||
if (_drawPass && _drawPass->RenderTargetsActions)
|
||||
{
|
||||
GPUDrawPassAction action = _drawPass->RenderTargetsActions[i];
|
||||
if ((uint32)action & (uint32)GPUDrawPassAction::Clear)
|
||||
layout.LoadClear |= mask;
|
||||
else if (((uint32)action & (uint32)GPUDrawPassAction::LoadMask) == 0)
|
||||
layout.LoadDontCare |= mask;
|
||||
else if (((uint32)action & (uint32)GPUDrawPassAction::StoreMask) == 0)
|
||||
layout.StoreDontCare |= mask;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -459,11 +471,22 @@ void GPUContextVulkan::BeginRenderPass()
|
||||
framebufferKey.AttachmentCount++;
|
||||
framebufferKey.Attachments[_rtCount] = handle->GetFramebufferView();
|
||||
AddImageBarrier(handle, handle->LayoutRTV);
|
||||
uint64 mask = 1ull << _rtCount;
|
||||
if (FindClear(handle, clear))
|
||||
{
|
||||
layout.ClearFlags |= 1 << _rtCount;
|
||||
layout.LoadClear |= mask;
|
||||
clearValues[_rtCount] = clear.Value;
|
||||
}
|
||||
if (_drawPass)
|
||||
{
|
||||
GPUDrawPassAction action = _drawPass->DepthAction;
|
||||
if ((uint32)action & (uint32)GPUDrawPassAction::Clear)
|
||||
layout.LoadClear |= mask;
|
||||
else if (((uint32)action & (uint32)GPUDrawPassAction::LoadMask) == 0)
|
||||
layout.LoadDontCare |= mask;
|
||||
else if (((uint32)action & (uint32)GPUDrawPassAction::StoreMask) == 0)
|
||||
layout.StoreDontCare |= mask;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1004,7 +1027,7 @@ void GPUContextVulkan::SetRenderTarget(GPUTextureView* depthBuffer, GPUTextureVi
|
||||
|
||||
void GPUContextVulkan::SetRenderTarget(GPUTextureView* depthBuffer, const Span<GPUTextureView*>& rts)
|
||||
{
|
||||
ASSERT(Math::IsInRange(rts.Length(), 1, GPU_MAX_RT_BINDED));
|
||||
ASSERT(Math::IsInRange(rts.Length(), 0, GPU_MAX_RT_BINDED));
|
||||
|
||||
const auto depthBufferVulkan = static_cast<GPUTextureViewVulkan*>(depthBuffer);
|
||||
|
||||
@@ -1963,4 +1986,19 @@ void GPUContextVulkan::OverlapUA(bool end)
|
||||
AddUABarrier();
|
||||
}
|
||||
|
||||
void GPUContextVulkan::BeginDrawPass(GPUDrawPass& pass)
|
||||
{
|
||||
_drawPass = &pass;
|
||||
_rtDirtyFlag = true;
|
||||
_psDirtyFlag = true;
|
||||
_rtCount = pass.RenderTargetsCount;
|
||||
_rtDepth = (GPUTextureViewVulkan*)pass.DepthBuffer;
|
||||
Platform::MemoryCopy(_rtHandles, pass.RenderTargets, pass.RenderTargetsCount * sizeof(void*));
|
||||
}
|
||||
|
||||
void GPUContextVulkan::EndDrawPass()
|
||||
{
|
||||
_drawPass = nullptr;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -91,6 +91,7 @@ private:
|
||||
uint32 _stencilRef;
|
||||
|
||||
RenderPassVulkan* _renderPass;
|
||||
GPUDrawPass* _drawPass = nullptr;
|
||||
GPUPipelineStateVulkan* _currentState;
|
||||
GPUShaderProgramCSVulkan* _currentCompute;
|
||||
GPUVertexLayoutVulkan* _vertexLayout;
|
||||
@@ -219,6 +220,8 @@ public:
|
||||
void Transition(GPUResource* resource, GPUResourceAccess access) override;
|
||||
void MemoryBarrier() override;
|
||||
void OverlapUA(bool end) override;
|
||||
void BeginDrawPass(GPUDrawPass& pass) override;
|
||||
void EndDrawPass() override;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -537,9 +537,9 @@ RenderPassVulkan::RenderPassVulkan(GPUDeviceVulkan* device, const RenderTargetLa
|
||||
attachment.flags = 0;
|
||||
attachment.format = RenderToolsVulkan::ToVulkanFormat(layout.RTVsFormats[i]);
|
||||
attachment.samples = (VkSampleCountFlagBits)layout.MSAA;
|
||||
// TODO: we need render passes into high-level rendering api to perform more optimal rendering (esp. for tiled gpus)
|
||||
attachment.loadOp = layout.ClearFlags & 1 << i ? VK_ATTACHMENT_LOAD_OP_CLEAR : VK_ATTACHMENT_LOAD_OP_LOAD;
|
||||
attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
|
||||
uint64 mask = 1ull << i;
|
||||
attachment.loadOp = layout.LoadClear & mask ? VK_ATTACHMENT_LOAD_OP_CLEAR : (layout.LoadDontCare & mask ? VK_ATTACHMENT_LOAD_OP_DONT_CARE : VK_ATTACHMENT_LOAD_OP_LOAD);
|
||||
attachment.storeOp = layout.StoreDontCare & mask ? VK_ATTACHMENT_STORE_OP_DONT_CARE : VK_ATTACHMENT_STORE_OP_STORE;
|
||||
attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
||||
attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
|
||||
attachment.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
|
||||
@@ -592,10 +592,11 @@ RenderPassVulkan::RenderPassVulkan(GPUDeviceVulkan* device, const RenderTargetLa
|
||||
// TODO: use VK_ATTACHMENT_STORE_OP_NONE for readonly depth/stencil but check for 'VK_KHR_load_store_op_none' extension
|
||||
attachment.stencilLoadOp = layout.ReadStencil ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
||||
attachment.stencilStoreOp = layout.WriteStencil ? VK_ATTACHMENT_STORE_OP_STORE : VK_ATTACHMENT_STORE_OP_DONT_CARE;
|
||||
if (layout.ClearFlags & 1 << colorAttachmentsCount)
|
||||
{
|
||||
uint64 mask = 1ull << colorAttachmentsCount;
|
||||
if (layout.LoadClear & mask)
|
||||
attachment.loadOp = attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
|
||||
}
|
||||
else if (layout.LoadDontCare & mask)
|
||||
attachment.loadOp = attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
||||
attachment.initialLayout = depthStencilLayout;
|
||||
attachment.finalLayout = depthStencilLayout;
|
||||
depthStencilReference.attachment = colorAttachmentsCount;
|
||||
|
||||
@@ -198,16 +198,18 @@ struct RenderTargetLayoutVulkan
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32 Layers : 10; // Limited by GPU_MAX_TEXTURE_ARRAY_SIZE
|
||||
uint32 RTsCount : 3; // Limited by GPU_MAX_RT_BINDED
|
||||
uint32 ReadDepth : 1;
|
||||
uint32 WriteDepth : 1;
|
||||
uint32 ReadStencil : 1;
|
||||
uint32 WriteStencil : 1;
|
||||
uint32 ClearFlags : 7; // GPU_MAX_RT_BINDED + 1
|
||||
uint64 Layers : 10; // Limited by GPU_MAX_TEXTURE_ARRAY_SIZE
|
||||
uint64 RTsCount : 3; // Limited by GPU_MAX_RT_BINDED
|
||||
uint64 ReadDepth : 1;
|
||||
uint64 WriteDepth : 1;
|
||||
uint64 ReadStencil : 1;
|
||||
uint64 WriteStencil : 1;
|
||||
uint64 LoadClear : 7; // GPU_MAX_RT_BINDED + 1
|
||||
uint64 LoadDontCare : 7; // GPU_MAX_RT_BINDED + 1
|
||||
uint64 StoreDontCare : 7; // GPU_MAX_RT_BINDED + 1
|
||||
};
|
||||
|
||||
uint32 Flags;
|
||||
uint64 Flags;
|
||||
};
|
||||
|
||||
MSAALevel MSAA;
|
||||
|
||||
@@ -11,17 +11,18 @@
|
||||
#include "Engine/Core/Collections/Sorting.h"
|
||||
#include "Engine/Graphics/GPUDevice.h"
|
||||
#include "Engine/Graphics/GPUContext.h"
|
||||
#include "Engine/Graphics/GPUPass.h"
|
||||
#include "Engine/Graphics/Shaders/GPUShader.h"
|
||||
#include "Engine/Graphics/RenderTask.h"
|
||||
#include "Engine/Graphics/RenderBuffers.h"
|
||||
#include "Engine/Graphics/RenderTargetPool.h"
|
||||
#include "Engine/Graphics/Graphics.h"
|
||||
#include "Engine/Content/Assets/Shader.h"
|
||||
#include "Engine/Content/Content.h"
|
||||
#include "Engine/Content/Assets/Model.h"
|
||||
#include "Engine/Level/Actors/Decal.h"
|
||||
#include "Engine/Level/Actors/Sky.h"
|
||||
#include "Engine/Engine/Engine.h"
|
||||
#include "Engine/Graphics/Graphics.h"
|
||||
|
||||
GPU_CB_STRUCT(GBufferPassData {
|
||||
ShaderGBufferData GBuffer;
|
||||
@@ -167,13 +168,19 @@ void GBufferPass::Fill(RenderContext& renderContext, GPUTexture* lightBuffer)
|
||||
renderContext.Buffers->GBuffer2->View(),
|
||||
renderContext.Buffers->GBuffer3->View(),
|
||||
};
|
||||
GPUDrawPassAction targetActions[5] =
|
||||
{
|
||||
GPUDrawPassAction::ClearStore,
|
||||
GPUDrawPassAction::ClearStore,
|
||||
GPUDrawPassAction::ClearStore,
|
||||
GPUDrawPassAction::ClearStore,
|
||||
GPUDrawPassAction::ClearStore,
|
||||
};
|
||||
renderContext.View.Pass = DrawPass::GBuffer;
|
||||
context->SetViewportAndScissors(renderContext.Buffers->GetViewport());
|
||||
|
||||
// Clear GBuffer
|
||||
{
|
||||
PROFILE_GPU_CPU_NAMED("Clear");
|
||||
|
||||
context->ClearDepth(*renderContext.Buffers->DepthBuffer);
|
||||
context->Clear(lightBuffer->View(), Color::Transparent);
|
||||
context->Clear(renderContext.Buffers->GBuffer0->View(), Color::Transparent);
|
||||
@@ -211,8 +218,15 @@ void GBufferPass::Fill(RenderContext& renderContext, GPUTexture* lightBuffer)
|
||||
#endif
|
||||
|
||||
// Draw objects that can get decals
|
||||
context->SetRenderTarget(*renderContext.Buffers->DepthBuffer, ToSpan(targetBuffers, ARRAY_COUNT(targetBuffers)));
|
||||
renderContext.List->ExecuteDrawCalls(renderContext, DrawCallsListType::GBuffer);
|
||||
{
|
||||
GPUDrawPass drawPass(
|
||||
context,
|
||||
*renderContext.Buffers->DepthBuffer,
|
||||
GPUDrawPassAction::ClearStore,
|
||||
ToSpan(targetBuffers, ARRAY_COUNT(targetBuffers)),
|
||||
ToSpan(targetActions, ARRAY_COUNT(targetActions)));
|
||||
renderContext.List->ExecuteDrawCalls(renderContext, DrawCallsListType::GBuffer);
|
||||
}
|
||||
|
||||
// Draw decals
|
||||
DrawDecals(renderContext, lightBuffer->View());
|
||||
|
||||
Reference in New Issue
Block a user