Optimize Vulkan texture clears with render pass attachments clear operation

This commit is contained in:
2026-04-23 09:44:31 +02:00
parent 63b6fafa1b
commit 141a8de0da
4 changed files with 85 additions and 57 deletions
@@ -422,9 +422,12 @@ void GPUContextVulkan::BeginRenderPass()
FramebufferVulkan::Key framebufferKey;
framebufferKey.AttachmentCount = _rtCount;
RenderTargetLayoutVulkan layout;
Platform::MemoryClear(&layout, sizeof(layout));
layout.Flags = 0;
layout.RTsCount = _rtCount;
layout.BlendEnable = _currentState && _currentState->BlendEnable;
layout.DepthFormat = _rtDepth ? _rtDepth->GetFormat() : PixelFormat::Unknown;
VkClearValue clearValues[GPU_MAX_RT_BINDED + 1];
PendingClear clear;
for (int32 i = 0; i < GPU_MAX_RT_BINDED; i++)
{
auto handle = _rtHandles[i];
@@ -433,6 +436,11 @@ void GPUContextVulkan::BeginRenderPass()
layout.RTVsFormats[i] = handle->GetFormat();
framebufferKey.Attachments[i] = handle->GetFramebufferView();
AddImageBarrier(handle, handle->LayoutRTV);
if (FindClear(handle, clear))
{
layout.ClearFlags |= 1 << i;
clearValues[i] = clear.Value;
}
}
else
{
@@ -448,17 +456,14 @@ void GPUContextVulkan::BeginRenderPass()
layout.ReadStencil = PixelFormatExtensions::HasStencil(handle->GetFormat());
layout.WriteDepth = handle->LayoutRTV == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL || handle->LayoutRTV == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL || handle->LayoutRTV == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL;
layout.WriteStencil = handle->LayoutRTV == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL || handle->LayoutRTV == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL || handle->LayoutRTV == VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL;
if (_currentState && 0)
{
// TODO: use this but only if state doesn't change during whole render pass (eg. 1st draw call might not draw depth but 2nd might)
layout.ReadDepth &= _currentState->DepthReadEnable;
layout.ReadStencil &= _currentState->StencilReadEnable;
layout.WriteDepth &= _currentState->DepthWriteEnable;
layout.WriteStencil &= _currentState->StencilWriteEnable;
}
framebufferKey.AttachmentCount++;
framebufferKey.Attachments[_rtCount] = handle->GetFramebufferView();
AddImageBarrier(handle, handle->LayoutRTV);
if (FindClear(handle, clear))
{
layout.ClearFlags |= 1 << _rtCount;
clearValues[_rtCount] = clear.Value;
}
}
else
{
@@ -471,6 +476,11 @@ void GPUContextVulkan::BeginRenderPass()
layout.Extent.height = handle->Extent.height;
layout.Layers = handle->Layers;
// Clear textures that are not bind to the render pass
for (auto& e : _pendingClears)
ManualClear(e);
_pendingClears.Clear();
// Get or create objects
auto renderPass = _device->GetOrCreateRenderPass(layout);
framebufferKey.RenderPass = renderPass;
@@ -479,8 +489,7 @@ void GPUContextVulkan::BeginRenderPass()
FlushBarriers();
// TODO: use clear values for render pass begin to improve performance
cmdBuffer->BeginRenderPass(renderPass, framebuffer, 0, nullptr);
cmdBuffer->BeginRenderPass(renderPass, framebuffer, ARRAY_COUNT(clearValues), clearValues);
}
void GPUContextVulkan::EndRenderPass()
@@ -494,6 +503,41 @@ void GPUContextVulkan::EndRenderPass()
vkCmdPipelineBarrier(cmdBuffer->GetHandle(), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, 0, nullptr);
}
bool GPUContextVulkan::FindClear(const GPUTextureViewVulkan* view, PendingClear& clear)
{
// Get last clear for render pass (the following ones will be done manually if the same resource was cleared twice)
for (int32 i = _pendingClears.Count() - 1; i >= 0; i--)
{
auto& e = _pendingClears.Get()[i];
if (e.View == view)
{
clear = e;
_pendingClears.RemoveAtKeepOrder(i);
return true;
}
}
return false;
}
void GPUContextVulkan::ManualClear(const PendingClear& clear)
{
const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer();
if (cmdBuffer->IsInsideRenderPass())
EndRenderPass();
AddImageBarrier(clear.View, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
FlushBarriers();
if (((GPUTextureVulkan*)clear.View->GetParent())->IsDepthStencil())
{
vkCmdClearDepthStencilImage(cmdBuffer->GetHandle(), clear.View->Image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear.Value.depthStencil, 1, &clear.View->Info.subresourceRange);
}
else
{
vkCmdClearColorImage(cmdBuffer->GetHandle(), clear.View->Image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear.Value.color, 1, &clear.View->Info.subresourceRange);
}
}
void GPUContextVulkan::UpdateDescriptorSets(const SpirvShaderDescriptorInfo& descriptorInfo, DescriptorSetWriterVulkan& dsWriter, bool& needsWrite)
{
for (uint32 i = 0; i < descriptorInfo.DescriptorTypesCount; i++)
@@ -762,6 +806,7 @@ void GPUContextVulkan::FrameBegin()
Platform::MemoryClear(_uaHandles, sizeof(_uaHandles));
Platform::MemoryCopy(_samplerHandles, _device->HelperResources.GetStaticSamplers(), sizeof(VkSampler) * GPU_STATIC_SAMPLERS_COUNT);
Platform::MemoryClear(_samplerHandles + GPU_STATIC_SAMPLERS_COUNT, sizeof(_samplerHandles) - sizeof(VkSampler) * GPU_STATIC_SAMPLERS_COUNT);
_pendingClears.Clear();
// Init command buffer
const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer();
@@ -835,44 +880,17 @@ bool GPUContextVulkan::IsDepthBufferBinded()
void GPUContextVulkan::Clear(GPUTextureView* rt, const Color& color)
{
auto rtVulkan = static_cast<GPUTextureViewVulkan*>(rt);
if (rtVulkan)
{
// TODO: detect if inside render pass and use ClearAttachments
// TODO: delay clear for attachments before render pass to use render pass clear values for faster clearing
const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer();
if (cmdBuffer->IsInsideRenderPass())
EndRenderPass();
AddImageBarrier(rtVulkan, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
FlushBarriers();
vkCmdClearColorImage(cmdBuffer->GetHandle(), rtVulkan->Image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, (const VkClearColorValue*)color.Raw, 1, &rtVulkan->Info.subresourceRange);
}
auto& clear = _pendingClears.AddOne();
clear.View = (GPUTextureViewVulkan*)rt;
Platform::MemoryCopy(clear.Value.color.float32, color.Raw, sizeof(color.Raw));
}
void GPUContextVulkan::ClearDepth(GPUTextureView* depthBuffer, float depthValue, uint8 stencilValue)
{
const auto rtVulkan = static_cast<GPUTextureViewVulkan*>(depthBuffer);
if (rtVulkan)
{
// TODO: detect if inside render pass and use ClearAttachments
// TODO: delay clear for attachments before render pass to use render pass clear values for faster clearing
const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer();
if (cmdBuffer->IsInsideRenderPass())
EndRenderPass();
AddImageBarrier(rtVulkan, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
FlushBarriers();
VkClearDepthStencilValue clear;
clear.depth = depthValue;
clear.stencil = stencilValue;
vkCmdClearDepthStencilImage(cmdBuffer->GetHandle(), rtVulkan->Image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear, 1, &rtVulkan->Info.subresourceRange);
}
auto& clear = _pendingClears.AddOne();
clear.View = (GPUTextureViewVulkan*)depthBuffer;
clear.Value.depthStencil.depth = depthValue;
clear.Value.depthStencil.stencil = stencilValue;
}
void GPUContextVulkan::ClearUA(GPUBuffer* buf, const Float4& value)
@@ -1427,9 +1445,12 @@ void GPUContextVulkan::FlushState()
{
const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer();
if (cmdBuffer->IsInsideRenderPass())
{
EndRenderPass();
}
// Flush pending clears
for (auto& clear : _pendingClears)
ManualClear(clear);
_pendingClears.Clear();
FlushBarriers();
}
@@ -70,6 +70,12 @@ struct PipelineBarrierVulkan
class GPUContextVulkan : public GPUContext
{
private:
struct PendingClear
{
GPUTextureViewVulkan* View;
VkClearValue Value;
};
GPUDeviceVulkan* _device;
QueueVulkan* _queue;
CmdBufferManagerVulkan* _cmdBufferManager;
@@ -101,6 +107,7 @@ private:
#if COMPILE_WITH_PROFILER
void* _tracyContext;
#endif
Array<PendingClear, FixedAllocation<16>> _pendingClears;
typedef Array<DescriptorPoolVulkan*> DescriptorPoolArray;
Dictionary<uint32, DescriptorPoolArray> _descriptorPools;
@@ -143,10 +150,11 @@ public:
DescriptorPoolVulkan* AllocateDescriptorSets(const VkDescriptorSetAllocateInfo& descriptorSetAllocateInfo, const DescriptorSetLayoutVulkan& layout, VkDescriptorSet* outSets);
void BeginRenderPass();
void EndRenderPass();
private:
bool FindClear(const GPUTextureViewVulkan* view, PendingClear& clear);
void ManualClear(const PendingClear& clear);
void UpdateDescriptorSets(const struct SpirvShaderDescriptorInfo& descriptorInfo, class DescriptorSetWriterVulkan& dsWriter, bool& needsWrite);
void UpdateDescriptorSets(ComputePipelineStateVulkan* pipelineState);
void OnDrawCall();
@@ -144,7 +144,7 @@ static VKAPI_ATTR VkBool32 VKAPI_PTR DebugUtilsCallback(VkDebugUtilsMessageSever
case 5: // SPIR-V module not valid: MemoryBarrier: Vulkan specification requires Memory Semantics to have one of the following bits set: Acquire, Release, AcquireRelease or SequentiallyConsistent
case -1666394502: // After query pool creation, each query must be reset before it is used. Queries must also be reset between uses.
case 1203141749:
case 602160055: // Attachment 4 not written by fragment shader; undefined values will be written to attachment. TODO: investigate it for PS_GBuffer shader from Deferred material with USE_LIGHTMAP=1
case 602160055: // Attachment 4 not written by fragment shader; undefined values will be written to attachment.
case 7060244: // Image Operand Offset can only be used with OpImage*Gather operations
case -1539028524: // SortedIndices is null so Vulkan backend sets it to default R32_SFLOAT format which is not good for UINT format of the buffer
case -1810835948: // SortedIndices is null so Vulkan backend sets it to default R32_SFLOAT format which is not good for UINT format of the buffer
@@ -537,14 +537,8 @@ RenderPassVulkan::RenderPassVulkan(GPUDeviceVulkan* device, const RenderTargetLa
attachment.flags = 0;
attachment.format = RenderToolsVulkan::ToVulkanFormat(layout.RTVsFormats[i]);
attachment.samples = (VkSampleCountFlagBits)layout.MSAA;
#if PLATFORM_ANDROID
attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; // TODO: Adreno 640 has glitches when blend is disabled and rt data not loaded
#elif PLATFORM_MAC || PLATFORM_IOS
attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; // MoltenVK seams to have glitches (tiled arch of gpu)
#else
// TODO: we need render passes into high-level rendering api to perform more optimal rendering (esp. for tiled gpus)
attachment.loadOp = layout.BlendEnable ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_DONT_CARE;
#endif
attachment.loadOp = layout.ClearFlags & 1 << i ? VK_ATTACHMENT_LOAD_OP_CLEAR : VK_ATTACHMENT_LOAD_OP_LOAD;
attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
@@ -595,8 +589,13 @@ RenderPassVulkan::RenderPassVulkan(GPUDeviceVulkan* device, const RenderTargetLa
attachment.loadOp = layout.ReadDepth || layout.ReadStencil ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_DONT_CARE;
//attachment.storeOp = layout.WriteDepth || layout.WriteStencil ? VK_ATTACHMENT_STORE_OP_STORE : VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; // For some reason, read-only depth results in artifacts
// TODO: use VK_ATTACHMENT_STORE_OP_NONE for readonly depth/stencil but check for 'VK_KHR_load_store_op_none' extension
attachment.stencilLoadOp = layout.ReadStencil ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachment.stencilStoreOp = layout.WriteStencil ? VK_ATTACHMENT_STORE_OP_STORE : VK_ATTACHMENT_STORE_OP_DONT_CARE;
if (layout.ClearFlags & 1 << colorAttachmentsCount)
{
attachment.loadOp = attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
}
attachment.initialLayout = depthStencilLayout;
attachment.finalLayout = depthStencilLayout;
depthStencilReference.attachment = colorAttachmentsCount;
@@ -204,7 +204,7 @@ struct RenderTargetLayoutVulkan
uint32 WriteDepth : 1;
uint32 ReadStencil : 1;
uint32 WriteStencil : 1;
uint32 BlendEnable : 1;
uint32 ClearFlags : 7; // GPU_MAX_RT_BINDED + 1
};
uint32 Flags;