Optimize Vulkan texture clears with render pass attachments clear operation
This commit is contained in:
@@ -422,9 +422,12 @@ void GPUContextVulkan::BeginRenderPass()
|
||||
FramebufferVulkan::Key framebufferKey;
|
||||
framebufferKey.AttachmentCount = _rtCount;
|
||||
RenderTargetLayoutVulkan layout;
|
||||
Platform::MemoryClear(&layout, sizeof(layout));
|
||||
layout.Flags = 0;
|
||||
layout.RTsCount = _rtCount;
|
||||
layout.BlendEnable = _currentState && _currentState->BlendEnable;
|
||||
layout.DepthFormat = _rtDepth ? _rtDepth->GetFormat() : PixelFormat::Unknown;
|
||||
VkClearValue clearValues[GPU_MAX_RT_BINDED + 1];
|
||||
PendingClear clear;
|
||||
for (int32 i = 0; i < GPU_MAX_RT_BINDED; i++)
|
||||
{
|
||||
auto handle = _rtHandles[i];
|
||||
@@ -433,6 +436,11 @@ void GPUContextVulkan::BeginRenderPass()
|
||||
layout.RTVsFormats[i] = handle->GetFormat();
|
||||
framebufferKey.Attachments[i] = handle->GetFramebufferView();
|
||||
AddImageBarrier(handle, handle->LayoutRTV);
|
||||
if (FindClear(handle, clear))
|
||||
{
|
||||
layout.ClearFlags |= 1 << i;
|
||||
clearValues[i] = clear.Value;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -448,17 +456,14 @@ void GPUContextVulkan::BeginRenderPass()
|
||||
layout.ReadStencil = PixelFormatExtensions::HasStencil(handle->GetFormat());
|
||||
layout.WriteDepth = handle->LayoutRTV == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL || handle->LayoutRTV == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL || handle->LayoutRTV == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL;
|
||||
layout.WriteStencil = handle->LayoutRTV == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL || handle->LayoutRTV == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL || handle->LayoutRTV == VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL;
|
||||
if (_currentState && 0)
|
||||
{
|
||||
// TODO: use this but only if state doesn't change during whole render pass (eg. 1st draw call might not draw depth but 2nd might)
|
||||
layout.ReadDepth &= _currentState->DepthReadEnable;
|
||||
layout.ReadStencil &= _currentState->StencilReadEnable;
|
||||
layout.WriteDepth &= _currentState->DepthWriteEnable;
|
||||
layout.WriteStencil &= _currentState->StencilWriteEnable;
|
||||
}
|
||||
framebufferKey.AttachmentCount++;
|
||||
framebufferKey.Attachments[_rtCount] = handle->GetFramebufferView();
|
||||
AddImageBarrier(handle, handle->LayoutRTV);
|
||||
if (FindClear(handle, clear))
|
||||
{
|
||||
layout.ClearFlags |= 1 << _rtCount;
|
||||
clearValues[_rtCount] = clear.Value;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -471,6 +476,11 @@ void GPUContextVulkan::BeginRenderPass()
|
||||
layout.Extent.height = handle->Extent.height;
|
||||
layout.Layers = handle->Layers;
|
||||
|
||||
// Clear textures that are not bind to the render pass
|
||||
for (auto& e : _pendingClears)
|
||||
ManualClear(e);
|
||||
_pendingClears.Clear();
|
||||
|
||||
// Get or create objects
|
||||
auto renderPass = _device->GetOrCreateRenderPass(layout);
|
||||
framebufferKey.RenderPass = renderPass;
|
||||
@@ -479,8 +489,7 @@ void GPUContextVulkan::BeginRenderPass()
|
||||
|
||||
FlushBarriers();
|
||||
|
||||
// TODO: use clear values for render pass begin to improve performance
|
||||
cmdBuffer->BeginRenderPass(renderPass, framebuffer, 0, nullptr);
|
||||
cmdBuffer->BeginRenderPass(renderPass, framebuffer, ARRAY_COUNT(clearValues), clearValues);
|
||||
}
|
||||
|
||||
void GPUContextVulkan::EndRenderPass()
|
||||
@@ -494,6 +503,41 @@ void GPUContextVulkan::EndRenderPass()
|
||||
vkCmdPipelineBarrier(cmdBuffer->GetHandle(), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, 0, nullptr);
|
||||
}
|
||||
|
||||
bool GPUContextVulkan::FindClear(const GPUTextureViewVulkan* view, PendingClear& clear)
|
||||
{
|
||||
// Get last clear for render pass (the following ones will be done manually if the same resource was cleared twice)
|
||||
for (int32 i = _pendingClears.Count() - 1; i >= 0; i--)
|
||||
{
|
||||
auto& e = _pendingClears.Get()[i];
|
||||
if (e.View == view)
|
||||
{
|
||||
clear = e;
|
||||
_pendingClears.RemoveAtKeepOrder(i);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void GPUContextVulkan::ManualClear(const PendingClear& clear)
|
||||
{
|
||||
const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer();
|
||||
if (cmdBuffer->IsInsideRenderPass())
|
||||
EndRenderPass();
|
||||
|
||||
AddImageBarrier(clear.View, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
||||
FlushBarriers();
|
||||
|
||||
if (((GPUTextureVulkan*)clear.View->GetParent())->IsDepthStencil())
|
||||
{
|
||||
vkCmdClearDepthStencilImage(cmdBuffer->GetHandle(), clear.View->Image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear.Value.depthStencil, 1, &clear.View->Info.subresourceRange);
|
||||
}
|
||||
else
|
||||
{
|
||||
vkCmdClearColorImage(cmdBuffer->GetHandle(), clear.View->Image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear.Value.color, 1, &clear.View->Info.subresourceRange);
|
||||
}
|
||||
}
|
||||
|
||||
void GPUContextVulkan::UpdateDescriptorSets(const SpirvShaderDescriptorInfo& descriptorInfo, DescriptorSetWriterVulkan& dsWriter, bool& needsWrite)
|
||||
{
|
||||
for (uint32 i = 0; i < descriptorInfo.DescriptorTypesCount; i++)
|
||||
@@ -762,6 +806,7 @@ void GPUContextVulkan::FrameBegin()
|
||||
Platform::MemoryClear(_uaHandles, sizeof(_uaHandles));
|
||||
Platform::MemoryCopy(_samplerHandles, _device->HelperResources.GetStaticSamplers(), sizeof(VkSampler) * GPU_STATIC_SAMPLERS_COUNT);
|
||||
Platform::MemoryClear(_samplerHandles + GPU_STATIC_SAMPLERS_COUNT, sizeof(_samplerHandles) - sizeof(VkSampler) * GPU_STATIC_SAMPLERS_COUNT);
|
||||
_pendingClears.Clear();
|
||||
|
||||
// Init command buffer
|
||||
const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer();
|
||||
@@ -835,44 +880,17 @@ bool GPUContextVulkan::IsDepthBufferBinded()
|
||||
|
||||
void GPUContextVulkan::Clear(GPUTextureView* rt, const Color& color)
|
||||
{
|
||||
auto rtVulkan = static_cast<GPUTextureViewVulkan*>(rt);
|
||||
|
||||
if (rtVulkan)
|
||||
{
|
||||
// TODO: detect if inside render pass and use ClearAttachments
|
||||
// TODO: delay clear for attachments before render pass to use render pass clear values for faster clearing
|
||||
|
||||
const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer();
|
||||
if (cmdBuffer->IsInsideRenderPass())
|
||||
EndRenderPass();
|
||||
|
||||
AddImageBarrier(rtVulkan, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
||||
FlushBarriers();
|
||||
|
||||
vkCmdClearColorImage(cmdBuffer->GetHandle(), rtVulkan->Image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, (const VkClearColorValue*)color.Raw, 1, &rtVulkan->Info.subresourceRange);
|
||||
}
|
||||
auto& clear = _pendingClears.AddOne();
|
||||
clear.View = (GPUTextureViewVulkan*)rt;
|
||||
Platform::MemoryCopy(clear.Value.color.float32, color.Raw, sizeof(color.Raw));
|
||||
}
|
||||
|
||||
void GPUContextVulkan::ClearDepth(GPUTextureView* depthBuffer, float depthValue, uint8 stencilValue)
|
||||
{
|
||||
const auto rtVulkan = static_cast<GPUTextureViewVulkan*>(depthBuffer);
|
||||
if (rtVulkan)
|
||||
{
|
||||
// TODO: detect if inside render pass and use ClearAttachments
|
||||
// TODO: delay clear for attachments before render pass to use render pass clear values for faster clearing
|
||||
|
||||
const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer();
|
||||
if (cmdBuffer->IsInsideRenderPass())
|
||||
EndRenderPass();
|
||||
|
||||
AddImageBarrier(rtVulkan, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
||||
FlushBarriers();
|
||||
|
||||
VkClearDepthStencilValue clear;
|
||||
clear.depth = depthValue;
|
||||
clear.stencil = stencilValue;
|
||||
vkCmdClearDepthStencilImage(cmdBuffer->GetHandle(), rtVulkan->Image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear, 1, &rtVulkan->Info.subresourceRange);
|
||||
}
|
||||
auto& clear = _pendingClears.AddOne();
|
||||
clear.View = (GPUTextureViewVulkan*)depthBuffer;
|
||||
clear.Value.depthStencil.depth = depthValue;
|
||||
clear.Value.depthStencil.stencil = stencilValue;
|
||||
}
|
||||
|
||||
void GPUContextVulkan::ClearUA(GPUBuffer* buf, const Float4& value)
|
||||
@@ -1427,9 +1445,12 @@ void GPUContextVulkan::FlushState()
|
||||
{
|
||||
const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer();
|
||||
if (cmdBuffer->IsInsideRenderPass())
|
||||
{
|
||||
EndRenderPass();
|
||||
}
|
||||
|
||||
// Flush pending clears
|
||||
for (auto& clear : _pendingClears)
|
||||
ManualClear(clear);
|
||||
_pendingClears.Clear();
|
||||
|
||||
FlushBarriers();
|
||||
}
|
||||
|
||||
@@ -70,6 +70,12 @@ struct PipelineBarrierVulkan
|
||||
class GPUContextVulkan : public GPUContext
|
||||
{
|
||||
private:
|
||||
struct PendingClear
|
||||
{
|
||||
GPUTextureViewVulkan* View;
|
||||
VkClearValue Value;
|
||||
};
|
||||
|
||||
GPUDeviceVulkan* _device;
|
||||
QueueVulkan* _queue;
|
||||
CmdBufferManagerVulkan* _cmdBufferManager;
|
||||
@@ -101,6 +107,7 @@ private:
|
||||
#if COMPILE_WITH_PROFILER
|
||||
void* _tracyContext;
|
||||
#endif
|
||||
Array<PendingClear, FixedAllocation<16>> _pendingClears;
|
||||
|
||||
typedef Array<DescriptorPoolVulkan*> DescriptorPoolArray;
|
||||
Dictionary<uint32, DescriptorPoolArray> _descriptorPools;
|
||||
@@ -143,10 +150,11 @@ public:
|
||||
DescriptorPoolVulkan* AllocateDescriptorSets(const VkDescriptorSetAllocateInfo& descriptorSetAllocateInfo, const DescriptorSetLayoutVulkan& layout, VkDescriptorSet* outSets);
|
||||
|
||||
void BeginRenderPass();
|
||||
|
||||
void EndRenderPass();
|
||||
|
||||
private:
|
||||
bool FindClear(const GPUTextureViewVulkan* view, PendingClear& clear);
|
||||
void ManualClear(const PendingClear& clear);
|
||||
void UpdateDescriptorSets(const struct SpirvShaderDescriptorInfo& descriptorInfo, class DescriptorSetWriterVulkan& dsWriter, bool& needsWrite);
|
||||
void UpdateDescriptorSets(ComputePipelineStateVulkan* pipelineState);
|
||||
void OnDrawCall();
|
||||
|
||||
@@ -144,7 +144,7 @@ static VKAPI_ATTR VkBool32 VKAPI_PTR DebugUtilsCallback(VkDebugUtilsMessageSever
|
||||
case 5: // SPIR-V module not valid: MemoryBarrier: Vulkan specification requires Memory Semantics to have one of the following bits set: Acquire, Release, AcquireRelease or SequentiallyConsistent
|
||||
case -1666394502: // After query pool creation, each query must be reset before it is used. Queries must also be reset between uses.
|
||||
case 1203141749:
|
||||
case 602160055: // Attachment 4 not written by fragment shader; undefined values will be written to attachment. TODO: investigate it for PS_GBuffer shader from Deferred material with USE_LIGHTMAP=1
|
||||
case 602160055: // Attachment 4 not written by fragment shader; undefined values will be written to attachment.
|
||||
case 7060244: // Image Operand Offset can only be used with OpImage*Gather operations
|
||||
case -1539028524: // SortedIndices is null so Vulkan backend sets it to default R32_SFLOAT format which is not good for UINT format of the buffer
|
||||
case -1810835948: // SortedIndices is null so Vulkan backend sets it to default R32_SFLOAT format which is not good for UINT format of the buffer
|
||||
@@ -537,14 +537,8 @@ RenderPassVulkan::RenderPassVulkan(GPUDeviceVulkan* device, const RenderTargetLa
|
||||
attachment.flags = 0;
|
||||
attachment.format = RenderToolsVulkan::ToVulkanFormat(layout.RTVsFormats[i]);
|
||||
attachment.samples = (VkSampleCountFlagBits)layout.MSAA;
|
||||
#if PLATFORM_ANDROID
|
||||
attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; // TODO: Adreno 640 has glitches when blend is disabled and rt data not loaded
|
||||
#elif PLATFORM_MAC || PLATFORM_IOS
|
||||
attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; // MoltenVK seams to have glitches (tiled arch of gpu)
|
||||
#else
|
||||
// TODO: we need render passes into high-level rendering api to perform more optimal rendering (esp. for tiled gpus)
|
||||
attachment.loadOp = layout.BlendEnable ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
||||
#endif
|
||||
attachment.loadOp = layout.ClearFlags & 1 << i ? VK_ATTACHMENT_LOAD_OP_CLEAR : VK_ATTACHMENT_LOAD_OP_LOAD;
|
||||
attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
|
||||
attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
||||
attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
|
||||
@@ -595,8 +589,13 @@ RenderPassVulkan::RenderPassVulkan(GPUDeviceVulkan* device, const RenderTargetLa
|
||||
attachment.loadOp = layout.ReadDepth || layout.ReadStencil ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
||||
//attachment.storeOp = layout.WriteDepth || layout.WriteStencil ? VK_ATTACHMENT_STORE_OP_STORE : VK_ATTACHMENT_STORE_OP_DONT_CARE;
|
||||
attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; // For some reason, read-only depth results in artifacts
|
||||
// TODO: use VK_ATTACHMENT_STORE_OP_NONE for readonly depth/stencil but check for 'VK_KHR_load_store_op_none' extension
|
||||
attachment.stencilLoadOp = layout.ReadStencil ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
||||
attachment.stencilStoreOp = layout.WriteStencil ? VK_ATTACHMENT_STORE_OP_STORE : VK_ATTACHMENT_STORE_OP_DONT_CARE;
|
||||
if (layout.ClearFlags & 1 << colorAttachmentsCount)
|
||||
{
|
||||
attachment.loadOp = attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
|
||||
}
|
||||
attachment.initialLayout = depthStencilLayout;
|
||||
attachment.finalLayout = depthStencilLayout;
|
||||
depthStencilReference.attachment = colorAttachmentsCount;
|
||||
|
||||
@@ -204,7 +204,7 @@ struct RenderTargetLayoutVulkan
|
||||
uint32 WriteDepth : 1;
|
||||
uint32 ReadStencil : 1;
|
||||
uint32 WriteStencil : 1;
|
||||
uint32 BlendEnable : 1;
|
||||
uint32 ClearFlags : 7; // GPU_MAX_RT_BINDED + 1
|
||||
};
|
||||
|
||||
uint32 Flags;
|
||||
|
||||
Reference in New Issue
Block a user