diff --git a/Source/Engine/Core/Collections/Sorting.h b/Source/Engine/Core/Collections/Sorting.h index 171c855a3..b6c2694a1 100644 --- a/Source/Engine/Core/Collections/Sorting.h +++ b/Source/Engine/Core/Collections/Sorting.h @@ -325,8 +325,8 @@ public: Platform::Free(tmp); } - template - FORCE_INLINE static void MergeSort(Array& data, Array* tmp = nullptr) + template + FORCE_INLINE static void MergeSort(Array& data, Array* tmp = nullptr) { if (tmp) tmp->Resize(data.Count()); diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index e96a7a9a9..1f45e7f78 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -26,12 +26,7 @@ static_assert(sizeof(ShaderObjectData) == sizeof(Float4) * ARRAY_COUNT(ShaderObj namespace { - // Cached data for the draw calls sorting - Array SortingKeys[2]; - Array SortingIndices; - Array SortingBatches; Array FreeRenderList; - Array> MemPool; CriticalSection MemPoolLocker; } @@ -199,12 +194,15 @@ void RendererAllocation::Free(void* ptr, uintptr size) RenderList* RenderList::GetFromPool() { + MemPoolLocker.Lock(); if (FreeRenderList.HasItems()) { const auto result = FreeRenderList.Last(); FreeRenderList.RemoveLast(); + MemPoolLocker.Unlock(); return result; } + MemPoolLocker.Unlock(); return New(); } @@ -213,10 +211,12 @@ void RenderList::ReturnToPool(RenderList* cache) { if (!cache) return; + cache->Clear(); + MemPoolLocker.Lock(); ASSERT(!FreeRenderList.Contains(cache)); FreeRenderList.Add(cache); - cache->Clear(); + MemPoolLocker.Unlock(); } void RenderList::CleanupCache() @@ -224,13 +224,12 @@ void RenderList::CleanupCache() // Don't call it during rendering (data may be already in use) ASSERT(GPUDevice::Instance == nullptr || GPUDevice::Instance->CurrentTask == nullptr); - SortingKeys[0].Resize(0); - SortingKeys[1].Resize(0); - SortingIndices.Resize(0); + MemPoolLocker.Lock(); FreeRenderList.ClearDelete(); for (auto& e : MemPool) Platform::Free(e.First); MemPool.Clear(); + MemPoolLocker.Unlock(); } bool RenderList::BlendableSettings::operator<(const BlendableSettings& other) const @@ -648,12 +647,12 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD const int32 listSize = list.Indices.Count(); ZoneValue(listSize); - // Peek shared memory -#define PREPARE_CACHE(list) (list).Clear(); (list).Resize(listSize) - PREPARE_CACHE(SortingKeys[0]); - PREPARE_CACHE(SortingKeys[1]); - PREPARE_CACHE(SortingIndices); -#undef PREPARE_CACHE + // Use shared memory from renderer allocator + Array SortingKeys[2]; + Array SortingIndices; + SortingKeys[0].Resize(listSize); + SortingKeys[1].Resize(listSize); + SortingIndices.Resize(listSize); uint64* sortedKeys = SortingKeys[0].Get(); // Setup sort keys @@ -726,7 +725,8 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD if (stable) { // Sort draw calls batches by depth - Sorting::MergeSort(list.Batches, &SortingBatches); + Array sortingBatches; + Sorting::MergeSort(list.Batches, &sortingBatches); } } diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index d4e5b2590..13921e079 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -425,29 +425,69 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont #endif } - // Sort draw calls + // Process draw calls (sorting, objects buffer building) { - PROFILE_CPU_NAMED("Sort Draw Calls"); - // TODO: run all of these functions in async via jobs - for (int32 i = 0; i < renderContextBatch.Contexts.Count(); i++) - renderContextBatch.Contexts[i].List->BuildObjectsBuffer(); - renderContext.List->SortDrawCalls(renderContext, false, DrawCallsListType::GBuffer); - renderContext.List->SortDrawCalls(renderContext, false, DrawCallsListType::GBufferNoDecals); - renderContext.List->SortDrawCalls(renderContext, true, DrawCallsListType::Forward); - renderContext.List->SortDrawCalls(renderContext, false, DrawCallsListType::Distortion); - if (setup.UseMotionVectors) - renderContext.List->SortDrawCalls(renderContext, false, DrawCallsListType::MotionVectors); - for (int32 i = 1; i < renderContextBatch.Contexts.Count(); i++) + PROFILE_CPU_NAMED("Process Draw Calls"); + + // Utility that handles async jobs for a specific rendering routines in async + struct DrawCallsProcessor { - auto& shadowContext = renderContextBatch.Contexts.Get()[i]; - shadowContext.List->SortDrawCalls(shadowContext, false, DrawCallsListType::Depth, DrawPass::Depth); - shadowContext.List->SortDrawCalls(shadowContext, false, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, DrawPass::Depth); - } + RenderContextBatch& RenderContextBatch; + Pair MainContextSorting[5] = + { + // Draw List + Reverse Distance sorting + ToPair(DrawCallsListType::GBuffer, false), + ToPair(DrawCallsListType::GBufferNoDecals, false), + ToPair(DrawCallsListType::Forward, true), + ToPair(DrawCallsListType::Distortion, false), + ToPair(DrawCallsListType::MotionVectors, false), + }; + + void BuildObjectsBufferJob(int32 index) + { + RenderContextBatch.Contexts[index].List->BuildObjectsBuffer(); + } + + void SortDrawCallsJob(int32 index) + { + RenderContext& renderContext = RenderContextBatch.GetMainContext(); + if (index < ARRAY_COUNT(MainContextSorting)) + { + // Main context sorting + RenderSetup& setup = renderContext.List->Setup; + auto sorting = MainContextSorting[index]; + if (sorting.First == DrawCallsListType::MotionVectors && !setup.UseMotionVectors) + return; + renderContext.List->SortDrawCalls(renderContext, sorting.Second, sorting.First); + } + else + { + // Shadow context sorting + auto& shadowContext = RenderContextBatch.Contexts[index - ARRAY_COUNT(MainContextSorting)]; + shadowContext.List->SortDrawCalls(shadowContext, false, DrawCallsListType::Depth, DrawPass::Depth); + shadowContext.List->SortDrawCalls(shadowContext, false, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, DrawPass::Depth); + } + } + } processor = { renderContextBatch }; + + // Dispatch async jobs + Function func; + func.Bind(&processor); + const int64 buildObjectsBufferJob = JobSystem::Dispatch(func, renderContextBatch.Contexts.Count()); + func.Bind(&processor); + const int64 sortDrawCallsJob = JobSystem::Dispatch(func, ARRAY_COUNT(DrawCallsProcessor::MainContextSorting) + renderContextBatch.Contexts.Count()); + + // Upload objects buffers to the GPU + JobSystem::Wait(buildObjectsBufferJob); { PROFILE_CPU_NAMED("FlushObjectsBuffer"); - for (int32 i = 0; i < renderContextBatch.Contexts.Count(); i++) - renderContextBatch.Contexts[i].List->ObjectBuffer.Flush(context); + for (auto& e : renderContextBatch.Contexts) + e.List->ObjectBuffer.Flush(context); } + + // Wait for async jobs to finish + // TODO: use per-pass wait labels (eg. don't wait for shadow pass draws sorting until ShadowPass needs it) + JobSystem::Wait(sortDrawCallsJob); } // Get the light accumulation buffer