From 3c48484870759476c56a48a77b2b7b16ccc8a281 Mon Sep 17 00:00:00 2001 From: stefnotch Date: Sun, 6 Jun 2021 21:52:29 +0200 Subject: [PATCH 01/15] Use Stopwatch instead of DateTime.Now for increased accuracy DateTime.Now is decent, but not accurate enough for timings involving only a few milliseconds. --- Source/Tools/Flax.Build/Build/Profiling.cs | 7 +++++-- Source/Tools/Flax.Build/Program.cs | 8 ++++---- Source/Tools/Flax.Build/Utilities/Utilities.cs | 6 +++--- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/Source/Tools/Flax.Build/Build/Profiling.cs b/Source/Tools/Flax.Build/Build/Profiling.cs index c5cc89bcb..785badad3 100644 --- a/Source/Tools/Flax.Build/Build/Profiling.cs +++ b/Source/Tools/Flax.Build/Build/Profiling.cs @@ -5,6 +5,7 @@ using System.IO; using System.Collections.Generic; using System.Text; using System.Threading; +using System.Diagnostics; namespace Flax.Build { @@ -71,6 +72,8 @@ namespace Flax.Build private static int _depth; private static readonly List _events = new List(1024); + private static readonly DateTime _startTime = DateTime.Now; + private static readonly Stopwatch _stopwatch = Stopwatch.StartNew(); // https://stackoverflow.com/questions/1416139/how-to-get-timestamp-of-tick-precision-in-net-c /// /// Begins the profiling event. @@ -81,7 +84,7 @@ namespace Flax.Build { Event e; e.Name = name; - e.StartTime = DateTime.Now; + e.StartTime = _startTime.AddTicks(_stopwatch.Elapsed.Ticks); e.Duration = TimeSpan.Zero; e.Depth = _depth++; e.ThreadId = Thread.CurrentThread.ManagedThreadId; @@ -95,7 +98,7 @@ namespace Flax.Build /// The event identifier returned by . public static void End(int id) { - var endTime = DateTime.Now; + var endTime = _startTime.AddTicks(_stopwatch.Elapsed.Ticks); var e = _events[id]; e.Duration = endTime - e.StartTime; _events[id] = e; diff --git a/Source/Tools/Flax.Build/Program.cs b/Source/Tools/Flax.Build/Program.cs index 89e71ace0..7411edb3b 100644 --- a/Source/Tools/Flax.Build/Program.cs +++ b/Source/Tools/Flax.Build/Program.cs @@ -1,6 +1,7 @@ // Copyright (c) 2012-2021 Wojciech Figat. All rights reserved. using System; +using System.Diagnostics; using System.IO; using System.Net; using System.Threading; @@ -25,7 +26,7 @@ namespace Flax.Build } Mutex singleInstanceMutex = null; - var startTime = DateTime.Now; + Stopwatch stopwatch = Stopwatch.StartNew(); bool failed = false; try @@ -166,9 +167,8 @@ namespace Flax.Build singleInstanceMutex.Dispose(); singleInstanceMutex = null; } - - var endTime = DateTime.Now; - Log.Info(string.Format("Total time: {0}", endTime - startTime)); + stopwatch.Stop(); + Log.Info(string.Format("Total time: {0}", stopwatch.Elapsed)); Log.Verbose("End."); Log.Dispose(); } diff --git a/Source/Tools/Flax.Build/Utilities/Utilities.cs b/Source/Tools/Flax.Build/Utilities/Utilities.cs index 8390377a8..e08755175 100644 --- a/Source/Tools/Flax.Build/Utilities/Utilities.cs +++ b/Source/Tools/Flax.Build/Utilities/Utilities.cs @@ -326,7 +326,7 @@ namespace Flax.Build } } - var startTime = DateTime.UtcNow; + Stopwatch stopwatch = Stopwatch.StartNew(); if (!options.HasFlag(RunOptions.NoLoggingOfRunCommand)) { Log.Verbose("Running: " + app + " " + (string.IsNullOrEmpty(commandLine) ? "" : commandLine)); @@ -397,11 +397,11 @@ namespace Flax.Build if (!options.HasFlag(RunOptions.NoWaitForExit)) { - var buildDuration = (DateTime.UtcNow - startTime).TotalMilliseconds; + stopwatch.Stop(); result = proc.ExitCode; if (!options.HasFlag(RunOptions.NoLoggingOfRunCommand) || options.HasFlag(RunOptions.NoLoggingOfRunDuration)) { - Log.Info(string.Format("Took {0}s to run {1}, ExitCode={2}", buildDuration / 1000, Path.GetFileName(app), result)); + Log.Info(string.Format("Took {0}s to run {1}, ExitCode={2}", stopwatch.Elapsed.TotalSeconds, Path.GetFileName(app), result)); } } From 4cd31ce2b4a65ac287a12bf2fcc9f7511aa82f91 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 6 Jun 2021 12:30:01 +0200 Subject: [PATCH 02/15] Remove some unsued lines --- Source/Editor/Editor.cs | 4 ++-- Source/Engine/Platform/Base/ThreadBase.h | 4 ---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/Source/Editor/Editor.cs b/Source/Editor/Editor.cs index e7b33852b..d549bb907 100644 --- a/Source/Editor/Editor.cs +++ b/Source/Editor/Editor.cs @@ -334,7 +334,7 @@ namespace FlaxEditor } // Load scene - + // scene cmd line argument var scene = ContentDatabase.Find(_startupSceneCmdLine); if (scene is SceneItem) @@ -1334,7 +1334,7 @@ namespace FlaxEditor { Instance.StateMachine.StateChanged += RequestStartPlayOnEditMode; } - + [MethodImpl(MethodImplOptions.InternalCall)] internal static extern int Internal_ReadOutputLogs(string[] outMessages, byte[] outLogTypes, long[] outLogTimes); diff --git a/Source/Engine/Platform/Base/ThreadBase.h b/Source/Engine/Platform/Base/ThreadBase.h index 8cd683d7c..6a56e33ad 100644 --- a/Source/Engine/Platform/Base/ThreadBase.h +++ b/Source/Engine/Platform/Base/ThreadBase.h @@ -45,7 +45,6 @@ public: /// /// Gets priority level of the thread. /// - /// The thread priority level. FORCE_INLINE ThreadPriority GetPriority() const { return _priority; @@ -60,7 +59,6 @@ public: /// /// Gets thread ID /// - /// Thread ID FORCE_INLINE uint64 GetID() const { return _id; @@ -69,7 +67,6 @@ public: /// /// Gets thread running state. /// - /// True if thread is running, otherwise false FORCE_INLINE bool IsRunning() const { return _isRunning; @@ -78,7 +75,6 @@ public: /// /// Gets name of the thread. /// - /// The thread name. FORCE_INLINE const String& GetName() const { return _name; From cdc85a19616a0e1b1aa71dffa486d949db7f43f4 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 6 Jun 2021 12:30:27 +0200 Subject: [PATCH 03/15] Add profiler entry for Audio Source update --- Source/Engine/Audio/AudioSource.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Source/Engine/Audio/AudioSource.cpp b/Source/Engine/Audio/AudioSource.cpp index a6370cc8b..192fd845f 100644 --- a/Source/Engine/Audio/AudioSource.cpp +++ b/Source/Engine/Audio/AudioSource.cpp @@ -6,6 +6,7 @@ #include "Engine/Graphics/RenderTask.h" #include "Engine/Engine/Time.h" #include "Engine/Level/Scene/Scene.h" +#include "Engine/Profiler/ProfilerCPU.h" #include "AudioBackend.h" #include "Audio.h" @@ -340,6 +341,8 @@ bool AudioSource::IntersectsItself(const Ray& ray, float& distance, Vector3& nor void AudioSource::Update() { + PROFILE_CPU(); + // Update the velocity const Vector3 pos = GetPosition(); const float dt = Math::Max(Time::Update.UnscaledDeltaTime.GetTotalSeconds(), ZeroTolerance); From e0b587251af3f70ffca06f531a9d3dcad3425ab9 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 6 Jun 2021 12:30:53 +0200 Subject: [PATCH 04/15] Add RingBuffer template --- Source/Engine/Core/Collections/RingBuffer.h | 95 +++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 Source/Engine/Core/Collections/RingBuffer.h diff --git a/Source/Engine/Core/Collections/RingBuffer.h b/Source/Engine/Core/Collections/RingBuffer.h new file mode 100644 index 000000000..bffc70e40 --- /dev/null +++ b/Source/Engine/Core/Collections/RingBuffer.h @@ -0,0 +1,95 @@ +// Copyright (c) 2012-2021 Wojciech Figat. All rights reserved. + +#pragma once + +#include "Engine/Platform/Platform.h" +#include "Engine/Core/Memory/Memory.h" +#include "Engine/Core/Memory/Allocation.h" + +/// +/// Template for ring buffer with variable capacity. +/// +template +class RingBuffer +{ +public: + + typedef T ItemType; + typedef typename AllocationType::template Data AllocationData; + +private: + + int32 _front = 0, _back = 0, _count = 0, _capacity = 0; + AllocationData _allocation; + +public: + + ~RingBuffer() + { + Memory::DestructItems(Get() + Math::Min(_front, _back), _count); + } + + FORCE_INLINE T* Get() + { + return _allocation.Get(); + } + + FORCE_INLINE int32 Count() const + { + return _count; + } + + FORCE_INLINE int32 Capacity() const + { + return _capacity; + } + + void PushBack(const T& data) + { + if (_capacity == 0 || _capacity == _count) + { + const int32 capacity = _allocation.CalculateCapacityGrow(_capacity, 0); + AllocationData alloc; + alloc.Allocate(capacity); + const int32 frontCount = Math::Min(_capacity - _front, _count); + Memory::MoveItems(alloc.Get(), _allocation.Get() + _front, frontCount); + Memory::DestructItems(_allocation.Get() + _front, frontCount); + const int32 backCount = _count - frontCount; + Memory::MoveItems(alloc.Get() + frontCount, _allocation.Get(), backCount); + Memory::DestructItems(_allocation.Get(), backCount); + _allocation.Swap(alloc); + _front = 0; + _back = _count; + _capacity = capacity; + } + Memory::ConstructItems(_allocation.Get() + _back, &data, 1); + _back = (_back + 1) % _capacity; + _count++; + } + + FORCE_INLINE T& PeekFront() + { + ASSERT(_front != _back); + return _allocation.Get()[_front]; + } + + FORCE_INLINE const T& PeekFront() const + { + ASSERT(_front != _back); + return _allocation.Get()[_front]; + } + + void PopFront() + { + ASSERT(_front != _back); + Memory::DestructItems(_allocation.Get() + _front, 1); + _front = (_front + 1) % _capacity; + _count--; + } + + void Clear() + { + Memory::DestructItems(Get() + Math::Min(_front, _back), _count); + _front = _back = _count = 0; + } +}; From 982b22b4b19d28972647725cb16f8cc0e31673c6 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 6 Jun 2021 13:29:14 +0200 Subject: [PATCH 05/15] Add Job System --- Source/Engine/Threading/JobSystem.cpp | 200 ++++++++++++++++++++++++++ Source/Engine/Threading/JobSystem.h | 32 +++++ 2 files changed, 232 insertions(+) create mode 100644 Source/Engine/Threading/JobSystem.cpp create mode 100644 Source/Engine/Threading/JobSystem.h diff --git a/Source/Engine/Threading/JobSystem.cpp b/Source/Engine/Threading/JobSystem.cpp new file mode 100644 index 000000000..b878bbdea --- /dev/null +++ b/Source/Engine/Threading/JobSystem.cpp @@ -0,0 +1,200 @@ +// Copyright (c) 2012-2021 Wojciech Figat. All rights reserved. + +#include "JobSystem.h" +#include "IRunnable.h" +#include "Engine/Core/Collections/RingBuffer.h" +#include "Engine/Platform/CPUInfo.h" +#include "Engine/Platform/Thread.h" +#include "Engine/Platform/ConditionVariable.h" +#include "Engine/Engine/EngineService.h" +#include "Engine/Profiler/ProfilerCPU.h" + +class JobSystemService : public EngineService +{ +public: + + JobSystemService() + : EngineService(TEXT("JobSystem"), -800) + { + } + + bool Init() override; + void BeforeExit() override; + void Dispose() override; +}; + +struct JobData +{ + Function Job; + int32 Index; + int32 Count; +}; + +template<> +struct TIsPODType +{ + enum { Value = true }; +}; + +class JobSystemThread : public IRunnable +{ +public: + int32 Index; + +public: + + // [IRunnable] + String ToString() const override + { + return TEXT("JobSystemThread"); + } + + int32 Run() override; + + void AfterWork(bool wasKilled) override + { + Delete(this); + } +}; + +namespace +{ + JobSystemService JobSystemInstance; + Thread* Threads[32] = {}; + int32 ThreadsCount = 0; + volatile int64 ExitFlag = 0; + volatile int64 DoneLabel = 0; + volatile int64 NextLabel = 0; + CriticalSection JobsLocker; + ConditionVariable JobsSignal; + ConditionVariable WaitSignal; + RingBuffer> Jobs; +} + +bool JobSystemService::Init() +{ + ThreadsCount = Math::Min(Platform::GetCPUInfo().LogicalProcessorCount, ARRAY_COUNT(Threads)); + for (int32 i = 0; i < ThreadsCount; i++) + { + auto runnable = New(); + runnable->Index = i; + auto thread = Thread::Create(runnable, String::Format(TEXT("Job System {0}"), i), ThreadPriority::AboveNormal); + if (thread == nullptr) + return true; + Threads[i] = thread; + } + return false; +} + +void JobSystemService::BeforeExit() +{ + Platform::AtomicStore(&ExitFlag, 1); + JobsSignal.NotifyAll(); +} + +void JobSystemService::Dispose() +{ + Platform::AtomicStore(&ExitFlag, 1); + JobsSignal.NotifyAll(); + Platform::Sleep(1); + + for (int32 i = 0; i < ThreadsCount; i++) + { + if (Threads[i] && Threads[i]->IsRunning()) + Threads[i]->Kill(true); + Threads[i] = nullptr; + } +} + +int32 JobSystemThread::Run() +{ + Platform::SetThreadAffinityMask(1 << Index); + + JobData data; + CriticalSection mutex; + while (Platform::AtomicRead(&ExitFlag) == 0) + { + // Try to get a job + JobsLocker.Lock(); + if (Jobs.Count() != 0) + { + auto& front = Jobs.PeekFront(); + data = front; + front.Index++; + if (front.Index == front.Count) + { + Jobs.PopFront(); + } + } + JobsLocker.Unlock(); + + if (data.Job.IsBinded()) + { + // Run job + data.Job(data.Index); + data.Job.Unbind(); + + if (data.Index + 1 == data.Count) + { + // Move forward with the job queue + Platform::InterlockedIncrement(&DoneLabel); + WaitSignal.NotifyAll(); + } + } + else + { + // Wait for signal + mutex.Lock(); + JobsSignal.Wait(mutex); + mutex.Unlock(); + } + } + return 0; +} + +int64 JobSystem::Dispatch(const Function& job, int32 jobCount) +{ + PROFILE_CPU(); + if (jobCount <= 0) + return 0; + + JobData data; + data.Job = job; + data.Index = 0; + data.Count = jobCount; + + JobsLocker.Lock(); + const auto label = Platform::InterlockedIncrement(&NextLabel); + Jobs.PushBack(data); + JobsLocker.Unlock(); + + if (jobCount == 1) + JobsSignal.NotifyOne(); + else + JobsSignal.NotifyAll(); + + return label; +} + +void JobSystem::Wait() +{ + Wait(Platform::AtomicRead(&NextLabel)); +} + +void JobSystem::Wait(int64 label) +{ + PROFILE_CPU(); + + // Early out + if (label <= Platform::AtomicRead(&DoneLabel)) + return; + + // Wait on signal until input label is not yet done + CriticalSection mutex; + while (label > Platform::AtomicRead(&DoneLabel) && Platform::AtomicRead(&ExitFlag) == 0) + { + mutex.Lock(); + WaitSignal.Wait(mutex); + mutex.Unlock(); + } +} diff --git a/Source/Engine/Threading/JobSystem.h b/Source/Engine/Threading/JobSystem.h new file mode 100644 index 000000000..cf9b79073 --- /dev/null +++ b/Source/Engine/Threading/JobSystem.h @@ -0,0 +1,32 @@ +// Copyright (c) 2012-2021 Wojciech Figat. All rights reserved. + +#pragma once + +#include "Engine/Core/Delegate.h" + +/// +/// Lightweight multi-threaded jobs execution scheduler. Uses a pool of threads and supports work-stealing concept. +/// +API_CLASS(Static) class FLAXENGINE_API JobSystem +{ +DECLARE_SCRIPTING_TYPE_MINIMAL(JobSystem); + + /// + /// Dispatches the job for the execution. + /// + /// The job. Argument is an index of the job execution. + /// The job executions count. + /// The label identifying this dispatch. Can be used to wait for the execution end. + API_FUNCTION() static int64 Dispatch(const Function& job, int32 jobCount = 1); + + /// + /// Waits for all dispatched jobs to finish. + /// + API_FUNCTION() static void Wait(); + + /// + /// Waits for all dispatched jobs until a given label to finish (i.e. waits for a Dispatch that returned that label). + /// + /// The label. + API_FUNCTION() static void Wait(int64 label); +}; From 07ad94de13612d27255dacdd6299797b7a6292c1 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 6 Jun 2021 23:15:41 +0200 Subject: [PATCH 06/15] Optimize Animated Model bones matrices buffer update --- Source/Engine/Animations/Animations.cpp | 2 -- .../Graphics/Models/SkinnedMeshDrawData.cpp | 35 ++++++++++--------- .../Graphics/Models/SkinnedMeshDrawData.h | 7 +++- Source/Engine/Level/Actors/AnimatedModel.cpp | 14 ++++---- 4 files changed, 33 insertions(+), 25 deletions(-) diff --git a/Source/Engine/Animations/Animations.cpp b/Source/Engine/Animations/Animations.cpp index 9ef229d23..d2e095fa6 100644 --- a/Source/Engine/Animations/Animations.cpp +++ b/Source/Engine/Animations/Animations.cpp @@ -7,7 +7,6 @@ #include "Engine/Engine/EngineService.h" Array UpdateList; -Array UpdateBones; class AnimationsService : public EngineService { @@ -81,7 +80,6 @@ void AnimationsService::Update() void AnimationsService::Dispose() { UpdateList.Resize(0); - UpdateBones.Resize(0); } void Animations::AddToUpdate(AnimatedModel* obj) diff --git a/Source/Engine/Graphics/Models/SkinnedMeshDrawData.cpp b/Source/Engine/Graphics/Models/SkinnedMeshDrawData.cpp index a99984a0c..a8fc48889 100644 --- a/Source/Engine/Graphics/Models/SkinnedMeshDrawData.cpp +++ b/Source/Engine/Graphics/Models/SkinnedMeshDrawData.cpp @@ -41,10 +41,27 @@ void SkinnedMeshDrawData::SetData(const Matrix* bones, bool dropHistory) { if (!bones) return; - ASSERT(BonesCount > 0); - ANIM_GRAPH_PROFILE_EVENT("SetSkinnedMeshData"); + // Copy bones to the buffer + const int32 count = BonesCount; + const int32 preFetchStride = 2; + const Matrix* input = bones; + const auto output = (Matrix3x4*)Data.Get(); + ASSERT(Data.Count() == count * sizeof(Matrix3x4)); + for (int32 i = 0; i < count; i++) + { + Matrix3x4* bone = output + i; + Platform::Prefetch(bone + preFetchStride); + Platform::Prefetch((byte*)(bone + preFetchStride) + PLATFORM_CACHE_LINE_SIZE); + bone->SetMatrixTranspose(input[i]); + } + + OnDataChanged(dropHistory); +} + +void SkinnedMeshDrawData::OnDataChanged(bool dropHistory) +{ // Setup previous frame bone matrices if needed if (_hasValidData && !dropHistory) { @@ -64,20 +81,6 @@ void SkinnedMeshDrawData::SetData(const Matrix* bones, bool dropHistory) SAFE_DELETE_GPU_RESOURCE(PrevBoneMatrices); } - // Copy bones to the buffer - const int32 count = BonesCount; - const int32 preFetchStride = 2; - const Matrix* input = bones; - const auto output = (Matrix3x4*)Data.Get(); - ASSERT(Data.Count() == count * sizeof(Matrix3x4)); - for (int32 i = 0; i < count; i++) - { - Matrix3x4* bone = output + i; - Platform::Prefetch(bone + preFetchStride); - Platform::Prefetch((byte*)(bone + preFetchStride) + PLATFORM_CACHE_LINE_SIZE); - bone->SetMatrixTranspose(input[i]); - } - _isDirty = true; _hasValidData = true; } diff --git a/Source/Engine/Graphics/Models/SkinnedMeshDrawData.h b/Source/Engine/Graphics/Models/SkinnedMeshDrawData.h index f9d434fee..a9a882684 100644 --- a/Source/Engine/Graphics/Models/SkinnedMeshDrawData.h +++ b/Source/Engine/Graphics/Models/SkinnedMeshDrawData.h @@ -54,7 +54,6 @@ public: /// /// Determines whether this instance is ready for rendering. /// - /// True if has valid data and can be rendered, otherwise false. FORCE_INLINE bool IsReady() const { return BoneMatrices != nullptr && BoneMatrices->IsAllocated(); @@ -73,6 +72,12 @@ public: /// True if drop previous update bones used for motion blur, otherwise will keep them and do the update. void SetData(const Matrix* bones, bool dropHistory); + /// + /// After bones Data has been modified externally. Updates the bone matrices data for the GPU buffer. Ensure to call Flush before rendering. + /// + /// True if drop previous update bones used for motion blur, otherwise will keep them and do the update. + void OnDataChanged(bool dropHistory); + /// /// Flushes the bones data buffer with the GPU by sending the data fro the CPU. /// diff --git a/Source/Engine/Level/Actors/AnimatedModel.cpp b/Source/Engine/Level/Actors/AnimatedModel.cpp index ece54b78b..701df3d6e 100644 --- a/Source/Engine/Level/Actors/AnimatedModel.cpp +++ b/Source/Engine/Level/Actors/AnimatedModel.cpp @@ -2,6 +2,7 @@ #include "AnimatedModel.h" #include "BoneSocket.h" +#include "Engine/Core/Math/Matrix3x4.h" #include "Engine/Animations/Animations.h" #include "Engine/Engine/Engine.h" #if USE_EDITOR @@ -13,8 +14,6 @@ #include "Engine/Level/SceneObjectsFactory.h" #include "Engine/Serialization/Serialization.h" -extern Array UpdateBones; - AnimatedModel::AnimatedModel(const SpawnParams& params) : ModelInstanceActor(params) , _actualMode(AnimationUpdateMode::Never) @@ -470,14 +469,17 @@ void AnimatedModel::OnAnimationUpdated() // Calculate the final bones transformations and update skinning { ANIM_GRAPH_PROFILE_EVENT("Final Pose"); - UpdateBones.Resize(skeleton.Bones.Count(), false); - for (int32 boneIndex = 0; boneIndex < skeleton.Bones.Count(); boneIndex++) + const int32 bonesCount = skeleton.Bones.Count(); + Matrix3x4* output = (Matrix3x4*)_skinningData.Data.Get(); + ASSERT(_skinningData.Data.Count() == bonesCount * sizeof(Matrix3x4)); + for (int32 boneIndex = 0; boneIndex < bonesCount; boneIndex++) { auto& bone = skeleton.Bones[boneIndex]; - UpdateBones[boneIndex] = bone.OffsetMatrix * GraphInstance.NodesPose[bone.NodeIndex]; + Matrix matrix = bone.OffsetMatrix * GraphInstance.NodesPose[bone.NodeIndex]; + output[boneIndex].SetMatrixTranspose(matrix); } + _skinningData.OnDataChanged(!PerBoneMotionBlur); } - _skinningData.SetData(UpdateBones.Get(), !PerBoneMotionBlur); UpdateBounds(); UpdateSockets(); From 544cb1ff6ddb4db183a979cb205ea92462608a8e Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 10 Jun 2021 19:08:48 +0200 Subject: [PATCH 07/15] Finish job system --- Source/Engine/Core/Collections/ChunkedArray.h | 44 ++++--- Source/Engine/Core/Collections/RingBuffer.h | 14 ++- Source/Engine/Threading/JobSystem.cpp | 119 ++++++++++++++---- 3 files changed, 135 insertions(+), 42 deletions(-) diff --git a/Source/Engine/Core/Collections/ChunkedArray.h b/Source/Engine/Core/Collections/ChunkedArray.h index 1ed78ffe2..0d24e980d 100644 --- a/Source/Engine/Core/Collections/ChunkedArray.h +++ b/Source/Engine/Core/Collections/ChunkedArray.h @@ -46,7 +46,6 @@ public: /// /// Gets the amount of the elements in the collection. /// - /// The amount of the elements in the collection. FORCE_INLINE int32 Count() const { return _count; @@ -55,7 +54,6 @@ public: /// /// Gets the amount of the elements that can be hold by collection without resizing. /// - /// The current capacity of the collection. FORCE_INLINE int32 Capacity() const { return _chunks.Count() * ChunkSize; @@ -64,7 +62,6 @@ public: /// /// Returns true if array isn't empty. /// - /// True if array has any elements added, otherwise it is empty. FORCE_INLINE bool HasItems() const { return _count != 0; @@ -73,7 +70,6 @@ public: /// /// Returns true if collection is empty. /// - /// True if array is empty, otherwise it has any elements added. FORCE_INLINE bool IsEmpty() const { return _count == 0; @@ -154,20 +150,12 @@ public: public: - /// - /// Checks if iterator is in the end of the collection. - /// - /// True if is in the end, otherwise false. bool IsEnd() const { ASSERT(_collection); return Index() == _collection->Count(); } - /// - /// Checks if iterator is not in the end of the collection. - /// - /// True if is not in the end, otherwise false. bool IsNotEnd() const { ASSERT(_collection); @@ -331,6 +319,36 @@ public: return &chunk->At(chunk->Count() - 1); } + /// + /// Adds the one item to the collection and returns the reference to it. + /// + /// The reference to the added item. + T& AddOne() + { + // Find first chunk with some space + Chunk* chunk = nullptr; + for (int32 i = 0; i < _chunks.Count(); i++) + { + if (_chunks[i]->Count() < ChunkSize) + { + chunk = _chunks[i]; + break; + } + } + + // Allocate chunk if missing + if (chunk == nullptr) + { + chunk = New(); + chunk->SetCapacity(ChunkSize); + _chunks.Add(chunk); + } + + // Add item + _count++; + return chunk->AddOne(); + } + /// /// Removes the element at specified iterator position. /// @@ -408,7 +426,6 @@ public: /// The new size. void Resize(int32 newSize) { - // Check if shrink if (newSize < Count()) { MISSING_CODE("shrinking ChunkedArray on Resize"); @@ -439,7 +456,6 @@ public: chunkIndex++; } } - ASSERT(newSize == Count()); } diff --git a/Source/Engine/Core/Collections/RingBuffer.h b/Source/Engine/Core/Collections/RingBuffer.h index bffc70e40..8a8deb86f 100644 --- a/Source/Engine/Core/Collections/RingBuffer.h +++ b/Source/Engine/Core/Collections/RingBuffer.h @@ -48,7 +48,7 @@ public: { if (_capacity == 0 || _capacity == _count) { - const int32 capacity = _allocation.CalculateCapacityGrow(_capacity, 0); + const int32 capacity = _allocation.CalculateCapacityGrow(_capacity, _count + 1); AllocationData alloc; alloc.Allocate(capacity); const int32 frontCount = Math::Min(_capacity - _front, _count); @@ -79,6 +79,18 @@ public: return _allocation.Get()[_front]; } + FORCE_INLINE T& operator[](int32 index) + { + ASSERT(index >= 0 && index < _count); + return _allocation.Get()[(_front + index) % _capacity]; + } + + FORCE_INLINE const T& operator[](int32 index) const + { + ASSERT(index >= 0 && index < _count); + return _allocation.Get()[(_front + index) % _capacity]; + } + void PopFront() { ASSERT(_front != _back); diff --git a/Source/Engine/Threading/JobSystem.cpp b/Source/Engine/Threading/JobSystem.cpp index b878bbdea..0a29bdecf 100644 --- a/Source/Engine/Threading/JobSystem.cpp +++ b/Source/Engine/Threading/JobSystem.cpp @@ -2,12 +2,35 @@ #include "JobSystem.h" #include "IRunnable.h" -#include "Engine/Core/Collections/RingBuffer.h" #include "Engine/Platform/CPUInfo.h" #include "Engine/Platform/Thread.h" #include "Engine/Platform/ConditionVariable.h" #include "Engine/Engine/EngineService.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Scripting/ManagedCLR/MCore.h" +#if USE_MONO +#include "Engine/Scripting/ManagedCLR/MDomain.h" +#include +#include +#endif + +// Jobs storage perf info: +// (500 jobs, i7 9th gen) +// JOB_SYSTEM_USE_MUTEX=1, enqueue=130-280 cycles, dequeue=2-6 cycles +// JOB_SYSTEM_USE_MUTEX=0, enqueue=300-700 cycles, dequeue=10-16 cycles +// So using RingBuffer+Mutex+Signals is better than moodycamel::ConcurrentQueue + +#define JOB_SYSTEM_USE_MUTEX 1 +#define JOB_SYSTEM_USE_STATS 0 + +#if JOB_SYSTEM_USE_STATS +#include "Engine/Core/Log.h" +#endif +#if JOB_SYSTEM_USE_MUTEX +#include "Engine/Core/Collections/RingBuffer.h" +#else +#include "ConcurrentQueue.h" +#endif class JobSystemService : public EngineService { @@ -27,7 +50,6 @@ struct JobData { Function Job; int32 Index; - int32 Count; }; template<> @@ -39,7 +61,7 @@ struct TIsPODType class JobSystemThread : public IRunnable { public: - int32 Index; + uint64 Index; public: @@ -65,10 +87,18 @@ namespace volatile int64 ExitFlag = 0; volatile int64 DoneLabel = 0; volatile int64 NextLabel = 0; - CriticalSection JobsLocker; ConditionVariable JobsSignal; ConditionVariable WaitSignal; +#if JOB_SYSTEM_USE_MUTEX + CriticalSection JobsLocker; RingBuffer> Jobs; +#else + ConcurrentQueue Jobs; +#endif +#if JOB_SYSTEM_USE_STATS + int64 DequeueCount = 0; + int64 DequeueSum = 0; +#endif } bool JobSystemService::Init() @@ -77,7 +107,7 @@ bool JobSystemService::Init() for (int32 i = 0; i < ThreadsCount; i++) { auto runnable = New(); - runnable->Index = i; + runnable->Index = (uint64)i; auto thread = Thread::Create(runnable, String::Format(TEXT("Job System {0}"), i), ThreadPriority::AboveNormal); if (thread == nullptr) return true; @@ -108,38 +138,57 @@ void JobSystemService::Dispose() int32 JobSystemThread::Run() { - Platform::SetThreadAffinityMask(1 << Index); + Platform::SetThreadAffinityMask(1ull << Index); JobData data; CriticalSection mutex; + bool attachMonoThread = true; +#if !JOB_SYSTEM_USE_MUTEX + moodycamel::ConsumerToken consumerToken(Jobs); +#endif while (Platform::AtomicRead(&ExitFlag) == 0) { // Try to get a job +#if JOB_SYSTEM_USE_STATS + const auto start = Platform::GetTimeCycles(); +#endif +#if JOB_SYSTEM_USE_MUTEX JobsLocker.Lock(); if (Jobs.Count() != 0) { - auto& front = Jobs.PeekFront(); - data = front; - front.Index++; - if (front.Index == front.Count) - { - Jobs.PopFront(); - } + data = Jobs.PeekFront(); + Jobs.PopFront(); } JobsLocker.Unlock(); +#else + if (!Jobs.try_dequeue(consumerToken, data)) + data.Job.Unbind(); +#endif +#if JOB_SYSTEM_USE_STATS + Platform::InterlockedIncrement(&DequeueCount); + Platform::InterlockedAdd(&DequeueSum, Platform::GetTimeCycles() - start); +#endif if (data.Job.IsBinded()) { +#if USE_MONO + // Ensure to have C# thread attached to this thead (late init due to MCore being initialized after Job System) + if (attachMonoThread && !mono_domain_get()) + { + const auto domain = MCore::Instance()->GetActiveDomain(); + mono_thread_attach(domain->GetNative()); + attachMonoThread = false; + } +#endif + // Run job data.Job(data.Index); - data.Job.Unbind(); - if (data.Index + 1 == data.Count) - { - // Move forward with the job queue - Platform::InterlockedIncrement(&DoneLabel); - WaitSignal.NotifyAll(); - } + // Move forward with the job queue + Platform::InterlockedIncrement(&DoneLabel); + WaitSignal.NotifyAll(); + + data.Job.Unbind(); } else { @@ -157,16 +206,27 @@ int64 JobSystem::Dispatch(const Function& job, int32 jobCount) PROFILE_CPU(); if (jobCount <= 0) return 0; +#if JOB_SYSTEM_USE_STATS + const auto start = Platform::GetTimeCycles(); +#endif JobData data; data.Job = job; - data.Index = 0; - data.Count = jobCount; +#if JOB_SYSTEM_USE_MUTEX JobsLocker.Lock(); - const auto label = Platform::InterlockedIncrement(&NextLabel); - Jobs.PushBack(data); + for (data.Index = 0; data.Index < jobCount; data.Index++) + Jobs.PushBack(data); JobsLocker.Unlock(); +#else + for (data.Index = 0; data.Index < jobCount; data.Index++) + Jobs.enqueue(data); +#endif + const auto label = Platform::InterlockedAdd(&NextLabel, (int64)jobCount) + jobCount; + +#if JOB_SYSTEM_USE_STATS + LOG(Info, "Job enqueue time: {0} cycles", (int64)(Platform::GetTimeCycles() - start)); +#endif if (jobCount == 1) JobsSignal.NotifyOne(); @@ -191,10 +251,15 @@ void JobSystem::Wait(int64 label) // Wait on signal until input label is not yet done CriticalSection mutex; - while (label > Platform::AtomicRead(&DoneLabel) && Platform::AtomicRead(&ExitFlag) == 0) + do { mutex.Lock(); - WaitSignal.Wait(mutex); + WaitSignal.Wait(mutex, 1); mutex.Unlock(); - } + } while (label > Platform::AtomicRead(&DoneLabel) && Platform::AtomicRead(&ExitFlag) == 0); + +#if JOB_SYSTEM_USE_STATS + LOG(Info, "Job average dequeue time: {0} cycles", DequeueSum / DequeueCount); + DequeueSum = DequeueCount = 0; +#endif } From b8ad4bdd2a6785c2faeb46537e4f799608aa3ec7 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 10 Jun 2021 19:10:39 +0200 Subject: [PATCH 08/15] Refactor AnimGraph debug flows to use scripting API event --- Source/Editor/Editor.cs | 16 --------- Source/Editor/Managed/ManagedEditor.cpp | 36 ------------------- .../Windows/Assets/AnimationGraphWindow.cs | 24 +++++++++---- Source/Engine/Animations/Animations.cpp | 2 ++ Source/Engine/Animations/Animations.h | 15 ++++++-- .../Animations/Graph/AnimGraph.Custom.cpp | 4 --- Source/Engine/Animations/Graph/AnimGraph.cpp | 2 +- Source/Engine/Animations/Graph/AnimGraph.h | 4 --- 8 files changed, 32 insertions(+), 71 deletions(-) diff --git a/Source/Editor/Editor.cs b/Source/Editor/Editor.cs index d549bb907..70f52268c 100644 --- a/Source/Editor/Editor.cs +++ b/Source/Editor/Editor.cs @@ -1306,22 +1306,6 @@ namespace FlaxEditor VisualScriptingDebugFlow?.Invoke(debugFlow); } - [StructLayout(LayoutKind.Sequential)] - internal struct AnimGraphDebugFlowInfo - { - public Asset Asset; - public FlaxEngine.Object Object; - public uint NodeId; - public int BoxId; - } - - internal static event Action AnimGraphDebugFlow; - - internal static void Internal_OnAnimGraphDebugFlow(ref AnimGraphDebugFlowInfo debugFlow) - { - AnimGraphDebugFlow?.Invoke(debugFlow); - } - private static void RequestStartPlayOnEditMode() { if (Instance.StateMachine.IsEditMode) diff --git a/Source/Editor/Managed/ManagedEditor.cpp b/Source/Editor/Managed/ManagedEditor.cpp index 18a905d4b..6a83ceeb5 100644 --- a/Source/Editor/Managed/ManagedEditor.cpp +++ b/Source/Editor/Managed/ManagedEditor.cpp @@ -34,7 +34,6 @@ MMethod* Internal_GetGameWinPtr = nullptr; MMethod* Internal_GetGameWindowSize = nullptr; MMethod* Internal_OnAppExit = nullptr; MMethod* Internal_OnVisualScriptingDebugFlow = nullptr; -MMethod* Internal_OnAnimGraphDebugFlow = nullptr; MMethod* Internal_RequestStartPlayOnEditMode = nullptr; void OnLightmapsBake(ShadowsOfMordor::BuildProgressStep step, float stepProgress, float totalProgress, bool isProgressEvent) @@ -138,38 +137,6 @@ void OnVisualScriptingDebugFlow() } } -struct AnimGraphDebugFlowInfo -{ - MonoObject* Asset; - MonoObject* Object; - uint32 NodeId; - int32 BoxId; -}; - -void OnAnimGraphDebugFlow(Asset* asset, ScriptingObject* object, uint32 nodeId, uint32 boxId) -{ - if (Internal_OnAnimGraphDebugFlow == nullptr) - { - Internal_OnAnimGraphDebugFlow = ManagedEditor::GetStaticClass()->GetMethod("Internal_OnAnimGraphDebugFlow", 1); - ASSERT(Internal_OnAnimGraphDebugFlow); - } - - AnimGraphDebugFlowInfo flowInfo; - flowInfo.Asset = asset ? asset->GetOrCreateManagedInstance() : nullptr; - flowInfo.Object = object ? object->GetOrCreateManagedInstance() : nullptr; - flowInfo.NodeId = nodeId; - flowInfo.BoxId = boxId; - MonoObject* exception = nullptr; - void* params[1]; - params[0] = &flowInfo; - Internal_OnAnimGraphDebugFlow->Invoke(nullptr, params, &exception); - if (exception) - { - MException ex(exception); - ex.Log(LogType::Error, TEXT("OnAnimGraphDebugFlow")); - } -} - void OnLogMessage(LogType type, const StringView& msg); ManagedEditor::ManagedEditor() @@ -187,7 +154,6 @@ ManagedEditor::ManagedEditor() CSG::Builder::OnBrushModified.Bind(); Log::Logger::OnMessage.Bind(); VisualScripting::DebugFlow.Bind(); - AnimGraphExecutor::DebugFlow.Bind(); } ManagedEditor::~ManagedEditor() @@ -204,7 +170,6 @@ ManagedEditor::~ManagedEditor() CSG::Builder::OnBrushModified.Unbind(); Log::Logger::OnMessage.Unbind(); VisualScripting::DebugFlow.Unbind(); - AnimGraphExecutor::DebugFlow.Unbind(); } void ManagedEditor::Init() @@ -530,7 +495,6 @@ void ManagedEditor::DestroyManaged() Internal_GetGameWinPtr = nullptr; Internal_OnAppExit = nullptr; Internal_OnVisualScriptingDebugFlow = nullptr; - Internal_OnAnimGraphDebugFlow = nullptr; // Base PersistentScriptingObject::DestroyManaged(); diff --git a/Source/Editor/Windows/Assets/AnimationGraphWindow.cs b/Source/Editor/Windows/Assets/AnimationGraphWindow.cs index 634202061..603fc6c7c 100644 --- a/Source/Editor/Windows/Assets/AnimationGraphWindow.cs +++ b/Source/Editor/Windows/Assets/AnimationGraphWindow.cs @@ -2,6 +2,7 @@ using System; using System.Collections.Generic; +using System.Runtime.InteropServices; using FlaxEditor.Content; using FlaxEditor.CustomEditors; using FlaxEditor.CustomEditors.Editors; @@ -13,6 +14,7 @@ using FlaxEditor.Viewport.Cameras; using FlaxEditor.Viewport.Previews; using FlaxEngine; using FlaxEngine.GUI; +using Object = FlaxEngine.Object; // ReSharper disable UnusedMember.Local // ReSharper disable UnusedMember.Global @@ -206,11 +208,18 @@ namespace FlaxEditor.Windows.Assets } } + [StructLayout(LayoutKind.Sequential)] + private struct AnimGraphDebugFlowInfo + { + public uint NodeId; + public int BoxId; + } + private FlaxObjectRefPickerControl _debugPicker; private NavigationBar _navigationBar; private PropertiesProxy _properties; private Tab _previewTab; - private readonly List _debugFlows = new List(); + private readonly List _debugFlows = new List(); /// /// Gets the animated model actor used for the animation preview. @@ -285,7 +294,7 @@ namespace FlaxEditor.Windows.Assets Parent = this }; - Editor.AnimGraphDebugFlow += OnDebugFlow; + Animations.DebugFlow += OnDebugFlow; } private void OnSurfaceContextChanged(VisjectSurfaceContext context) @@ -293,26 +302,27 @@ namespace FlaxEditor.Windows.Assets _surface.UpdateNavigationBar(_navigationBar, _toolstrip); } - private bool OnCheckValid(FlaxEngine.Object obj, ScriptType type) + private bool OnCheckValid(Object obj, ScriptType type) { return obj is AnimatedModel player && player.AnimationGraph == OriginalAsset; } - private void OnDebugFlow(Editor.AnimGraphDebugFlowInfo flowInfo) + private void OnDebugFlow(Asset asset, Object obj, uint nodeId, uint boxId) { // Filter the flow if (_debugPicker.Value != null) { - if (flowInfo.Asset != OriginalAsset || _debugPicker.Value != flowInfo.Object) + if (asset != OriginalAsset || _debugPicker.Value != obj) return; } else { - if (flowInfo.Asset != Asset || _preview.PreviewActor != flowInfo.Object) + if (asset != Asset || _preview.PreviewActor != obj) return; } // Register flow to show it in UI on a surface + var flowInfo = new AnimGraphDebugFlowInfo { NodeId = nodeId, BoxId = (int)boxId }; lock (_debugFlows) { _debugFlows.Add(flowInfo); @@ -457,7 +467,7 @@ namespace FlaxEditor.Windows.Assets /// public override void OnDestroy() { - Editor.AnimGraphDebugFlow -= OnDebugFlow; + Animations.DebugFlow -= OnDebugFlow; _properties = null; _navigationBar = null; diff --git a/Source/Engine/Animations/Animations.cpp b/Source/Engine/Animations/Animations.cpp index d2e095fa6..037c87a5f 100644 --- a/Source/Engine/Animations/Animations.cpp +++ b/Source/Engine/Animations/Animations.cpp @@ -1,6 +1,7 @@ // Copyright (c) 2012-2021 Wojciech Figat. All rights reserved. #include "Animations.h" +#include "Engine/Engine/Engine.h" #include "Engine/Profiler/ProfilerCPU.h" #include "Engine/Level/Actors/AnimatedModel.h" #include "Engine/Engine/Time.h" @@ -22,6 +23,7 @@ public: }; AnimationsService AnimationManagerInstance; +Delegate Animations::DebugFlow; void AnimationsService::Update() { diff --git a/Source/Engine/Animations/Animations.h b/Source/Engine/Animations/Animations.h index a1b90e753..8b5b79c67 100644 --- a/Source/Engine/Animations/Animations.h +++ b/Source/Engine/Animations/Animations.h @@ -2,14 +2,23 @@ #pragma once +#include "Engine/Scripting/ScriptingType.h" +#include "Engine/Core/Delegate.h" + class AnimatedModel; +class Asset; /// -/// The animations service. +/// The animations playback service. /// -class FLAXENGINE_API Animations +API_CLASS(Static) class FLAXENGINE_API Animations { -public: +DECLARE_SCRIPTING_TYPE_NO_SPAWN(Content); + +#if USE_EDITOR + // Custom event that is called every time the Anim Graph signal flows over the graph (including the data connections). Can be used to read and visualize the animation blending logic. Args are: anim graph asset, animated object, node id, box id + API_EVENT() static Delegate DebugFlow; +#endif /// /// Adds an animated model to update. diff --git a/Source/Engine/Animations/Graph/AnimGraph.Custom.cpp b/Source/Engine/Animations/Graph/AnimGraph.Custom.cpp index 22fc6af28..60ae46f16 100644 --- a/Source/Engine/Animations/Graph/AnimGraph.Custom.cpp +++ b/Source/Engine/Animations/Graph/AnimGraph.Custom.cpp @@ -80,10 +80,6 @@ namespace AnimGraphInternal } } -#if USE_EDITOR -Delegate AnimGraphExecutor::DebugFlow; -#endif - void AnimGraphExecutor::initRuntime() { ADD_INTERNAL_CALL("FlaxEngine.AnimationGraph::Internal_HasConnection", &AnimGraphInternal::HasConnection); diff --git a/Source/Engine/Animations/Graph/AnimGraph.cpp b/Source/Engine/Animations/Graph/AnimGraph.cpp index 2932f7c3f..321c5a944 100644 --- a/Source/Engine/Animations/Graph/AnimGraph.cpp +++ b/Source/Engine/Animations/Graph/AnimGraph.cpp @@ -312,7 +312,7 @@ VisjectExecutor::Value AnimGraphExecutor::eatBox(Node* caller, Box* box) _callStack.Add(caller); #if USE_EDITOR - DebugFlow(_graph._owner, _data->Object, box->GetParent()->ID, box->ID); + Animations::DebugFlow(_graph._owner, context.Data->Object, box->GetParent()->ID, box->ID); #endif // Call per group custom processing event diff --git a/Source/Engine/Animations/Graph/AnimGraph.h b/Source/Engine/Animations/Graph/AnimGraph.h index 8ca4b7247..a8f055033 100644 --- a/Source/Engine/Animations/Graph/AnimGraph.h +++ b/Source/Engine/Animations/Graph/AnimGraph.h @@ -828,10 +828,6 @@ private: public: -#if USE_EDITOR - // Custom event that is called every time the Anim Graph signal flows over the graph (including the data connections). Can be used to read and visualize the animation blending logic. - static Delegate DebugFlow; -#endif /// /// Initializes the managed runtime calls. From 41ad835d865fe148c97140021a0376bf8fed1369 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 12 Jun 2021 12:29:47 +0200 Subject: [PATCH 09/15] Refactor AnimGraph to support asynchronous execution --- .../Animations/Graph/AnimGraph.Base.cpp | 19 --- .../Animations/Graph/AnimGraph.Custom.cpp | 33 ++-- Source/Engine/Animations/Graph/AnimGraph.cpp | 124 ++++++++++----- Source/Engine/Animations/Graph/AnimGraph.h | 127 ++++------------ .../Animations/Graph/AnimGroup.Animation.cpp | 141 +++++++++--------- Source/Engine/Visject/VisjectGraph.h | 7 - 6 files changed, 199 insertions(+), 252 deletions(-) diff --git a/Source/Engine/Animations/Graph/AnimGraph.Base.cpp b/Source/Engine/Animations/Graph/AnimGraph.Base.cpp index 22b74e6f8..8748e99bb 100644 --- a/Source/Engine/Animations/Graph/AnimGraph.Base.cpp +++ b/Source/Engine/Animations/Graph/AnimGraph.Base.cpp @@ -9,25 +9,6 @@ #include "Engine/Utilities/Delaunay2D.h" #include "Engine/Serialization/MemoryReadStream.h" -void AnimGraphBase::ClearCache() -{ - // Clear sub-graphs - for (int32 i = 0; i < SubGraphs.Count(); i++) - { - SubGraphs[i]->ClearCache(); - } - - // Clear cache - for (int32 i = 0; i < Nodes.Count(); i++) - { - auto& node = Nodes[i]; - for (int32 j = 0; j < node.Boxes.Count(); j++) - { - node.Boxes[j].InvalidateCache(); - } - } -} - AnimSubGraph* AnimGraphBase::LoadSubGraph(const void* data, int32 dataLength, const Char* name) { if (data == nullptr || dataLength == 0) diff --git a/Source/Engine/Animations/Graph/AnimGraph.Custom.cpp b/Source/Engine/Animations/Graph/AnimGraph.Custom.cpp index 60ae46f16..5c082a8b5 100644 --- a/Source/Engine/Animations/Graph/AnimGraph.Custom.cpp +++ b/Source/Engine/Animations/Graph/AnimGraph.Custom.cpp @@ -89,13 +89,10 @@ void AnimGraphExecutor::initRuntime() void AnimGraphExecutor::ProcessGroupCustom(Box* boxBase, Node* nodeBase, Value& value) { - auto box = (AnimGraphBox*)boxBase; - if (box->IsCacheValid()) - { - // Return cache - value = box->Cache; + auto& context = Context.Get(); + if (context.ValueCache.TryGet(boxBase, value)) return; - } + auto box = (AnimGraphBox*)boxBase; auto node = (AnimGraphNode*)nodeBase; auto& data = node->Data.Custom; value = Value::Null; @@ -105,16 +102,16 @@ void AnimGraphExecutor::ProcessGroupCustom(Box* boxBase, Node* nodeBase, Value& return; // Prepare node context - InternalContext context; - context.Graph = &_graph; - context.GraphExecutor = this; - context.Node = node; - context.NodeId = node->ID; - context.BoxId = box->ID; - context.DeltaTime = _deltaTime; - context.CurrentFrameIndex = _currentFrameIndex;; - context.BaseModel = _graph.BaseModel->GetOrCreateManagedInstance(); - context.Instance = _data->Object ? _data->Object->GetOrCreateManagedInstance() : nullptr; + InternalContext internalContext; + internalContext.Graph = &_graph; + internalContext.GraphExecutor = this; + internalContext.Node = node; + internalContext.NodeId = node->ID; + internalContext.BoxId = box->ID; + internalContext.DeltaTime = context.DeltaTime; + internalContext.CurrentFrameIndex = context.CurrentFrameIndex; + internalContext.BaseModel = _graph.BaseModel->GetOrCreateManagedInstance(); + internalContext.Instance = context.Data->Object ? context.Data->Object->GetOrCreateManagedInstance() : nullptr; // Peek managed object const auto obj = mono_gchandle_get_target(data.Handle); @@ -126,7 +123,7 @@ void AnimGraphExecutor::ProcessGroupCustom(Box* boxBase, Node* nodeBase, Value& // Evaluate node void* params[1]; - params[0] = &context; + params[0] = &internalContext; MonoObject* exception = nullptr; MonoObject* result = data.Evaluate->Invoke(obj, params, &exception); if (exception) @@ -138,7 +135,7 @@ void AnimGraphExecutor::ProcessGroupCustom(Box* boxBase, Node* nodeBase, Value& // Extract result value = MUtils::UnboxVariant(result); - box->Cache = value; + context.ValueCache.Add(boxBase, value); } bool AnimGraph::IsReady() const diff --git a/Source/Engine/Animations/Graph/AnimGraph.cpp b/Source/Engine/Animations/Graph/AnimGraph.cpp index 321c5a944..6d95cbdfc 100644 --- a/Source/Engine/Animations/Graph/AnimGraph.cpp +++ b/Source/Engine/Animations/Graph/AnimGraph.cpp @@ -1,11 +1,14 @@ // Copyright (c) 2012-2021 Wojciech Figat. All rights reserved. #include "AnimGraph.h" +#include "Engine/Animations/Animations.h" #include "Engine/Content/Assets/SkinnedModel.h" #include "Engine/Graphics/Models/SkeletonData.h" #include "Engine/Scripting/Scripting.h" #include "Engine/Engine/Time.h" +ThreadLocal AnimGraphExecutor::Context; + RootMotionData RootMotionData::Identity = { Vector3(0.0f), Quaternion(0.0f, 0.0f, 0.0f, 1.0f) }; RootMotionData& RootMotionData::operator+=(const RootMotionData& b) @@ -78,16 +81,44 @@ void AnimGraphImpulse::SetNodeModelTransformation(SkeletonData& skeleton, int32 parentTransform.WorldToLocal(value, Nodes[nodeIndex]); } +void AnimGraphInstanceData::Clear() +{ + Version = 0; + LastUpdateTime = -1; + CurrentFrame = 0; + RootTransform = Transform::Identity; + RootMotion = RootMotionData::Identity; + Parameters.Resize(0); + State.Resize(0); + NodesPose.Resize(0); +} + +void AnimGraphInstanceData::ClearState() +{ + Version = 0; + LastUpdateTime = -1; + CurrentFrame = 0; + RootTransform = Transform::Identity; + RootMotion = RootMotionData::Identity; + State.Resize(0); + NodesPose.Resize(0); +} + +void AnimGraphInstanceData::Invalidate() +{ + LastUpdateTime = -1; + CurrentFrame = 0; +} + AnimGraphImpulse* AnimGraphNode::GetNodes(AnimGraphExecutor* executor) { - // Ensure to have memory + auto& context = AnimGraphExecutor::Context.Get(); const int32 count = executor->_skeletonNodesCount; - if (Nodes.Nodes.Count() != count) - { - Nodes.Nodes.Resize(count, false); - } - - return &Nodes; + if (context.PoseCacheSize == context.PoseCache.Count()) + context.PoseCache.AddOne(); + auto& nodes = context.PoseCache[context.PoseCacheSize++]; + nodes.Nodes.Resize(count, false); + return &nodes; } bool AnimGraph::Load(ReadStream* stream, bool loadMeta) @@ -181,20 +212,24 @@ void AnimGraphExecutor::Update(AnimGraphInstanceData& data, float dt) // Initialize auto& skeleton = _graph.BaseModel->Skeleton; + auto& context = Context.Get(); { ANIM_GRAPH_PROFILE_EVENT("Init"); - // Prepare graph data for the evaluation + // Init data from base model _skeletonNodesCount = skeleton.Nodes.Count(); - _graphStack.Clear(); - _graphStack.Push((Graph*)&_graph); - _data = &data; - _deltaTime = dt; _rootMotionMode = (RootMotionMode)(int32)_graph._rootNode->Values[0]; - _currentFrameIndex = ++data.CurrentFrame; - _callStack.Clear(); - _functions.Clear(); - _graph.ClearCache(); + + // Prepare context data for the evaluation + context.GraphStack.Clear(); + context.GraphStack.Push((Graph*)&_graph); + context.Data = &data; + context.DeltaTime = dt; + context.CurrentFrameIndex = ++data.CurrentFrame; + context.CallStack.Clear(); + context.Functions.Clear(); + context.PoseCacheSize = 0; + context.ValueCache.Clear(); // Prepare instance data if (data.Version != _graph.Version) @@ -208,18 +243,18 @@ void AnimGraphExecutor::Update(AnimGraphInstanceData& data, float dt) data.State.Resize(_graph.BucketsCountTotal, false); // Initialize buckets - ResetBuckets(&_graph); + ResetBuckets(context, &_graph); } // Init empty nodes data - _emptyNodes.RootMotion = RootMotionData::Identity; - _emptyNodes.Position = 0.0f; - _emptyNodes.Length = 0.0f; - _emptyNodes.Nodes.Resize(_skeletonNodesCount, false); + context.EmptyNodes.RootMotion = RootMotionData::Identity; + context.EmptyNodes.Position = 0.0f; + context.EmptyNodes.Length = 0.0f; + context.EmptyNodes.Nodes.Resize(_skeletonNodesCount, false); for (int32 i = 0; i < _skeletonNodesCount; i++) { auto& node = skeleton.Nodes[i]; - _emptyNodes.Nodes[i] = node.LocalTransform; + context.EmptyNodes.Nodes[i] = node.LocalTransform; } } @@ -244,7 +279,7 @@ void AnimGraphExecutor::Update(AnimGraphInstanceData& data, float dt) { ANIM_GRAPH_PROFILE_EVENT("Global Pose"); - _data->NodesPose.Resize(_skeletonNodesCount, false); + data.NodesPose.Resize(_skeletonNodesCount, false); // Note: this assumes that nodes are sorted (parents first) for (int32 nodeIndex = 0; nodeIndex < _skeletonNodesCount; nodeIndex++) @@ -254,18 +289,16 @@ void AnimGraphExecutor::Update(AnimGraphInstanceData& data, float dt) { nodesTransformations[nodeIndex] = nodesTransformations[parentIndex].LocalToWorld(nodesTransformations[nodeIndex]); } - nodesTransformations[nodeIndex].GetWorld(_data->NodesPose[nodeIndex]); + nodesTransformations[nodeIndex].GetWorld(data.NodesPose[nodeIndex]); } - } - // Process the root node transformation and the motion - { - _data->RootTransform = nodesTransformations[0]; - _data->RootMotion = animResult->RootMotion; + // Process the root node transformation and the motion + data.RootTransform = nodesTransformations[0]; + data.RootMotion = animResult->RootMotion; } // Cleanup - _data = nullptr; + context.Data = nullptr; } void AnimGraphExecutor::GetInputValue(Box* box, Value& result) @@ -273,29 +306,39 @@ void AnimGraphExecutor::GetInputValue(Box* box, Value& result) result = eatBox(box->GetParent(), box->FirstConnection()); } -void AnimGraphExecutor::ResetBucket(int32 bucketIndex) +AnimGraphImpulse* AnimGraphExecutor::GetEmptyNodes() { - auto& stateBucket = _data->State[bucketIndex]; - _graph._bucketInitializerList[bucketIndex](stateBucket); + return &Context.Get().EmptyNodes; } -void AnimGraphExecutor::ResetBuckets(AnimGraphBase* graph) +void AnimGraphExecutor::InitNodes(AnimGraphImpulse* nodes) const +{ + const auto& emptyNodes = Context.Get().EmptyNodes; + Platform::MemoryCopy(nodes->Nodes.Get(), emptyNodes.Nodes.Get(), sizeof(Transform) * _skeletonNodesCount); + nodes->RootMotion = emptyNodes.RootMotion; + nodes->Position = emptyNodes.Position; + nodes->Length = emptyNodes.Length; +} + +void AnimGraphExecutor::ResetBuckets(AnimGraphContext& context, AnimGraphBase* graph) { if (graph == nullptr) return; - ASSERT(_data); + auto& state = context.Data->State; for (int32 i = 0; i < graph->BucketsCountTotal; i++) { const int32 bucketIndex = graph->BucketsStart + i; - _graph._bucketInitializerList[bucketIndex](_data->State[bucketIndex]); + _graph._bucketInitializerList[bucketIndex](state[bucketIndex]); } } VisjectExecutor::Value AnimGraphExecutor::eatBox(Node* caller, Box* box) { + auto& context = Context.Get(); + // Check if graph is looped or is too deep - if (_callStack.Count() >= ANIM_GRAPH_MAX_CALL_STACK) + if (context.CallStack.Count() >= ANIM_GRAPH_MAX_CALL_STACK) { OnError(caller, box, TEXT("Graph is looped or too deep!")); return Value::Zero; @@ -309,7 +352,7 @@ VisjectExecutor::Value AnimGraphExecutor::eatBox(Node* caller, Box* box) #endif // Add to the calling stack - _callStack.Add(caller); + context.CallStack.Add(caller); #if USE_EDITOR Animations::DebugFlow(_graph._owner, context.Data->Object, box->GetParent()->ID, box->ID); @@ -322,12 +365,13 @@ VisjectExecutor::Value AnimGraphExecutor::eatBox(Node* caller, Box* box) (this->*func)(box, parentNode, value); // Remove from the calling stack - _callStack.RemoveLast(); + context.CallStack.RemoveLast(); return value; } VisjectExecutor::Graph* AnimGraphExecutor::GetCurrentGraph() const { - return _graphStack.Peek(); + auto& context = Context.Get(); + return context.GraphStack.Peek(); } diff --git a/Source/Engine/Animations/Graph/AnimGraph.h b/Source/Engine/Animations/Graph/AnimGraph.h index a8f055033..1e011aed3 100644 --- a/Source/Engine/Animations/Graph/AnimGraph.h +++ b/Source/Engine/Animations/Graph/AnimGraph.h @@ -4,6 +4,7 @@ #include "Engine/Visject/VisjectGraph.h" #include "Engine/Content/Assets/Animation.h" +#include "Engine/Core/Collections/ChunkedArray.h" #include "Engine/Animations/AlphaBlend.h" #include "Engine/Core/Math/Matrix.h" #include "../Config.h" @@ -362,40 +363,17 @@ public: /// /// Clears this container data. /// - void Clear() - { - Version = 0; - LastUpdateTime = -1; - CurrentFrame = 0; - RootTransform = Transform::Identity; - RootMotion = RootMotionData::Identity; - Parameters.Resize(0); - State.Resize(0); - NodesPose.Resize(0); - } + void Clear(); /// /// Clears this container state data. /// - void ClearState() - { - Version = 0; - LastUpdateTime = -1; - CurrentFrame = 0; - RootTransform = Transform::Identity; - RootMotion = RootMotionData::Identity; - State.Resize(0); - NodesPose.Resize(0); - } + void ClearState(); /// /// Invalidates the update timer. /// - void Invalidate() - { - LastUpdateTime = -1; - CurrentFrame = 0; - } + void Invalidate(); }; /// @@ -424,18 +402,6 @@ public: : VisjectGraphBox(parent, id, type) { } - -public: - - bool IsCacheValid() const - { - return Cache.Type.Type != VariantType::Pointer || Cache.AsPointer != nullptr; - } - - void InvalidateCache() - { - Cache = Variant::Null; - } }; class AnimGraphNode : public VisjectGraphNode @@ -575,13 +541,6 @@ public: /// int32 BucketIndex = -1; - // TODO: use shared allocator per AnimGraph to reduce dynamic memory allocation (also bones data would be closer in memory -> less cache misses) - - /// - /// The node transformations (layout matches the linked to graph skinned model skeleton). - /// - AnimGraphImpulse Nodes; - /// /// The custom data (depends on node type). Used to cache data for faster usage at runtime. /// @@ -661,17 +620,11 @@ public: /// /// Gets the root node of the graph (cache don load). /// - /// The root node. FORCE_INLINE Node* GetRootNode() const { return _rootNode; } - /// - /// Clear all cached values in the graph nodes and the sub-graphs data. - /// - void ClearCache(); - /// /// Loads the sub-graph. /// @@ -751,9 +704,9 @@ public: AnimGraph(Asset* owner, bool isFunction = false) : AnimGraphBase(this) , _isFunction(isFunction) + , _isRegisteredForScriptingEvents(false) , _bucketInitializerList(64) , _owner(owner) - , _isRegisteredForScriptingEvents(false) { } @@ -806,6 +759,24 @@ public: bool onParamCreated(Parameter* p) override; }; +/// +/// The Animation Graph evaluation context. +/// +struct AnimGraphContext +{ + float DeltaTime; + uint64 CurrentFrameIndex; + AnimGraphInstanceData* Data; + AnimGraphImpulse EmptyNodes; + AnimGraphTransitionData TransitionData; + Array> CallStack; + Array> GraphStack; + Dictionary Functions; + ChunkedArray PoseCache; + int32 PoseCacheSize; + Dictionary ValueCache; +}; + /// /// The Animation Graph executor runtime for animation pose evaluation. /// @@ -815,20 +786,14 @@ class AnimGraphExecutor : public VisjectExecutor private: AnimGraph& _graph; - float _deltaTime = 0.0f; - uint64 _currentFrameIndex = 0; - int32 _skeletonNodesCount = 0; RootMotionMode _rootMotionMode = RootMotionMode::NoExtraction; - AnimGraphInstanceData* _data = nullptr; - AnimGraphImpulse _emptyNodes; - AnimGraphTransitionData _transitionData; - Array> _callStack; - Array> _graphStack; - Dictionary _functions; + int32 _skeletonNodesCount = 0; + + // Per-thread context to allow async execution + static ThreadLocal Context; public: - /// /// Initializes the managed runtime calls. /// @@ -854,34 +819,10 @@ public: /// /// Gets the skeleton nodes transformations structure containing identity matrices. /// - FORCE_INLINE const AnimGraphImpulse* GetEmptyNodes() const - { - return &_emptyNodes; - } + AnimGraphImpulse* GetEmptyNodes(); - /// - /// Gets the skeleton nodes transformations structure containing identity matrices. - /// - /// The data. - FORCE_INLINE AnimGraphImpulse* GetEmptyNodes() - { - return &_emptyNodes; - } - - FORCE_INLINE void InitNodes(AnimGraphImpulse* nodes) const - { - // Initialize with cached node transformations - Platform::MemoryCopy(nodes->Nodes.Get(), _emptyNodes.Nodes.Get(), sizeof(Transform) * _skeletonNodesCount); - nodes->RootMotion = _emptyNodes.RootMotion; - nodes->Position = _emptyNodes.Position; - nodes->Length = _emptyNodes.Length; - } - - FORCE_INLINE void InitNode(AnimGraphImpulse* nodes, int32 index) const - { - // Initialize with cached node transformation - nodes->Nodes[index] = GetEmptyNodes()->Nodes[index]; - } + // Initialize impulse with cached node transformations + void InitNodes(AnimGraphImpulse* nodes) const; FORCE_INLINE void CopyNodes(AnimGraphImpulse* dstNodes, AnimGraphImpulse* srcNodes) const { @@ -899,16 +840,10 @@ public: CopyNodes(dstNodes, static_cast(value.AsPointer)); } - /// - /// Resets the state bucket. - /// - /// The zero-based index of the bucket. - void ResetBucket(int32 bucketIndex); - /// /// Resets all the state bucket used by the given graph including sub-graphs (total). Can eb used to reset the animation state of the nested graph (including children). /// - void ResetBuckets(AnimGraphBase* graph); + void ResetBuckets(AnimGraphContext& context, AnimGraphBase* graph); private: diff --git a/Source/Engine/Animations/Graph/AnimGroup.Animation.cpp b/Source/Engine/Animations/Graph/AnimGroup.Animation.cpp index d7baadfc0..732390112 100644 --- a/Source/Engine/Animations/Graph/AnimGroup.Animation.cpp +++ b/Source/Engine/Animations/Graph/AnimGroup.Animation.cpp @@ -14,7 +14,7 @@ int32 AnimGraphExecutor::GetRootNodeIndex(Animation* anim) if (anim->Data.RootNodeName.HasChars()) { auto& skeleton = _graph.BaseModel->Skeleton; - for (int32 i = 0; i < _skeletonNodesCount; i++) + for (int32 i = 0; i < skeleton.Nodes.Count(); i++) { if (skeleton.Nodes[i].Name == anim->Data.RootNodeName) { @@ -119,7 +119,7 @@ float GetAnimSamplePos(float length, Animation* anim, float pos, float speed) // Also, scale the animation to fit the total animation node length without cut in a middle const auto animLength = anim->GetLength(); const int32 cyclesCount = Math::FloorToInt(length / animLength); - const float cycleLength = animLength * cyclesCount; + const float cycleLength = animLength * (float)cyclesCount; const float adjustRateScale = length / cycleLength; auto animPos = pos * speed * adjustRateScale; while (animPos > animLength) @@ -152,10 +152,11 @@ Variant AnimGraphExecutor::SampleAnimation(AnimGraphNode* node, bool loop, float nodes->Position = pos; nodes->Length = length; const auto mapping = anim->GetMapping(_graph.BaseModel); - for (int32 i = 0; i < _skeletonNodesCount; i++) + const auto emptyNodes = GetEmptyNodes(); + for (int32 i = 0; i < nodes->Nodes.Count(); i++) { const int32 nodeToChannel = mapping->At(i); - InitNode(nodes, i); + nodes->Nodes[i] = emptyNodes->Nodes[i]; if (nodeToChannel != -1) { // Calculate the animated node transformation @@ -197,7 +198,7 @@ Variant AnimGraphExecutor::SampleAnimationsWithBlend(AnimGraphNode* node, bool l nodes->Length = length; const auto mappingA = animA->GetMapping(_graph.BaseModel); const auto mappingB = animB->GetMapping(_graph.BaseModel); - for (int32 i = 0; i < _skeletonNodesCount; i++) + for (int32 i = 0; i < nodes->Nodes.Count(); i++) { const int32 nodeToChannelA = mappingA->At(i); const int32 nodeToChannelB = mappingB->At(i); @@ -286,12 +287,13 @@ Variant AnimGraphExecutor::SampleAnimationsWithBlend(AnimGraphNode* node, bool l const auto mappingB = animB->GetMapping(_graph.BaseModel); const auto mappingC = animC->GetMapping(_graph.BaseModel); Transform tmp, t; - for (int32 i = 0; i < _skeletonNodesCount; i++) + const auto emptyNodes = GetEmptyNodes(); + for (int32 i = 0; i < nodes->Nodes.Count(); i++) { const int32 nodeToChannelA = mappingA->At(i); const int32 nodeToChannelB = mappingB->At(i); const int32 nodeToChannelC = mappingC->At(i); - tmp = t = GetEmptyNodes()->Nodes[i]; + tmp = t = emptyNodes->Nodes[i]; // Calculate the animated node transformations if (nodeToChannelA != -1) @@ -384,7 +386,7 @@ Variant AnimGraphExecutor::Blend(AnimGraphNode* node, const Value& poseA, const if (!ANIM_GRAPH_IS_VALID_PTR(poseB)) nodesB = GetEmptyNodes(); - for (int32 i = 0; i < _skeletonNodesCount; i++) + for (int32 i = 0; i < nodes->Nodes.Count(); i++) { Transform::Lerp(nodesA->Nodes[i], nodesB->Nodes[i], alpha, nodes->Nodes[i]); } @@ -443,6 +445,7 @@ void ComputeMultiBlendLength(float& length, AnimGraphNode* node) void AnimGraphExecutor::ProcessGroupParameters(Box* box, Node* node, Value& value) { + auto& context = Context.Get(); switch (node->TypeID) { // Get @@ -453,7 +456,7 @@ void AnimGraphExecutor::ProcessGroupParameters(Box* box, Node* node, Value& valu const auto param = _graph.GetParameter((Guid)node->Values[0], paramIndex); if (param) { - value = _data->Parameters[paramIndex].Value; + value = context.Data->Parameters[paramIndex].Value; switch (param->Type.Type) { case VariantType::Vector2: @@ -523,19 +526,20 @@ void AnimGraphExecutor::ProcessGroupParameters(Box* box, Node* node, Value& valu void AnimGraphExecutor::ProcessGroupTools(Box* box, Node* nodeBase, Value& value) { + auto& context = Context.Get(); auto node = (AnimGraphNode*)nodeBase; switch (node->TypeID) { // Time case 5: { - auto& bucket = _data->State[node->BucketIndex].Animation; - if (bucket.LastUpdateFrame != _currentFrameIndex) + auto& bucket = context.Data->State[node->BucketIndex].Animation; + if (bucket.LastUpdateFrame != context.CurrentFrameIndex) { - bucket.TimePosition += _deltaTime; - bucket.LastUpdateFrame = _currentFrameIndex; + bucket.TimePosition += context.DeltaTime; + bucket.LastUpdateFrame = context.CurrentFrameIndex; } - value = box->ID == 0 ? bucket.TimePosition : _deltaTime; + value = box->ID == 0 ? bucket.TimePosition : context.DeltaTime; break; } default: @@ -546,13 +550,10 @@ void AnimGraphExecutor::ProcessGroupTools(Box* box, Node* nodeBase, Value& value void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Value& value) { - auto box = (AnimGraphBox*)boxBase; - if (box->IsCacheValid()) - { - // Return cache - value = box->Cache; + auto& context = Context.Get(); + if (context.ValueCache.TryGet(boxBase, value)) return; - } + auto box = (AnimGraphBox*)boxBase; auto node = (AnimGraphNode*)nodeBase; switch (node->TypeID) { @@ -569,7 +570,7 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu case 2: { const auto anim = node->Assets[0].As(); - auto& bucket = _data->State[node->BucketIndex].Animation; + auto& bucket = context.Data->State[node->BucketIndex].Animation; const float speed = (float)tryGetValue(node->GetBox(5), node->Values[1]); const bool loop = (bool)tryGetValue(node->GetBox(6), node->Values[2]); const float startTimePos = (float)tryGetValue(node->GetBox(7), node->Values[3]); @@ -584,17 +585,17 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu const float length = anim ? anim->GetLength() : 0.0f; // Calculate new time position - if (speed < 0.0f && bucket.LastUpdateFrame < _currentFrameIndex - 1) + if (speed < 0.0f && bucket.LastUpdateFrame < context.CurrentFrameIndex - 1) { // If speed is negative and it's the first node update then start playing from end bucket.TimePosition = length; } - float newTimePos = bucket.TimePosition + _deltaTime * speed; + float newTimePos = bucket.TimePosition + context.DeltaTime * speed; value = SampleAnimation(node, loop, length, startTimePos, bucket.TimePosition, newTimePos, anim, 1.0f); bucket.TimePosition = newTimePos; - bucket.LastUpdateFrame = _currentFrameIndex; + bucket.LastUpdateFrame = context.CurrentFrameIndex; break; } @@ -615,7 +616,7 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu // Is Playing case 4: // If anim was updated during this or a previous frame - value = bucket.LastUpdateFrame >= _currentFrameIndex - 1; + value = bucket.LastUpdateFrame >= context.CurrentFrameIndex - 1; break; } break; @@ -643,7 +644,7 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu value = Value::Null; if (inputBox->HasConnection()) value = eatBox(nodeBase, inputBox->FirstConnection()); - box->Cache = value; + context.ValueCache.Add(boxBase, value); return; } const auto nodeIndex = _graph.BaseModel->Skeleton.Bones[boneIndex].NodeIndex; @@ -690,7 +691,7 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu // Transform every node const auto& skeleton = BaseModel->Skeleton; - for (int32 i = 0; i < _skeletonNodesCount; i++) + for (int32 i = 0; i < nodes->Nodes.Count(); i++) { const int32 parentIndex = skeleton.Nodes[i].ParentIndex; if (parentIndex != -1) @@ -729,7 +730,7 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu // Inv transform every node const auto& skeleton = BaseModel->Skeleton; - for (int32 i = _skeletonNodesCount - 1; i >= 0; i--) + for (int32 i = nodes->Nodes.Count() - 1; i >= 0; i--) { const int32 parentIndex = skeleton.Nodes[i].ParentIndex; if (parentIndex != -1) @@ -775,7 +776,7 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu { // Pass through the input value = input; - box->Cache = value; + context.ValueCache.Add(boxBase, value); return; } @@ -836,7 +837,7 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu if (!ANIM_GRAPH_IS_VALID_PTR(valueB)) nodesB = GetEmptyNodes(); - for (int32 i = 0; i < _skeletonNodesCount; i++) + for (int32 i = 0; i < nodes->Nodes.Count(); i++) { Transform::Lerp(nodesA->Nodes[i], nodesB->Nodes[i], alpha, nodes->Nodes[i]); } @@ -876,7 +877,7 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu const auto nodesA = static_cast(valueA.AsPointer); const auto nodesB = static_cast(valueB.AsPointer); Transform t, tA, tB; - for (int32 i = 0; i < _skeletonNodesCount; i++) + for (int32 i = 0; i < nodes->Nodes.Count(); i++) { tA = nodesA->Nodes[i]; tB = nodesB->Nodes[i]; @@ -921,7 +922,7 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu // Blend all nodes masked by the user Transform tA, tB; auto& nodesMask = mask->GetNodesMask(); - for (int32 nodeIndex = 0; nodeIndex < _skeletonNodesCount; nodeIndex++) + for (int32 nodeIndex = 0; nodeIndex < nodes->Nodes.Count(); nodeIndex++) { tA = nodesA->Nodes[nodeIndex]; if (nodesMask[nodeIndex]) @@ -956,7 +957,7 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu // [1]: Guid Animation // Prepare - auto& bucket = _data->State[node->BucketIndex].MultiBlend; + auto& bucket = context.Data->State[node->BucketIndex].MultiBlend; const auto range = node->Values[0].AsVector4(); const auto speed = (float)tryGetValue(node->GetBox(1), node->Values[1]); const auto loop = (bool)tryGetValue(node->GetBox(2), node->Values[2]); @@ -988,12 +989,12 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu } // Calculate new time position - if (speed < 0.0f && bucket.LastUpdateFrame < _currentFrameIndex - 1) + if (speed < 0.0f && bucket.LastUpdateFrame < context.CurrentFrameIndex - 1) { // If speed is negative and it's the first node update then start playing from end bucket.TimePosition = data.Length; } - float newTimePos = bucket.TimePosition + _deltaTime * speed; + float newTimePos = bucket.TimePosition + context.DeltaTime * speed; ANIM_GRAPH_PROFILE_EVENT("Multi Blend 1D"); @@ -1035,7 +1036,7 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu } bucket.TimePosition = newTimePos; - bucket.LastUpdateFrame = _currentFrameIndex; + bucket.LastUpdateFrame = context.CurrentFrameIndex; break; } @@ -1054,7 +1055,7 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu // [1]: Guid Animation // Prepare - auto& bucket = _data->State[node->BucketIndex].MultiBlend; + auto& bucket = context.Data->State[node->BucketIndex].MultiBlend; const auto range = node->Values[0].AsVector4(); const auto speed = (float)tryGetValue(node->GetBox(1), node->Values[1]); const auto loop = (bool)tryGetValue(node->GetBox(2), node->Values[2]); @@ -1090,12 +1091,12 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu } // Calculate new time position - if (speed < 0.0f && bucket.LastUpdateFrame < _currentFrameIndex - 1) + if (speed < 0.0f && bucket.LastUpdateFrame < context.CurrentFrameIndex - 1) { // If speed is negative and it's the first node update then start playing from end bucket.TimePosition = data.Length; } - float newTimePos = bucket.TimePosition + _deltaTime * speed; + float newTimePos = bucket.TimePosition + context.DeltaTime * speed; ANIM_GRAPH_PROFILE_EVENT("Multi Blend 2D"); @@ -1227,7 +1228,7 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu } bucket.TimePosition = newTimePos; - bucket.LastUpdateFrame = _currentFrameIndex; + bucket.LastUpdateFrame = context.CurrentFrameIndex; break; } @@ -1246,7 +1247,7 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu // [3]: AlphaBlendMode Mode // Prepare - auto& bucket = _data->State[node->BucketIndex].BlendPose; + auto& bucket = context.Data->State[node->BucketIndex].BlendPose; const int32 poseIndex = (int32)tryGetValue(node->GetBox(1), node->Values[0]); const float blendDuration = (float)tryGetValue(node->GetBox(2), node->Values[1]); const int32 poseCount = Math::Clamp(node->Values[2].AsInt, 0, MaxBlendPoses); @@ -1259,7 +1260,7 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu } // Check if transition is not active (first update, pose not changing or transition ended) - bucket.TransitionPosition += _deltaTime; + bucket.TransitionPosition += context.DeltaTime; if (bucket.PreviousBlendPoseIndex == -1 || bucket.PreviousBlendPoseIndex == poseIndex || bucket.TransitionPosition >= blendDuration || blendDuration <= ANIM_GRAPH_BLEND_THRESHOLD) { bucket.TransitionPosition = 0.0f; @@ -1356,11 +1357,11 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu ANIM_GRAPH_PROFILE_EVENT("State Machine"); // Prepare - auto& bucket = _data->State[node->BucketIndex].StateMachine; + auto& bucket = context.Data->State[node->BucketIndex].StateMachine; auto& data = node->Data.StateMachine; int32 transitionsLeft = maxTransitionsPerUpdate == 0 ? MAX_uint16 : maxTransitionsPerUpdate; bool isFirstUpdate = bucket.LastUpdateFrame == 0 || bucket.CurrentState == nullptr; - if (bucket.LastUpdateFrame != _currentFrameIndex - 1 && reinitializeOnBecomingRelevant) + if (bucket.LastUpdateFrame != context.CurrentFrameIndex - 1 && reinitializeOnBecomingRelevant) { // Reset on becoming relevant isFirstUpdate = true; @@ -1384,19 +1385,19 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu bucket.TransitionPosition = 0.0f; // Reset all state buckets pof the graphs and nodes included inside the state machine - ResetBuckets(data.Graph); + ResetBuckets(context, data.Graph); } // Update the active transition if (bucket.ActiveTransition) { - bucket.TransitionPosition += _deltaTime; + bucket.TransitionPosition += context.DeltaTime; - // Check ofr transition end + // Check for transition end if (bucket.TransitionPosition >= bucket.ActiveTransition->BlendDuration) { // End transition - ResetBuckets(bucket.CurrentState->Data.State.Graph); + ResetBuckets(context, bucket.CurrentState->Data.State.Graph); bucket.CurrentState = bucket.ActiveTransition->Destination; bucket.ActiveTransition = nullptr; bucket.TransitionPosition = 0.0f; @@ -1422,7 +1423,7 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu // Evaluate source state transition data (position, length, etc.) const Value sourceStatePtr = SampleState(bucket.CurrentState); - auto& transitionData = _transitionData; // Note: this could support nested transitions but who uses state machine inside transition rule? + auto& transitionData = context.TransitionData; // Note: this could support nested transitions but who uses state machine inside transition rule? if (ANIM_GRAPH_IS_VALID_PTR(sourceStatePtr)) { // Use source state as data provider @@ -1475,7 +1476,7 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu if (bucket.ActiveTransition && bucket.ActiveTransition->BlendDuration <= ZeroTolerance) { // End transition - ResetBuckets(bucket.CurrentState->Data.State.Graph); + ResetBuckets(context, bucket.CurrentState->Data.State.Graph); bucket.CurrentState = bucket.ActiveTransition->Destination; bucket.ActiveTransition = nullptr; bucket.TransitionPosition = 0.0f; @@ -1498,7 +1499,7 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu } // Update bucket - bucket.LastUpdateFrame = _currentFrameIndex; + bucket.LastUpdateFrame = context.CurrentFrameIndex; break; } @@ -1537,7 +1538,7 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu // Transition Source State Anim case 23: { - const AnimGraphTransitionData& transitionsData = _transitionData; + const AnimGraphTransitionData& transitionsData = context.TransitionData; switch (box->ID) { // Length @@ -1587,7 +1588,7 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu if (callFunc == function) { value = Value::Zero; - box->Cache = value; + context.ValueCache.Add(boxBase, value); return; } } @@ -1606,12 +1607,12 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu Box* functionOutputBox = functionOutputNode->TryGetBox(0); // Cache relation between current node in the call stack to the actual function graph - _functions[nodeBase] = (Graph*)data.Graph; + context.Functions[nodeBase] = (Graph*)data.Graph; // Evaluate the function output - _graphStack.Push((Graph*)data.Graph); + context.GraphStack.Push((Graph*)data.Graph); value = functionOutputBox && functionOutputBox->HasConnection() ? eatBox(nodeBase, functionOutputBox->FirstConnection()) : Value::Zero; - _graphStack.Pop(); + context.GraphStack.Pop(); break; } // Transform Bone (local/model space) @@ -1635,7 +1636,7 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu value = Value::Null; if (inputBox->HasConnection()) value = eatBox(nodeBase, inputBox->FirstConnection()); - box->Cache = value; + context.ValueCache.Add(boxBase, value); return; } const auto nodes = node->GetNodes(this); @@ -1704,7 +1705,7 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu { // Pass through the input value = input; - box->Cache = value; + context.ValueCache.Add(boxBase, value); return; } @@ -1859,18 +1860,14 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu default: break; } - box->Cache = value; + context.ValueCache.Add(boxBase, value); } void AnimGraphExecutor::ProcessGroupFunction(Box* boxBase, Node* node, Value& value) { - auto box = (AnimGraphBox*)boxBase; - if (box->IsCacheValid()) - { - // Return cache - value = box->Cache; + auto& context = Context.Get(); + if (context.ValueCache.TryGet(boxBase, value)) return; - } switch (node->TypeID) { // Function Input @@ -1878,13 +1875,13 @@ void AnimGraphExecutor::ProcessGroupFunction(Box* boxBase, Node* node, Value& va { // Find the function call AnimGraphNode* functionCallNode = nullptr; - ASSERT(_graphStack.Count() >= 2); + ASSERT(context.GraphStack.Count() >= 2); Graph* graph; - for (int32 i = _callStack.Count() - 1; i >= 0; i--) + for (int32 i = context.CallStack.Count() - 1; i >= 0; i--) { - if (_callStack[i]->Type == GRAPH_NODE_MAKE_TYPE(9, 24) && _functions.TryGet(_callStack[i], graph) && _graphStack[_graphStack.Count() - 1] == (Graph*)graph) + if (context.CallStack[i]->Type == GRAPH_NODE_MAKE_TYPE(9, 24) && context.Functions.TryGet(context.CallStack[i], graph) && context.GraphStack.Last() == (Graph*)graph) { - functionCallNode = (AnimGraphNode*)_callStack[i]; + functionCallNode = (AnimGraphNode*)context.CallStack[i]; break; } } @@ -1926,19 +1923,19 @@ void AnimGraphExecutor::ProcessGroupFunction(Box* boxBase, Node* node, Value& va if (functionCallBox && functionCallBox->HasConnection()) { // Use provided input value from the function call - _graphStack.Pop(); + context.GraphStack.Pop(); value = eatBox(node, functionCallBox->FirstConnection()); - _graphStack.Push(graph); + context.GraphStack.Push(graph); } else { // Use the default value from the function graph value = tryGetValue(node->TryGetBox(1), Value::Zero); } + context.ValueCache.Add(boxBase, value); break; } default: break; } - box->Cache = value; } diff --git a/Source/Engine/Visject/VisjectGraph.h b/Source/Engine/Visject/VisjectGraph.h index 7be06566e..d38234c6d 100644 --- a/Source/Engine/Visject/VisjectGraph.h +++ b/Source/Engine/Visject/VisjectGraph.h @@ -18,13 +18,6 @@ class VisjectGraphNode; class VisjectGraphBox : public GraphBox { -public: - - /// - /// The cached value. - /// - Variant Cache; - public: VisjectGraphBox() From 47af31a8c4ddd7fd21d50fa1575487ae6eace435 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 12 Jun 2021 19:35:27 +0200 Subject: [PATCH 10/15] Tweaks --- Source/Engine/Profiler/ProfilerCPU.cpp | 2 +- Source/Engine/Threading/JobSystem.cpp | 15 +++++++++++++++ Source/Engine/Threading/ThreadLocal.h | 6 ++++-- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/Source/Engine/Profiler/ProfilerCPU.cpp b/Source/Engine/Profiler/ProfilerCPU.cpp index 2119a25d7..cab429d2c 100644 --- a/Source/Engine/Profiler/ProfilerCPU.cpp +++ b/Source/Engine/Profiler/ProfilerCPU.cpp @@ -12,7 +12,7 @@ bool ProfilerCPU::Enabled = false; ProfilerCPU::EventBuffer::EventBuffer() { - _capacity = Math::RoundUpToPowerOf2(10 * 1000); + _capacity = 8192; _capacityMask = _capacity - 1; _data = NewArray(_capacity); _head = 0; diff --git a/Source/Engine/Threading/JobSystem.cpp b/Source/Engine/Threading/JobSystem.cpp index 0a29bdecf..08bbef410 100644 --- a/Source/Engine/Threading/JobSystem.cpp +++ b/Source/Engine/Threading/JobSystem.cpp @@ -20,6 +20,7 @@ // JOB_SYSTEM_USE_MUTEX=0, enqueue=300-700 cycles, dequeue=10-16 cycles // So using RingBuffer+Mutex+Signals is better than moodycamel::ConcurrentQueue +#define JOB_SYSTEM_ENABLED 1 #define JOB_SYSTEM_USE_MUTEX 1 #define JOB_SYSTEM_USE_STATS 0 @@ -32,6 +33,8 @@ #include "ConcurrentQueue.h" #endif +#if JOB_SYSTEM_ENABLED + class JobSystemService : public EngineService { public: @@ -201,11 +204,14 @@ int32 JobSystemThread::Run() return 0; } +#endif + int64 JobSystem::Dispatch(const Function& job, int32 jobCount) { PROFILE_CPU(); if (jobCount <= 0) return 0; +#if JOB_SYSTEM_ENABLED #if JOB_SYSTEM_USE_STATS const auto start = Platform::GetTimeCycles(); #endif @@ -234,15 +240,23 @@ int64 JobSystem::Dispatch(const Function& job, int32 jobCount) JobsSignal.NotifyAll(); return label; +#else + for (int32 i = 0; i < jobCount; i++) + job(i); + return 0; +#endif } void JobSystem::Wait() { +#if JOB_SYSTEM_ENABLED Wait(Platform::AtomicRead(&NextLabel)); +#endif } void JobSystem::Wait(int64 label) { +#if JOB_SYSTEM_ENABLED PROFILE_CPU(); // Early out @@ -262,4 +276,5 @@ void JobSystem::Wait(int64 label) LOG(Info, "Job average dequeue time: {0} cycles", DequeueSum / DequeueCount); DequeueSum = DequeueCount = 0; #endif +#endif } diff --git a/Source/Engine/Threading/ThreadLocal.h b/Source/Engine/Threading/ThreadLocal.h index b37b18661..c3395ff56 100644 --- a/Source/Engine/Threading/ThreadLocal.h +++ b/Source/Engine/Threading/ThreadLocal.h @@ -69,7 +69,8 @@ public: return result; } - void GetValues(Array& result) const + template + void GetValues(Array& result) const { result.EnsureCapacity(MaxThreads); for (int32 i = 0; i < MaxThreads; i++) @@ -134,7 +135,8 @@ public: } } - void GetNotNullValues(Array& result) const + template + void GetNotNullValues(Array& result) const { result.EnsureCapacity(MaxThreads); for (int32 i = 0; i < MaxThreads; i++) From 93cdb7ce8f982034b341f199635f45ab6f1df47e Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 12 Jun 2021 19:35:37 +0200 Subject: [PATCH 11/15] Bump up build number --- Flax.flaxproj | 2 +- Source/FlaxEngine.Gen.cs | 4 ++-- Source/FlaxEngine.Gen.h | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Flax.flaxproj b/Flax.flaxproj index 6826bedfe..a7178c5cd 100644 --- a/Flax.flaxproj +++ b/Flax.flaxproj @@ -3,7 +3,7 @@ "Version": { "Major": 1, "Minor": 1, - "Build": 6219 + "Build": 6220 }, "Company": "Flax", "Copyright": "Copyright (c) 2012-2021 Wojciech Figat. All rights reserved.", diff --git a/Source/FlaxEngine.Gen.cs b/Source/FlaxEngine.Gen.cs index f7f535a40..5206c98f4 100644 --- a/Source/FlaxEngine.Gen.cs +++ b/Source/FlaxEngine.Gen.cs @@ -13,5 +13,5 @@ using System.Runtime.InteropServices; [assembly: AssemblyCulture("")] [assembly: ComVisible(false)] [assembly: Guid("b8442186-4a70-7c85-704a-857c262d00f6")] -[assembly: AssemblyVersion("1.1.6219")] -[assembly: AssemblyFileVersion("1.1.6219")] +[assembly: AssemblyVersion("1.1.6220")] +[assembly: AssemblyFileVersion("1.1.6220")] diff --git a/Source/FlaxEngine.Gen.h b/Source/FlaxEngine.Gen.h index 46eefd64f..0e083b5ab 100644 --- a/Source/FlaxEngine.Gen.h +++ b/Source/FlaxEngine.Gen.h @@ -3,11 +3,11 @@ #pragma once #define FLAXENGINE_NAME "FlaxEngine" -#define FLAXENGINE_VERSION Version(1, 1, 6219) -#define FLAXENGINE_VERSION_TEXT "1.1.6219" +#define FLAXENGINE_VERSION Version(1, 1, 6220) +#define FLAXENGINE_VERSION_TEXT "1.1.6220" #define FLAXENGINE_VERSION_MAJOR 1 #define FLAXENGINE_VERSION_MINOR 1 -#define FLAXENGINE_VERSION_BUILD 6219 +#define FLAXENGINE_VERSION_BUILD 6220 #define FLAXENGINE_COMPANY "Flax" #define FLAXENGINE_COPYRIGHT "Copyright (c) 2012-2021 Wojciech Figat. All rights reserved." From 25c00a0d55f2cd82c4e8370d6d6046116431e113 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 12 Jun 2021 20:10:16 +0200 Subject: [PATCH 12/15] Fix C# profiler events from other threads --- Source/Engine/Profiler/ProfilerCPU.cpp | 19 +++++++++++++++---- Source/Engine/Profiler/ProfilerCPU.h | 10 ++++++++++ .../Engine/Scripting/Scripting.Internal.cpp | 16 ++++------------ 3 files changed, 29 insertions(+), 16 deletions(-) diff --git a/Source/Engine/Profiler/ProfilerCPU.cpp b/Source/Engine/Profiler/ProfilerCPU.cpp index cab429d2c..e3b1caad4 100644 --- a/Source/Engine/Profiler/ProfilerCPU.cpp +++ b/Source/Engine/Profiler/ProfilerCPU.cpp @@ -122,6 +122,14 @@ void ProfilerCPU::Thread::EndEvent(int32 index) e.End = time; } +void ProfilerCPU::Thread::EndEvent() +{ + const double time = Platform::GetTimeSeconds() * 1000.0; + _depth--; + Event& e = Buffer.Get(Buffer.GetCount() - 1); + e.End = time; +} + bool ProfilerCPU::IsProfilingCurrentThread() { return Enabled && Thread::Current != nullptr; @@ -194,11 +202,14 @@ int32 ProfilerCPU::BeginEvent(const char* name) void ProfilerCPU::EndEvent(int32 index) { - if (!Enabled) - return; + if (Enabled && Thread::Current) + Thread::Current->EndEvent(index); +} - ASSERT(Thread::Current); - Thread::Current->EndEvent(index); +void ProfilerCPU::EndEvent() +{ + if (Enabled && Thread::Current) + Thread::Current->EndEvent(); } void ProfilerCPU::Dispose() diff --git a/Source/Engine/Profiler/ProfilerCPU.h b/Source/Engine/Profiler/ProfilerCPU.h index feccdd79b..2a48e6c15 100644 --- a/Source/Engine/Profiler/ProfilerCPU.h +++ b/Source/Engine/Profiler/ProfilerCPU.h @@ -289,6 +289,11 @@ public: /// /// The event index returned by the BeginEvent method. void EndEvent(int32 index); + + /// + /// Ends the last event running on a this thread. + /// + void EndEvent(); }; public: @@ -341,6 +346,11 @@ public: /// The event index returned by the BeginEvent method. static void EndEvent(int32 index); + /// + /// Ends the last event. + /// + static void EndEvent(); + /// /// Releases resources. Calls to the profiling API after Dispose are not valid. /// diff --git a/Source/Engine/Scripting/Scripting.Internal.cpp b/Source/Engine/Scripting/Scripting.Internal.cpp index 79e578401..781dc161e 100644 --- a/Source/Engine/Scripting/Scripting.Internal.cpp +++ b/Source/Engine/Scripting/Scripting.Internal.cpp @@ -14,29 +14,21 @@ namespace ProfilerInternal { - /// - /// The managed events IDs. - /// - Array ManagedEvents; - - /// - /// The managed events IDs for GPU profiling. - /// +#if COMPILE_WITH_PROFILER Array ManagedEventsGPU; +#endif void BeginEvent(MonoString* nameObj) { #if COMPILE_WITH_PROFILER - const auto index = ProfilerCPU::BeginEvent((const Char*)mono_string_chars(nameObj)); - ManagedEvents.Push(index); + ProfilerCPU::BeginEvent((const Char*)mono_string_chars(nameObj)); #endif } void EndEvent() { #if COMPILE_WITH_PROFILER - const auto index = ManagedEvents.Pop(); - ProfilerCPU::EndEvent(index); + ProfilerCPU::EndEvent(); #endif } From d7e7dcc823bb1547396fe79700c0d303afeab8eb Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 12 Jun 2021 22:43:37 +0200 Subject: [PATCH 13/15] Add Task Graph --- Source/Engine/Core/Collections/Array.h | 13 +-- Source/Engine/Threading/TaskGraph.cpp | 118 +++++++++++++++++++++++++ Source/Engine/Threading/TaskGraph.h | 95 ++++++++++++++++++++ 3 files changed, 215 insertions(+), 11 deletions(-) create mode 100644 Source/Engine/Threading/TaskGraph.cpp create mode 100644 Source/Engine/Threading/TaskGraph.h diff --git a/Source/Engine/Core/Collections/Array.h b/Source/Engine/Core/Collections/Array.h index 9b8c41245..5069c0ac2 100644 --- a/Source/Engine/Core/Collections/Array.h +++ b/Source/Engine/Core/Collections/Array.h @@ -493,17 +493,8 @@ public: /// Adds the other collection to the collection. /// /// The other collection to add. - FORCE_INLINE void Add(const Array& other) - { - Add(other.Get(), other.Count()); - } - - /// - /// Adds the other collection to the collection. - /// - /// The other collection to add. - template - FORCE_INLINE void Add(const Array& other) + template + FORCE_INLINE void Add(const Array& other) { Add(other.Get(), other.Count()); } diff --git a/Source/Engine/Threading/TaskGraph.cpp b/Source/Engine/Threading/TaskGraph.cpp new file mode 100644 index 000000000..aa86a94a9 --- /dev/null +++ b/Source/Engine/Threading/TaskGraph.cpp @@ -0,0 +1,118 @@ +// Copyright (c) 2012-2021 Wojciech Figat. All rights reserved. + +#include "TaskGraph.h" +#include "JobSystem.h" +#include "Engine/Core/Collections/Sorting.h" +#include "Engine/Profiler/ProfilerCPU.h" + +namespace +{ + bool SortTaskGraphSystem(TaskGraphSystem* const& a, TaskGraphSystem* const& b) + { + return b->Order < a->Order; + }; +} + +TaskGraphSystem::TaskGraphSystem(const SpawnParams& params) + : PersistentScriptingObject(params) +{ +} + +void TaskGraphSystem::AddDependency(TaskGraphSystem* system) +{ + _dependencies.Add(system); +} + +void TaskGraphSystem::PreExecute(TaskGraph* graph) +{ +} + +void TaskGraphSystem::Execute(TaskGraph* graph) +{ +} + +void TaskGraphSystem::PostExecute(TaskGraph* graph) +{ +} + +TaskGraph::TaskGraph(const SpawnParams& params) + : PersistentScriptingObject(params) +{ +} + +const Array>& TaskGraph::GetSystems() const +{ + return _systems; +} + +void TaskGraph::AddSystem(TaskGraphSystem* system) +{ + _systems.Add(system); +} + +void TaskGraph::RemoveSystem(TaskGraphSystem* system) +{ + _systems.Remove(system); +} + +void TaskGraph::Execute() +{ + PROFILE_CPU(); + + for (auto system : _systems) + system->PreExecute(this); + + _queue.Clear(); + _remaining.Clear(); + _remaining.Add(_systems); + + while (_remaining.HasItems()) + { + // Find systems without dependencies or with already executed dependencies + for (int32 i = _remaining.Count() - 1; i >= 0; i--) + { + auto e = _remaining[i]; + bool hasReadyDependencies = true; + for (auto d : e->_dependencies) + { + if (_remaining.Contains(d)) + { + hasReadyDependencies = false; + break; + } + } + if (hasReadyDependencies) + { + _queue.Add(e); + _remaining.RemoveAt(i); + } + } + + // End if no systems left + if (_queue.IsEmpty()) + break; + + // Execute in order + Sorting::QuickSort(_queue.Get(), _queue.Count(), &SortTaskGraphSystem); + _currentLabel = 0; + for (int32 i = 0; i < _queue.Count(); i++) + { + _currentSystem = _queue[i]; + _currentSystem->Execute(this); + } + _currentSystem = nullptr; + _queue.Clear(); + + // Wait for async jobs to finish + JobSystem::Wait(_currentLabel); + } + + for (auto system : _systems) + system->PostExecute(this); +} + +void TaskGraph::DispatchJob(const Function& job, int32 jobCount) +{ + ASSERT(_currentSystem); + _currentLabel = JobSystem::Dispatch(job, jobCount); +} diff --git a/Source/Engine/Threading/TaskGraph.h b/Source/Engine/Threading/TaskGraph.h new file mode 100644 index 000000000..7268e644c --- /dev/null +++ b/Source/Engine/Threading/TaskGraph.h @@ -0,0 +1,95 @@ +// Copyright (c) 2012-2021 Wojciech Figat. All rights reserved. + +#pragma once + +#include "Engine/Scripting/ScriptingObject.h" +#include "Engine/Core/Collections/Array.h" + +class TaskGraph; + +/// +/// System that can generate work into Task Graph for asynchronous execution. +/// +API_CLASS(Abstract) class FLAXENGINE_API TaskGraphSystem : public PersistentScriptingObject +{ +DECLARE_SCRIPTING_TYPE(TaskGraphSystem); + friend TaskGraph; +private: + Array> _dependencies; + +public: + /// + /// The execution order of the system (systems with higher order are executed earlier). + /// + API_FIELD() int32 Order = 0; + +public: + /// + /// Adds the dependency on the system execution. Before this system can be executed the given dependant system has to be executed first. + /// + /// The system to depend on. + API_FUNCTION() void AddDependency(TaskGraphSystem* system); + + /// + /// Called before executing any systems of the graph. Can be used to initialize data (synchronous). + /// + /// The graph executing the system. + API_FUNCTION() virtual void PreExecute(TaskGraph* graph); + + /// + /// Executes the system logic and schedules the asynchronous work. + /// + /// The graph executing the system. + API_FUNCTION() virtual void Execute(TaskGraph* graph); + + /// + /// Called after executing all systems of the graph. Can be used to cleanup data (synchronous). + /// + /// The graph executing the system. + API_FUNCTION() virtual void PostExecute(TaskGraph* graph); +}; + +/// +/// Graph-based asynchronous tasks scheduler for high-performance computing and processing. +/// +API_CLASS() class FLAXENGINE_API TaskGraph : public PersistentScriptingObject +{ +DECLARE_SCRIPTING_TYPE(TaskGraph); +private: + Array> _systems; + Array> _remaining; + Array> _queue; + TaskGraphSystem* _currentSystem = nullptr; + int64 _currentLabel = 0; + +public: + /// + /// Gets the list of systems. + /// + API_PROPERTY() const Array>& GetSystems() const; + + /// + /// Adds the system to the graph for the execution. + /// + /// The system to add. + API_FUNCTION() void AddSystem(TaskGraphSystem* system); + + /// + /// Removes the system from the graph. + /// + /// The system to add. + API_FUNCTION() void RemoveSystem(TaskGraphSystem* system); + + /// + /// Schedules the asynchronous systems execution including ordering and dependencies handling. + /// + API_FUNCTION() void Execute(); + + /// + /// Dispatches the job for the execution. + /// + /// Call only from system's Execute method to properly schedule job. + /// The job. Argument is an index of the job execution. + /// The job executions count. + API_FUNCTION() void DispatchJob(const Function& job, int32 jobCount = 1); +}; From 360f498e47c074cf68df4340e31cb813c071ffad Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 12 Jun 2021 22:44:16 +0200 Subject: [PATCH 14/15] Add `Engine::UpdateGraph` for async engine/game update --- Source/Engine/Engine/Engine.cpp | 5 +++++ Source/Engine/Engine/Engine.h | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/Source/Engine/Engine/Engine.cpp b/Source/Engine/Engine/Engine.cpp index a5fd460fb..bfa2338c6 100644 --- a/Source/Engine/Engine/Engine.cpp +++ b/Source/Engine/Engine/Engine.cpp @@ -27,6 +27,7 @@ #include "Engine/Graphics/RenderTargetPool.h" #include "Engine/Graphics/RenderTask.h" #include "Engine/Profiler/Profiler.h" +#include "Engine/Threading/TaskGraph.h" #if USE_EDITOR #include "Editor/Editor.h" #include "Editor/ProjectInfo.h" @@ -62,6 +63,7 @@ bool Engine::HasFocus = false; uint64 Engine::FrameCount = 0; Action Engine::FixedUpdate; Action Engine::Update; +TaskGraph* Engine::UpdateGraph = nullptr; Action Engine::LateUpdate; Action Engine::Draw; Action Engine::Pause; @@ -122,6 +124,7 @@ int32 Engine::Main(const Char* cmdLine) #endif // Initialize engine + UpdateGraph = New(); EngineService::OnInit(); if (Application::Init()) return -10; @@ -289,6 +292,7 @@ void Engine::OnUpdate() // Call event Update(); + UpdateGraph->Execute(); // Update services EngineService::OnUpdate(); @@ -436,6 +440,7 @@ void Engine::OnExit() // Unload Engine services EngineService::OnDispose(); + Delete(UpdateGraph); LOG_FLUSH(); diff --git a/Source/Engine/Engine/Engine.h b/Source/Engine/Engine/Engine.h index 13f3e1fe8..51860e5a2 100644 --- a/Source/Engine/Engine/Engine.h +++ b/Source/Engine/Engine/Engine.h @@ -6,6 +6,7 @@ #include "Engine/Core/Types/DateTime.h" #include "Engine/Scripting/ScriptingType.h" +class TaskGraph; class JsonAsset; /// @@ -43,6 +44,11 @@ public: /// static Action Update; + /// + /// Task graph for engine update. + /// + API_FIELD(ReadOnly) static TaskGraph* UpdateGraph; + /// /// Event called after engine update. /// From 4a92850d9abbafc04562fa391c3cfb280f630f66 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 12 Jun 2021 23:18:04 +0200 Subject: [PATCH 15/15] Add async animations updating via Task Graph --- Source/Engine/Animations/Animations.cpp | 147 ++++++++++++------- Source/Engine/Animations/Animations.h | 6 + Source/Engine/Level/Actors/AnimatedModel.cpp | 38 +++-- Source/Engine/Level/Actors/AnimatedModel.h | 4 +- 4 files changed, 127 insertions(+), 68 deletions(-) diff --git a/Source/Engine/Animations/Animations.cpp b/Source/Engine/Animations/Animations.cpp index 037c87a5f..179e710e4 100644 --- a/Source/Engine/Animations/Animations.cpp +++ b/Source/Engine/Animations/Animations.cpp @@ -6,8 +6,7 @@ #include "Engine/Level/Actors/AnimatedModel.h" #include "Engine/Engine/Time.h" #include "Engine/Engine/EngineService.h" - -Array UpdateList; +#include "Engine/Threading/TaskGraph.h" class AnimationsService : public EngineService { @@ -18,70 +17,110 @@ public: { } - void Update() override; + bool Init() override; void Dispose() override; }; +class AnimationsSystem : public TaskGraphSystem +{ +public: + float DeltaTime, UnscaledDeltaTime, Time, UnscaledTime; + void Job(int32 index); + void Execute(TaskGraph* graph) override; + void PostExecute(TaskGraph* graph) override; +}; + AnimationsService AnimationManagerInstance; +Array UpdateList; +TaskGraphSystem* Animations::System = nullptr; Delegate Animations::DebugFlow; -void AnimationsService::Update() +bool AnimationsService::Init() { - PROFILE_CPU_NAMED("Animations"); - - // TODO: implement the thread jobs pipeline to run set of tasks at once (use it for multi-threaded rendering and animations evaluation) - - const auto& tickData = Time::Update; - const float deltaTime = tickData.DeltaTime.GetTotalSeconds(); - const float unscaledDeltaTime = tickData.UnscaledDeltaTime.GetTotalSeconds(); - const float time = tickData.Time.GetTotalSeconds(); - const float unscaledTime = tickData.UnscaledTime.GetTotalSeconds(); - - for (int32 i = 0; i < UpdateList.Count(); i++) - { - auto animatedModel = UpdateList[i]; - if (animatedModel->SkinnedModel == nullptr || !animatedModel->SkinnedModel->IsLoaded()) - continue; - - // Prepare skinning data - animatedModel->SetupSkinningData(); - - // Update the animation graph and the skinning - auto graph = animatedModel->AnimationGraph.Get(); - if (graph && graph->IsLoaded() && graph->Graph.CanUseWithSkeleton(animatedModel->SkinnedModel) -#if USE_EDITOR - && graph->Graph.Parameters.Count() == animatedModel->GraphInstance.Parameters.Count() // It may happen in editor so just add safe check to prevent any crashes -#endif - ) - { -#if USE_EDITOR - // Lock in editor only (more reloads during asset live editing) - ScopeLock lock(animatedModel->AnimationGraph->Locker); -#endif - - // Animation delta time can be based on a time since last update or the current delta - float dt = animatedModel->UseTimeScale ? deltaTime : unscaledDeltaTime; - float t = animatedModel->UseTimeScale ? time : unscaledTime; - const float lastUpdateTime = animatedModel->GraphInstance.LastUpdateTime; - if (lastUpdateTime > 0 && t > lastUpdateTime) - { - dt = t - lastUpdateTime; - } - animatedModel->GraphInstance.LastUpdateTime = t; - - // Evaluate animated nodes pose - graph->GraphExecutor.Update(animatedModel->GraphInstance, dt); - - // Update gameplay - animatedModel->OnAnimationUpdated(); - } - } - UpdateList.Clear(); + Animations::System = New(); + Engine::UpdateGraph->AddSystem(Animations::System); + return false; } void AnimationsService::Dispose() { UpdateList.Resize(0); + SAFE_DELETE(Animations::System); +} + +void AnimationsSystem::Job(int32 index) +{ + PROFILE_CPU_NAMED("Animations.Job"); + auto animatedModel = UpdateList[index]; + auto skinnedModel = animatedModel->SkinnedModel.Get(); + auto graph = animatedModel->AnimationGraph.Get(); + if (graph && graph->IsLoaded() && graph->Graph.CanUseWithSkeleton(skinnedModel) +#if USE_EDITOR + && graph->Graph.Parameters.Count() == animatedModel->GraphInstance.Parameters.Count() // It may happen in editor so just add safe check to prevent any crashes +#endif + ) + { + // Prepare skinning data + animatedModel->SetupSkinningData(); + + // Animation delta time can be based on a time since last update or the current delta + float dt = animatedModel->UseTimeScale ? DeltaTime : UnscaledDeltaTime; + float t = animatedModel->UseTimeScale ? Time : UnscaledTime; + const float lastUpdateTime = animatedModel->GraphInstance.LastUpdateTime; + if (lastUpdateTime > 0 && t > lastUpdateTime) + { + dt = t - lastUpdateTime; + } + animatedModel->GraphInstance.LastUpdateTime = t; + + // Evaluate animated nodes pose + graph->GraphExecutor.Update(animatedModel->GraphInstance, dt); + + // Update gameplay + animatedModel->OnAnimationUpdated_Async(); + } +} + +void AnimationsSystem::Execute(TaskGraph* graph) +{ + if (UpdateList.Count() == 0) + return; + + // Setup data for async update + const auto& tickData = Time::Update; + DeltaTime = tickData.DeltaTime.GetTotalSeconds(); + UnscaledDeltaTime = tickData.UnscaledDeltaTime.GetTotalSeconds(); + Time = tickData.Time.GetTotalSeconds(); + UnscaledTime = tickData.UnscaledTime.GetTotalSeconds(); + + // Schedule work to update all animated models in async + Function job; + job.Bind(this); + graph->DispatchJob(job, UpdateList.Count()); +} + +void AnimationsSystem::PostExecute(TaskGraph* graph) +{ + PROFILE_CPU_NAMED("Animations.PostExecute"); + + // Update gameplay + for (int32 index = 0; index < UpdateList.Count(); index++) + { + auto animatedModel = UpdateList[index]; + auto skinnedModel = animatedModel->SkinnedModel.Get(); + auto animGraph = animatedModel->AnimationGraph.Get(); + if (animGraph && animGraph->IsLoaded() && animGraph->Graph.CanUseWithSkeleton(skinnedModel) +#if USE_EDITOR + && animGraph->Graph.Parameters.Count() == animatedModel->GraphInstance.Parameters.Count() // It may happen in editor so just add safe check to prevent any crashes +#endif + ) + { + animatedModel->OnAnimationUpdated_Sync(); + } + } + + // Cleanup + UpdateList.Clear(); } void Animations::AddToUpdate(AnimatedModel* obj) diff --git a/Source/Engine/Animations/Animations.h b/Source/Engine/Animations/Animations.h index 8b5b79c67..014104eac 100644 --- a/Source/Engine/Animations/Animations.h +++ b/Source/Engine/Animations/Animations.h @@ -5,6 +5,7 @@ #include "Engine/Scripting/ScriptingType.h" #include "Engine/Core/Delegate.h" +class TaskGraphSystem; class AnimatedModel; class Asset; @@ -15,6 +16,11 @@ API_CLASS(Static) class FLAXENGINE_API Animations { DECLARE_SCRIPTING_TYPE_NO_SPAWN(Content); + /// + /// The system for Animations update. + /// + API_FIELD(ReadOnly) static TaskGraphSystem* System; + #if USE_EDITOR // Custom event that is called every time the Anim Graph signal flows over the graph (including the data connections). Can be used to read and visualize the animation blending logic. Args are: anim graph asset, animated object, node id, box id API_EVENT() static Delegate DebugFlow; diff --git a/Source/Engine/Level/Actors/AnimatedModel.cpp b/Source/Engine/Level/Actors/AnimatedModel.cpp index 701df3d6e..45bc701c6 100644 --- a/Source/Engine/Level/Actors/AnimatedModel.cpp +++ b/Source/Engine/Level/Actors/AnimatedModel.cpp @@ -113,16 +113,6 @@ void AnimatedModel::PreInitSkinningData() UpdateSockets(); } -void AnimatedModel::UpdateSockets() -{ - for (int32 i = 0; i < Children.Count(); i++) - { - auto socket = dynamic_cast(Children[i]); - if (socket) - socket->UpdateTransformation(); - } -} - void AnimatedModel::GetCurrentPose(Array& nodesTransformation, bool worldSpace) const { nodesTransformation = GraphInstance.NodesPose; @@ -451,9 +441,19 @@ void AnimatedModel::UpdateBounds() BoundingSphere::FromBox(_box, _sphere); } -void AnimatedModel::OnAnimationUpdated() +void AnimatedModel::UpdateSockets() { - ANIM_GRAPH_PROFILE_EVENT("OnAnimationUpdated"); + for (int32 i = 0; i < Children.Count(); i++) + { + auto socket = dynamic_cast(Children[i]); + if (socket) + socket->UpdateTransformation(); + } +} + +void AnimatedModel::OnAnimationUpdated_Async() +{ + // Update asynchronous stuff auto& skeleton = SkinnedModel->Skeleton; // Copy pose from the master @@ -482,12 +482,24 @@ void AnimatedModel::OnAnimationUpdated() } UpdateBounds(); + _blendShapes.Update(SkinnedModel.Get()); +} + +void AnimatedModel::OnAnimationUpdated_Sync() +{ + // Update synchronous stuff UpdateSockets(); ApplyRootMotion(GraphInstance.RootMotion); - _blendShapes.Update(SkinnedModel.Get()); AnimationUpdated(); } +void AnimatedModel::OnAnimationUpdated() +{ + ANIM_GRAPH_PROFILE_EVENT("OnAnimationUpdated"); + OnAnimationUpdated_Async(); + OnAnimationUpdated_Sync(); +} + void AnimatedModel::OnSkinnedModelChanged() { Entries.Release(); diff --git a/Source/Engine/Level/Actors/AnimatedModel.h b/Source/Engine/Level/Actors/AnimatedModel.h index 8c9023d23..771833f2c 100644 --- a/Source/Engine/Level/Actors/AnimatedModel.h +++ b/Source/Engine/Level/Actors/AnimatedModel.h @@ -15,7 +15,7 @@ API_CLASS() class FLAXENGINE_API AnimatedModel : public ModelInstanceActor { DECLARE_SCENE_OBJECT(AnimatedModel); - friend class AnimationsService; + friend class AnimationsSystem; public: /// @@ -306,6 +306,8 @@ private: void UpdateLocalBounds(); void UpdateBounds(); void UpdateSockets(); + void OnAnimationUpdated_Async(); + void OnAnimationUpdated_Sync(); void OnAnimationUpdated(); void OnSkinnedModelChanged();