Optimize Screen Space Reflections tracing with Hierarchical Z-Buffer

Improve SSR resolve filter quality and adjust scalability.
This commit is contained in:
2026-01-26 15:22:39 +01:00
parent da8376bba1
commit 143d714037
15 changed files with 482 additions and 145 deletions
@@ -116,7 +116,7 @@ void PS_Forward(
Texture2D sceneColorTexture = MATERIAL_REFLECTIONS_SSR_COLOR;
float2 screenUV = materialInput.SvPosition.xy * ScreenSize.zw;
float stepSize = ScreenSize.z; // 1 / screenWidth
float maxSamples = 48;
float maxSamples = 50;
float worldAntiSelfOcclusionBias = 0.1f;
float brdfBias = 0.82f;
float drawDistance = 5000.0f;
+38
View File
@@ -64,6 +64,7 @@ void RenderBuffers::ReleaseUnusedMemory()
UPDATE_LAZY_KEEP_RT(TemporalSSR);
UPDATE_LAZY_KEEP_RT(TemporalAA);
UPDATE_LAZY_KEEP_RT(HalfResDepth);
UPDATE_LAZY_KEEP_RT(HiZ);
UPDATE_LAZY_KEEP_RT(LuminanceMap);
#undef UPDATE_LAZY_KEEP_RT
for (int32 i = CustomBuffers.Count() - 1; i >= 0; i--)
@@ -112,6 +113,42 @@ GPUTexture* RenderBuffers::RequestHalfResDepth(GPUContext* context)
return HalfResDepth;
}
GPUTexture* RenderBuffers::RequestHiZ(GPUContext* context, bool fullRes, int32 mipLevels)
{
// Skip if already done in the current frame
const auto currentFrame = Engine::FrameCount;
if (LastFrameHiZ == currentFrame)
return HiZ;
LastFrameHiZ = currentFrame;
// Allocate or resize buffer (with full mip-chain)
// TODO: migrate to inverse depth and try using r16 again as default (should have no artifacts anymore)
auto format = PLATFORM_ANDROID || PLATFORM_IOS || PLATFORM_SWITCH ? PixelFormat::R16_UInt : PixelFormat::R32_Float;
auto width = fullRes ? _width : Math::Max(_width >> 1, 1);
auto height = fullRes ? _height : Math::Max(_height >> 1, 1);
auto desc = GPUTextureDescription::New2D(width, height, mipLevels, format, GPUTextureFlags::ShaderResource);
bool useCompute = false; // TODO: impl Compute Shader for downscaling depth to HiZ with a single dispatch (eg. FidelityFX Single Pass Downsampler)
if (useCompute)
desc.Flags |= GPUTextureFlags::UnorderedAccess;
else
desc.Flags |= GPUTextureFlags::RenderTarget | GPUTextureFlags::PerMipViews;
if (HiZ && HiZ->GetDescription() != desc)
{
RenderTargetPool::Release(HiZ);
HiZ = nullptr;
}
if (HiZ == nullptr)
{
HiZ = RenderTargetPool::Get(desc);
RENDER_TARGET_POOL_SET_NAME(HiZ, "HiZ");
}
// Downscale
MultiScaler::Instance()->BuildHiZ(context, DepthBuffer, HiZ);
return HiZ;
}
PixelFormat RenderBuffers::GetOutputFormat() const
{
auto colorFormat = GraphicsSettings::Get()->RenderColorFormat;
@@ -244,6 +281,7 @@ void RenderBuffers::Release()
UPDATE_LAZY_KEEP_RT(TemporalSSR);
UPDATE_LAZY_KEEP_RT(TemporalAA);
UPDATE_LAZY_KEEP_RT(HalfResDepth);
UPDATE_LAZY_KEEP_RT(HiZ);
UPDATE_LAZY_KEEP_RT(LuminanceMap);
#undef UPDATE_LAZY_KEEP_RT
CustomBuffers.ClearDelete();
+15 -5
View File
@@ -43,6 +43,12 @@ API_CLASS() class FLAXENGINE_API RenderBuffers : public ScriptingObject
String ToString() const override;
};
private:
GPUTexture* HalfResDepth = nullptr;
GPUTexture* HiZ = nullptr;
uint64 LastFrameHalfResDepth = 0;
uint64 LastFrameHiZ = 0;
protected:
int32 _width = 0;
int32 _height = 0;
@@ -85,11 +91,6 @@ public:
float MaxDistance;
} VolumetricFogData;
// Helper buffer with half-resolution depth buffer shared by effects (eg. SSR, Motion Blur). Valid only during frame rendering and on request (see RequestHalfResDepth).
// Should be released if not used for a few frames.
GPUTexture* HalfResDepth = nullptr;
uint64 LastFrameHalfResDepth = 0;
// Helper target for the temporal SSR.
// Should be released if not used for a few frames.
GPUTexture* TemporalSSR = nullptr;
@@ -122,6 +123,15 @@ public:
/// <returns>The half-res depth buffer.</returns>
GPUTexture* RequestHalfResDepth(GPUContext* context);
/// <summary>
/// Requests the Hierarchical Z-Buffer (closest) to be prepared for the current frame.
/// </summary>
/// <param name="context">The context.</param>
/// <param name="fullRes">Generates the full-resolution buffer, otherwise HiZ starts at half-res of the original Depth Buffer.</param>
/// <param name="mipLevels">Maximum amount of mip levels to generate. Value 0 generates a full mip chain down to 1x1.</param>
/// <returns>The HiZ depth buffer.</returns>
GPUTexture* RequestHiZ(GPUContext* context, bool fullRes = false, int32 mipLevels = 0);
public:
/// <summary>
/// Gets the buffers width (in pixels).
@@ -411,6 +411,7 @@ void ReflectionsPass::Render(RenderContext& renderContext, GPUTextureView* light
else
{
// Combine reflections and light buffer (additive mode)
PROFILE_GPU("Combine");
if (_depthBounds)
{
context->SetRenderTarget(depthBufferRTV, lightBuffer);
@@ -22,19 +22,21 @@
#define TEXTURE1 5
#define TEXTURE2 6
#define SSR_USE_HZB 1
GPU_CB_STRUCT(Data {
ShaderGBufferData GBuffer;
float MaxColorMiplevel;
float TraceSizeMax;
float MaxTraceSamples;
float RoughnessFade;
Float2 SSRtexelSize;
Float2 SSRTexelSize;
float TemporalTime;
float BRDFBias;
float WorldAntiSelfOcclusionBias;
float EdgeFadeFactor;
float TemporalResponse;
float Dummy0;
uint32 DepthMips;
float RayTraceStep;
float TemporalEffect;
float Intensity;
@@ -157,10 +159,10 @@ GPUTexture* ScreenSpaceReflectionsPass::Render(RenderContext& renderContext, GPU
const auto colorBufferMips = MipLevelsCount(colorBufferWidth, colorBufferHeight);
// Prepare buffers
auto tempDesc = GPUTextureDescription::New2D(colorBufferWidth, colorBufferHeight, 0, PixelFormat::R11G11B10_Float, GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::PerMipViews);
GPUTexture* colorBuffer0, *colorBuffer1;
if (settings.UseColorBufferMips)
{
auto tempDesc = GPUTextureDescription::New2D(colorBufferWidth, colorBufferHeight, 0, PixelFormat::R11G11B10_Float, GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::PerMipViews);
colorBuffer0 = RenderTargetPool::Get(tempDesc);
RENDER_TARGET_POOL_SET_NAME(colorBuffer0, "SSR.ColorBuffer0");
// TODO: maybe allocate colorBuffer1 smaller because mip0 is not used (the same as PostProcessingPass for Bloom), keep in sync to use the same buffer in frame
@@ -170,27 +172,33 @@ GPUTexture* ScreenSpaceReflectionsPass::Render(RenderContext& renderContext, GPU
else
{
// Single mip
tempDesc.MipLevels = 1;
tempDesc.Flags &= ~GPUTextureFlags::PerMipViews;
auto tempDesc = GPUTextureDescription::New2D(colorBufferWidth, colorBufferHeight, 1, PixelFormat::R11G11B10_Float);
colorBuffer0 = RenderTargetPool::Get(tempDesc);
colorBuffer1 = nullptr;
}
tempDesc = GPUTextureDescription::New2D(traceWidth, traceHeight, PixelFormat::R16G16B16A16_Float);
auto traceBuffer = RenderTargetPool::Get(tempDesc);
RENDER_TARGET_POOL_SET_NAME(traceBuffer, "SSR.TraceBuffer");
tempDesc = GPUTextureDescription::New2D(resolveWidth, resolveHeight, PixelFormat::R16G16B16A16_Float);
auto resolveBuffer = RenderTargetPool::Get(tempDesc);
RENDER_TARGET_POOL_SET_NAME(resolveBuffer, "SSR.ResolveBuffer");
GPUTexture* traceBuffer, *resolveBuffer;
{
auto tempDesc = GPUTextureDescription::New2D(traceWidth, traceHeight, PixelFormat::R16G16B16A16_Float);
traceBuffer = RenderTargetPool::Get(tempDesc);
RENDER_TARGET_POOL_SET_NAME(traceBuffer, "SSR.TraceBuffer");
tempDesc.Width = resolveWidth;
tempDesc.Height = resolveHeight;
resolveBuffer = RenderTargetPool::Get(tempDesc);
RENDER_TARGET_POOL_SET_NAME(resolveBuffer, "SSR.ResolveBuffer");
}
// Pick effect settings
int32 maxTraceSamples = 60;
int32 resolveSamples = settings.ResolveSamples;
switch (Graphics::SSRQuality)
{
case Quality::Low:
maxTraceSamples = 20;
maxTraceSamples = 40;
resolveSamples = Math::Min(resolveSamples, 2);
break;
case Quality::Medium:
maxTraceSamples = 55;
resolveSamples = Math::Min(resolveSamples, 4);
break;
case Quality::High:
maxTraceSamples = 70;
@@ -199,7 +207,6 @@ GPUTexture* ScreenSpaceReflectionsPass::Render(RenderContext& renderContext, GPU
maxTraceSamples = 120;
break;
}
const int32 resolveSamples = settings.ResolveSamples;
int32 resolvePassIndex = 0;
if (resolveSamples >= 8)
resolvePassIndex = 3;
@@ -214,12 +221,12 @@ GPUTexture* ScreenSpaceReflectionsPass::Render(RenderContext& renderContext, GPU
data.RoughnessFade = Math::Saturate(settings.RoughnessThreshold);
data.MaxTraceSamples = static_cast<float>(maxTraceSamples);
data.BRDFBias = settings.BRDFBias;
data.WorldAntiSelfOcclusionBias = settings.WorldAntiSelfOcclusionBias;
data.WorldAntiSelfOcclusionBias = settings.WorldAntiSelfOcclusionBias * (int32)settings.DepthResolution;
data.EdgeFadeFactor = settings.EdgeFadeFactor;
data.SSRtexelSize = Float2(1.0f / (float)traceWidth, 1.0f / (float)traceHeight);
data.SSRTexelSize = Float2(1.0f / (float)traceWidth, 1.0f / (float)traceHeight);
data.TraceSizeMax = (float)Math::Max(traceWidth, traceHeight);
data.MaxColorMiplevel = settings.UseColorBufferMips ? (float)colorBufferMips - 2.0f : 0.0f;
data.RayTraceStep = static_cast<float>(settings.DepthResolution) / (float)width;
data.MaxColorMiplevel = settings.UseColorBufferMips ? (float)(colorBufferMips - 2) : 0.0f;
data.RayTraceStep = (float)settings.DepthResolution / (float)width;
data.Intensity = settings.Intensity;
data.FadeOutDistance = Math::Max(settings.FadeOutDistance, 100.0f);
data.TemporalResponse = settings.TemporalResponse;
@@ -245,8 +252,16 @@ GPUTexture* ScreenSpaceReflectionsPass::Render(RenderContext& renderContext, GPU
}
}
// Check if resize depth
// Prepare depth buffer
#if SSR_USE_HZB
int32 hzbMips = settings.DepthResolution == ResolutionMode::Full ? 5 : 4; // Using lower mips in tracing introduces blocky artifacts
bool hzbFullRes = settings.DepthResolution == ResolutionMode::Full;
GPUTexture* depthBufferTrace = buffers->RequestHiZ(context, hzbFullRes, hzbMips);
data.DepthMips = hzbMips - 1; // Offset to improve SSR range
#else
GPUTexture* depthBufferTrace = settings.DepthResolution == ResolutionMode::Half ? buffers->RequestHalfResDepth(context) : buffers->DepthBuffer;
data.DepthMips = 1;
#endif
// Prepare constants
context->UpdateCB(cb, &data);
@@ -259,16 +274,19 @@ GPUTexture* ScreenSpaceReflectionsPass::Render(RenderContext& renderContext, GPU
context->BindSR(3, depthBufferTrace);
// Combine pass
context->BindSR(TEXTURE0, lightBuffer);
context->BindSR(TEXTURE1, reflectionsRT);
context->BindSR(TEXTURE2, _preIntegratedGF->GetTexture());
context->SetViewportAndScissors((float)colorBufferWidth, (float)colorBufferHeight);
context->SetRenderTarget(colorBuffer0->View(0));
context->SetState(_psCombinePass);
context->DrawFullscreenTriangle();
context->UnBindSR(TEXTURE1);
context->UnBindSR(TEXTURE2);
context->ResetRenderTarget();
{
PROFILE_GPU("Combine");
context->BindSR(TEXTURE0, lightBuffer);
context->BindSR(TEXTURE1, reflectionsRT);
context->BindSR(TEXTURE2, _preIntegratedGF->GetTexture());
context->SetViewportAndScissors((float)colorBufferWidth, (float)colorBufferHeight);
context->SetRenderTarget(colorBuffer0->View(0));
context->SetState(_psCombinePass);
context->DrawFullscreenTriangle();
context->UnBindSR(TEXTURE1);
context->UnBindSR(TEXTURE2);
context->ResetRenderTarget();
}
// Blur Pass
if (settings.UseColorBufferMips)
@@ -298,37 +316,44 @@ GPUTexture* ScreenSpaceReflectionsPass::Render(RenderContext& renderContext, GPU
RenderTargetPool::Release(colorBuffer1);
// Ray Trace Pass
context->SetViewportAndScissors((float)traceWidth, (float)traceHeight);
context->SetRenderTarget(*traceBuffer);
context->BindSR(TEXTURE0, colorBuffer0->View());
if (useGlobalSurfaceAtlas)
{
context->BindSR(7, bindingDataSDF.Texture ? bindingDataSDF.Texture->ViewVolume() : nullptr);
context->BindSR(8, bindingDataSDF.TextureMip ? bindingDataSDF.TextureMip->ViewVolume() : nullptr);
context->BindSR(9, bindingDataSurfaceAtlas.Chunks ? bindingDataSurfaceAtlas.Chunks->View() : nullptr);
context->BindSR(10, bindingDataSurfaceAtlas.CulledObjects ? bindingDataSurfaceAtlas.CulledObjects->View() : nullptr);
context->BindSR(11, bindingDataSurfaceAtlas.Objects ? bindingDataSurfaceAtlas.Objects->View() : nullptr);
context->BindSR(12, bindingDataSurfaceAtlas.AtlasDepth->View());
context->BindSR(13, bindingDataSurfaceAtlas.AtlasLighting->View());
PROFILE_GPU("RayTrace");
context->SetViewportAndScissors((float)traceWidth, (float)traceHeight);
context->SetRenderTarget(*traceBuffer);
context->BindSR(TEXTURE0, colorBuffer0->View());
if (useGlobalSurfaceAtlas)
{
context->BindSR(7, bindingDataSDF.Texture ? bindingDataSDF.Texture->ViewVolume() : nullptr);
context->BindSR(8, bindingDataSDF.TextureMip ? bindingDataSDF.TextureMip->ViewVolume() : nullptr);
context->BindSR(9, bindingDataSurfaceAtlas.Chunks ? bindingDataSurfaceAtlas.Chunks->View() : nullptr);
context->BindSR(10, bindingDataSurfaceAtlas.CulledObjects ? bindingDataSurfaceAtlas.CulledObjects->View() : nullptr);
context->BindSR(11, bindingDataSurfaceAtlas.Objects ? bindingDataSurfaceAtlas.Objects->View() : nullptr);
context->BindSR(12, bindingDataSurfaceAtlas.AtlasDepth->View());
context->BindSR(13, bindingDataSurfaceAtlas.AtlasLighting->View());
}
context->SetState(_psRayTracePass.Get(useGlobalSurfaceAtlas ? 1 : 0));
context->DrawFullscreenTriangle();
context->ResetRenderTarget();
RenderTargetPool::Release(colorBuffer0);
}
context->SetState(_psRayTracePass.Get(useGlobalSurfaceAtlas ? 1 : 0));
context->DrawFullscreenTriangle();
context->ResetRenderTarget();
RenderTargetPool::Release(colorBuffer0);
// Resolve Pass
context->SetViewportAndScissors((float)resolveWidth, (float)resolveHeight);
context->SetRenderTarget(resolveBuffer->View());
context->BindSR(TEXTURE0, traceBuffer->View());
context->SetState(_psResolvePass.Get(resolvePassIndex));
context->DrawFullscreenTriangle();
context->ResetRenderTarget();
RenderTargetPool::Release(traceBuffer);
{
PROFILE_GPU("Resolve");
context->SetViewportAndScissors((float)resolveWidth, (float)resolveHeight);
context->SetRenderTarget(resolveBuffer->View());
context->BindSR(TEXTURE0, traceBuffer->View());
context->SetState(_psResolvePass.Get(resolvePassIndex));
context->DrawFullscreenTriangle();
context->ResetRenderTarget();
RenderTargetPool::Release(traceBuffer);
}
// Temporal Pass
GPUTexture* reflectionsBuffer = resolveBuffer;
if (useTemporal)
{
PROFILE_GPU("Temporal");
buffers->LastFrameTemporalSSR = Engine::FrameCount;
bool resetHistory = false;
if (!buffers->TemporalSSR || buffers->TemporalSSR->Width() != resolveWidth || buffers->TemporalSSR->Height() != resolveHeight)
@@ -336,7 +361,7 @@ GPUTexture* ScreenSpaceReflectionsPass::Render(RenderContext& renderContext, GPU
resetHistory = true;
if (buffers->TemporalSSR)
RenderTargetPool::Release(buffers->TemporalSSR);
tempDesc = GPUTextureDescription::New2D(resolveWidth, resolveHeight, PixelFormat::R16G16B16A16_Float);
auto tempDesc = GPUTextureDescription::New2D(resolveWidth, resolveHeight, PixelFormat::R16G16B16A16_Float);
buffers->TemporalSSR = RenderTargetPool::Get(tempDesc);
RENDER_TARGET_POOL_SET_NAME(buffers->TemporalSSR, "SSR.TemporalSSR");
}
+60 -16
View File
@@ -2,8 +2,8 @@
#include "MultiScaler.h"
#include "Engine/Graphics/Textures/GPUTexture.h"
#include "Engine/Content/Content.h"
#include "Engine/Graphics/GPUContext.h"
#include "Engine/Content/Content.h"
GPU_CB_STRUCT(Data {
Float2 TexelSize;
@@ -18,10 +18,10 @@ String MultiScaler::ToString() const
bool MultiScaler::Init()
{
// Create pipeline states
_psHalfDepth = GPUDevice::Instance->CreatePipelineState();
_psBlur5.CreatePipelineStates();
_psBlur9.CreatePipelineStates();
_psBlur13.CreatePipelineStates();
_psHalfDepth.CreatePipelineStates();
_psUpscale = GPUDevice::Instance->CreatePipelineState();
// Load asset
@@ -66,13 +66,20 @@ bool MultiScaler::setupResources()
if (_psUpscale->Init(psDesc))
return true;
}
if (!_psHalfDepth->IsValid())
if (!_psHalfDepth.IsValid())
{
psDesc.PS = shader->GetPS("PS_HalfDepth");
psDesc.PS = shader->GetPS("PS_HalfDepth", 0);
if (_psHalfDepth[0]->Init(psDesc))
return true;
psDesc.PS = shader->GetPS("PS_HalfDepth", 2);
psDesc.BlendMode.RenderTargetWriteMask = BlendingMode::ColorWrite::Red;
if (_psHalfDepth[2]->Init(psDesc))
return true;
psDesc.PS = shader->GetPS("PS_HalfDepth", 1);
psDesc.DepthWriteEnable = true;
psDesc.DepthEnable = true;
psDesc.DepthFunc = ComparisonFunc::Always;
if (_psHalfDepth->Init(psDesc))
if (_psHalfDepth[1]->Init(psDesc))
return true;
}
@@ -85,15 +92,15 @@ void MultiScaler::Dispose()
RendererPass::Dispose();
// Cleanup
SAFE_DELETE_GPU_RESOURCE(_psHalfDepth);
SAFE_DELETE_GPU_RESOURCE(_psUpscale);
_psBlur5.Delete();
_psBlur9.Delete();
_psBlur13.Delete();
_psHalfDepth.Delete();
_shader = nullptr;
}
void MultiScaler::Filter(const FilterMode mode, GPUContext* context, const int32 width, const int32 height, GPUTextureView* src, GPUTextureView* dst, GPUTextureView* tmp)
void MultiScaler::Filter(FilterMode mode, GPUContext* context, int32 width, int32 height, GPUTextureView* src, GPUTextureView* dst, GPUTextureView* tmp)
{
PROFILE_GPU_CPU("MultiScaler Filter");
@@ -152,18 +159,14 @@ void MultiScaler::Filter(const FilterMode mode, GPUContext* context, const int32
context->ResetRenderTarget();
}
void MultiScaler::Filter(const FilterMode mode, GPUContext* context, const int32 width, const int32 height, GPUTextureView* srcDst, GPUTextureView* tmp)
void MultiScaler::Filter(FilterMode mode, GPUContext* context, int32 width, int32 height, GPUTextureView* srcDst, GPUTextureView* tmp)
{
PROFILE_GPU_CPU("MultiScaler Filter");
context->SetViewportAndScissors((float)width, (float)height);
// Check if has missing resources
if (checkIfSkipPass())
{
// Skip
return;
}
// Select filter
GPUPipelineStatePermutationsPs<2>* ps;
@@ -211,11 +214,8 @@ void MultiScaler::Filter(const FilterMode mode, GPUContext* context, const int32
void MultiScaler::DownscaleDepth(GPUContext* context, int32 dstWidth, int32 dstHeight, GPUTexture* src, GPUTextureView* dst)
{
PROFILE_GPU_CPU("Downscale Depth");
// Check if has missing resources
if (checkIfSkipPass())
{
// Clear the output
context->ClearDepth(dst);
return;
}
@@ -224,6 +224,7 @@ void MultiScaler::DownscaleDepth(GPUContext* context, int32 dstWidth, int32 dstH
Data data;
data.TexelSize.X = 1.0f / (float)src->Width();
data.TexelSize.Y = 1.0f / (float)src->Height();
bool outputDepth = ((GPUTexture*)dst->GetParent())->IsDepthStencil();
auto cb = _shader->GetShader()->GetCB(0);
context->UpdateCB(cb, &data);
context->BindCB(0, cb);
@@ -232,7 +233,7 @@ void MultiScaler::DownscaleDepth(GPUContext* context, int32 dstWidth, int32 dstH
context->SetViewportAndScissors((float)dstWidth, (float)dstHeight);
context->SetRenderTarget(dst, (GPUTextureView*)nullptr);
context->BindSR(0, src);
context->SetState(_psHalfDepth);
context->SetState(_psHalfDepth[outputDepth ? 1 : 0]);
context->DrawFullscreenTriangle();
// Cleanup
@@ -240,6 +241,49 @@ void MultiScaler::DownscaleDepth(GPUContext* context, int32 dstWidth, int32 dstH
context->UnBindCB(0);
}
void MultiScaler::BuildHiZ(GPUContext* context, GPUTexture* srcDepth, GPUTexture* dstHiZ)
{
PROFILE_GPU_CPU("Build HiZ");
int32 dstWidth = dstHiZ->Width();
int32 dstHeight = dstHiZ->Height();
// Copy mip0
if (srcDepth->Size() == dstHiZ->Size() && srcDepth->Format() == dstHiZ->Format())
{
context->CopySubresource(dstHiZ, 0, srcDepth, 0);
}
else if (srcDepth->Size() == dstHiZ->Size())
{
context->Draw(dstHiZ, srcDepth);
}
else
{
context->SetViewportAndScissors((float)dstWidth, (float)dstHeight);
context->SetRenderTarget(dstHiZ->View());
context->BindSR(0, srcDepth);
context->SetState(_psHalfDepth[2]);
context->DrawFullscreenTriangle();
}
// Build mip chain
for (int32 mip = 1; mip < dstHiZ->MipLevels(); mip++)
{
const int32 mipWidth = Math::Max(dstWidth >> mip, 1);
const int32 mipHeight = Math::Max(dstHeight >> mip, 1);
context->ResetRenderTarget();
context->SetViewportAndScissors((float)mipWidth, (float)mipHeight);
context->SetRenderTarget(dstHiZ->View(0, mip));
context->BindSR(0, dstHiZ->View(0, mip - 1));
context->SetState(_psHalfDepth[2]);
context->DrawFullscreenTriangle();
}
context->ResetRenderTarget();
context->UnBindCB(0);
}
void MultiScaler::Upscale(GPUContext* context, const Viewport& viewport, GPUTexture* src, GPUTextureView* dst)
{
PROFILE_GPU_CPU("Upscale");
+15 -11
View File
@@ -12,16 +12,14 @@
class MultiScaler : public RendererPass<MultiScaler>
{
private:
AssetReference<Shader> _shader;
GPUPipelineState* _psHalfDepth = nullptr;
GPUPipelineStatePermutationsPs<2> _psBlur5;
GPUPipelineStatePermutationsPs<2> _psBlur9;
GPUPipelineStatePermutationsPs<2> _psBlur13;
GPUPipelineStatePermutationsPs<3> _psHalfDepth;
GPUPipelineState* _psUpscale = nullptr;
public:
/// <summary>
/// Filter mode
/// </summary>
@@ -53,7 +51,7 @@ public:
/// <param name="src">The source texture.</param>
/// <param name="dst">The destination texture.</param>
/// <param name="tmp">The temporary texture (should have the same size as destination texture).</param>
void Filter(const FilterMode mode, GPUContext* context, const int32 width, const int32 height, GPUTextureView* src, GPUTextureView* dst, GPUTextureView* tmp);
void Filter(FilterMode mode, GPUContext* context, int32 width, int32 height, GPUTextureView* src, GPUTextureView* dst, GPUTextureView* tmp);
/// <summary>
/// Performs texture filtering.
@@ -64,18 +62,26 @@ public:
/// <param name="height">The output height.</param>
/// <param name="srcDst">The source and destination texture.</param>
/// <param name="tmp">The temporary texture (should have the same size as destination texture).</param>
void Filter(const FilterMode mode, GPUContext* context, const int32 width, const int32 height, GPUTextureView* srcDst, GPUTextureView* tmp);
void Filter(FilterMode mode, GPUContext* context, int32 width, int32 height, GPUTextureView* srcDst, GPUTextureView* tmp);
/// <summary>
/// Downscales the depth buffer (to half resolution).
/// Downscales the depth buffer (to half resolution). Uses `min` operator (`max` for inverted depth) to output the furthest depths for conservative usage.
/// </summary>
/// <param name="context">The context.</param>
/// <param name="dstWidth">The width of the destination texture (in pixels).</param>
/// <param name="dstHeight">The height of the destination texture (in pixels).</param>
/// <param name="src">The source texture.</param>
/// <param name="dst">The destination texture.</param>
/// <param name="src">The source texture (has to have ShaderResource flag).</param>
/// <param name="dst">The destination texture (has to have DepthStencil or RenderTarget flag).</param>
void DownscaleDepth(GPUContext* context, int32 dstWidth, int32 dstHeight, GPUTexture* src, GPUTextureView* dst);
/// <summary>
/// Generates the Hierarchical Z-Buffer (HiZ). Uses `min` operator (`max` for inverted depth) to output the furthest depths for conservative usage.
/// </summary>
/// <param name="context">The context.</param>
/// <param name="srcDepth">The source depth buffer texture (has to have ShaderResource flag).</param>
/// <param name="dstHiZ">The destination HiZ texture (has to have DepthStencil or RenderTarget flag).</param>
void BuildHiZ(GPUContext* context, GPUTexture* srcDepth, GPUTexture* dstHiZ);
/// <summary>
/// Upscales the texture.
/// </summary>
@@ -86,7 +92,6 @@ public:
void Upscale(GPUContext* context, const Viewport& viewport, GPUTexture* src, GPUTextureView* dst);
public:
// [RendererPass]
String ToString() const override;
bool Init() override;
@@ -94,17 +99,16 @@ public:
#if COMPILE_WITH_DEV_ENV
void OnShaderReloading(Asset* obj)
{
_psHalfDepth->ReleaseGPU();
_psUpscale->ReleaseGPU();
_psBlur5.Release();
_psBlur9.Release();
_psBlur13.Release();
_psHalfDepth.Release();
invalidateResources();
}
#endif
protected:
// [RendererPass]
bool setupResources() override;
};
@@ -443,6 +443,11 @@ String ShadersCompilation::ResolveShaderPath(StringView path)
// Hard-coded redirect to platform-specific includes
result = Globals::StartupFolder / TEXT("Source/Platforms");
}
else if (projectName.StartsWith(StringView(TEXT("FlaxThirdParty"))))
{
// Hard-coded redirect to third-party-specific includes
result = Globals::StartupFolder / TEXT("Source/ThirdParty");
}
else
{
HashSet<const ProjectInfo*> projects;
+16 -10
View File
@@ -1,6 +1,7 @@
// Copyright (c) Wojciech Figat. All rights reserved.
#include "./Flax/Common.hlsl"
#include "./Flax/Gather.hlsl"
META_CB_BEGIN(0, Data)
float2 TexelSize;
@@ -14,19 +15,24 @@ Texture2D Input : register(t0);
// Pixel Shader for depth buffer downscale (to half res)
META_PS(true, FEATURE_LEVEL_ES2)
float PS_HalfDepth(Quad_VS2PS input) : SV_Depth
{
#if CAN_USE_GATHER
float4 depths = Input.GatherRed(SamplerPointClamp, input.TexCoord);
META_PERMUTATION_1(OUTPUT_DEPTH=0)
META_PERMUTATION_1(OUTPUT_DEPTH=1)
META_PERMUTATION_1(HZB_CLOSEST=2)
float PS_HalfDepth(Quad_VS2PS input)
#if OUTPUT_DEPTH
: SV_Depth
#else
float4 depths;
depths.x = Input.SampleLevel(SamplerPointClamp, input.TexCoord + float2(0, 1) * TexelSize, 0).r;
depths.y = Input.SampleLevel(SamplerPointClamp, input.TexCoord + float2(1, 1) * TexelSize, 0).r;
depths.z = Input.SampleLevel(SamplerPointClamp, input.TexCoord + float2(1, 0) * TexelSize, 0).r;
depths.w = Input.SampleLevel(SamplerPointClamp, input.TexCoord + float2(0, 0) * TexelSize, 0).r;
: SV_Target0
#endif
{
// Load 4 depth values (2x2 quad)
float4 depths = TextureGatherRed(Input, SamplerPointClamp, input.TexCoord);
return max(depths.x, max(depths.y, max(depths.z, depths.w))) + 0.0001f;
#if HZB_CLOSEST
return min(depths.x, min(depths.y, min(depths.z, depths.w)));
#else
return max(depths.x, max(depths.y, max(depths.z, depths.w)));
#endif
}
// Pixel Shader for 5-tap gaussian blur
-3
View File
@@ -6,9 +6,6 @@
#include "./Flax/GBufferCommon.hlsl"
#include "./Flax/Quaternion.hlsl"
// Hit depth (view space) threshold to detect if sky was hit (value above it where 1.0f is default)
#define REFLECTIONS_HIT_THRESHOLD 0.9f
// Packed env probe data
struct EnvProbeData
{
+31 -25
View File
@@ -5,6 +5,9 @@
#include "./Flax/Random.hlsl"
#include "./Flax/MonteCarlo.hlsl"
#include "./Flax/GBufferCommon.hlsl"
#if SSR_USE_HZB
#include "./FlaxThirdParty/FidelityFX/ffx_sssr.h"
#endif
// 1:-1 to 0:1
float2 ClipToUv(float2 clipPos)
@@ -44,19 +47,22 @@ float RayAttenBorder(float2 pos, float value)
// Returns: xy: hitUV, z: hitMask, where hitUV is the result UV of hit pixel, hitMask is the normalized sample weight (0 if no hit).
float3 ScreenSpaceReflectionDirection(float2 uv, GBufferSample gBuffer, float3 viewPos, bool temporal = false, float temporalTime = 0.0f, float brdfBias = 0.82f)
{
// Randomize it a little
float2 jitter = RandN2(uv + temporalTime);
float2 Xi = jitter;
Xi.y = lerp(Xi.y, 0.0, brdfBias);
float3 H = temporal ? TangentToWorld(gBuffer.Normal, ImportanceSampleGGX(Xi, gBuffer.Roughness)) : gBuffer.Normal;
float3 viewWS = normalize(gBuffer.WorldPos - viewPos);
return reflect(viewWS, H.xyz);
}
// Screen Space Reflection ray tracing utility.
// If SSR_USE_HZB is defined, it uses Hierarchical Z-Buffer for tracing against screen (assumes that depthBuffer is a HiZ with full mip-chain).
// Returns: xy: hitUV, z: hitMask, where hitUV is the result UV of hit pixel, hitMask is the normalized sample weight (0 if no hit).
float3 TraceScreenSpaceReflection(float2 uv, GBufferSample gBuffer, Texture2D depthBuffer, float3 viewPos, float4x4 viewMatrix, float4x4 viewProjectionMatrix, float stepSize, float maxSamples = 20, bool temporal = false, float temporalTime = 0.0f, float worldAntiSelfOcclusionBias = 0.1f, float brdfBias = 0.82f, float drawDistance = 5000.0f, float roughnessThreshold = 0.4f, float edgeFade = 0.1f)
float3 TraceScreenSpaceReflection(
#if SSR_USE_HZB
out bool uncertainHit, uint hzbMips,
#endif
float2 uv, GBufferSample gBuffer, Texture2D depthBuffer, float3 viewPos, float4x4 viewMatrix, float4x4 viewProjectionMatrix, float stepSize, float maxSamples = 50, bool temporal = false, float temporalTime = 0.0f, float worldAntiSelfOcclusionBias = 0.1f, float brdfBias = 0.82f, float drawDistance = 5000.0f, float roughnessThreshold = 0.4f, float edgeFade = 0.1f)
{
#ifndef SSR_SKIP_INVALID_CHECK
// Reject invalid pixels
@@ -69,17 +75,19 @@ float3 TraceScreenSpaceReflection(float2 uv, GBufferSample gBuffer, Texture2D de
float3 reflectVS = normalize(reflect(gBuffer.ViewPos, normalVS));
if (gBuffer.ViewPos.z < 1.0 && reflectVS.z < 0.4)
return 0;
// Calculate ray path in UV space (z is raw depth)
float3 reflectWS = ScreenSpaceReflectionDirection(uv, gBuffer, viewPos, temporal, temporalTime, brdfBias);
#if SSR_USE_HZB
worldAntiSelfOcclusionBias *= 10.0f; // Higher bias for HZB trace to reduce artifacts
#endif
float3 startWS = gBuffer.WorldPos + gBuffer.Normal * worldAntiSelfOcclusionBias;
float3 startUV = ProjectWorldToUv(startWS, viewProjectionMatrix);
float3 endUV = ProjectWorldToUv(startWS + reflectWS, viewProjectionMatrix);
float3 rayUV = endUV - startUV;
float2 rayUVAbs = abs(rayUV.xy);
rayUV *= stepSize / max(rayUVAbs.x, rayUVAbs.y);
float3 startUv = startUV + rayUV * 2;
float3 currOffset = startUv;
float3 rayStep = rayUV * 2;
@@ -89,26 +97,30 @@ float3 TraceScreenSpaceReflection(float2 uv, GBufferSample gBuffer, Texture2D de
float numSamples = min(maxSamples, samplesToEdge.x);
rayStep *= samplesToEdge.x / numSamples;
// Calculate depth difference error
float depthDiffError = 1.3f * abs(rayStep.z);
// Ray trace
float depthDiffError = 1.3f * abs(rayStep.z);
#if SSR_USE_HZB
bool validHit = false;
uint2 depthBufferSize;
depthBuffer.GetDimensions(depthBufferSize.x, depthBufferSize.y);
float3 hit = FFX_SSSR_HierarchicalRaymarch(depthBuffer, hzbMips, depthDiffError, uncertainHit, startUV, rayUV, depthBufferSize, 0, numSamples, validHit);
if (!validHit)
return 0;
currOffset = hit;
#else
float currSampleIndex = 0;
float currSample, depthDiff;
LOOP
while (currSampleIndex < numSamples)
{
// Sample depth buffer and calculate depth difference
currSample = SAMPLE_RT(depthBuffer, currOffset.xy).r;
depthDiff = currOffset.z - currSample;
float currSample = SAMPLE_RT(depthBuffer, currOffset.xy).r;
float depthDiff = currOffset.z - currSample;
// Check intersection
if (depthDiff >= 0)
{
if (depthDiff < depthDiffError)
{
break;
}
currOffset -= rayStep;
rayStep *= 0.5;
}
@@ -117,25 +129,19 @@ float3 TraceScreenSpaceReflection(float2 uv, GBufferSample gBuffer, Texture2D de
currOffset += rayStep;
currSampleIndex++;
}
// Check if has valid result after ray tracing
if (currSampleIndex >= numSamples)
{
// All samples done but no result
return 0;
}
float2 hitUV = currOffset.xy;
return 0; // All samples done but no result
#endif
// Fade rays close to screen edge
const float fadeStart = 0.9f;
const float fadeEnd = 1.0f;
const float fadeDiffRcp = 1.0f / (fadeEnd - fadeStart);
float2 boundary = abs(hitUV - float2(0.5f, 0.5f)) * 2.0f;
float2 boundary = abs(currOffset.xy - float2(0.5f, 0.5f)) * 2.0f;
float fadeOnBorder = 1.0f - saturate((boundary.x - fadeStart) * fadeDiffRcp);
fadeOnBorder *= 1.0f - saturate((boundary.y - fadeStart) * fadeDiffRcp);
fadeOnBorder = smoothstep(0.0f, 1.0f, fadeOnBorder);
fadeOnBorder *= RayAttenBorder(hitUV, edgeFade);
fadeOnBorder *= RayAttenBorder(currOffset.xy, edgeFade);
// Fade rays on high roughness
float roughnessFade = saturate((roughnessThreshold - gBuffer.Roughness) * 20);
@@ -144,5 +150,5 @@ float3 TraceScreenSpaceReflection(float2 uv, GBufferSample gBuffer, Texture2D de
float distanceFade = saturate((drawDistance - gBuffer.ViewPos.z) / drawDistance);
// Output: xy: hitUV, z: hitMask
return float3(hitUV, fadeOnBorder * roughnessFade * distanceFade);
return float3(currOffset.xy, fadeOnBorder * roughnessFade * distanceFade);
}
+52 -23
View File
@@ -1,7 +1,14 @@
// Copyright (c) Wojciech Figat. All rights reserved.
// Skips additional check in TraceScreenSpaceReflection for material that is already done by PS_RayTracePass
#define SSR_SKIP_INVALID_CHECK 1
// Uses more-optimized Hierarchical Z-Buffer tracing rather than naive Depth Buffer tracing
#define SSR_USE_HZB 1
// Enable/disable luminance filter to reduce reflections highlights
#define SSR_REDUCE_HIGHLIGHTS 1
#include "./Flax/Common.hlsl"
#include "./Flax/LightingCommon.hlsl"
#include "./Flax/ReflectionsCommon.hlsl"
@@ -11,8 +18,7 @@
#include "./Flax/GlobalSignDistanceField.hlsl"
#include "./Flax/GI/GlobalSurfaceAtlas.hlsl"
// Enable/disable luminance filter to reduce reflections highlights
#define SSR_REDUCE_HIGHLIGHTS 1
#define SSR_USE_SDF (USE_GLOBAL_SURFACE_ATLAS && CAN_USE_GLOBAL_SURFACE_ATLAS)
META_CB_BEGIN(0, Data)
GBufferData GBuffer;
@@ -20,13 +26,13 @@ float MaxColorMiplevel;
float TraceSizeMax;
float MaxTraceSamples;
float RoughnessFade;
float2 SSRtexelSize;
float2 SSRTexelSize;
float TemporalTime;
float BRDFBias;
float WorldAntiSelfOcclusionBias;
float EdgeFadeFactor;
float TemporalResponse;
float Dummy0;
uint DepthMips;
float RayTraceStep;
float TemporalEffect;
float Intensity;
@@ -104,14 +110,31 @@ float4 PS_RayTracePass(Quad_VS2PS input) : SV_Target0
GBufferSample gBuffer = SampleGBuffer(gBufferData, input.TexCoord);
// Reject invalid pixels
BRANCH
if (gBuffer.ShadingModel == SHADING_MODEL_UNLIT || gBuffer.Roughness > RoughnessFade || gBuffer.ViewPos.z > FadeOutDistance)
return base;
// Trace depth buffer to find intersection
float3 screenHit = TraceScreenSpaceReflection(input.TexCoord, gBuffer, Depth, gBufferData.ViewPos, ViewMatrix, ViewProjectionMatrix, RayTraceStep, MaxTraceSamples, TemporalEffect, TemporalTime, WorldAntiSelfOcclusionBias, BRDFBias, FadeOutDistance, RoughnessFade, EdgeFadeFactor);
float4 result = base;
bool uncertainHit = false;
float3 screenHit = TraceScreenSpaceReflection(
#if SSR_USE_HZB
uncertainHit, DepthMips,
#endif
input.TexCoord, gBuffer, Depth, gBufferData.ViewPos, ViewMatrix, ViewProjectionMatrix, RayTraceStep, MaxTraceSamples, TemporalEffect, TemporalTime, WorldAntiSelfOcclusionBias, BRDFBias, FadeOutDistance, RoughnessFade, EdgeFadeFactor);
float4 result = base;
#if SSR_USE_SDF
if (screenHit.z > 0 && !uncertainHit) // Only use certain SSR hits when SDF tracing is enabled
#else
if (screenHit.z > 0)
#endif
{
if (uncertainHit)
{
// Jitter edges of uncertain hits (when ray goes behind the object)
screenHit.xy += RandN2(input.TexCoord + TemporalTime) * SSRTexelSize;
}
// Sample color buffer mip that matches roughness of the surface to get blurred reflections
float3 viewVector = normalize(gBufferData.ViewPos - gBuffer.WorldPos);
float NdotV = saturate(dot(gBuffer.Normal, viewVector));
float coneTangent = lerp(0.0, gBuffer.Roughness * 5 * (1.0 - BRDFBias), pow(NdotV, 1.5) * sqrt(gBuffer.Roughness));
@@ -119,21 +142,28 @@ float4 PS_RayTracePass(Quad_VS2PS input) : SV_Target0
float mip = clamp(log2(intersectionCircleRadius * TraceSizeMax), 0.0, MaxColorMiplevel);
float3 sampleColor = Texture0.SampleLevel(SamplerLinearClamp, screenHit.xy, mip).rgb;
result = float4(sampleColor, screenHit.z);
if (screenHit.z >= REFLECTIONS_HIT_THRESHOLD)
#if SSR_USE_SDF
// Skip SDF tracing if SSR hit is very certain
BRANCH
if (result.a > 0.95f)
return result;
#endif
}
// Fallback to Global SDF and Global Surface Atlas tracing
#if USE_GLOBAL_SURFACE_ATLAS && CAN_USE_GLOBAL_SURFACE_ATLAS
#if SSR_USE_SDF
// Calculate reflection direction (the same TraceScreenSpaceReflection)
float3 reflectWS = ScreenSpaceReflectionDirection(input.TexCoord, gBuffer, gBufferData.ViewPos, TemporalEffect, TemporalTime, BRDFBias);
// Raytrace Global SDF
GlobalSDFTrace sdfTrace;
float maxDistance = GLOBAL_SDF_WORLD_SIZE;
sdfTrace.Init(gBuffer.WorldPos, reflectWS, 0.0f, maxDistance);
GlobalSDFHit sdfHit = RayTraceGlobalSDF(GlobalSDF, GlobalSDFTex, GlobalSDFMip, sdfTrace, 2.0f);
if (sdfHit.IsHit())
{
// Sample Global Surface Atlas
float3 hitPosition = sdfHit.GetHitPosition(sdfTrace);
float surfaceThreshold = GetGlobalSurfaceAtlasThreshold(GlobalSDF, sdfHit);
float4 surfaceAtlas = SampleGlobalSurfaceAtlas(GlobalSurfaceAtlas, GlobalSurfaceAtlasChunks, RWGlobalSurfaceAtlasCulledObjects, GlobalSurfaceAtlasObjects, GlobalSurfaceAtlasDepth, GlobalSurfaceAtlasTex, hitPosition, -reflectWS, surfaceThreshold);
@@ -159,28 +189,27 @@ float4 PS_ResolvePass(Quad_VS2PS input) : SV_Target0
static const float2 Offsets[8] =
{
float2( 0, 0),
float2( 2, -2),
float2(-2, -2),
float2( 0, 2),
float2(-2, 0),
float2( 0, -2),
float2( 2, 0),
float2( 2, 2),
float2( 1, -1),
float2(-1, -1),
float2( 0, 1),
float2(-1, 0),
float2( 0, -1),
float2( 1, 0),
float2( 1, 1),
};
float2 uv = input.TexCoord;
// Inputs:
// Texture0 - ray trace buffer (xy: HDR color, z: weight)
// Sample GBuffer
GBufferData gBufferData = GetGBufferData();
GBufferSample gBuffer = SampleGBuffer(gBufferData, uv);
GBufferSample gBuffer = SampleGBuffer(gBufferData, input.TexCoord);
BRANCH
if (gBuffer.ShadingModel == SHADING_MODEL_UNLIT)
return 0;
// Randomize it a little
float2 random = RandN2(uv + TemporalTime);
float2 random = RandN2(input.TexCoord + TemporalTime);
float2 blueNoise = random.xy * 2.0 - 1.0;
float2x2 offsetRotationMatrix = float2x2(blueNoise.x, blueNoise.y, -blueNoise.y, blueNoise.x);
@@ -189,9 +218,9 @@ float4 PS_ResolvePass(Quad_VS2PS input) : SV_Target0
UNROLL
for (int i = 0; i < RESOLVE_SAMPLES; i++)
{
float2 offsetUV = Offsets[i] * SSRtexelSize;
float2 offsetUV = Offsets[i] * SSRTexelSize;
offsetUV = mul(offsetRotationMatrix, offsetUV);
float4 value = Texture0.SampleLevel(SamplerLinearClamp, uv + offsetUV, 0);
float4 value = Texture0.SampleLevel(SamplerLinearClamp, input.TexCoord + offsetUV, 0);
#if SSR_REDUCE_HIGHLIGHTS
value.rgb /= 1 + Luminance(value.rgb);
#endif
@@ -224,8 +253,8 @@ float4 PS_TemporalPass(Quad_VS2PS input) : SV_Target0
float2 velocity = Texture2.SampleLevel(SamplerLinearClamp, uv, 0).xy;
float2 prevUV = uv - velocity;
float4 current = Texture0.SampleLevel(SamplerLinearClamp, uv, 0);
float2 du = float2(SSRtexelSize.x, 0.0);
float2 dv = float2(0.0, SSRtexelSize.y);
float2 du = float2(SSRTexelSize.x, 0.0);
float2 dv = float2(0.0, SSRTexelSize.y);
// Sample pixels around
float4 currentTopLeft = Texture0.SampleLevel(SamplerLinearClamp, uv.xy - dv - du, 0);
+22
View File
@@ -0,0 +1,22 @@
// Copyright (c) Wojciech Figat. All rights reserved.
using Flax.Build;
using Flax.Build.NativeCpp;
/// <summary>
/// https://github.com/GPUOpen-LibrariesAndSDKs/FidelityFX-SDK
/// </summary>
public class FidelityFX : HeaderOnlyModule
{
/// <inheritdoc />
public override void Init()
{
base.Init();
LicenseType = LicenseTypes.MIT;
LicenseFilePath = "license.txt";
// Merge third-party modules into engine binary
BinaryModuleName = "FlaxEngine";
}
}
+131
View File
@@ -0,0 +1,131 @@
/**********************************************************************
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
********************************************************************/
#ifndef FFX_SSSR
#define FFX_SSSR
#define FFX_SSSR_FLOAT_MAX 3.402823466e+38
void FFX_SSSR_InitialAdvanceRay(float3 origin, float3 direction, float3 inv_direction, float2 current_mip_resolution, float2 current_mip_resolution_inv, float2 floor_offset, float2 uv_offset, out float3 position, out float current_t) {
float2 current_mip_position = current_mip_resolution * origin.xy;
// Intersect ray with the half box that is pointing away from the ray origin.
float2 xy_plane = floor(current_mip_position) + floor_offset;
xy_plane = xy_plane * current_mip_resolution_inv + uv_offset;
// o + d * t = p' => t = (p' - o) / d
float2 t = xy_plane * inv_direction.xy - origin.xy * inv_direction.xy;
current_t = min(t.x, t.y);
position = origin + current_t * direction;
}
bool FFX_SSSR_AdvanceRay(float3 origin, float3 direction, float3 inv_direction, float2 current_mip_position, float2 current_mip_resolution_inv, float2 floor_offset, float2 uv_offset, float surface_z, inout float3 position, inout float current_t) {
// Create boundary planes
float2 xy_plane = floor(current_mip_position) + floor_offset;
xy_plane = xy_plane * current_mip_resolution_inv + uv_offset;
float3 boundary_planes = float3(xy_plane, surface_z);
// Intersect ray with the half box that is pointing away from the ray origin.
// o + d * t = p' => t = (p' - o) / d
float3 t = boundary_planes * inv_direction - origin * inv_direction;
// Prevent using z plane when shooting out of the depth buffer.
#ifdef FFX_SSSR_INVERTED_DEPTH_RANGE
t.z = direction.z < 0 ? t.z : FFX_SSSR_FLOAT_MAX;
#else
t.z = direction.z > 0 ? t.z : FFX_SSSR_FLOAT_MAX;
#endif
// Choose nearest intersection with a boundary.
float t_min = min(min(t.x, t.y), t.z);
#ifdef FFX_SSSR_INVERTED_DEPTH_RANGE
// Larger z means closer to the camera.
bool above_surface = surface_z < position.z;
#else
// Smaller z means closer to the camera.
bool above_surface = surface_z > position.z;
#endif
// Decide whether we are able to advance the ray until we hit the xy boundaries or if we had to clamp it at the surface.
// We use the asuint comparison to avoid NaN / Inf logic, also we actually care about bitwise equality here to see if t_min is the t.z we fed into the min3 above.
bool skipped_tile = asuint(t_min) != asuint(t.z) && above_surface;
// Make sure to only advance the ray if we're still above the surface.
current_t = above_surface ? t_min : current_t;
// Advance ray
position = origin + current_t * direction;
return skipped_tile;
}
float2 FFX_SSSR_GetMipResolution(float2 screen_dimensions, int mip_level) {
return screen_dimensions * pow(0.5, mip_level);
}
// Requires origin and direction of the ray to be in screen space [0, 1] x [0, 1]
float3 FFX_SSSR_HierarchicalRaymarch(Texture2D depthBuffer, uint hzbMips, float depthDiffError, out bool uncertainHit, float3 origin, float3 direction, float2 screen_size, int most_detailed_mip, uint max_traversal_intersections, out bool valid_hit) {
const float3 inv_direction = select(direction != 0, 1.0 / direction, FFX_SSSR_FLOAT_MAX);
// Start on mip with highest detail.
int current_mip = most_detailed_mip;
// Could recompute these every iteration, but it's faster to hoist them out and update them.
float2 current_mip_resolution = FFX_SSSR_GetMipResolution(screen_size, current_mip);
float2 current_mip_resolution_inv = rcp(current_mip_resolution);
// Offset to the bounding boxes uv space to intersect the ray with the center of the next pixel.
// This means we ever so slightly over shoot into the next region.
float2 uv_offset = 0.005 * exp2(most_detailed_mip) / screen_size;
uv_offset = select(direction.xy < 0, -uv_offset, uv_offset);
// Offset applied depending on current mip resolution to move the boundary to the left/right upper/lower border depending on ray direction.
float2 floor_offset = select(direction.xy < 0, 0, 1);
// Initially advance ray to avoid immediate self intersections.
float current_t;
float3 position;
FFX_SSSR_InitialAdvanceRay(origin, direction, inv_direction, current_mip_resolution, current_mip_resolution_inv, floor_offset, uv_offset, position, current_t);
uint overDiffError = 0;
int i = 0;
while (i < max_traversal_intersections && current_mip >= most_detailed_mip) {
float2 current_mip_position = current_mip_resolution * position.xy;
float surface_z = depthBuffer.Load(int3(current_mip_position, current_mip)).x;
if (position.z - surface_z > depthDiffError) overDiffError++; // Count number of times we were under the depth by more than the allowed error
bool skipped_tile = FFX_SSSR_AdvanceRay(origin, direction, inv_direction, current_mip_position, current_mip_resolution_inv, floor_offset, uv_offset, surface_z, position, current_t);
++i;
if (!skipped_tile || current_mip < hzbMips) // Never go too low depth resolution to avoid blocky artifacts
{
current_mip += skipped_tile ? 1 : -1;
current_mip_resolution *= skipped_tile ? 0.5 : 2;
current_mip_resolution_inv *= skipped_tile ? 2 : 0.5;
}
}
valid_hit = (i <= max_traversal_intersections);
uncertainHit = valid_hit && overDiffError > 3; // If we went over under the surface to detect uncertain hits
return position;
}
#endif //FFX_SSSR
+19
View File
@@ -0,0 +1,19 @@
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.