BRE Architecture Series Part 6 – Render Manager and Command List Executor

In this opportunity, we are going to talk about one of the most important components in BRE: the RenderManager. It is a class that inherits from tbb::task and it runs in its own thread. It has several responsibilities:

  • Resource creation
  • Pass initialization and execution
  • Camera update
  • Frames presentation and synchronization

Its implementation is the following

RenderManager.h

#pragma once

#include <d3d12.h>
#include <dxgi1_4.h>
#include <tbb/task.h>
#include <wrl.h>

#include <AmbientOcclusionPass\AmbientOcclusionPass.h>
#include <ApplicationSettings\ApplicationSettings.h>
#include <CommandManager\CommandListPerFrame.h>
#include <Camera/Camera.h>
#include <EnvironmentLightPass\EnvironmentLightPass.h>
#include <GeometryPass\GeometryPass.h>
#include <PostProcesspass\PostProcesspass.h>
#include <ReflectionPass\ReflectionPass.h>
#include <SkyBoxPass\SkyBoxPass.h>
#include <ShaderUtils\CBuffers.h>
#include <ToneMappingPass\ToneMappingPass.h>
#include <Timer/Timer.h>

namespace BRE {
class CommandListExecutor;
class Scene;

///
/// @brief Responsible to initialize passes (geometry, light, skybox, etc) based on a Scene.
///
/// Steps:
/// - Use RenderManager::Create() to create and spawn and instance. 
/// - When you want to terminate this task, you should call RenderManager::Terminate()
///
class RenderManager : public tbb::task {
public:
    ///
    /// @brief Creates a RenderManager
    ///
    /// This mtehod must be called once.
    ///
    /// @param scene Scene to create the RenderManager
    /// @return Render manager
    ///
    static RenderManager& Create(Scene& scene) noexcept;

    ~RenderManager() = default;
    RenderManager(const RenderManager&) = delete;
    const RenderManager& operator=(const RenderManager&) = delete;
    RenderManager(RenderManager&&) = delete;
    RenderManager& operator=(RenderManager&&) = delete;

    ///
    /// @brief Terminate render manager
    ///
    void Terminate() noexcept;

private:
    explicit RenderManager(Scene& scene);

    ///
    /// @brief Executes the tbb task.
    ///
    /// This method is called when tbb::task is spawned
    ///
    tbb::task* execute() final override;

    static RenderManager* sRenderManager;

    ///
    /// @brief Initialize passes
    /// @param scene Scene to initialize passes
    ///
    void InitPasses(Scene& scene) noexcept;

    ///
    /// @brief Creates frame buffers and render target views
    ///
    void CreateFrameBuffersAndRenderTargetViews() noexcept;

    ///
    /// @brief Creates depth stencil buffer and view
    ///
    void CreateDepthStencilBufferAndView() noexcept;

    ///
    /// @brief Creates intermediate color buffer, shader resource views, and render target views.
    /// @param initialState Initial state of the buffers
    /// @param resourceName Resource name
    /// @param buffer Output color buffer
    /// @param renderTargetView Output render target view
    /// @param shaderResourceView Output shader resource view
    ///
    void CreateIntermediateColorBufferAndViews(const D3D12_RESOURCE_STATES initialState,
                                               const wchar_t* resourceName,
                                               ID3D12Resource* &buffer,
                                               D3D12_CPU_DESCRIPTOR_HANDLE& renderTargetView,
                                               D3D12_GPU_DESCRIPTOR_HANDLE& shaderResourceView) noexcept;

    ///
    /// @brief Get current frame buffer
    /// @return Frame buffer
    ///
    ID3D12Resource* GetCurrentFrameBuffer() const noexcept
    {
        BRE_ASSERT(mSwapChain != nullptr);
        return mFrameBuffers[mSwapChain->GetCurrentBackBufferIndex()];
    }

    ///
    /// @brief Get render target view to the current frame buffer
    /// @return Render target view
    ///
    D3D12_CPU_DESCRIPTOR_HANDLE GetCurrentFrameBufferRenderTargetView() const noexcept
    {
        return mFrameBufferRenderTargetViews[mSwapChain->GetCurrentBackBufferIndex()];
    }

    ///
    /// @brief Records pre pass command lists and pushes them to 
    /// the CommandListExecutor.
    /// @return The number of recorded command lists
    ///
    std::uint32_t RecordAndPushPrePassCommandLists() noexcept;

    ///
    /// @brief Records post pass command lists and pushes them to 
    /// the CommandListExecutor.
    /// @return The number of recorded command lists
    ///
    std::uint32_t RecordAndPushPostPassCommandLists() noexcept;

    ///
    /// @brief Flushes command queue
    ///
    void FlushCommandQueue() noexcept;

    ///
    /// @brief Presents current frame and continue with the next frame.
    ///
    void PresentCurrentFrameAndBeginNextFrame() noexcept;

    Microsoft::WRL::ComPtr<IDXGISwapChain3> mSwapChain{ nullptr };

    // Fences data for synchronization purposes.
    ID3D12Fence* mFence{ nullptr };
    std::uint32_t mCurrentQueuedFrameIndex{ 0U };
    std::uint64_t mFenceValueByQueuedFrameIndex[ApplicationSettings::sQueuedFrameCount]{ 0UL };
    std::uint64_t mCurrentFenceValue{ 0UL };

    // Passes
    GeometryPass mGeometryPass;
    AmbientOcclusionPass mAmbientOcclusionPass;
    EnvironmentLightPass mEnvironmentLightPass;
    ReflectionPass mReflectionPass;
    SkyBoxPass mSkyBoxPass;
    ToneMappingPass mToneMappingPass;
    PostProcessPass mPostProcessPass;

    CommandListPerFrame mPrePassCommandListPerFrame;
    CommandListPerFrame mPostPassCommandListPerFrame;

    ID3D12Resource* mFrameBuffers[ApplicationSettings::sSwapChainBufferCount]{ nullptr };
    D3D12_CPU_DESCRIPTOR_HANDLE mFrameBufferRenderTargetViews[ApplicationSettings::sSwapChainBufferCount]{ 0UL };

    ID3D12Resource* mDepthBuffer{ nullptr };
    D3D12_GPU_DESCRIPTOR_HANDLE mDepthBufferShaderResourceView{ 0UL };
    D3D12_CPU_DESCRIPTOR_HANDLE mDepthBufferRenderTargetView{ 0UL };

    // Buffers used for intermediate computations.
    // They are used as render targets (light pass) or pixel shader resources (post processing passes)
    ID3D12Resource* mIntermediateColorBuffer1{ nullptr };
    D3D12_GPU_DESCRIPTOR_HANDLE mIntermediateColorBuffer1ShaderResourceView{ 0UL };
    D3D12_CPU_DESCRIPTOR_HANDLE mIntermediateColorBuffer1RenderTargetView{ 0UL };
    ID3D12Resource* mIntermediateColorBuffer2{ nullptr };
    D3D12_GPU_DESCRIPTOR_HANDLE mIntermediateColorBuffer2ShaderResourceView{ 0UL };
    D3D12_CPU_DESCRIPTOR_HANDLE mIntermediateColorBuffer2RenderTargetView{ 0UL };

    // We cache it here, as is is used by most passes.
    FrameCBuffer mFrameCBuffer;

    Camera mCamera;
    Timer mTimer;

    // When it is true, master render thread is destroyed.
    bool mTerminate{ false };
};
}

RenderManager.cpp

#include "RenderManager.h"

#include <DirectXColors.h>
#include <tbb/parallel_for.h>

#include <CommandListExecutor/CommandListExecutor.h>
#include <CommandManager/CommandQueueManager.h>
#include <CommandManager/FenceManager.h>
#include <DescriptorManager\CbvSrvUavDescriptorManager.h>
#include <DescriptorManager\DepthStencilDescriptorManager.h>
#include <DescriptorManager\RenderTargetDescriptorManager.h>
#include <DirectXManager\DirectXManager.h>
#include <DXUtils\D3DFactory.h>
#include <Input/Keyboard.h>
#include <Input/Mouse.h>
#include <ResourceManager\ResourceManager.h>
#include <ResourceStateManager\ResourceStateManager.h>
#include <Scene/Scene.h>

using namespace DirectX;

namespace BRE {
namespace {
///
/// @brief Update camera and constant buffer per frame
/// @param elapsedFrameTime Elapsed frame time
/// @param camera Camera
/// @param Constant buffer per frame
///
void UpdateCameraAndFrameCBuffer(const float elapsedFrameTime,
                                 Camera& camera,
                                 FrameCBuffer& frameCBuffer) noexcept
{
    static float elapsedFrameTimeAccumulator = 0.0f;
    elapsedFrameTimeAccumulator += elapsedFrameTime;

    while (elapsedFrameTimeAccumulator >= ApplicationSettings::sSecondsPerFrame) {
        static const float translationAcceleration = 5.0f; // rate of acceleration in units/sec
        const float translationDelta = translationAcceleration;

        static const float rotationAcceleration = 10.0f;
        const float rotationDelta = rotationAcceleration;

        static std::int32_t lastXY[]{ 0UL, 0UL };
        static const float sCameraOffset{ 7.5f };
        static const float sCameraMultiplier{ 10.0f };

        camera.UpdateViewMatrix();

        frameCBuffer.mEyeWorldPosition = camera.GetPosition4f();

        MathUtils::StoreTransposeMatrix(camera.GetViewMatrix(),
                                        frameCBuffer.mViewMatrix);
        MathUtils::StoreInverseTransposeMatrix(camera.GetViewMatrix(),
                                               frameCBuffer.mInverseViewMatrix);

        MathUtils::StoreTransposeMatrix(camera.GetProjectionMatrix(),
                                        frameCBuffer.mProjectionMatrix);
        MathUtils::StoreInverseTransposeMatrix(camera.GetProjectionMatrix(),
                                               frameCBuffer.mInverseProjectionMatrix);

        // Update camera based on keyboard
        const float offset = translationDelta * (Keyboard::Get().IsKeyDown(DIK_LSHIFT) ? sCameraMultiplier : 1.0f);
        if (Keyboard::Get().IsKeyDown(DIK_W)) {
            camera.Walk(offset);
        }
        if (Keyboard::Get().IsKeyDown(DIK_S)) {
            camera.Walk(-offset);
        }
        if (Keyboard::Get().IsKeyDown(DIK_A)) {
            camera.Strafe(-offset);
        }
        if (Keyboard::Get().IsKeyDown(DIK_D)) {
            camera.Strafe(offset);
        }

        // Update camera based on mouse
        const std::int32_t x{ Mouse::Get().GetX() };
        const std::int32_t y{ Mouse::Get().GetY() };
        if (Mouse::Get().IsButtonDown(Mouse::MouseButtonsLeft)) {
            const float dx = static_cast<float>(x - lastXY[0]) / ApplicationSettings::sWindowWidth;
            const float dy = static_cast<float>(y - lastXY[1]) / ApplicationSettings::sWindowHeight;

            camera.Pitch(dy * rotationDelta);
            camera.RotateY(dx * rotationDelta);
        }

        lastXY[0] = x;
        lastXY[1] = y;

        elapsedFrameTimeAccumulator -= ApplicationSettings::sSecondsPerFrame;
    }
}

///
/// @brief Creates swap chain
/// @param windowHandle Window handle
/// @param frameBufferFormat Format of the frame buffer
/// @param swapChain Swap chain
///
void CreateSwapChain(const HWND windowHandle,
                     const DXGI_FORMAT frameBufferFormat,
                     Microsoft::WRL::ComPtr<IDXGISwapChain3>& swapChain) noexcept
{

    IDXGISwapChain1* baseSwapChain{ nullptr };

    DXGI_SWAP_CHAIN_DESC1 swapChainDescriptor = {};
    swapChainDescriptor.AlphaMode = DXGI_ALPHA_MODE_UNSPECIFIED;
    swapChainDescriptor.BufferCount = ApplicationSettings::sSwapChainBufferCount;
    swapChainDescriptor.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
#ifdef V_SYNC
    sd.Flags = DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT;
#else 
    swapChainDescriptor.Flags = 0U;
#endif
    swapChainDescriptor.Format = frameBufferFormat;
    swapChainDescriptor.SampleDesc.Count = 1U;
    swapChainDescriptor.Scaling = DXGI_SCALING_NONE;
    swapChainDescriptor.Stereo = false;
    swapChainDescriptor.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;

    BRE_CHECK_HR(DirectXManager::GetIDXGIFactory().CreateSwapChainForHwnd(&CommandListExecutor::Get().GetCommandQueue(),
                                                                          windowHandle,
                                                                          &swapChainDescriptor,
                                                                          nullptr,
                                                                          nullptr,
                                                                          &baseSwapChain));
    BRE_CHECK_HR(baseSwapChain->QueryInterface(IID_PPV_ARGS(swapChain.GetAddressOf())));

    BRE_CHECK_HR(swapChain->ResizeBuffers(ApplicationSettings::sSwapChainBufferCount,
                                          ApplicationSettings::sWindowWidth,
                                          ApplicationSettings::sWindowHeight,
                                          frameBufferFormat,
                                          swapChainDescriptor.Flags));

    // Make window association
    BRE_CHECK_HR(DirectXManager::GetIDXGIFactory().MakeWindowAssociation(windowHandle,
                                                                         DXGI_MWA_NO_WINDOW_CHANGES | DXGI_MWA_NO_ALT_ENTER | DXGI_MWA_NO_PRINT_SCREEN));

#ifdef V_SYNC
    BRE_CHECK_HR(swapChain3->SetMaximumFrameLatency(ApplicationSettings::sQueuedFrameCount));
#endif
}
}

using namespace DirectX;

RenderManager* RenderManager::sRenderManager{ nullptr };

RenderManager&
RenderManager::Create(Scene& scene) noexcept
{
    BRE_ASSERT(sRenderManager == nullptr);

    tbb::empty_task* parent{ new (tbb::task::allocate_root()) tbb::empty_task };
    // Reference count is 2: 1 parent task + 1 master render task
    parent->set_ref_count(2);

    sRenderManager = new (parent->allocate_child()) RenderManager(scene);
    return *sRenderManager;
}

RenderManager::RenderManager(Scene& scene)
    : mGeometryPass(scene.GetGeometryCommandListRecorders())
    , mCamera(scene.GetCamera())
{
    mFence = &FenceManager::CreateFence(0U, D3D12_FENCE_FLAG_NONE);

    CreateFrameBuffersAndRenderTargetViews();

    CreateDepthStencilBufferAndView();

    CreateIntermediateColorBufferAndViews(D3D12_RESOURCE_STATE_RENDER_TARGET,
                                          L"Intermediate Color Buffer 1",
                                          mIntermediateColorBuffer1,
                                          mIntermediateColorBuffer1RenderTargetView,
                                          mIntermediateColorBuffer1ShaderResourceView);

    CreateIntermediateColorBufferAndViews(D3D12_RESOURCE_STATE_RENDER_TARGET,
                                          L"Intermediate Color Buffer 2",
                                          mIntermediateColorBuffer2,
                                          mIntermediateColorBuffer2RenderTargetView,
                                          mIntermediateColorBuffer2ShaderResourceView);

    mCamera.SetFrustum(ApplicationSettings::sVerticalFieldOfView,
                       ApplicationSettings::GetAspectRatio(),
                       ApplicationSettings::sNearPlaneZ,
                       ApplicationSettings::sFarPlaneZ);

    // Shader resource view to the depth buffer
    D3D12_SHADER_RESOURCE_VIEW_DESC srvDescriptor{};
    srvDescriptor.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
    srvDescriptor.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
    srvDescriptor.Format = ApplicationSettings::sDepthStencilSRVFormat;
    srvDescriptor.Texture2D.MostDetailedMip = 0;
    srvDescriptor.Texture2D.ResourceMinLODClamp = 0.0f;
    srvDescriptor.Texture2D.PlaneSlice = 0;
    srvDescriptor.Texture2D.MipLevels = mDepthBuffer->GetDesc().MipLevels;

    mDepthBufferShaderResourceView = CbvSrvUavDescriptorManager::CreateShaderResourceView(*mDepthBuffer,
                                                                                          srvDescriptor);

    InitPasses(scene);

    // Spawns master render task
    parent()->spawn(*this);
}

void
RenderManager::InitPasses(Scene& scene) noexcept
{
    mGeometryPass.Init(mDepthBufferRenderTargetView);

    ID3D12Resource* skyBoxCubeMap = scene.GetSkyBoxCubeMap();
    ID3D12Resource* diffuseIrradianceCubeMap = scene.GetDiffuseIrradianceCubeMap();
    ID3D12Resource* specularPreConvolvedCubeMap = scene.GetSpecularPreConvolvedCubeMap();
    BRE_ASSERT(skyBoxCubeMap != nullptr);
    BRE_ASSERT(diffuseIrradianceCubeMap != nullptr);
    BRE_ASSERT(specularPreConvolvedCubeMap != nullptr);

    mReflectionPass.Init(*mDepthBuffer);

    mAmbientOcclusionPass.Init(mGeometryPass.GetGeometryBuffer(GeometryPass::NORMAL_ROUGHNESS),
                               *mDepthBuffer,
                               mGeometryPass.GetGeometryBufferShaderResourceView(GeometryPass::NORMAL_ROUGHNESS),
                               mDepthBufferShaderResourceView);

    mEnvironmentLightPass.Init(mGeometryPass.GetGeometryBuffer(GeometryPass::BASECOLOR_METALNESS),
                               mGeometryPass.GetGeometryBuffer(GeometryPass::NORMAL_ROUGHNESS),
                               *mDepthBuffer,
                               *diffuseIrradianceCubeMap,
                               *specularPreConvolvedCubeMap,
                               mAmbientOcclusionPass.GetAmbientAccessibilityBuffer(),
                               mIntermediateColorBuffer1RenderTargetView,
                               mGeometryPass.GetGeometryBufferShaderResourceViews(),
                               mAmbientOcclusionPass.GetAmbientAccessibilityShaderResourceView(),
                               mDepthBufferShaderResourceView);

    mSkyBoxPass.Init(*skyBoxCubeMap,
                     *mDepthBuffer,
                     mIntermediateColorBuffer1RenderTargetView,
                     mDepthBufferRenderTargetView);

    mToneMappingPass.Init(*mIntermediateColorBuffer1,
                          mIntermediateColorBuffer1ShaderResourceView,
                          *mIntermediateColorBuffer2,
                          mIntermediateColorBuffer2RenderTargetView);

    mPostProcessPass.Init(*mIntermediateColorBuffer2,
                          mIntermediateColorBuffer2ShaderResourceView);

    // Initialize fence values for all frames to the same number.
    const std::uint64_t count{ _countof(mFenceValueByQueuedFrameIndex) };
    for (std::uint64_t i = 0UL; i < count; ++i) {
        mFenceValueByQueuedFrameIndex[i] = mCurrentFenceValue;
    }
}

void
RenderManager::Terminate() noexcept
{
    mTerminate = true;
    parent()->wait_for_all();
}

tbb::task*
RenderManager::execute()
{
    while (!mTerminate) {
        mTimer.Tick();
        UpdateCameraAndFrameCBuffer(mTimer.GetDeltaTimeInSeconds(),
                                    mCamera,
                                    mFrameCBuffer);

        std::uint32_t commandListCount = 0U;
        CommandListExecutor::Get().ResetExecutedCommandListCount();

        commandListCount += RecordAndPushPrePassCommandLists();

        commandListCount += mGeometryPass.Execute(mFrameCBuffer);
        commandListCount += mAmbientOcclusionPass.Execute(mFrameCBuffer);
        commandListCount += mEnvironmentLightPass.Execute(mFrameCBuffer);
        commandListCount += mReflectionPass.Execute(mFrameCBuffer);
        commandListCount += mSkyBoxPass.Execute(mFrameCBuffer);
        commandListCount += mToneMappingPass.Execute();
        commandListCount += mPostProcessPass.Execute(*GetCurrentFrameBuffer(),
                                                     GetCurrentFrameBufferRenderTargetView());

        commandListCount += RecordAndPushPostPassCommandLists();

        // Wait until all previous tasks command lists are executed
        while (CommandListExecutor::Get().GetExecutedCommandListCount() < commandListCount) {
            Sleep(0U);
        }

        PresentCurrentFrameAndBeginNextFrame();
    }

    // If we need to terminate, then we terminates command list processor
    // and waits until all GPU command lists are properly executed.
    CommandListExecutor::Get().Terminate();
    FlushCommandQueue();

    return nullptr;
}

std::uint32_t
RenderManager::RecordAndPushPrePassCommandLists() noexcept
{
    ID3D12GraphicsCommandList& commandList = mPrePassCommandListPerFrame.ResetCommandListWithNextCommandAllocator(nullptr);

    D3D12_RESOURCE_BARRIER barriers[4U];
    std::uint32_t barrierCount = 0UL;
    if (ResourceStateManager::GetResourceState(*GetCurrentFrameBuffer()) != D3D12_RESOURCE_STATE_RENDER_TARGET) {
        barriers[barrierCount] = ResourceStateManager::ChangeResourceStateAndGetBarrier(*GetCurrentFrameBuffer(),
                                                                                        D3D12_RESOURCE_STATE_RENDER_TARGET);
        ++barrierCount;
    }

    if (ResourceStateManager::GetResourceState(*mIntermediateColorBuffer1) != D3D12_RESOURCE_STATE_RENDER_TARGET) {
        barriers[barrierCount] = ResourceStateManager::ChangeResourceStateAndGetBarrier(*mIntermediateColorBuffer1,
                                                                                        D3D12_RESOURCE_STATE_RENDER_TARGET);
        ++barrierCount;
    }

    if (ResourceStateManager::GetResourceState(*mIntermediateColorBuffer2) != D3D12_RESOURCE_STATE_RENDER_TARGET) {
        barriers[barrierCount] = ResourceStateManager::ChangeResourceStateAndGetBarrier(*mIntermediateColorBuffer2,
                                                                                        D3D12_RESOURCE_STATE_RENDER_TARGET);
        ++barrierCount;
    }

    if (ResourceStateManager::GetResourceState(*mDepthBuffer) != D3D12_RESOURCE_STATE_DEPTH_WRITE) {
        barriers[barrierCount] = ResourceStateManager::ChangeResourceStateAndGetBarrier(*mDepthBuffer,
                                                                                        D3D12_RESOURCE_STATE_DEPTH_WRITE);
        ++barrierCount;
    }

    if (barrierCount > 0UL) {
        commandList.ResourceBarrier(barrierCount, barriers);
    }

    commandList.ClearRenderTargetView(GetCurrentFrameBufferRenderTargetView(),
                                      Colors::Black,
                                      0U,
                                      nullptr);

    commandList.ClearRenderTargetView(mIntermediateColorBuffer1RenderTargetView,
                                      Colors::Black,
                                      0U,
                                      nullptr);

    commandList.ClearRenderTargetView(mIntermediateColorBuffer2RenderTargetView,
                                      Colors::Black,
                                      0U,
                                      nullptr);

    commandList.ClearDepthStencilView(mDepthBufferRenderTargetView,
                                      D3D12_CLEAR_FLAG_DEPTH,
                                      1.0f,
                                      0U,
                                      0U,
                                      nullptr);

    BRE_CHECK_HR(commandList.Close());
    CommandListExecutor::Get().PushCommandList(commandList);

    return 1U;
}

std::uint32_t
RenderManager::RecordAndPushPostPassCommandLists() noexcept
{
    D3D12_RESOURCE_BARRIER barriers[4U];
    std::uint32_t barrierCount = 0UL;
    if (ResourceStateManager::GetResourceState(*GetCurrentFrameBuffer()) != D3D12_RESOURCE_STATE_PRESENT) {
        barriers[barrierCount] = ResourceStateManager::ChangeResourceStateAndGetBarrier(*GetCurrentFrameBuffer(),
                                                                                        D3D12_RESOURCE_STATE_PRESENT);
        ++barrierCount;
    }

    if (barrierCount > 0UL) {
        ID3D12GraphicsCommandList& commandList = mPostPassCommandListPerFrame.ResetCommandListWithNextCommandAllocator(nullptr);
        commandList.ResourceBarrier(barrierCount, barriers);
        BRE_CHECK_HR(commandList.Close());
        CommandListExecutor::Get().PushCommandList(commandList);

        return 1U;
    }

    return 0U;
}

void
RenderManager::CreateFrameBuffersAndRenderTargetViews() noexcept
{
    // Setup render target view
    D3D12_RENDER_TARGET_VIEW_DESC rtvDescriptor = {};
    rtvDescriptor.Format = ApplicationSettings::sFrameBufferRTFormat;
    rtvDescriptor.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;

    // Create swap chain and frame buffers
    BRE_ASSERT(mSwapChain == nullptr);
    CreateSwapChain(DirectXManager::GetWindowHandle(),
                    ApplicationSettings::sFrameBufferFormat,
                    mSwapChain);

    // Create frame buffer render target views
    const std::size_t rtvDescriptorSize{ DirectXManager::GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV) };
    for (std::uint32_t i = 0U; i < ApplicationSettings::sSwapChainBufferCount; ++i) {
        BRE_CHECK_HR(mSwapChain->GetBuffer(i, IID_PPV_ARGS(&mFrameBuffers[i])));

        RenderTargetDescriptorManager::CreateRenderTargetView(*mFrameBuffers[i],
                                                              rtvDescriptor,
                                                              &mFrameBufferRenderTargetViews[i]);

        ResourceStateManager::AddFullResourceTracking(*mFrameBuffers[i],
                                                      D3D12_RESOURCE_STATE_PRESENT);
    }
}

void
RenderManager::CreateDepthStencilBufferAndView() noexcept
{
    // Create the depth/stencil buffer and view.
    const D3D12_RESOURCE_DESC depthStencilDesc = D3DFactory::GetResourceDescriptor(ApplicationSettings::sWindowWidth,
                                                                                   ApplicationSettings::sWindowHeight,
                                                                                   ApplicationSettings::sDepthStencilFormat,
                                                                                   D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL);

    D3D12_CLEAR_VALUE clearValue = {};
    clearValue.Format = ApplicationSettings::sDepthStencilViewFormat;
    clearValue.DepthStencil.Depth = 1.0f;
    clearValue.DepthStencil.Stencil = 0U;

    const D3D12_HEAP_PROPERTIES heapProperties = D3DFactory::GetHeapProperties();
    mDepthBuffer = &ResourceManager::CreateCommittedResource(heapProperties,
                                                             D3D12_HEAP_FLAG_NONE,
                                                             depthStencilDesc,
                                                             D3D12_RESOURCE_STATE_DEPTH_WRITE,
                                                             &clearValue,
                                                             L"Depth Stencil Buffer",
                                                             ResourceManager::ResourceStateTrackingType::FULL_TRACKING);

    // Create descriptor to mip level 0 of entire resource using the format of the resource.
    D3D12_DEPTH_STENCIL_VIEW_DESC depthStencilViewDesc = {};
    depthStencilViewDesc.Format = ApplicationSettings::sDepthStencilViewFormat;
    depthStencilViewDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D;
    depthStencilViewDesc.Texture2D.MipSlice = 0;
    DepthStencilDescriptorManager::CreateDepthStencilView(*mDepthBuffer, depthStencilViewDesc, &mDepthBufferRenderTargetView);
}

void
RenderManager::CreateIntermediateColorBufferAndViews(const D3D12_RESOURCE_STATES initialState,
                                                     const wchar_t* resourceName,
                                                     ID3D12Resource* &buffer,
                                                     D3D12_CPU_DESCRIPTOR_HANDLE& renderTargetView,
                                                     D3D12_GPU_DESCRIPTOR_HANDLE& shaderResourceView) noexcept
{
    BRE_ASSERT(resourceName != nullptr);

    // Fill resource description
    const D3D12_RESOURCE_DESC resourceDescriptor = D3DFactory::GetResourceDescriptor(ApplicationSettings::sWindowWidth,
                                                                                     ApplicationSettings::sWindowHeight,
                                                                                     ApplicationSettings::sColorBufferFormat,
                                                                                     D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET);

    // Create buffer
    const D3D12_HEAP_PROPERTIES heapProperties = D3DFactory::GetHeapProperties();
    D3D12_CLEAR_VALUE clearValue = { resourceDescriptor.Format, 0.0f, 0.0f, 0.0f, 1.0f };
    buffer = &ResourceManager::CreateCommittedResource(heapProperties,
                                                       D3D12_HEAP_FLAG_NONE,
                                                       resourceDescriptor,
                                                       initialState,
                                                       &clearValue,
                                                       resourceName,
                                                       ResourceManager::ResourceStateTrackingType::FULL_TRACKING);

    // Create render target view
    D3D12_RENDER_TARGET_VIEW_DESC rtvDescriptor{};
    rtvDescriptor.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
    rtvDescriptor.Format = resourceDescriptor.Format;
    RenderTargetDescriptorManager::CreateRenderTargetView(*buffer,
                                                          rtvDescriptor,
                                                          &renderTargetView);

    // Create shader resource view
    D3D12_SHADER_RESOURCE_VIEW_DESC srvDescriptor{};
    srvDescriptor.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
    srvDescriptor.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
    srvDescriptor.Texture2D.MostDetailedMip = 0;
    srvDescriptor.Texture2D.ResourceMinLODClamp = 0.0f;
    srvDescriptor.Format = buffer->GetDesc().Format;
    srvDescriptor.Texture2D.MipLevels = buffer->GetDesc().MipLevels;
    shaderResourceView = CbvSrvUavDescriptorManager::CreateShaderResourceView(*buffer,
                                                                              srvDescriptor);
}

void
RenderManager::FlushCommandQueue() noexcept
{
    ++mCurrentFenceValue;
    CommandListExecutor::Get().SignalFenceAndWaitForCompletion(*mFence,
                                                               mCurrentFenceValue,
                                                               mCurrentFenceValue);
}

void
RenderManager::PresentCurrentFrameAndBeginNextFrame() noexcept
{
    BRE_ASSERT(mSwapChain != nullptr);

#ifdef V_SYNC
    static const HANDLE frameLatencyWaitableObj(mSwapChain->GetFrameLatencyWaitableObject());
    WaitForSingleObjectEx(frameLatencyWaitableObj, INFINITE, true);
    BRE_CHECK_HR(mSwapChain->Present(1U, 0U));
#else
    BRE_CHECK_HR(mSwapChain->Present(0U, 0U));
#endif

    // Add an instruction to the command queue to set a new fence point. Because we 
    // are on the GPU time line, the new fence point won't be set until the GPU finishes
    // processing all the commands prior to this Signal().
    mFenceValueByQueuedFrameIndex[mCurrentQueuedFrameIndex] = ++mCurrentFenceValue;
    mCurrentQueuedFrameIndex = (mCurrentQueuedFrameIndex + 1U) % ApplicationSettings::sQueuedFrameCount;
    const std::uint64_t oldestFence{ mFenceValueByQueuedFrameIndex[mCurrentQueuedFrameIndex] };

    // If we executed command lists for all queued frames, then we need to wait
    // at least 1 of them to be completed, before continue recording command lists. 
    CommandListExecutor::Get().SignalFenceAndWaitForCompletion(*mFence,
                                                               mCurrentFenceValue,
                                                               oldestFence);
}
}

To make it easier to understand, we are going to explain each task in detail and we will show the source code related with it

Resource Creation

In RenderManager constructor are carried out tasks like the swap chain (IDXGISwapChain3), frame buffers creation, depth stencil buffer creation, intermediate color buffers creation, and camera configuration. Next, we show the source code of the different methods involved in these tasks.

void
RenderManager::CreateFrameBuffersAndRenderTargetViews() noexcept
{
    // Setup render target view
    D3D12_RENDER_TARGET_VIEW_DESC rtvDescriptor = {};
    rtvDescriptor.Format = ApplicationSettings::sFrameBufferRTFormat;
    rtvDescriptor.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;

    // Create swap chain and frame buffers
    BRE_ASSERT(mSwapChain == nullptr);
    CreateSwapChain(DirectXManager::GetWindowHandle(),
                    ApplicationSettings::sFrameBufferFormat,
                    mSwapChain);

    // Create frame buffer render target views
    const std::size_t rtvDescriptorSize{ DirectXManager::GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV) };
    for (std::uint32_t i = 0U; i < ApplicationSettings::sSwapChainBufferCount; ++i) {
        BRE_CHECK_HR(mSwapChain->GetBuffer(i, IID_PPV_ARGS(mFrameBuffers[i].GetAddressOf())));

        RenderTargetDescriptorManager::CreateRenderTargetView(*mFrameBuffers[i].Get(),
                                                              rtvDescriptor,
                                                              &mFrameBufferRenderTargetViews[i]);

        ResourceStateManager::AddResource(*mFrameBuffers[i].Get(), D3D12_RESOURCE_STATE_PRESENT);
    }
}

void
RenderManager::CreateDepthStencilBufferAndView() noexcept
{
    // Create the depth/stencil buffer and view.
    D3D12_RESOURCE_DESC depthStencilDesc = {};
    depthStencilDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
    depthStencilDesc.Alignment = 0U;
    depthStencilDesc.Width = ApplicationSettings::sWindowWidth;
    depthStencilDesc.Height = ApplicationSettings::sWindowHeight;
    depthStencilDesc.DepthOrArraySize = 1U;
    depthStencilDesc.MipLevels = 1U;
    depthStencilDesc.Format = ApplicationSettings::sDepthStencilFormat;
    depthStencilDesc.SampleDesc.Count = 1U;
    depthStencilDesc.SampleDesc.Quality = 0U;
    depthStencilDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
    depthStencilDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;

    D3D12_CLEAR_VALUE clearValue = {};
    clearValue.Format = ApplicationSettings::sDepthStencilViewFormat;
    clearValue.DepthStencil.Depth = 1.0f;
    clearValue.DepthStencil.Stencil = 0U;

    CD3DX12_HEAP_PROPERTIES heapProps{ D3D12_HEAP_TYPE_DEFAULT };
    mDepthBuffer = &ResourceManager::CreateCommittedResource(heapProps,
                                                             D3D12_HEAP_FLAG_NONE,
                                                             depthStencilDesc,
                                                             D3D12_RESOURCE_STATE_DEPTH_WRITE,
                                                             &clearValue,
                                                             L"Depth Stencil Buffer");

    // Create descriptor to mip level 0 of entire resource using the format of the resource.
    D3D12_DEPTH_STENCIL_VIEW_DESC depthStencilViewDesc = {};
    depthStencilViewDesc.Format = ApplicationSettings::sDepthStencilViewFormat;
    depthStencilViewDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D;
    depthStencilViewDesc.Texture2D.MipSlice = 0;
    DepthStencilDescriptorManager::CreateDepthStencilView(*mDepthBuffer, depthStencilViewDesc, &mDepthBufferRenderTargetView);
}

void
RenderManager::CreateIntermediateColorBufferAndRenderTargetView(const D3D12_RESOURCE_STATES initialState,
                                                                const wchar_t* resourceName,
                                                                Microsoft::WRL::ComPtr<ID3D12Resource>& buffer,
                                                                D3D12_CPU_DESCRIPTOR_HANDLE& renderTargetView) noexcept
{
    BRE_ASSERT(resourceName != nullptr);

    // Fill resource description
    D3D12_RESOURCE_DESC resourceDescriptor = {};
    resourceDescriptor.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
    resourceDescriptor.Alignment = 0U;
    resourceDescriptor.Width = ApplicationSettings::sWindowWidth;
    resourceDescriptor.Height = ApplicationSettings::sWindowHeight;
    resourceDescriptor.DepthOrArraySize = 1U;
    resourceDescriptor.MipLevels = 0U;
    resourceDescriptor.SampleDesc.Count = 1U;
    resourceDescriptor.SampleDesc.Quality = 0U;
    resourceDescriptor.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
    resourceDescriptor.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
    resourceDescriptor.Format = ApplicationSettings::sColorBufferFormat;

    // Create buffer and render target view
    D3D12_RENDER_TARGET_VIEW_DESC rtvDescriptor{};
    rtvDescriptor.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
    rtvDescriptor.Format = resourceDescriptor.Format;
    CD3DX12_HEAP_PROPERTIES heapProperties{ D3D12_HEAP_TYPE_DEFAULT };
    D3D12_CLEAR_VALUE clearValue = { resourceDescriptor.Format, 0.0f, 0.0f, 0.0f, 1.0f };
    ID3D12Resource* resource = &ResourceManager::CreateCommittedResource(heapProperties,
                                                                         D3D12_HEAP_FLAG_NONE,
                                                                         resourceDescriptor,
                                                                         initialState,
                                                                         &clearValue,
                                                                         resourceName);
    buffer = Microsoft::WRL::ComPtr<ID3D12Resource>(resource);

    RenderTargetDescriptorManager::CreateRenderTargetView(*buffer.Get(),
                                                          rtvDescriptor,
                                                          &renderTargetView);
}

Pass initialization and execution

After resource creation, we need to initialize the different passes. After its initialization, we spawn the tbb::task that calls its overloaded method execute(). The RenderManager thread will finalize its execution once execute() method returns. Next, you can see the implementation of the initialization and execution of passes.

void
RenderManager::InitPasses(Scene& scene) noexcept
{
    mGeometryPass.Init(mDepthBufferRenderTargetView);

    ID3D12Resource* skyBoxCubeMap = scene.GetSkyBoxCubeMap();
    ID3D12Resource* diffuseIrradianceCubeMap = scene.GetDiffuseIrradianceCubeMap();
    ID3D12Resource* specularPreConvolvedCubeMap = scene.GetSpecularPreConvolvedCubeMap();
    BRE_ASSERT(skyBoxCubeMap != nullptr);
    BRE_ASSERT(diffuseIrradianceCubeMap != nullptr);
    BRE_ASSERT(specularPreConvolvedCubeMap != nullptr);

    mReflectionPass.Init(*mDepthBuffer);

    mAmbientOcclusionPass.Init(mGeometryPass.GetGeometryBuffer(GeometryPass::NORMAL_ROUGHNESS),
                               *mDepthBuffer,
                               mGeometryPass.GetGeometryBufferShaderResourceView(GeometryPass::NORMAL_ROUGHNESS),
                               mDepthBufferShaderResourceView);

    mEnvironmentLightPass.Init(mGeometryPass.GetGeometryBuffer(GeometryPass::BASECOLOR_METALNESS),
                               mGeometryPass.GetGeometryBuffer(GeometryPass::NORMAL_ROUGHNESS),
                               *mDepthBuffer,
                               *diffuseIrradianceCubeMap,
                               *specularPreConvolvedCubeMap,
                               mAmbientOcclusionPass.GetAmbientAccessibilityBuffer(),
                               mIntermediateColorBuffer1RenderTargetView,
                               mGeometryPass.GetGeometryBufferShaderResourceViews(),
                               mAmbientOcclusionPass.GetAmbientAccessibilityShaderResourceView(),
                               mDepthBufferShaderResourceView);

    mSkyBoxPass.Init(*skyBoxCubeMap,
                     *mDepthBuffer,
                     mIntermediateColorBuffer1RenderTargetView,
                     mDepthBufferRenderTargetView);

    mToneMappingPass.Init(*mIntermediateColorBuffer1,
                          mIntermediateColorBuffer1ShaderResourceView,
                          *mIntermediateColorBuffer2,
                          mIntermediateColorBuffer2RenderTargetView);

    mPostProcessPass.Init(*mIntermediateColorBuffer2,
                          mIntermediateColorBuffer2ShaderResourceView);

    // Initialize fence values for all frames to the same number.
    const std::uint64_t count{ _countof(mFenceValueByQueuedFrameIndex) };
    for (std::uint64_t i = 0UL; i < count; ++i) {
        mFenceValueByQueuedFrameIndex[i] = mCurrentFenceValue;
    }
}

tbb::task*
RenderManager::execute()
{
    while (!mTerminate) {
        mTimer.Tick();
        UpdateCameraAndFrameCBuffer(mTimer.GetDeltaTimeInSeconds(),
                                    mCamera,
                                    mFrameCBuffer);

        std::uint32_t commandListCount = 0U;
        CommandListExecutor::Get().ResetExecutedCommandListCount();

        commandListCount += RecordAndPushPrePassCommandLists();

        commandListCount += mGeometryPass.Execute(mFrameCBuffer);
        commandListCount += mAmbientOcclusionPass.Execute(mFrameCBuffer);
        commandListCount += mEnvironmentLightPass.Execute(mFrameCBuffer);
        commandListCount += mReflectionPass.Execute(mFrameCBuffer);
        commandListCount += mSkyBoxPass.Execute(mFrameCBuffer);
        commandListCount += mToneMappingPass.Execute();
        commandListCount += mPostProcessPass.Execute(*GetCurrentFrameBuffer(),
                                                     GetCurrentFrameBufferRenderTargetView());

        commandListCount += RecordAndPushPostPassCommandLists();

        // Wait until all previous tasks command lists are executed
        while (CommandListExecutor::Get().GetExecutedCommandListCount() < commandListCount) {
            Sleep(0U);
        }

        PresentCurrentFrameAndBeginNextFrame();
    }

    // If we need to terminate, then we terminates command list processor
    // and waits until all GPU command lists are properly executed.
    CommandListExecutor::Get().Terminate();
    FlushCommandQueue();

    return nullptr;
}

std::uint32_t
RenderManager::RecordAndPushPrePassCommandLists() noexcept
{
    ID3D12GraphicsCommandList& commandList = mPrePassCommandListPerFrame.ResetCommandListWithNextCommandAllocator(nullptr);

    D3D12_RESOURCE_BARRIER barriers[4U];
    std::uint32_t barrierCount = 0UL;
    if (ResourceStateManager::GetResourceState(*GetCurrentFrameBuffer()) != D3D12_RESOURCE_STATE_RENDER_TARGET) {
        barriers[barrierCount] = ResourceStateManager::ChangeResourceStateAndGetBarrier(*GetCurrentFrameBuffer(),
                                                                                        D3D12_RESOURCE_STATE_RENDER_TARGET);
        ++barrierCount;
    }

    if (ResourceStateManager::GetResourceState(*mIntermediateColorBuffer1) != D3D12_RESOURCE_STATE_RENDER_TARGET) {
        barriers[barrierCount] = ResourceStateManager::ChangeResourceStateAndGetBarrier(*mIntermediateColorBuffer1,
                                                                                        D3D12_RESOURCE_STATE_RENDER_TARGET);
        ++barrierCount;
    }

    if (ResourceStateManager::GetResourceState(*mIntermediateColorBuffer2) != D3D12_RESOURCE_STATE_RENDER_TARGET) {
        barriers[barrierCount] = ResourceStateManager::ChangeResourceStateAndGetBarrier(*mIntermediateColorBuffer2,
                                                                                        D3D12_RESOURCE_STATE_RENDER_TARGET);
        ++barrierCount;
    }

    if (ResourceStateManager::GetResourceState(*mDepthBuffer) != D3D12_RESOURCE_STATE_DEPTH_WRITE) {
        barriers[barrierCount] = ResourceStateManager::ChangeResourceStateAndGetBarrier(*mDepthBuffer,
                                                                                        D3D12_RESOURCE_STATE_DEPTH_WRITE);
        ++barrierCount;
    }

    if (barrierCount > 0UL) {
        commandList.ResourceBarrier(barrierCount, barriers);
    }

    commandList.ClearRenderTargetView(GetCurrentFrameBufferRenderTargetView(),
                                      Colors::Black,
                                      0U,
                                      nullptr);

    commandList.ClearRenderTargetView(mIntermediateColorBuffer1RenderTargetView,
                                      Colors::Black,
                                      0U,
                                      nullptr);

    commandList.ClearRenderTargetView(mIntermediateColorBuffer2RenderTargetView,
                                      Colors::Black,
                                      0U,
                                      nullptr);

    commandList.ClearDepthStencilView(mDepthBufferRenderTargetView,
                                      D3D12_CLEAR_FLAG_DEPTH,
                                      1.0f,
                                      0U,
                                      0U,
                                      nullptr);

    BRE_CHECK_HR(commandList.Close());
    CommandListExecutor::Get().PushCommandList(commandList);

    return 1U;
}

std::uint32_t
RenderManager::RecordAndPushPostPassCommandLists() noexcept
{
    D3D12_RESOURCE_BARRIER barriers[4U];
    std::uint32_t barrierCount = 0UL;
    if (ResourceStateManager::GetResourceState(*GetCurrentFrameBuffer()) != D3D12_RESOURCE_STATE_PRESENT) {
        barriers[barrierCount] = ResourceStateManager::ChangeResourceStateAndGetBarrier(*GetCurrentFrameBuffer(),
                                                                                        D3D12_RESOURCE_STATE_PRESENT);
        ++barrierCount;
    }

    if (barrierCount > 0UL) {
        ID3D12GraphicsCommandList& commandList = mPostPassCommandListPerFrame.ResetCommandListWithNextCommandAllocator(nullptr);
        commandList.ResourceBarrier(barrierCount, barriers);
        BRE_CHECK_HR(commandList.Close());
        CommandListExecutor::Get().PushCommandList(commandList);

        return 1U;
    }

    return 0U;
}

Camera Update

The camera is updated through UpdateCameraAndFrameCBuffer method. In addition to update the camera, it updates the necessary data for the constant buffer per frame, that is used by the different shaders in different passes. Its implementation is the following

///
/// @brief Update camera and constant buffer per frame
/// @param elapsedFrameTime Elapsed frame time
/// @param camera Camera
/// @param Constant buffer per frame
///
void UpdateCameraAndFrameCBuffer(const float elapsedFrameTime,
                                 Camera& camera,
                                 FrameCBuffer& frameCBuffer) noexcept
{
    static float elapsedFrameTimeAccumulator = 0.0f;
    elapsedFrameTimeAccumulator += elapsedFrameTime;

    while (elapsedFrameTimeAccumulator >= ApplicationSettings::sSecondsPerFrame) {
        static const float translationAcceleration = 5.0f; // rate of acceleration in units/sec
        const float translationDelta = translationAcceleration;

        static const float rotationAcceleration = 10.0f;
        const float rotationDelta = rotationAcceleration;

        static std::int32_t lastXY[]{ 0UL, 0UL };
        static const float sCameraOffset{ 7.5f };
        static const float sCameraMultiplier{ 10.0f };

        camera.UpdateViewMatrix();

        frameCBuffer.mEyeWorldPosition = camera.GetPosition4f();

        MathUtils::StoreTransposeMatrix(camera.GetViewMatrix(),
                                        frameCBuffer.mViewMatrix);
        MathUtils::StoreInverseTransposeMatrix(camera.GetViewMatrix(),
                                               frameCBuffer.mInverseViewMatrix);

        MathUtils::StoreTransposeMatrix(camera.GetProjectionMatrix(),
                                        frameCBuffer.mProjectionMatrix);
        MathUtils::StoreInverseTransposeMatrix(camera.GetProjectionMatrix(),
                                               frameCBuffer.mInverseProjectionMatrix);

        // Update camera based on keyboard
        const float offset = translationDelta * (Keyboard::Get().IsKeyDown(DIK_LSHIFT) ? sCameraMultiplier : 1.0f);
        if (Keyboard::Get().IsKeyDown(DIK_W)) {
            camera.Walk(offset);
        }
        if (Keyboard::Get().IsKeyDown(DIK_S)) {
            camera.Walk(-offset);
        }
        if (Keyboard::Get().IsKeyDown(DIK_A)) {
            camera.Strafe(-offset);
        }
        if (Keyboard::Get().IsKeyDown(DIK_D)) {
            camera.Strafe(offset);
        }

        // Update camera based on mouse
        const std::int32_t x{ Mouse::Get().GetX() };
        const std::int32_t y{ Mouse::Get().GetY() };
        if (Mouse::Get().IsButtonDown(Mouse::MouseButtonsLeft)) {
            const float dx = static_cast<float>(x - lastXY[0]) / ApplicationSettings::sWindowWidth;
            const float dy = static_cast<float>(y - lastXY[1]) / ApplicationSettings::sWindowHeight;

            camera.Pitch(dy * rotationDelta);
            camera.RotateY(dx * rotationDelta);
        }

        lastXY[0] = x;
        lastXY[1] = y;

        elapsedFrameTimeAccumulator -= ApplicationSettings::sSecondsPerFrame;
    }
}

Frames presentation and synchronization

For performance reasons, after we executed all the command lists for the current frame, we should not wait until GPU finishes, to begin with, the next frame (GPU will be idle, and we should avoid that). Instead, we should have several queued frames to keep the GPU busy.

I read the following in a NVIDIA link about DirectX12 recommendations: “Don’t forget that there’s a per swap-chain limit of 3 queued frames before DXGI will start to block in Present()”

We do something similar in BRE’s architecture. If N is the number of swap chain buffers, then we have N -1 queued frames. What happens if your CPU already sent all command lists for all your queued frames, but the GPU did not finish to execute command lists for the first one? Then you should use fences to avoid the following situation. When all the passes finish its execution, the method PresentCurrentFrameAndBeginNextFrame() is called. Basically, this method presents current frame and begins with the next frame. It is possible to reach the limit of computed frames (not processed by the GPU yet or presented yet), for which we must wait until one of them becomes “free”. This situation is handled with fences (ID3D12Fence). Next, we can see the implementation of several methods that are involved in the process we described previously.

tbb::task*
RenderManager::execute()
{
    while (!mTerminate) {
        mTimer.Tick();
        UpdateCameraAndFrameCBuffer(mTimer.GetDeltaTimeInSeconds(),
                                    mCamera,
                                    mFrameCBuffer);

        std::uint32_t commandListCount = 0U;
        CommandListExecutor::Get().ResetExecutedCommandListCount();

        commandListCount += RecordAndPushPrePassCommandLists();

        commandListCount += mGeometryPass.Execute(mFrameCBuffer);
        commandListCount += mAmbientOcclusionPass.Execute(mFrameCBuffer);
        commandListCount += mEnvironmentLightPass.Execute(mFrameCBuffer);
        commandListCount += mReflectionPass.Execute(mFrameCBuffer);
        commandListCount += mSkyBoxPass.Execute(mFrameCBuffer);
        commandListCount += mToneMappingPass.Execute();
        commandListCount += mPostProcessPass.Execute(*GetCurrentFrameBuffer(),
                                                     GetCurrentFrameBufferRenderTargetView());

        commandListCount += RecordAndPushPostPassCommandLists();

        // Wait until all previous tasks command lists are executed
        while (CommandListExecutor::Get().GetExecutedCommandListCount() < commandListCount) {
            Sleep(0U);
        }

        PresentCurrentFrameAndBeginNextFrame();
    }

    // If we need to terminate, then we terminates command list processor
    // and waits until all GPU command lists are properly executed.
    CommandListExecutor::Get().Terminate();
    FlushCommandQueue();

    return nullptr;
}

void
RenderManager::FlushCommandQueue() noexcept
{
    ++mCurrentFenceValue;
    CommandListExecutor::Get().SignalFenceAndWaitForCompletion(*mFence,
                                                               mCurrentFenceValue,
                                                               mCurrentFenceValue);
}

void
RenderManager::PresentCurrentFrameAndBeginNextFrame() noexcept
{
    BRE_ASSERT(mSwapChain != nullptr);

#ifdef V_SYNC
    static const HANDLE frameLatencyWaitableObj(mSwapChain->GetFrameLatencyWaitableObject());
    WaitForSingleObjectEx(frameLatencyWaitableObj, INFINITE, true);
    BRE_CHECK_HR(mSwapChain->Present(1U, 0U));
#else
    BRE_CHECK_HR(mSwapChain->Present(0U, 0U));
#endif

    // Add an instruction to the command queue to set a new fence point. Because we 
    // are on the GPU time line, the new fence point won't be set until the GPU finishes
    // processing all the commands prior to this Signal().
    mFenceValueByQueuedFrameIndex[mCurrentQueuedFrameIndex] = ++mCurrentFenceValue;
    mCurrentQueuedFrameIndex = (mCurrentQueuedFrameIndex + 1U) % ApplicationSettings::sQueuedFrameCount;
    const std::uint64_t oldestFence{ mFenceValueByQueuedFrameIndex[mCurrentQueuedFrameIndex] };

    // If we executed command lists for all queued frames, then we need to wait
    // at least 1 of them to be completed, before continue recording command lists. 
    CommandListExecutor::Get().SignalFenceAndWaitForCompletion(*mFence,
                                                               mCurrentFenceValue,
                                                               oldestFence);
}

CommandListExecutor

The CommandListExecutor is a class that is spawned in its own thread and its only responsibility is to receive recorded command lists from the different passes and to execute them in groups. Its implementation is the following

CommandListExecutor.h

#pragma once

#include <atomic>
#include <d3d12.h>
#include <tbb/concurrent_queue.h>
#include <tbb/task.h>

#include <Utils\DebugUtils.h>

namespace BRE {

///
/// @brief Class responsible to execute command lists.
///
/// To check for new command lists and execute them.
/// Steps:
/// - Use CommandListExecutor::Create() to create and spawn an instance.
/// - When you spawn it, execute() method is automatically called. You should fill the queue with
///   command lists. You can use CommandListExecutor::GetCommandListQueue() to get it.
/// - When you want to terminate this task, you should call CommandListExecutor::Terminate() 
class CommandListExecutor : public tbb::task {
public:
    ///
    /// @brief Create an instance of CommandListExecutor. 
    ///
    /// This method must be called once.
    ///
    /// @param maxNumberOfCommandListsToExecute The maximum number of command lists to
    /// execute by ID3D12CommandQueue::ExecuteCommandLists(). This parameter must be
    /// greater than zero.
    ///
    static void Create(const std::uint32_t maxNumberOfCommandListsToExecute) noexcept;

    ///
    /// @brief Get CommandListExecutor. 
    ///
    /// CommandListExecutor::Create() must be called before this method.
    ///
    /// @return CommandListExecutor generated with Create() method
    ///
    static CommandListExecutor& CommandListExecutor::Get() noexcept;

    ~CommandListExecutor() = default;
    CommandListExecutor(const CommandListExecutor&) = delete;
    const CommandListExecutor& operator=(const CommandListExecutor&) = delete;
    CommandListExecutor(CommandListExecutor&&) = delete;
    CommandListExecutor& operator=(CommandListExecutor&&) = delete;
    ///
    /// @brief Reset the counter of executed command lists.
    ///
    /// A thread safe way to know if CommandListExecutor finished processing and executing all the command lists.
    /// If you are going to execute N command lists, then you should:
    /// - Call ResetExecutedCommandListCount()
    /// - Fill queue through GetCommandListQueue()
    /// - Check if GetExecutedCommandListCount() is equal to N, to be sure all was executed properly (sent to GPU)
    ///
    __forceinline void ResetExecutedCommandListCount() noexcept
    {
        mExecutedCommandListCount = 0U;
    }

    ///
    /// @brief Get the number of executed command lists.
    ///
    /// @return The number of executed command lists
    ///
    __forceinline std::uint32_t GetExecutedCommandListCount() const noexcept
    {
        return mExecutedCommandListCount;
    }

    ///
    /// @brief Add a command list to be executed
    ///
    /// @param commandList The command list to add
    ///
    __forceinline void AddCommandList(ID3D12CommandList& commandList) noexcept
    {
        mCommandListsToExecute.push(&commandList);
    }

    ///
    /// @brief Get the command queue
    ///
    /// @return The command queue
    ///
    __forceinline ID3D12CommandQueue& GetCommandQueue() noexcept
    {
        BRE_ASSERT(mCommandQueue != nullptr);
        return *mCommandQueue;
    }

    ///
    /// @brief Signal a fence and wait until fence completes.
    ///
    /// @param fence The fence to be updated
    /// @param valueToSignal The value used to update @p fence
    /// @param valueToWaitFor The value that @p fence must reach before we 
    /// return from this method. Commonly, this will be equal to @p valueToSignal.
    ///
    void SignalFenceAndWaitForCompletion(ID3D12Fence& fence,
                                         const std::uint64_t valueToSignal,
                                         const std::uint64_t valueToWaitFor) noexcept;

    ///
    /// @brief Executes a command list and wait until it completes
    ///
    /// @param commandList The command list to be executed
    void ExecuteCommandListAndWaitForCompletion(ID3D12CommandList& commandList) noexcept;

    ///
    /// @brief Terminates the generated CommandListExecutor.
    ///
    void Terminate() noexcept;

private:
    ///
    /// @brief CommandListExecutor constructor
    /// @param maxNumCommandLists Maximum number of command lists to execute at once
    ///
    explicit CommandListExecutor(const std::uint32_t maxNumCommandLists);

    // Called when tbb::task is spawned
    tbb::task* execute() final override;

    static CommandListExecutor* sExecutor;

    bool mTerminate{ false };

    std::uint32_t mExecutedCommandListCount{ 0U };
    std::atomic<std::uint32_t> mPendingCommandListCount{ 0U };
    std::uint32_t mMaxNumberOfCommandListsToExecute{ 1U };

    ID3D12CommandQueue* mCommandQueue{ nullptr };
    tbb::concurrent_queue<ID3D12CommandList*> mCommandListsToExecute;
    ID3D12Fence* mFence{ nullptr };
};
}

CommandListExecutor.cpp

#include "CommandListExecutor.h"

#include <memory>

#include <CommandManager\CommandQueueManager.h>
#include <CommandManager\FenceManager.h>

namespace BRE {
CommandListExecutor* CommandListExecutor::sExecutor{ nullptr };

void
CommandListExecutor::Create(const std::uint32_t maxNumCommandLists) noexcept
{
    BRE_ASSERT(sExecutor == nullptr);

    tbb::empty_task* parent{ new (tbb::task::allocate_root()) tbb::empty_task };

    // 1 reference for the parent + 1 reference for the child
    parent->set_ref_count(2);

    sExecutor = new (parent->allocate_child()) CommandListExecutor(maxNumCommandLists);
}

CommandListExecutor&
CommandListExecutor::Get() noexcept
{
    BRE_ASSERT(sExecutor != nullptr);

    return *sExecutor;
}

CommandListExecutor::CommandListExecutor(const std::uint32_t maxNumberOfCommandListsToExecute)
    : mMaxNumberOfCommandListsToExecute(maxNumberOfCommandListsToExecute)
{
    BRE_ASSERT(maxNumberOfCommandListsToExecute > 0U);

    D3D12_COMMAND_QUEUE_DESC commandQueueDescriptor = {};
    commandQueueDescriptor.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
    commandQueueDescriptor.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
    mCommandQueue = &CommandQueueManager::CreateCommandQueue(commandQueueDescriptor);
    BRE_ASSERT(mCommandQueue != nullptr);

    mFence = &FenceManager::CreateFence(0U, D3D12_FENCE_FLAG_NONE);

    parent()->spawn(*this);
}

tbb::task*
CommandListExecutor::execute()
{
    BRE_ASSERT(mMaxNumberOfCommandListsToExecute > 0);

    ID3D12CommandList* *pendingCommandLists{ new ID3D12CommandList*[mMaxNumberOfCommandListsToExecute] };
    while (mTerminate == false) {
        // Pop at most mMaxNumberOfCommandListsToExecute from command list queue
        while (mPendingCommandListCount < mMaxNumberOfCommandListsToExecute &&
               mCommandListsToExecute.try_pop(pendingCommandLists[mPendingCommandListCount])) {
            ++mPendingCommandListCount;
        }

        // Execute pending command lists (if any)
        if (mPendingCommandListCount != 0U) {
            mCommandQueue->ExecuteCommandLists(mPendingCommandListCount, pendingCommandLists);
            mExecutedCommandListCount += mPendingCommandListCount;
            mPendingCommandListCount = 0U;
        } else {
            Sleep(0U);
        }
    }

    delete[] pendingCommandLists;

    return nullptr;
}

void
CommandListExecutor::SignalFenceAndWaitForCompletion(ID3D12Fence& fence,
                                                     const std::uint64_t valueToSignal,
                                                     const std::uint64_t valueToWaitFor) noexcept
{
    const std::uint64_t completedFenceValue = fence.GetCompletedValue();
    BRE_CHECK_HR(mCommandQueue->Signal(&fence, valueToSignal));

    // Wait until the GPU has completed commands up to this fence point.
    if (completedFenceValue < valueToWaitFor) {
        const HANDLE eventHandle{ CreateEventEx(nullptr, nullptr, false, EVENT_ALL_ACCESS) };
        BRE_ASSERT(eventHandle);

        // Fire event when GPU hits current fence.  
        BRE_CHECK_HR(fence.SetEventOnCompletion(valueToWaitFor, eventHandle));

        // Wait until the GPU hits current fence event is fired.
        WaitForSingleObject(eventHandle, INFINITE);
        CloseHandle(eventHandle);
    }
}

void
CommandListExecutor::ExecuteCommandListAndWaitForCompletion(ID3D12CommandList& commandList) noexcept
{
    BRE_ASSERT(mCommandQueue != nullptr);
    BRE_ASSERT(mFence != nullptr);

    ID3D12CommandList* commandLists[1U]{ &commandList };
    mCommandQueue->ExecuteCommandLists(_countof(commandLists), commandLists);

    const std::uint64_t valueToSignal = mFence->GetCompletedValue() + 1UL;
    SignalFenceAndWaitForCompletion(*mFence, valueToSignal, valueToSignal);
}

void
CommandListExecutor::Terminate() noexcept
{
    mTerminate = true;
    parent()->wait_for_all();
}
}

Future Work

  • Move the camera update to its own thread. The RenderManager should not be responsible of this.
  • Do more complicated tests with different number of queued frames
Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s