diff --git a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp index d27345a13b..44e79593ad 100644 --- a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp +++ b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp @@ -165,11 +165,12 @@ bool GSDevice12::CreateDevice(u32& vendor_id) // Enabling the debug layer will fail if the Graphics Tools feature is not installed. if (enable_debug_layer) { - ComPtr debug12; + ComPtr debug12; hr = D3D12GetDebugInterface(IID_PPV_ARGS(debug12.put())); if (SUCCEEDED(hr)) { debug12->EnableDebugLayer(); + debug12->SetEnableGPUBasedValidation(true); } else { @@ -1224,8 +1225,8 @@ bool GSDevice12::CheckFeatures(const u32& vendor_id) { //const bool isAMD = (vendor_id == 0x1002 || vendor_id == 0x1022); - m_features.texture_barrier = false; - m_features.multidraw_fb_copy = GSConfig.OverrideTextureBarriers != 0; + m_features.texture_barrier = GSConfig.OverrideTextureBarriers != 0; + m_features.multidraw_fb_copy = false; m_features.broken_point_sampler = false; m_features.primitive_id = true; m_features.prefer_new_textures = true; @@ -3207,7 +3208,7 @@ void GSDevice12::SetStencilRef(u8 ref) m_dirty_flags |= DIRTY_FLAG_STENCIL_REF; } -void GSDevice12::PSSetShaderResource(int i, GSTexture* sr, bool check_state) +void GSDevice12::PSSetShaderResource(int i, GSTexture* sr, bool check_state, bool feedback) { D3D12DescriptorHandle handle; if (sr) @@ -3225,7 +3226,7 @@ void GSDevice12::PSSetShaderResource(int i, GSTexture* sr, bool check_state) dtex->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); } dtex->SetUseFenceCounter(GetCurrentFenceValue()); - handle = dtex->GetSRVDescriptor(); + handle = feedback ? dtex->GetFBLDescriptor() : dtex->GetSRVDescriptor(); } else { @@ -3312,7 +3313,7 @@ void GSDevice12::UnbindTexture(GSTexture12* tex) { for (u32 i = 0; i < NUM_TOTAL_TFX_TEXTURES; i++) { - if (m_tfx_textures[i] == tex->GetSRVDescriptor()) + if (m_tfx_textures[i] == tex->GetSRVDescriptor() || m_tfx_textures[i] == tex->GetFBLDescriptor()) { m_tfx_textures[i] = m_null_texture->GetSRVDescriptor(); m_dirty_flags |= DIRTY_FLAG_TFX_TEXTURES; @@ -3826,12 +3827,24 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config) // TODO: Backport from vk. if (stencil_DATE_One) + { config.ps.date = 0; + config.alpha_second_pass.ps.date = 0; + if (!config.ps.IsFeedbackLoop()) + { + config.require_one_barrier = false; + config.require_full_barrier = false; + } + if (!config.alpha_second_pass.ps.IsFeedbackLoop()) + { + config.alpha_second_pass.require_one_barrier = false; + config.alpha_second_pass.require_full_barrier = false; + } + } GSTexture12* colclip_rt = static_cast(g_gs_device->GetColorClipTexture()); GSTexture12* draw_rt = static_cast(config.rt); GSTexture12* draw_ds = static_cast(config.ds); - GSTexture12* draw_rt_clone = nullptr; // Align the render area to 128x128, hopefully avoiding render pass restarts for small render area changes (e.g. Ratchet and Clank). const GSVector2i rtsize(config.rt ? config.rt->GetSize() : config.ds->GetSize()); @@ -3955,7 +3968,7 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config) } // we're not drawing to the RT, so we can use it as a source - if (config.require_one_barrier && !m_features.multidraw_fb_copy) + if (config.require_one_barrier && !m_features.texture_barrier) PSSetShaderResource(2, draw_rt, true); } @@ -3985,14 +3998,7 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config) m_pipeline_selector.ds = true; } - if (draw_rt && (config.require_one_barrier || (config.require_full_barrier && m_features.multidraw_fb_copy) || (config.tex && config.tex == config.rt))) - { - // Requires a copy of the RT. - // Used as "bind rt" flag when texture barrier is unsupported for tex is fb. - draw_rt_clone = static_cast(CreateTexture(rtsize.x, rtsize.y, 1, draw_rt->GetFormat(), true)); - if (!draw_rt_clone) - Console.Warning("D3D12: Failed to allocate temp texture for RT copy."); - } + const bool feedback = draw_rt && (config.require_one_barrier || (config.require_full_barrier && m_features.texture_barrier) || (config.tex && config.tex == config.rt)); OMSetRenderTargets(draw_rt, draw_ds, config.scissor); @@ -4011,8 +4017,8 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config) draw_ds ? D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE : D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS, stencil_DATE ? D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE : D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_NO_ACCESS, - stencil_DATE ? (draw_rt_clone ? D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE : - D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_DISCARD) : + stencil_DATE ? (feedback ? D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE : + D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_DISCARD) : D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS, clear_color, draw_ds ? draw_ds->GetClearDepth() : 0.0f, 1); } @@ -4040,7 +4046,7 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config) UploadHWDrawVerticesAndIndices(config); // now we can do the actual draw - SendHWDraw(pipe, config, draw_rt_clone, draw_rt, config.require_one_barrier, config.require_full_barrier, false); + SendHWDraw(pipe, config, draw_rt, feedback, config.require_one_barrier, config.require_full_barrier); // blend second pass if (config.blend_multi_pass.enable) @@ -4070,12 +4076,9 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config) pipe.cms = config.alpha_second_pass.colormask; pipe.dss = config.alpha_second_pass.depth; pipe.bs = config.blend; - SendHWDraw(pipe, config, draw_rt_clone, draw_rt, config.alpha_second_pass.require_one_barrier, config.alpha_second_pass.require_full_barrier, true); + SendHWDraw(pipe, config, draw_rt, feedback, config.alpha_second_pass.require_one_barrier, config.alpha_second_pass.require_full_barrier); } - if (draw_rt_clone) - Recycle(draw_rt_clone); - if (date_image) Recycle(date_image); @@ -4113,43 +4116,39 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config) } } -void GSDevice12::SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig& config, GSTexture12* draw_rt_clone, GSTexture12* draw_rt, const bool one_barrier, const bool full_barrier, const bool skip_first_barrier) +void GSDevice12::SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig& config, GSTexture12* draw_rt, const bool feedback, const bool one_barrier, const bool full_barrier) { - if (draw_rt_clone) + if (feedback) { - #ifdef PCSX2_DEVBUILD if ((one_barrier || full_barrier) && !config.ps.IsFeedbackLoop()) [[unlikely]] - Console.Warning("D3D12: Possible unnecessary copy detected."); + Console.Warning("D3D12: Possible unnecessary barrier detected."); #endif - auto CopyAndBind = [&](GSVector4i drawarea) { - EndRenderPass(); + if (one_barrier || full_barrier) + PSSetShaderResource(2, draw_rt, false, true); + if (config.tex && config.tex == config.rt) + PSSetShaderResource(0, draw_rt, false, true); - CopyRect(draw_rt, draw_rt_clone, drawarea, drawarea.left, drawarea.top); - draw_rt->TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); - - if (one_barrier || full_barrier) - PSSetShaderResource(2, draw_rt_clone, true); - if (config.tex && config.tex == config.rt) - PSSetShaderResource(0, draw_rt_clone, true); - }; - - if (m_features.multidraw_fb_copy && full_barrier) + if (full_barrier) { + pxAssert(config.drawlist && !config.drawlist->empty()); const u32 draw_list_size = static_cast(config.drawlist->size()); const u32 indices_per_prim = config.indices_per_prim; - pxAssert(config.drawlist && !config.drawlist->empty()); - pxAssert(config.drawlist_bbox && static_cast(config.drawlist_bbox->size()) == draw_list_size); + GL_PUSH("Split the draw"); + g_perfmon.Put(GSPerfMon::Barriers, draw_list_size); for (u32 n = 0, p = 0; n < draw_list_size; n++) { const u32 count = (*config.drawlist)[n] * indices_per_prim; - GSVector4i bbox = (*config.drawlist_bbox)[n].rintersect(config.drawarea); + EndRenderPass(); + // Specify null for the after resource as both resources are used after the barrier. + // While this may also be true before the barrier, we only write using the main resource. + D3D12_RESOURCE_BARRIER barrier = {D3D12_RESOURCE_BARRIER_TYPE_ALIASING, D3D12_RESOURCE_BARRIER_FLAG_NONE}; + barrier.Aliasing = {draw_rt->GetResource(), nullptr}; + GetCommandList()->ResourceBarrier(1, &barrier); - // Copy only the part needed by the draw. - CopyAndBind(bbox); if (BindDrawPipeline(pipe)) DrawIndexedPrimitive(p, count); p += count; @@ -4158,10 +4157,16 @@ void GSDevice12::SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig& return; } + if (one_barrier) + { + g_perfmon.Put(GSPerfMon::Barriers, 1); - // Optimization: For alpha second pass we can reuse the copy snapshot from the first pass. - if (!skip_first_barrier) - CopyAndBind(config.drawarea); + EndRenderPass(); + // Specify null for the after resource as both resources are used after the barrier. + D3D12_RESOURCE_BARRIER barrier = {D3D12_RESOURCE_BARRIER_TYPE_ALIASING, D3D12_RESOURCE_BARRIER_FLAG_NONE}; + barrier.Aliasing = {draw_rt->GetResource(), nullptr}; + GetCommandList()->ResourceBarrier(1, &barrier); + } } if (BindDrawPipeline(pipe)) diff --git a/pcsx2/GS/Renderers/DX12/GSDevice12.h b/pcsx2/GS/Renderers/DX12/GSDevice12.h index bd5af07dad..ccf6a45014 100644 --- a/pcsx2/GS/Renderers/DX12/GSDevice12.h +++ b/pcsx2/GS/Renderers/DX12/GSDevice12.h @@ -456,7 +456,7 @@ public: void IASetVertexBuffer(const void* vertex, size_t stride, size_t count); void IASetIndexBuffer(const void* index, size_t count); - void PSSetShaderResource(int i, GSTexture* sr, bool check_state); + void PSSetShaderResource(int i, GSTexture* sr, bool check_state, bool feedback = false); void PSSetSampler(GSHWDrawConfig::SamplerSelector sel); void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i& scissor); @@ -466,7 +466,7 @@ public: bool BindDrawPipeline(const PipelineSelector& p); void RenderHW(GSHWDrawConfig& config) override; - void SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig& config, GSTexture12* draw_rt_clone, GSTexture12* draw_rt, const bool one_barrier, const bool full_barrier, const bool skip_first_barrier); + void SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig& config, GSTexture12* draw_rt, const bool feedback, const bool one_barrier, const bool full_barrier); void UpdateHWPipelineSelector(GSHWDrawConfig& config); void UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config); diff --git a/pcsx2/GS/Renderers/DX12/GSTexture12.cpp b/pcsx2/GS/Renderers/DX12/GSTexture12.cpp index e21f749254..00a131bd80 100644 --- a/pcsx2/GS/Renderers/DX12/GSTexture12.cpp +++ b/pcsx2/GS/Renderers/DX12/GSTexture12.cpp @@ -15,14 +15,17 @@ #include "D3D12MemAlloc.h" GSTexture12::GSTexture12(Type type, Format format, int width, int height, int levels, DXGI_FORMAT dxgi_format, - wil::com_ptr_nothrow resource, wil::com_ptr_nothrow allocation, - const D3D12DescriptorHandle& srv_descriptor, const D3D12DescriptorHandle& write_descriptor, - const D3D12DescriptorHandle& uav_descriptor, WriteDescriptorType wdtype, D3D12_RESOURCE_STATES resource_state) + wil::com_ptr_nothrow resource, wil::com_ptr_nothrow resource_fbl, + wil::com_ptr_nothrow allocation, const D3D12DescriptorHandle& srv_descriptor, + const D3D12DescriptorHandle& write_descriptor, const D3D12DescriptorHandle& uav_descriptor, + const D3D12DescriptorHandle& fbl_descriptor, WriteDescriptorType wdtype, D3D12_RESOURCE_STATES resource_state) : m_resource(std::move(resource)) + , m_resource_fbl(std::move(resource_fbl)) , m_allocation(std::move(allocation)) , m_srv_descriptor(srv_descriptor) , m_write_descriptor(write_descriptor) , m_uav_descriptor(uav_descriptor) + , m_fbl_descriptor(fbl_descriptor) , m_write_descriptor_type(wdtype) , m_dxgi_format(dxgi_format) , m_resource_state(resource_state) @@ -64,8 +67,13 @@ void GSTexture12::Destroy(bool defer) if (m_uav_descriptor) dev->DeferDescriptorDestruction(dev->GetDescriptorHeapManager(), &m_uav_descriptor); + if (m_fbl_descriptor) + dev->DeferDescriptorDestruction(dev->GetDescriptorHeapManager(), &m_fbl_descriptor); + dev->DeferResourceDestruction(m_allocation.get(), m_resource.get()); + dev->DeferResourceDestruction(m_allocation.get(), m_resource_fbl.get()); m_resource.reset(); + m_resource_fbl.reset(); m_allocation.reset(); } else @@ -88,7 +96,11 @@ void GSTexture12::Destroy(bool defer) if (m_uav_descriptor) dev->GetDescriptorHeapManager().Free(&m_uav_descriptor); + if (m_fbl_descriptor) + dev->GetDescriptorHeapManager().Free(&m_fbl_descriptor); + m_resource.reset(); + m_resource_fbl.reset(); m_allocation.reset(); } @@ -135,7 +147,9 @@ std::unique_ptr GSTexture12::Create(Type type, Format format, int w // RT's tend to be larger, so we'll keep them committed for speed. pxAssert(levels == 1); allocationDesc.Flags |= D3D12MA::ALLOCATION_FLAG_COMMITTED; - desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + allocationDesc.ExtraHeapFlags = D3D12_HEAP_FLAG_DENY_BUFFERS | D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES; + desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS; + desc.Layout = D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE; optimized_clear_value.Format = rtv_format; state = D3D12_RESOURCE_STATE_RENDER_TARGET; } @@ -167,20 +181,63 @@ std::unique_ptr GSTexture12::Create(Type type, Format format, int w desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; wil::com_ptr_nothrow resource; + wil::com_ptr_nothrow resource_fbl; wil::com_ptr_nothrow allocation; - HRESULT hr = dev->GetAllocator()->CreateResource(&allocationDesc, &desc, state, - (type == Type::RenderTarget || type == Type::DepthStencil) ? &optimized_clear_value : nullptr, allocation.put(), - IID_PPV_ARGS(resource.put())); - if (FAILED(hr)) - { - // OOM isn't fatal. - if (hr != E_OUTOFMEMORY) - Console.Error("Create texture failed: 0x%08X", hr); - return {}; + if (type == Type::RenderTarget) + { + const D3D12_RESOURCE_ALLOCATION_INFO allocInfo = dev->GetDevice()->GetResourceAllocationInfo(0, 1, &desc); + + HRESULT hr = dev->GetAllocator()->AllocateMemory(&allocationDesc, &allocInfo, allocation.put()); + if (FAILED(hr)) + { + // OOM isn't fatal. + if (hr != E_OUTOFMEMORY) + Console.Error("Allocate texture memory failed: 0x%08X", hr); + + return {}; + } + + hr = dev->GetAllocator()->CreateAliasingResource(allocation.get(), 0, &desc, state, + (type == Type::RenderTarget || type == Type::DepthStencil) ? &optimized_clear_value : nullptr, + IID_PPV_ARGS(resource.put())); + if (FAILED(hr)) + { + // OOM isn't fatal. + if (hr != E_OUTOFMEMORY) + Console.Error("Create texture resource 1 failed: 0x%08X", hr); + + return {}; + } + + hr = dev->GetAllocator()->CreateAliasingResource(allocation.get(), 0, &desc, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, + (type == Type::RenderTarget || type == Type::DepthStencil) ? &optimized_clear_value : nullptr, + IID_PPV_ARGS(resource_fbl.put())); + if (FAILED(hr)) + { + // OOM isn't fatal. + if (hr != E_OUTOFMEMORY) + Console.Error("Create texture resource 2 failed: 0x%08X", hr); + + return {}; + } + } + else + { + HRESULT hr = dev->GetAllocator()->CreateResource(&allocationDesc, &desc, state, + (type == Type::RenderTarget || type == Type::DepthStencil) ? &optimized_clear_value : nullptr, allocation.put(), + IID_PPV_ARGS(resource.put())); + if (FAILED(hr)) + { + // OOM isn't fatal. + if (hr != E_OUTOFMEMORY) + Console.Error("Create texture failed: 0x%08X", hr); + + return {}; + } } - D3D12DescriptorHandle srv_descriptor, write_descriptor, uav_descriptor; + D3D12DescriptorHandle srv_descriptor, write_descriptor, uav_descriptor, fbl_descriptor; WriteDescriptorType write_descriptor_type = WriteDescriptorType::None; if (srv_format != DXGI_FORMAT_UNKNOWN) { @@ -223,9 +280,20 @@ std::unique_ptr GSTexture12::Create(Type type, Format format, int w return {}; } + if (resource_fbl) + { + if (!CreateSRVDescriptor(resource_fbl.get(), levels, srv_format, &fbl_descriptor)) + { + dev->GetDescriptorHeapManager().Free(&uav_descriptor); + dev->GetDescriptorHeapManager().Free(&write_descriptor); + dev->GetDescriptorHeapManager().Free(&srv_descriptor); + return {}; + } + } + return std::unique_ptr( - new GSTexture12(type, format, width, height, levels, dxgi_format, std::move(resource), std::move(allocation), - srv_descriptor, write_descriptor, uav_descriptor, write_descriptor_type, state)); + new GSTexture12(type, format, width, height, levels, dxgi_format, std::move(resource), std::move(resource_fbl), std::move(allocation), + srv_descriptor, write_descriptor, uav_descriptor, fbl_descriptor, write_descriptor_type, state)); } std::unique_ptr GSTexture12::Adopt(wil::com_ptr_nothrow resource, Type type, Format format, @@ -272,8 +340,8 @@ std::unique_ptr GSTexture12::Adopt(wil::com_ptr_nothrow(new GSTexture12(type, format, static_cast(desc.Width), desc.Height, - desc.MipLevels, desc.Format, std::move(resource), {}, srv_descriptor, write_descriptor, uav_descriptor, - write_descriptor_type, resource_state)); + desc.MipLevels, desc.Format, std::move(resource), {}, {}, srv_descriptor, write_descriptor, uav_descriptor, + {}, write_descriptor_type, resource_state)); } bool GSTexture12::CreateSRVDescriptor( diff --git a/pcsx2/GS/Renderers/DX12/GSTexture12.h b/pcsx2/GS/Renderers/DX12/GSTexture12.h index 49c82d034f..7872f2fc1d 100644 --- a/pcsx2/GS/Renderers/DX12/GSTexture12.h +++ b/pcsx2/GS/Renderers/DX12/GSTexture12.h @@ -31,9 +31,11 @@ public: __fi const D3D12DescriptorHandle& GetSRVDescriptor() const { return m_srv_descriptor; } __fi const D3D12DescriptorHandle& GetWriteDescriptor() const { return m_write_descriptor; } __fi const D3D12DescriptorHandle& GetUAVDescriptor() const { return m_uav_descriptor; } + __fi const D3D12DescriptorHandle& GetFBLDescriptor() const { return m_fbl_descriptor; } __fi D3D12_RESOURCE_STATES GetResourceState() const { return m_resource_state; } __fi DXGI_FORMAT GetDXGIFormat() const { return m_dxgi_format; } __fi ID3D12Resource* GetResource() const { return m_resource.get(); } + __fi ID3D12Resource* GetFBLResource() const { return m_resource_fbl.get(); } void* GetNativeHandle() const override; @@ -68,9 +70,10 @@ private: }; GSTexture12(Type type, Format format, int width, int height, int levels, DXGI_FORMAT dxgi_format, - wil::com_ptr_nothrow resource, wil::com_ptr_nothrow allocation, - const D3D12DescriptorHandle& srv_descriptor, const D3D12DescriptorHandle& write_descriptor, - const D3D12DescriptorHandle& uav_descriptor, WriteDescriptorType wdtype, D3D12_RESOURCE_STATES resource_state); + wil::com_ptr_nothrow resource, wil::com_ptr_nothrow resource_fbl, + wil::com_ptr_nothrow allocation, const D3D12DescriptorHandle& srv_descriptor, + const D3D12DescriptorHandle& write_descriptor, const D3D12DescriptorHandle& uav_descriptor, + const D3D12DescriptorHandle& fbl_descriptor, WriteDescriptorType wdtype, D3D12_RESOURCE_STATES resource_state); static bool CreateSRVDescriptor( ID3D12Resource* resource, u32 levels, DXGI_FORMAT format, D3D12DescriptorHandle* dh); @@ -83,11 +86,13 @@ private: void CopyTextureDataForUpload(void* dst, const void* src, u32 pitch, u32 upload_pitch, u32 height) const; wil::com_ptr_nothrow m_resource; + wil::com_ptr_nothrow m_resource_fbl; wil::com_ptr_nothrow m_allocation; D3D12DescriptorHandle m_srv_descriptor = {}; D3D12DescriptorHandle m_write_descriptor = {}; D3D12DescriptorHandle m_uav_descriptor = {}; + D3D12DescriptorHandle m_fbl_descriptor = {}; WriteDescriptorType m_write_descriptor_type = WriteDescriptorType::None; DXGI_FORMAT m_dxgi_format = DXGI_FORMAT_UNKNOWN;