This commit is contained in:
TheLastRar 2025-12-15 21:56:48 -05:00 committed by GitHub
commit ab94cb0768
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 148 additions and 70 deletions

View File

@ -165,11 +165,12 @@ bool GSDevice12::CreateDevice(u32& vendor_id)
// Enabling the debug layer will fail if the Graphics Tools feature is not installed.
if (enable_debug_layer)
{
ComPtr<ID3D12Debug> debug12;
ComPtr<ID3D12Debug1> debug12;
hr = D3D12GetDebugInterface(IID_PPV_ARGS(debug12.put()));
if (SUCCEEDED(hr))
{
debug12->EnableDebugLayer();
debug12->SetEnableGPUBasedValidation(true);
}
else
{
@ -1224,8 +1225,8 @@ bool GSDevice12::CheckFeatures(const u32& vendor_id)
{
//const bool isAMD = (vendor_id == 0x1002 || vendor_id == 0x1022);
m_features.texture_barrier = false;
m_features.multidraw_fb_copy = GSConfig.OverrideTextureBarriers != 0;
m_features.texture_barrier = GSConfig.OverrideTextureBarriers != 0;
m_features.multidraw_fb_copy = false;
m_features.broken_point_sampler = false;
m_features.primitive_id = true;
m_features.prefer_new_textures = true;
@ -3207,7 +3208,7 @@ void GSDevice12::SetStencilRef(u8 ref)
m_dirty_flags |= DIRTY_FLAG_STENCIL_REF;
}
void GSDevice12::PSSetShaderResource(int i, GSTexture* sr, bool check_state)
void GSDevice12::PSSetShaderResource(int i, GSTexture* sr, bool check_state, bool feedback)
{
D3D12DescriptorHandle handle;
if (sr)
@ -3225,7 +3226,7 @@ void GSDevice12::PSSetShaderResource(int i, GSTexture* sr, bool check_state)
dtex->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
}
dtex->SetUseFenceCounter(GetCurrentFenceValue());
handle = dtex->GetSRVDescriptor();
handle = feedback ? dtex->GetFBLDescriptor() : dtex->GetSRVDescriptor();
}
else
{
@ -3312,7 +3313,7 @@ void GSDevice12::UnbindTexture(GSTexture12* tex)
{
for (u32 i = 0; i < NUM_TOTAL_TFX_TEXTURES; i++)
{
if (m_tfx_textures[i] == tex->GetSRVDescriptor())
if (m_tfx_textures[i] == tex->GetSRVDescriptor() || m_tfx_textures[i] == tex->GetFBLDescriptor())
{
m_tfx_textures[i] = m_null_texture->GetSRVDescriptor();
m_dirty_flags |= DIRTY_FLAG_TFX_TEXTURES;
@ -3826,12 +3827,24 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
// TODO: Backport from vk.
if (stencil_DATE_One)
{
config.ps.date = 0;
config.alpha_second_pass.ps.date = 0;
if (!config.ps.IsFeedbackLoop())
{
config.require_one_barrier = false;
config.require_full_barrier = false;
}
if (!config.alpha_second_pass.ps.IsFeedbackLoop())
{
config.alpha_second_pass.require_one_barrier = false;
config.alpha_second_pass.require_full_barrier = false;
}
}
GSTexture12* colclip_rt = static_cast<GSTexture12*>(g_gs_device->GetColorClipTexture());
GSTexture12* draw_rt = static_cast<GSTexture12*>(config.rt);
GSTexture12* draw_ds = static_cast<GSTexture12*>(config.ds);
GSTexture12* draw_rt_clone = nullptr;
// Align the render area to 128x128, hopefully avoiding render pass restarts for small render area changes (e.g. Ratchet and Clank).
const GSVector2i rtsize(config.rt ? config.rt->GetSize() : config.ds->GetSize());
@ -3955,7 +3968,7 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
}
// we're not drawing to the RT, so we can use it as a source
if (config.require_one_barrier && !m_features.multidraw_fb_copy)
if (config.require_one_barrier && !m_features.texture_barrier)
PSSetShaderResource(2, draw_rt, true);
}
@ -3985,14 +3998,7 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
m_pipeline_selector.ds = true;
}
if (draw_rt && (config.require_one_barrier || (config.require_full_barrier && m_features.multidraw_fb_copy) || (config.tex && config.tex == config.rt)))
{
// Requires a copy of the RT.
// Used as "bind rt" flag when texture barrier is unsupported for tex is fb.
draw_rt_clone = static_cast<GSTexture12*>(CreateTexture(rtsize.x, rtsize.y, 1, draw_rt->GetFormat(), true));
if (!draw_rt_clone)
Console.Warning("D3D12: Failed to allocate temp texture for RT copy.");
}
const bool feedback = draw_rt && (config.require_one_barrier || (config.require_full_barrier && m_features.texture_barrier) || (config.tex && config.tex == config.rt));
OMSetRenderTargets(draw_rt, draw_ds, config.scissor);
@ -4011,8 +4017,8 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
draw_ds ? D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE : D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS,
stencil_DATE ? D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE :
D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_NO_ACCESS,
stencil_DATE ? (draw_rt_clone ? D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE :
D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_DISCARD) :
stencil_DATE ? (feedback ? D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE :
D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_DISCARD) :
D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS,
clear_color, draw_ds ? draw_ds->GetClearDepth() : 0.0f, 1);
}
@ -4040,7 +4046,7 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
UploadHWDrawVerticesAndIndices(config);
// now we can do the actual draw
SendHWDraw(pipe, config, draw_rt_clone, draw_rt, config.require_one_barrier, config.require_full_barrier, false);
SendHWDraw(pipe, config, draw_rt, feedback, config.require_one_barrier, config.require_full_barrier);
// blend second pass
if (config.blend_multi_pass.enable)
@ -4070,12 +4076,9 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
pipe.cms = config.alpha_second_pass.colormask;
pipe.dss = config.alpha_second_pass.depth;
pipe.bs = config.blend;
SendHWDraw(pipe, config, draw_rt_clone, draw_rt, config.alpha_second_pass.require_one_barrier, config.alpha_second_pass.require_full_barrier, true);
SendHWDraw(pipe, config, draw_rt, feedback, config.alpha_second_pass.require_one_barrier, config.alpha_second_pass.require_full_barrier);
}
if (draw_rt_clone)
Recycle(draw_rt_clone);
if (date_image)
Recycle(date_image);
@ -4113,43 +4116,39 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
}
}
void GSDevice12::SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig& config, GSTexture12* draw_rt_clone, GSTexture12* draw_rt, const bool one_barrier, const bool full_barrier, const bool skip_first_barrier)
void GSDevice12::SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig& config, GSTexture12* draw_rt, const bool feedback, const bool one_barrier, const bool full_barrier)
{
if (draw_rt_clone)
if (feedback)
{
#ifdef PCSX2_DEVBUILD
if ((one_barrier || full_barrier) && !config.ps.IsFeedbackLoop()) [[unlikely]]
Console.Warning("D3D12: Possible unnecessary copy detected.");
Console.Warning("D3D12: Possible unnecessary barrier detected.");
#endif
auto CopyAndBind = [&](GSVector4i drawarea) {
EndRenderPass();
if (one_barrier || full_barrier)
PSSetShaderResource(2, draw_rt, false, true);
if (config.tex && config.tex == config.rt)
PSSetShaderResource(0, draw_rt, false, true);
CopyRect(draw_rt, draw_rt_clone, drawarea, drawarea.left, drawarea.top);
draw_rt->TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET);
if (one_barrier || full_barrier)
PSSetShaderResource(2, draw_rt_clone, true);
if (config.tex && config.tex == config.rt)
PSSetShaderResource(0, draw_rt_clone, true);
};
if (m_features.multidraw_fb_copy && full_barrier)
if (full_barrier)
{
pxAssert(config.drawlist && !config.drawlist->empty());
const u32 draw_list_size = static_cast<u32>(config.drawlist->size());
const u32 indices_per_prim = config.indices_per_prim;
pxAssert(config.drawlist && !config.drawlist->empty());
pxAssert(config.drawlist_bbox && static_cast<u32>(config.drawlist_bbox->size()) == draw_list_size);
GL_PUSH("Split the draw");
g_perfmon.Put(GSPerfMon::Barriers, draw_list_size);
for (u32 n = 0, p = 0; n < draw_list_size; n++)
{
const u32 count = (*config.drawlist)[n] * indices_per_prim;
GSVector4i bbox = (*config.drawlist_bbox)[n].rintersect(config.drawarea);
EndRenderPass();
// Specify null for the after resource as both resources are used after the barrier.
// While this may also be true before the barrier, we only write using the main resource.
D3D12_RESOURCE_BARRIER barrier = {D3D12_RESOURCE_BARRIER_TYPE_ALIASING, D3D12_RESOURCE_BARRIER_FLAG_NONE};
barrier.Aliasing = {draw_rt->GetResource(), nullptr};
GetCommandList()->ResourceBarrier(1, &barrier);
// Copy only the part needed by the draw.
CopyAndBind(bbox);
if (BindDrawPipeline(pipe))
DrawIndexedPrimitive(p, count);
p += count;
@ -4158,10 +4157,16 @@ void GSDevice12::SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig&
return;
}
if (one_barrier)
{
g_perfmon.Put(GSPerfMon::Barriers, 1);
// Optimization: For alpha second pass we can reuse the copy snapshot from the first pass.
if (!skip_first_barrier)
CopyAndBind(config.drawarea);
EndRenderPass();
// Specify null for the after resource as both resources are used after the barrier.
D3D12_RESOURCE_BARRIER barrier = {D3D12_RESOURCE_BARRIER_TYPE_ALIASING, D3D12_RESOURCE_BARRIER_FLAG_NONE};
barrier.Aliasing = {draw_rt->GetResource(), nullptr};
GetCommandList()->ResourceBarrier(1, &barrier);
}
}
if (BindDrawPipeline(pipe))

View File

@ -456,7 +456,7 @@ public:
void IASetVertexBuffer(const void* vertex, size_t stride, size_t count);
void IASetIndexBuffer(const void* index, size_t count);
void PSSetShaderResource(int i, GSTexture* sr, bool check_state);
void PSSetShaderResource(int i, GSTexture* sr, bool check_state, bool feedback = false);
void PSSetSampler(GSHWDrawConfig::SamplerSelector sel);
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i& scissor);
@ -466,7 +466,7 @@ public:
bool BindDrawPipeline(const PipelineSelector& p);
void RenderHW(GSHWDrawConfig& config) override;
void SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig& config, GSTexture12* draw_rt_clone, GSTexture12* draw_rt, const bool one_barrier, const bool full_barrier, const bool skip_first_barrier);
void SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig& config, GSTexture12* draw_rt, const bool feedback, const bool one_barrier, const bool full_barrier);
void UpdateHWPipelineSelector(GSHWDrawConfig& config);
void UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config);

View File

@ -15,14 +15,17 @@
#include "D3D12MemAlloc.h"
GSTexture12::GSTexture12(Type type, Format format, int width, int height, int levels, DXGI_FORMAT dxgi_format,
wil::com_ptr_nothrow<ID3D12Resource> resource, wil::com_ptr_nothrow<D3D12MA::Allocation> allocation,
const D3D12DescriptorHandle& srv_descriptor, const D3D12DescriptorHandle& write_descriptor,
const D3D12DescriptorHandle& uav_descriptor, WriteDescriptorType wdtype, D3D12_RESOURCE_STATES resource_state)
wil::com_ptr_nothrow<ID3D12Resource> resource, wil::com_ptr_nothrow<ID3D12Resource> resource_fbl,
wil::com_ptr_nothrow<D3D12MA::Allocation> allocation, const D3D12DescriptorHandle& srv_descriptor,
const D3D12DescriptorHandle& write_descriptor, const D3D12DescriptorHandle& uav_descriptor,
const D3D12DescriptorHandle& fbl_descriptor, WriteDescriptorType wdtype, D3D12_RESOURCE_STATES resource_state)
: m_resource(std::move(resource))
, m_resource_fbl(std::move(resource_fbl))
, m_allocation(std::move(allocation))
, m_srv_descriptor(srv_descriptor)
, m_write_descriptor(write_descriptor)
, m_uav_descriptor(uav_descriptor)
, m_fbl_descriptor(fbl_descriptor)
, m_write_descriptor_type(wdtype)
, m_dxgi_format(dxgi_format)
, m_resource_state(resource_state)
@ -64,8 +67,13 @@ void GSTexture12::Destroy(bool defer)
if (m_uav_descriptor)
dev->DeferDescriptorDestruction(dev->GetDescriptorHeapManager(), &m_uav_descriptor);
if (m_fbl_descriptor)
dev->DeferDescriptorDestruction(dev->GetDescriptorHeapManager(), &m_fbl_descriptor);
dev->DeferResourceDestruction(m_allocation.get(), m_resource.get());
dev->DeferResourceDestruction(m_allocation.get(), m_resource_fbl.get());
m_resource.reset();
m_resource_fbl.reset();
m_allocation.reset();
}
else
@ -88,7 +96,11 @@ void GSTexture12::Destroy(bool defer)
if (m_uav_descriptor)
dev->GetDescriptorHeapManager().Free(&m_uav_descriptor);
if (m_fbl_descriptor)
dev->GetDescriptorHeapManager().Free(&m_fbl_descriptor);
m_resource.reset();
m_resource_fbl.reset();
m_allocation.reset();
}
@ -135,7 +147,9 @@ std::unique_ptr<GSTexture12> GSTexture12::Create(Type type, Format format, int w
// RT's tend to be larger, so we'll keep them committed for speed.
pxAssert(levels == 1);
allocationDesc.Flags |= D3D12MA::ALLOCATION_FLAG_COMMITTED;
desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
allocationDesc.ExtraHeapFlags = D3D12_HEAP_FLAG_DENY_BUFFERS | D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES;
desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS;
desc.Layout = D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE;
optimized_clear_value.Format = rtv_format;
state = D3D12_RESOURCE_STATE_RENDER_TARGET;
}
@ -167,20 +181,63 @@ std::unique_ptr<GSTexture12> GSTexture12::Create(Type type, Format format, int w
desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
wil::com_ptr_nothrow<ID3D12Resource> resource;
wil::com_ptr_nothrow<ID3D12Resource> resource_fbl;
wil::com_ptr_nothrow<D3D12MA::Allocation> allocation;
HRESULT hr = dev->GetAllocator()->CreateResource(&allocationDesc, &desc, state,
(type == Type::RenderTarget || type == Type::DepthStencil) ? &optimized_clear_value : nullptr, allocation.put(),
IID_PPV_ARGS(resource.put()));
if (FAILED(hr))
{
// OOM isn't fatal.
if (hr != E_OUTOFMEMORY)
Console.Error("Create texture failed: 0x%08X", hr);
return {};
if (type == Type::RenderTarget)
{
const D3D12_RESOURCE_ALLOCATION_INFO allocInfo = dev->GetDevice()->GetResourceAllocationInfo(0, 1, &desc);
HRESULT hr = dev->GetAllocator()->AllocateMemory(&allocationDesc, &allocInfo, allocation.put());
if (FAILED(hr))
{
// OOM isn't fatal.
if (hr != E_OUTOFMEMORY)
Console.Error("Allocate texture memory failed: 0x%08X", hr);
return {};
}
hr = dev->GetAllocator()->CreateAliasingResource(allocation.get(), 0, &desc, state,
(type == Type::RenderTarget || type == Type::DepthStencil) ? &optimized_clear_value : nullptr,
IID_PPV_ARGS(resource.put()));
if (FAILED(hr))
{
// OOM isn't fatal.
if (hr != E_OUTOFMEMORY)
Console.Error("Create texture resource 1 failed: 0x%08X", hr);
return {};
}
hr = dev->GetAllocator()->CreateAliasingResource(allocation.get(), 0, &desc, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
(type == Type::RenderTarget || type == Type::DepthStencil) ? &optimized_clear_value : nullptr,
IID_PPV_ARGS(resource_fbl.put()));
if (FAILED(hr))
{
// OOM isn't fatal.
if (hr != E_OUTOFMEMORY)
Console.Error("Create texture resource 2 failed: 0x%08X", hr);
return {};
}
}
else
{
HRESULT hr = dev->GetAllocator()->CreateResource(&allocationDesc, &desc, state,
(type == Type::RenderTarget || type == Type::DepthStencil) ? &optimized_clear_value : nullptr, allocation.put(),
IID_PPV_ARGS(resource.put()));
if (FAILED(hr))
{
// OOM isn't fatal.
if (hr != E_OUTOFMEMORY)
Console.Error("Create texture failed: 0x%08X", hr);
return {};
}
}
D3D12DescriptorHandle srv_descriptor, write_descriptor, uav_descriptor;
D3D12DescriptorHandle srv_descriptor, write_descriptor, uav_descriptor, fbl_descriptor;
WriteDescriptorType write_descriptor_type = WriteDescriptorType::None;
if (srv_format != DXGI_FORMAT_UNKNOWN)
{
@ -223,9 +280,20 @@ std::unique_ptr<GSTexture12> GSTexture12::Create(Type type, Format format, int w
return {};
}
if (resource_fbl)
{
if (!CreateSRVDescriptor(resource_fbl.get(), levels, srv_format, &fbl_descriptor))
{
dev->GetDescriptorHeapManager().Free(&uav_descriptor);
dev->GetDescriptorHeapManager().Free(&write_descriptor);
dev->GetDescriptorHeapManager().Free(&srv_descriptor);
return {};
}
}
return std::unique_ptr<GSTexture12>(
new GSTexture12(type, format, width, height, levels, dxgi_format, std::move(resource), std::move(allocation),
srv_descriptor, write_descriptor, uav_descriptor, write_descriptor_type, state));
new GSTexture12(type, format, width, height, levels, dxgi_format, std::move(resource), std::move(resource_fbl), std::move(allocation),
srv_descriptor, write_descriptor, uav_descriptor, fbl_descriptor, write_descriptor_type, state));
}
std::unique_ptr<GSTexture12> GSTexture12::Adopt(wil::com_ptr_nothrow<ID3D12Resource> resource, Type type, Format format,
@ -272,8 +340,8 @@ std::unique_ptr<GSTexture12> GSTexture12::Adopt(wil::com_ptr_nothrow<ID3D12Resou
}
return std::unique_ptr<GSTexture12>(new GSTexture12(type, format, static_cast<u32>(desc.Width), desc.Height,
desc.MipLevels, desc.Format, std::move(resource), {}, srv_descriptor, write_descriptor, uav_descriptor,
write_descriptor_type, resource_state));
desc.MipLevels, desc.Format, std::move(resource), {}, {}, srv_descriptor, write_descriptor, uav_descriptor,
{}, write_descriptor_type, resource_state));
}
bool GSTexture12::CreateSRVDescriptor(

View File

@ -31,9 +31,11 @@ public:
__fi const D3D12DescriptorHandle& GetSRVDescriptor() const { return m_srv_descriptor; }
__fi const D3D12DescriptorHandle& GetWriteDescriptor() const { return m_write_descriptor; }
__fi const D3D12DescriptorHandle& GetUAVDescriptor() const { return m_uav_descriptor; }
__fi const D3D12DescriptorHandle& GetFBLDescriptor() const { return m_fbl_descriptor; }
__fi D3D12_RESOURCE_STATES GetResourceState() const { return m_resource_state; }
__fi DXGI_FORMAT GetDXGIFormat() const { return m_dxgi_format; }
__fi ID3D12Resource* GetResource() const { return m_resource.get(); }
__fi ID3D12Resource* GetFBLResource() const { return m_resource_fbl.get(); }
void* GetNativeHandle() const override;
@ -68,9 +70,10 @@ private:
};
GSTexture12(Type type, Format format, int width, int height, int levels, DXGI_FORMAT dxgi_format,
wil::com_ptr_nothrow<ID3D12Resource> resource, wil::com_ptr_nothrow<D3D12MA::Allocation> allocation,
const D3D12DescriptorHandle& srv_descriptor, const D3D12DescriptorHandle& write_descriptor,
const D3D12DescriptorHandle& uav_descriptor, WriteDescriptorType wdtype, D3D12_RESOURCE_STATES resource_state);
wil::com_ptr_nothrow<ID3D12Resource> resource, wil::com_ptr_nothrow<ID3D12Resource> resource_fbl,
wil::com_ptr_nothrow<D3D12MA::Allocation> allocation, const D3D12DescriptorHandle& srv_descriptor,
const D3D12DescriptorHandle& write_descriptor, const D3D12DescriptorHandle& uav_descriptor,
const D3D12DescriptorHandle& fbl_descriptor, WriteDescriptorType wdtype, D3D12_RESOURCE_STATES resource_state);
static bool CreateSRVDescriptor(
ID3D12Resource* resource, u32 levels, DXGI_FORMAT format, D3D12DescriptorHandle* dh);
@ -83,11 +86,13 @@ private:
void CopyTextureDataForUpload(void* dst, const void* src, u32 pitch, u32 upload_pitch, u32 height) const;
wil::com_ptr_nothrow<ID3D12Resource> m_resource;
wil::com_ptr_nothrow<ID3D12Resource> m_resource_fbl;
wil::com_ptr_nothrow<D3D12MA::Allocation> m_allocation;
D3D12DescriptorHandle m_srv_descriptor = {};
D3D12DescriptorHandle m_write_descriptor = {};
D3D12DescriptorHandle m_uav_descriptor = {};
D3D12DescriptorHandle m_fbl_descriptor = {};
WriteDescriptorType m_write_descriptor_type = WriteDescriptorType::None;
DXGI_FORMAT m_dxgi_format = DXGI_FORMAT_UNKNOWN;