From 313b0fe3b54a03a6e8e9e48602e778f47d0cc3be Mon Sep 17 00:00:00 2001 From: lightningterror <18107717+lightningterror@users.noreply.github.com> Date: Sat, 30 Aug 2025 17:29:29 +0200 Subject: [PATCH] GS/DX: Fully implement tex is fb on dx11, extend partially on dx12. Always use tex is fb for dx11. Partial support for dx12 if prims don't overlap. Previously it didn't work on dx because we used input.t which is interpolated, instead of absolute cords when fb sampling. --- bin/resources/shaders/dx11/tfx.fx | 34 +++++++++++++------------- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 22 ++++++++--------- pcsx2/ShaderCacheVersion.h | 2 +- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index c62aee635d..c798a4c219 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -173,10 +173,10 @@ cbuffer cb1 float RcpScaleFactor; }; -float4 sample_c(float2 uv, float uv_w) +float4 sample_c(float2 uv, float uv_w, int2 xy) { #if PS_TEX_IS_FB == 1 - return RtTexture.Load(int3(int2(uv * WH.zw), 0)); + return RtTexture.Load(int3(int2(xy), 0)); #elif PS_REGION_RECT == 1 return Texture.Load(int3(int2(uv), 0)); #else @@ -315,26 +315,26 @@ float4 clamp_wrap_uv(float4 uv) return uv; } -float4x4 sample_4c(float4 uv, float uv_w) +float4x4 sample_4c(float4 uv, float uv_w, int2 xy) { float4x4 c; - c[0] = sample_c(uv.xy, uv_w); - c[1] = sample_c(uv.zy, uv_w); - c[2] = sample_c(uv.xw, uv_w); - c[3] = sample_c(uv.zw, uv_w); + c[0] = sample_c(uv.xy, uv_w, xy); + c[1] = sample_c(uv.zy, uv_w, xy); + c[2] = sample_c(uv.xw, uv_w, xy); + c[3] = sample_c(uv.zw, uv_w, xy); return c; } -uint4 sample_4_index(float4 uv, float uv_w) +uint4 sample_4_index(float4 uv, float uv_w, int2 xy) { float4 c; - c.x = sample_c(uv.xy, uv_w).a; - c.y = sample_c(uv.zy, uv_w).a; - c.z = sample_c(uv.xw, uv_w).a; - c.w = sample_c(uv.zw, uv_w).a; + c.x = sample_c(uv.xy, uv_w, xy).a; + c.y = sample_c(uv.zy, uv_w, xy).a; + c.z = sample_c(uv.xw, uv_w, xy).a; + c.w = sample_c(uv.zw, uv_w, xy).a; // Denormalize value uint4 i; @@ -606,7 +606,7 @@ float4 fetch_gXbY(int2 xy) } } -float4 sample_color(float2 st, float uv_w) +float4 sample_color(float2 st, float uv_w, int2 xy) { #if PS_TCOFFSETHACK st += TC_OffsetHack.xy; @@ -618,7 +618,7 @@ float4 sample_color(float2 st, float uv_w) if (PS_LTF == 0 && PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_REGION_RECT == 0 && PS_WMS < 2 && PS_WMT < 2) { - c[0] = sample_c(st, uv_w); + c[0] = sample_c(st, uv_w, xy); } else { @@ -642,9 +642,9 @@ float4 sample_color(float2 st, float uv_w) uv = clamp_wrap_uv(uv); #if PS_PAL_FMT != 0 - c = sample_4p(sample_4_index(uv, uv_w)); + c = sample_4p(sample_4_index(uv, uv_w, xy)); #else - c = sample_4c(uv, uv_w); + c = sample_4c(uv, uv_w, xy); #endif } @@ -769,7 +769,7 @@ float4 ps_color(PS_INPUT input) #elif PS_DEPTH_FMT > 0 float4 T = sample_depth(st_int, input.p.xy); #else - float4 T = sample_color(st, input.t.w); + float4 T = sample_color(st, input.t.w, int2(input.p.xy)); #endif if (PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && !(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)) diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index d16a3a105c..ab4be8e396 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -6869,7 +6869,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c { m_conf.tex = nullptr; m_conf.ps.tex_is_fb = true; - if (m_prim_overlap == PRIM_OVERLAP_NO || !g_gs_device->Features().texture_barrier) + if (m_prim_overlap == PRIM_OVERLAP_NO || !(g_gs_device->Features().texture_barrier || g_gs_device->Features().multidraw_fb_copy)) m_conf.require_one_barrier = true; else m_conf.require_full_barrier = true; @@ -7129,21 +7129,14 @@ bool GSRendererHW::CanUseTexIsFB(const GSTextureCache::Target* rt, const GSTextu return false; } - // If it's a channel shuffle, tex-is-fb should be fine, even on DX. + // If it's a channel shuffle, tex-is-fb should be fine. if (m_channel_shuffle) { GL_CACHE("HW: Enabling tex-is-fb for channel shuffle."); return true; } - // No barriers -> we can't use tex-is-fb. - if (!g_gs_device->Features().texture_barrier) - { - GL_CACHE("HW: Disabling tex-is-fb due to no barriers."); - return false; - } - - // If it's a channel shuffle, tex-is-fb is always fine, except on DX. + // If it's a channel shuffle, tex-is-fb is always fine. if (m_texture_shuffle) { // We can't do tex is FB if the source and destination aren't pointing to the same bit of texture. @@ -7154,6 +7147,13 @@ bool GSRendererHW::CanUseTexIsFB(const GSTextureCache::Target* rt, const GSTextu return true; } + // No barriers -> we can't use tex-is-fb when there's overlap. + if (!(g_gs_device->Features().texture_barrier || g_gs_device->Features().multidraw_fb_copy) && m_prim_overlap != PRIM_OVERLAP_NO) + { + GL_CACHE("HW: Disabling tex-is-fb due to no barriers."); + return false; + } + static constexpr auto check_clamp = [](u32 clamp, u32 min, u32 max, s32 tmin, s32 tmax) { if (clamp == CLAMP_REGION_CLAMP) { @@ -9544,7 +9544,7 @@ void GSRendererHW::EndHLEHardwareDraw(bool force_copy_on_hazard /* = false */) { const GSDevice::FeatureSupport features = g_gs_device->Features(); - if (!force_copy_on_hazard && config.tex == config.rt && features.texture_barrier) + if (!force_copy_on_hazard && config.tex == config.rt) { // Sample RT 1:1. config.require_one_barrier = !features.framebuffer_fetch; diff --git a/pcsx2/ShaderCacheVersion.h b/pcsx2/ShaderCacheVersion.h index 9bda047cd6..125bc028aa 100644 --- a/pcsx2/ShaderCacheVersion.h +++ b/pcsx2/ShaderCacheVersion.h @@ -3,4 +3,4 @@ /// Version number for GS and other shaders. Increment whenever any of the contents of the /// shaders change, to invalidate the cache. -static constexpr u32 SHADER_CACHE_VERSION = 71; +static constexpr u32 SHADER_CACHE_VERSION = 72;