GS/VK/GL/DX12/DX11: Depth feedback loops and accurate AFAIL.

This commit is contained in:
TJnotJT 2025-11-28 16:03:55 -05:00
parent cd120c3cfd
commit f0705bf13a
18 changed files with 710 additions and 260 deletions

View File

@ -21,6 +21,18 @@
#define GS_FORWARD_PRIMID 0
#endif
#ifndef ZTST_GEQUAL
#define ZTST_GEQUAL 2
#define ZTST_GREATER 3
#endif
#ifndef AFAIL_KEEP
#define AFAIL_KEEP 0
#define AFAIL_FB_ONLY 1
#define AFAIL_ZB_ONLY 2
#define AFAIL_RGB_ONLY 3
#endif
#ifndef PS_FST
#define PS_IIP 0
#define PS_FST 0
@ -78,12 +90,16 @@
#define PS_NO_COLOR 0
#define PS_NO_COLOR1 0
#define PS_DATE 0
#define PS_TEX_IS_FB 0
#define PS_COLOR_FEEDBACK 0
#define PS_DEPTH_FEEDBACK 0
#endif
#define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D)
#define SW_BLEND_NEEDS_RT (SW_BLEND && (PS_BLEND_A == 1 || PS_BLEND_B == 1 || PS_BLEND_C == 1 || PS_BLEND_D == 1))
#define SW_AD_TO_HW (PS_BLEND_C == 1 && PS_A_MASKED)
#define NEEDS_RT_FOR_AFAIL (PS_AFAIL == 3 && PS_NO_COLOR1)
#define AFAIL_NEEDS_RT (PS_AFAIL == AFAIL_ZB_ONLY || (PS_AFAIL == AFAIL_RGB_ONLY && PS_NO_COLOR1))
#define AFAIL_NEEDS_DEPTH (PS_AFAIL == AFAIL_FB_ONLY || PS_AFAIL == AFAIL_RGB_ONLY)
struct VS_INPUT
{
@ -138,7 +154,7 @@ struct PS_OUTPUT
#endif
#endif
#endif
#if PS_ZCLAMP
#if PS_ZCLAMP || (PS_DEPTH_FEEDBACK && AFAIL_NEEDS_DEPTH)
float depth : SV_Depth;
#endif
};
@ -147,6 +163,7 @@ Texture2D<float4> Texture : register(t0);
Texture2D<float4> Palette : register(t1);
Texture2D<float4> RtTexture : register(t2);
Texture2D<float> PrimMinTexture : register(t3);
Texture2D<float> DepthTexture : register(t4);
SamplerState TextureSampler : register(s0);
#ifdef DX12
@ -1017,10 +1034,27 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy)
PS_OUTPUT ps_main(PS_INPUT input)
{
#if PS_DEPTH_FEEDBACK && (PS_ZTST == ZTST_GEQUAL || PS_ZTST == ZTST_GREATER)
#if PS_ZTST == ZTST_GEQUAL
if (input.p.z < DepthTexture.Load(int3(input.p.xy, 0)).r)
discard;
#elif PS_ZTST == ZTST_GREATER
if (input.p.z <= DepthTexture.Load(int3(input.p.xy, 0)).r)
discard;
#endif
#endif // PS_ZTST
float4 C = ps_color(input);
#if PS_FIXED_ONE_A
// AA (Fixed one) will output a coverage of 1.0 as alpha
C.a = 128.0f;
#endif
bool atst_pass = atst(C);
#if PS_AFAIL == 0 // KEEP or ATST off
#if PS_AFAIL == AFAIL_KEEP
if (!atst_pass)
discard;
#endif
@ -1034,14 +1068,6 @@ PS_OUTPUT ps_main(PS_INPUT input)
discard;
}
// Must be done before alpha correction
// AA (Fixed one) will output a coverage of 1.0 as alpha
if (PS_FIXED_ONE_A)
{
C.a = 128.0f;
}
float4 alpha_blend = (float4)0.0f;
if (SW_AD_TO_HW)
{
@ -1186,7 +1212,7 @@ PS_OUTPUT ps_main(PS_INPUT input)
ps_fbmask(C, input.p.xy);
#if PS_AFAIL == 3 && !PS_NO_COLOR1 // RGB_ONLY
#if (PS_AFAIL == AFAIL_RGB_ONLY) && !PS_NO_COLOR1
// Use alpha blend factor to determine whether to update A.
alpha_blend.a = float(atst_pass);
#endif
@ -1197,11 +1223,23 @@ PS_OUTPUT ps_main(PS_INPUT input)
#if !PS_NO_COLOR1
output.c1 = alpha_blend;
#endif
#if PS_AFAIL == 3 && PS_NO_COLOR1 // RGB_ONLY, no dual src blend
// Alpha test with feedback
#if (PS_AFAIL == AFAIL_FB_ONLY) && PS_DEPTH_FEEDBACK
if (!atst_pass)
input.p.z = DepthTexture.Load(int3(input.p.xy, 0)).r;
#elif (PS_AFAIL == AFAIL_ZB_ONLY) && PS_COLOR_FEEDBACK
if (!atst_pass)
output.c0 = RtTexture.Load(int3(input.p.xy, 0));
#elif (PS_AFAIL == AFAIL_RGB_ONLY)
if (!atst_pass)
{
float RTa = NEEDS_RT_FOR_AFAIL ? RtTexture.Load(int3(input.p.xy, 0)).a : 0.0f;
output.c0.a = RTa;
#if PS_COLOR_FEEDBACK && PS_NO_COLOR1 // No dual src blend
output.c0.a = RtTexture.Load(int3(input.p.xy, 0)).a;
#endif
#if PS_DEPTH_FEEDBACK
input.p.z = DepthTexture.Load(int3(input.p.xy, 0)).r;
#endif
}
#endif
@ -1211,6 +1249,8 @@ PS_OUTPUT ps_main(PS_INPUT input)
#if PS_ZCLAMP
output.depth = min(input.p.z, MaxDepthPS);
#elif PS_DEPTH_FEEDBACK && AFAIL_NEEDS_DEPTH
output.depth = input.p.z; // Output depth value for ATST pass/fail
#endif
return output;

View File

@ -11,6 +11,18 @@
#define SHUFFLE_WRITE 2
#define SHUFFLE_READWRITE 3
#ifndef ZTST_GEQUAL
#define ZTST_GEQUAL 2
#define ZTST_GREATER 3
#endif
#ifndef AFAIL_KEEP
#define AFAIL_KEEP 0
#define AFAIL_FB_ONLY 1
#define AFAIL_ZB_ONLY 2
#define AFAIL_RGB_ONLY 3
#endif
// TEX_COORD_DEBUG output the uv coordinate as color. It is useful
// to detect bad sampling due to upscaling
//#define TEX_COORD_DEBUG
@ -25,9 +37,13 @@
#define SW_AD_TO_HW (PS_BLEND_C == 1 && PS_A_MASKED)
#define PS_PRIMID_INIT (PS_DATE == 1 || PS_DATE == 2)
#define NEEDS_RT_EARLY (PS_TEX_IS_FB == 1 || PS_DATE >= 5)
#define NEEDS_RT_FOR_AFAIL (PS_AFAIL == 3 && PS_NO_COLOR1)
#define NEEDS_RT (NEEDS_RT_EARLY || NEEDS_RT_FOR_AFAIL || (!PS_PRIMID_INIT && (PS_FBMASK || SW_BLEND_NEEDS_RT || SW_AD_TO_HW)))
#define NEEDS_RT_FOR_AFAIL (PS_AFAIL == PS_ZB_ONLY || (PS_AFAIL == AFAIL_RGB_ONLY && PS_NO_COLOR1))
#define NEEDS_DEPTH_FOR_AFAIL (PS_AFAIL == AFAIL_FB_ONLY || PS_AFAIL == AFAIL_RGB_ONLY)
#define NEEDS_RT (NEEDS_RT_EARLY || NEEDS_RT_FOR_AFAIL || (!PS_PRIMID_INIT && (PS_FBMASK || SW_BLEND_NEEDS_RT || SW_AD_TO_HW)) || PS_COLOR_FEEDBACK)
#define NEEDS_TEX (PS_TFX != 4)
#define NEEDS_DEPTH (PS_DEPTH_FEEDBACK && NEEDS_DEPTH_FOR_AFAIL)
vec4 FragCoord;
layout(std140, binding = 0) uniform cb21
{
@ -107,9 +123,10 @@ layout(binding = 2) uniform sampler2D RtSampler; // note 2 already use by the im
#if PS_DATE == 3
layout(binding = 3) uniform sampler2D img_prim_min;
#endif
// I don't remember why I set this parameter but it is surely useless
//layout(pixel_center_integer) in vec4 gl_FragCoord;
#if NEEDS_DEPTH
layout(binding = 4) uniform sampler2D DepthSampler;
#endif
vec4 sample_from_rt()
@ -119,7 +136,16 @@ vec4 sample_from_rt()
#elif HAS_FRAMEBUFFER_FETCH
return LAST_FRAG_COLOR;
#else
return texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);
return texelFetch(RtSampler, ivec2(FragCoord.xy), 0);
#endif
}
vec4 sample_from_depth()
{
#if !NEEDS_DEPTH
return vec4(0.0);
#else
return texelFetch(DepthSampler, ivec2(FragCoord.xy), 0);
#endif
}
@ -315,7 +341,7 @@ int fetch_raw_depth()
#if PS_TEX_IS_FB == 1
return int(sample_from_rt().r * multiplier);
#else
return int(texelFetch(TextureSampler, ivec2(gl_FragCoord.xy), 0).r * multiplier);
return int(texelFetch(TextureSampler, ivec2(FragCoord.xy), 0).r * multiplier);
#endif
}
@ -324,7 +350,7 @@ vec4 fetch_raw_color()
#if PS_TEX_IS_FB == 1
return sample_from_rt();
#else
return texelFetch(TextureSampler, ivec2(gl_FragCoord.xy), 0);
return texelFetch(TextureSampler, ivec2(FragCoord.xy), 0);
#endif
}
@ -724,9 +750,9 @@ void ps_dither(inout vec3 C, float As)
{
#if PS_DITHER > 0 && PS_DITHER < 3
#if PS_DITHER == 2
ivec2 fpos = ivec2(gl_FragCoord.xy);
ivec2 fpos = ivec2(FragCoord.xy);
#else
ivec2 fpos = ivec2(gl_FragCoord.xy * RcpScaleFactor);
ivec2 fpos = ivec2(FragCoord.xy * RcpScaleFactor);
#endif
float value = DitherMatrix[fpos.y&3][fpos.x&3];
@ -969,9 +995,21 @@ float As = As_rgba.a;
void ps_main()
{
FragCoord = gl_FragCoord;
#if NEEDS_DEPTH && (PS_ZTST == ZTST_GEQUAL || PS_ZTST == ZTST_GREATER)
#if PS_ZTST == ZTST_GEQUAL
if (FragCoord.z < sample_from_depth().r)
discard;
#elif PS_ZTST == ZTST_GREATER
if (FragCoord.z <= sample_from_depth().r)
discard;
#endif
#endif // PS_ZTST
#if PS_SCANMSK & 2
// fail depth test on prohibited lines
if ((int(gl_FragCoord.y) & 1) == (PS_SCANMSK & 1))
if ((int(FragCoord.y) & 1) == (PS_SCANMSK & 1))
discard;
#endif
@ -1007,7 +1045,7 @@ void ps_main()
#endif
#if PS_DATE == 3
int stencil_ceil = int(texelFetch(img_prim_min, ivec2(gl_FragCoord.xy), 0).r);
int stencil_ceil = int(texelFetch(img_prim_min, ivec2(FragCoord.xy), 0).r);
// Note gl_PrimitiveID == stencil_ceil will be the primitive that will update
// the bad alpha value so we must keep it.
@ -1017,18 +1055,17 @@ void ps_main()
#endif
vec4 C = ps_color();
bool atst_pass = atst(C);
#if PS_AFAIL == 0 // KEEP or ATST off
if (!atst_pass)
discard;
#if PS_FIXED_ONE_A
// AA (Fixed one) will output a coverage of 1.0 as alpha
C.a = 128.0f;
#endif
// Must be done before alpha correction
bool atst_pass = atst(C);
// AA (Fixed one) will output a coverage of 1.0 as alpha
#if PS_FIXED_ONE_A
C.a = 128.0f;
#if PS_AFAIL == AFAIL_KEEP
if (!atst_pass)
discard;
#endif
#if SW_AD_TO_HW
@ -1066,7 +1103,6 @@ void ps_main()
ps_blend(C, alpha_blend);
#if PS_SHUFFLE
#if !PS_READ16_SRC && !PS_SHUFFLE_SAME && !(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
uvec4 denorm_c_after = uvec4(C);
@ -1118,32 +1154,54 @@ void ps_main()
ps_fbmask(C);
#if PS_AFAIL == 3 && !PS_NO_COLOR1 // RGB_ONLY
#if PS_AFAIL == AFAIL_RGB && !PS_NO_COLOR1
// Use alpha blend factor to determine whether to update A.
alpha_blend.a = float(atst_pass);
#endif
#if !PS_NO_COLOR
#if PS_RTA_CORRECTION
SV_Target0.a = C.a / 128.0f;
C.a = C.a / 128.0f;
#else
SV_Target0.a = C.a / 255.0f;
C.a = C.a / 255.0f;
#endif
#if PS_COLCLIP_HW == 1
SV_Target0.rgb = vec3(C.rgb / 65535.0f);
C.rgb = vec3(C.rgb / 65535.0f);
#else
SV_Target0.rgb = C.rgb / 255.0f;
C.rgb = C.rgb / 255.0f;
#endif
#if PS_AFAIL == 3 && PS_NO_COLOR1 // RGB_ONLY, no dual src blend
// Alpha test with feedback
#if (PS_AFAIL == AFAIL_FB_ONLY) && NEEDS_DEPTH
if (!atst_pass)
SV_Target0.a = sample_from_rt().a;
FragCoord.z = sample_from_depth().r;
#elif (PS_AFAIL == AFAIL_ZB_ONLY) && NEEDS_RT
if (!atst_pass)
C = sample_from_rt();
#elif (PS_AFAIL == AFAIL_RGB_ONLY)
if (!atst_pass)
{
#if NEEDS_RT && PS_NO_COLOR1 // No dual src blend
C.a = sample_from_rt().a;
#endif
#if NEEDS_DEPTH
FragCoord.z = sample_from_depth().r;
#endif
}
#endif
// Warning: do not write SV_Target0 until the end since the value might be needed for
// FB fetch in sample_from_rt().
SV_Target0 = C;
#if !PS_NO_COLOR1
SV_Target1 = alpha_blend;
#endif
#endif
#if PS_ZCLAMP
gl_FragDepth = min(gl_FragCoord.z, MaxDepthPS);
gl_FragDepth = min(FragCoord.z, MaxDepthPS);
#elif NEEDS_DEPTH && AFAIL_NEEDS_DEPTH
gl_FragDepth = FragCoord.z; // Output depth value for ATST pass/fail
#endif
}

View File

@ -245,6 +245,18 @@ void main()
#define GS_LINE 0
#endif
#ifndef ZTST_GEQUAL
#define ZTST_GEQUAL 2
#define ZTST_GREATER 3
#endif
#ifndef AFAIL_KEEP
#define AFAIL_KEEP 0
#define AFAIL_FB_ONLY 1
#define AFAIL_ZB_ONLY 2
#define AFAIL_RGB_ONLY 3
#endif
#ifndef PS_FST
#define PS_FST 0
#define PS_WMS 0
@ -288,19 +300,31 @@ void main()
#define PS_DITHER 0
#define PS_DITHER_ADJUST 0
#define PS_ZCLAMP 0
#define PS_FEEDBACK_LOOP 0
#define PS_SCANMSK 0
#define PS_AUTOMATIC_LOD 0
#define PS_MANUAL_LOD 0
#define PS_TEX_IS_FB 0
#define PS_NO_COLOR 0
#define PS_NO_COLOR1 0
#define PS_DATE 0
#define PS_TEX_IS_FB 0
#define PS_COLOR_FEEDBACK 0
#define PS_DEPTH_FEEDBACK 0
#endif
#define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D)
#define SW_BLEND_NEEDS_RT (SW_BLEND && (PS_BLEND_A == 1 || PS_BLEND_B == 1 || PS_BLEND_C == 1 || PS_BLEND_D == 1))
#define SW_AD_TO_HW (PS_BLEND_C == 1 && PS_A_MASKED)
#define AFAIL_NEEDS_RT (PS_AFAIL == 3 && PS_NO_COLOR1)
#define AFAIL_NEEDS_RT (PS_AFAIL == AFAIL_ZB_ONLY || (PS_AFAIL == AFAIL_RGB_ONLY && PS_NO_COLOR1))
#define AFAIL_NEEDS_DEPTH (PS_AFAIL == AFAIL_FB_ONLY || PS_AFAIL == AFAIL_RGB_ONLY)
#define PS_FEEDBACK_LOOP_IS_NEEDED (PS_TEX_IS_FB == 1 || AFAIL_NEEDS_RT || PS_FBMASK || SW_BLEND_NEEDS_RT || SW_AD_TO_HW || (PS_DATE >= 5))
#define PS_FEEDBACK_LOOP_IS_NEEDED_RT (PS_TEX_IS_FB == 1 || AFAIL_NEEDS_RT || PS_FBMASK || SW_BLEND_NEEDS_RT || SW_AD_TO_HW || (PS_DATE >= 5) || PS_COLOR_FEEDBACK)
#define PS_FEEDBACK_LOOP_IS_NEEDED_DEPTH (PS_DEPTH_FEEDBACK && AFAIL_NEEDS_DEPTH)
#define NEEDS_TEX (PS_TFX != 4)
vec4 FragCoord;
layout(std140, set = 0, binding = 1) uniform cb1
{
vec3 FogColor;
@ -345,13 +369,30 @@ layout(set = 1, binding = 0) uniform sampler2D Texture;
layout(set = 1, binding = 1) uniform texture2D Palette;
#endif
#if PS_FEEDBACK_LOOP_IS_NEEDED
#if PS_FEEDBACK_LOOP_IS_NEEDED_RT || PS_FEEDBACK_LOOP_IS_NEEDED_DEPTH
#if defined(DISABLE_TEXTURE_BARRIER) || defined(HAS_FEEDBACK_LOOP_LAYOUT)
layout(set = 1, binding = 2) uniform texture2D RtSampler;
vec4 sample_from_rt() { return texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0); }
#if PS_FEEDBACK_LOOP_IS_NEEDED_RT
layout(set = 1, binding = 2) uniform texture2D RtSampler;
vec4 sample_from_rt() { return texelFetch(RtSampler, ivec2(FragCoord.xy), 0); }
#endif
#if PS_FEEDBACK_LOOP_IS_NEEDED_DEPTH
layout(set = 1, binding = 4) uniform texture2D DepthSampler;
vec4 sample_from_depth() { return texelFetch(DepthSampler, ivec2(FragCoord.xy), 0); }
#endif
#else
layout(input_attachment_index = 0, set = 1, binding = 2) uniform subpassInput RtSampler;
vec4 sample_from_rt() { return subpassLoad(RtSampler); }
// Must consider each case separately since the input attachment indices must be consecutive.
#if PS_FEEDBACK_LOOP_IS_NEEDED_RT && PS_FEEDBACK_LOOP_IS_NEEDED_DEPTH
layout(input_attachment_index = 0, set = 1, binding = 2) uniform subpassInput RtSampler;
layout(input_attachment_index = 1, set = 1, binding = 4) uniform subpassInput DepthSampler;
vec4 sample_from_rt() { return subpassLoad(RtSampler); }
vec4 sample_from_depth() { return subpassLoad(DepthSampler); }
#elif PS_FEEDBACK_LOOP_IS_NEEDED_RT
layout(input_attachment_index = 0, set = 1, binding = 2) uniform subpassInput RtSampler;
vec4 sample_from_rt() { return subpassLoad(RtSampler); }
#elif PS_FEEDBACK_LOOP_IS_NEEDED_DEPTH
layout(input_attachment_index = 0, set = 1, binding = 4) uniform subpassInput DepthSampler;
vec4 sample_from_depth() { return subpassLoad(DepthSampler); }
#endif
#endif
#endif
@ -925,19 +966,19 @@ vec4 ps_color()
#if !NEEDS_TEX
vec4 T = vec4(0.0f);
#elif PS_CHANNEL_FETCH == 1
vec4 T = fetch_red(ivec2(gl_FragCoord.xy));
vec4 T = fetch_red(ivec2(FragCoord.xy));
#elif PS_CHANNEL_FETCH == 2
vec4 T = fetch_green(ivec2(gl_FragCoord.xy));
vec4 T = fetch_green(ivec2(FragCoord.xy));
#elif PS_CHANNEL_FETCH == 3
vec4 T = fetch_blue(ivec2(gl_FragCoord.xy));
vec4 T = fetch_blue(ivec2(FragCoord.xy));
#elif PS_CHANNEL_FETCH == 4
vec4 T = fetch_alpha(ivec2(gl_FragCoord.xy));
vec4 T = fetch_alpha(ivec2(FragCoord.xy));
#elif PS_CHANNEL_FETCH == 5
vec4 T = fetch_rgb(ivec2(gl_FragCoord.xy));
vec4 T = fetch_rgb(ivec2(FragCoord.xy));
#elif PS_CHANNEL_FETCH == 6
vec4 T = fetch_gXbY(ivec2(gl_FragCoord.xy));
vec4 T = fetch_gXbY(ivec2(FragCoord.xy));
#elif PS_DEPTH_FMT > 0
vec4 T = sample_depth(st_int, ivec2(gl_FragCoord.xy));
vec4 T = sample_depth(st_int, ivec2(FragCoord.xy));
#else
vec4 T = sample_color(st);
#endif
@ -969,7 +1010,6 @@ vec4 ps_color()
void ps_fbmask(inout vec4 C)
{
#if PS_FBMASK
#if PS_COLCLIP_HW == 1
vec4 RT = trunc(sample_from_rt() * 65535.0f);
#else
@ -985,9 +1025,9 @@ void ps_dither(inout vec3 C, float As)
ivec2 fpos;
#if PS_DITHER == 2
fpos = ivec2(gl_FragCoord.xy);
fpos = ivec2(FragCoord.xy);
#else
fpos = ivec2(gl_FragCoord.xy * RcpScaleFactor);
fpos = ivec2(FragCoord.xy * RcpScaleFactor);
#endif
float value = DitherMatrix[fpos.y & 3][fpos.x & 3];
@ -1065,7 +1105,7 @@ void ps_blend(inout vec4 Color, inout vec4 As_rgba)
As_rgba.rgb = vec3(1.0f);
#endif
#if PS_FEEDBACK_LOOP_IS_NEEDED
#if PS_FEEDBACK_LOOP_IS_NEEDED_RT
vec4 RT = sample_from_rt();
#else
// Not used, but we define it to make the selection below simpler.
@ -1078,7 +1118,7 @@ void ps_blend(inout vec4 Color, inout vec4 As_rgba)
float Ad = trunc(RT.a * 255.0f + 0.1f) / 128.0f;
#endif
#if PS_SHUFFLE && PS_FEEDBACK_LOOP_IS_NEEDED
#if PS_SHUFFLE && PS_FEEDBACK_LOOP_IS_NEEDED_RT
uvec4 denorm_rt = uvec4(RT);
#if (PS_PROCESS_BA & SHUFFLE_WRITE)
RT.r = float((denorm_rt.b << 3) & 0xF8u);
@ -1230,9 +1270,21 @@ void ps_blend(inout vec4 Color, inout vec4 As_rgba)
void main()
{
FragCoord = gl_FragCoord;
#if PS_FEEDBACK_LOOP_IS_NEEDED_DEPTH && (PS_ZTST == ZTST_GEQUAL || PS_ZTST == ZTST_GREATER)
#if PS_ZTST == ZTST_GEQUAL
if (FragCoord.z < sample_from_depth().r)
discard;
#elif PS_ZTST == ZTST_GREATER
if (FragCoord.z <= sample_from_depth().r)
discard;
#endif
#endif // PS_ZTST
#if PS_SCANMSK & 2
// fail depth test on prohibited lines
if ((int(gl_FragCoord.y) & 1) == (PS_SCANMSK & 1))
if ((int(FragCoord.y) & 1) == (PS_SCANMSK & 1))
discard;
#endif
#if PS_DATE >= 5
@ -1267,7 +1319,7 @@ void main()
#endif // PS_DATE >= 5
#if PS_DATE == 3
int stencil_ceil = int(texelFetch(PrimMinTexture, ivec2(gl_FragCoord.xy), 0).r);
int stencil_ceil = int(texelFetch(PrimMinTexture, ivec2(FragCoord.xy), 0).r);
// Note gl_PrimitiveID == stencil_ceil will be the primitive that will update
// the bad alpha value so we must keep it.
@ -1277,18 +1329,17 @@ void main()
#endif
vec4 C = ps_color();
bool atst_pass = atst(C);
#if PS_AFAIL == 0 // KEEP or ATST off
if (!atst_pass)
discard;
#if PS_FIXED_ONE_A
// AA (Fixed one) will output a coverage of 1.0 as alpha
C.a = 128.0f;
#endif
// Must be done before alpha correction
bool atst_pass = atst(C);
// AA (Fixed one) will output a coverage of 1.0 as alpha
#if PS_FIXED_ONE_A
C.a = 128.0f;
#if PS_AFAIL == ATST_KEEP
if (!atst_pass)
discard;
#endif
#if SW_AD_TO_HW
@ -1327,7 +1378,7 @@ void main()
#else
ps_blend(C, alpha_blend);
#if PS_SHUFFLE
#if PS_SHUFFLE
#if !PS_READ16_SRC && !PS_SHUFFLE_SAME && !(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
uvec4 denorm_c_after = uvec4(C);
#if (PS_PROCESS_BA & SHUFFLE_READ)
@ -1375,7 +1426,7 @@ void main()
ps_fbmask(C);
#if PS_AFAIL == 3 && !PS_NO_COLOR1 // RGB_ONLY
#if (PS_AFAIL == AFAIL_RGB_ONLY) && !PS_NO_COLOR1
// Use alpha blend factor to determine whether to update A.
alpha_blend.a = float(atst_pass);
#endif
@ -1394,16 +1445,32 @@ void main()
#if !PS_NO_COLOR1
o_col1 = alpha_blend;
#endif
#if PS_AFAIL == 3 && PS_NO_COLOR1 // RGB_ONLY, no dual src blend
// Alpha test with feedback
#if (PS_AFAIL == AFAIL_FB_ONLY) && PS_FEEDBACK_LOOP_IS_NEEDED_DEPTH
if (!atst_pass)
FragCoord.z = sample_from_depth().r;
#elif (PS_AFAIL == AFAIL_ZB_ONLY) && PS_FEEDBACK_LOOP_IS_NEEDED_RT
if (!atst_pass)
o_col0 = sample_from_rt();
#elif (PS_AFAIL == AFAIL_RGB_ONLY)
if (!atst_pass)
{
#if PS_FEEDBACK_LOOP_IS_NEEDED_RT && PS_NO_COLOR1 // No dual src blend
o_col0.a = sample_from_rt().a;
#endif
#if PS_FEEDBACK_LOOP_IS_NEEDED_DEPTH
FragCoord.z = sample_from_depth().r;
#endif
}
#endif
#endif
#if PS_ZCLAMP
gl_FragDepth = min(gl_FragCoord.z, MaxDepthPS);
gl_FragDepth = min(FragCoord.z, MaxDepthPS);
#elif PS_FEEDBACK_LOOP_IS_NEEDED_DEPTH && AFAIL_NEEDS_DEPTH
gl_FragDepth = FragCoord.z; // Output depth value for ATST pass/fail
#endif
#endif // PS_DATE
}

View File

@ -757,6 +757,7 @@ struct Pcsx2Config
PreloadFrameWithGSData : 1,
Mipmap : 1,
HWMipmap : 1,
HWAFAILFeedback : 1,
ManualUserHacks : 1,
UserHacks_AlignSpriteX : 1,
UserHacks_CPUFBConversion : 1,

View File

@ -775,7 +775,6 @@ REG64_(GIFReg, TEST)
REG_END2
__forceinline bool DoFirstPass() const { return !ATE || ATST != ATST_NEVER; } // not all pixels fail automatically
__forceinline bool DoSecondPass() const { return ATE && ATST != ATST_ALWAYS && AFAIL != AFAIL_KEEP; } // pixels may fail, write fb/z
__forceinline bool NoSecondPass() const { return ATE && ATST != ATST_ALWAYS && AFAIL == AFAIL_KEEP; } // pixels may fail, no output
__forceinline u32 GetAFAIL(u32 fpsm) const { return (AFAIL == AFAIL_RGB_ONLY && (fpsm & 0xF) != 0) ? static_cast<u32>(AFAIL_FB_ONLY) : AFAIL; } // FB Only when not 32bit Framebuffer
REG_END2

View File

@ -431,6 +431,8 @@ const char* GSState::GetFlushReasonString(GSFlushReason reason)
return "VSYNC";
case GSFlushReason::GSREOPEN:
return "GS REOPEN";
case GSFlushReason::VERTEXCOUNT:
return "VERTEX COUNT";
case GSFlushReason::UNKNOWN:
default:
return "UNKNOWN";

View File

@ -354,6 +354,7 @@ struct alignas(16) GSHWDrawConfig
u32 date : 3;
u32 atst : 3;
u32 afail : 2;
u32 ztst : 2;
// Color sampling
u32 fst : 1; // Investigate to do it on the VS
u32 tfx : 3;
@ -414,6 +415,10 @@ struct alignas(16) GSHWDrawConfig
// Scan mask
u32 scanmsk : 2;
// Feedback
u32 color_feedback : 1;
u32 depth_feedback : 1;
};
struct
@ -428,11 +433,16 @@ struct alignas(16) GSHWDrawConfig
__fi bool operator!=(const PSSelector& rhs) const { return (key_lo != rhs.key_lo || key_hi != rhs.key_hi); }
__fi bool operator<(const PSSelector& rhs) const { return (key_lo < rhs.key_lo || key_hi < rhs.key_hi); }
__fi bool IsFeedbackLoop() const
__fi bool IsFeedbackLoopRT() const
{
const u32 sw_blend_bits = blend_a | blend_b | blend_d;
const bool sw_blend_needs_rt = (sw_blend_bits != 0 && ((sw_blend_bits | blend_c) & 1u)) || ((a_masked & blend_c) != 0);
return channel_fb || tex_is_fb || fbmask || (date >= 5) || sw_blend_needs_rt;
return color_feedback || channel_fb || tex_is_fb || fbmask || (date >= 5) || sw_blend_needs_rt;;
}
__fi bool IsFeedbackLoopDepth() const
{
return depth_feedback;
}
/// Disables color output from the pixel shader, this is done when all channels are masked.

View File

@ -14,6 +14,7 @@
#include "common/Error.h"
#include "common/Path.h"
#include "common/StringUtil.h"
#include "common/ScopedGuard.h"
#include "imgui.h"
#include "IconsFontAwesome6.h"
@ -1766,6 +1767,9 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant
sm.AddMacro("PS_TEX_IS_FB", sel.tex_is_fb);
sm.AddMacro("PS_NO_COLOR", sel.no_color);
sm.AddMacro("PS_NO_COLOR1", sel.no_color1);
sm.AddMacro("PS_ZTST", sel.ztst);
sm.AddMacro("PS_COLOR_FEEDBACK", sel.color_feedback);
sm.AddMacro("PS_DEPTH_FEEDBACK", sel.depth_feedback);
wil::com_ptr_nothrow<ID3D11PixelShader> ps = m_shader_cache.GetPixelShader(m_dev.get(), m_tfx_source, sm.GetPtr(), "ps_main");
i = m_ps.try_emplace(sel, std::move(ps)).first;
@ -2583,6 +2587,18 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
{
const GSVector2i rtsize = (config.rt ? config.rt : config.ds)->GetSize();
GSTexture* colclip_rt = g_gs_device->GetColorClipTexture();
GSTexture* draw_rt_clone = nullptr;
GSTexture* draw_ds_clone = nullptr;
GSTexture* primid_texture = nullptr;
ScopedGuard recycle_temp_textures([&]() {
if (draw_rt_clone)
Recycle(draw_rt_clone);
if (draw_ds_clone)
Recycle(draw_ds_clone);
if (primid_texture)
Recycle(primid_texture);
});
if (colclip_rt)
{
@ -2627,7 +2643,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
// Destination Alpha Setup
const bool multidraw_fb_copy = m_features.multidraw_fb_copy && (config.require_one_barrier || config.require_full_barrier);
GSTexture* primid_texture = nullptr;
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking)
{
primid_texture = CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::PrimID, false);
@ -2689,7 +2704,7 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
// Depth testing and sampling, bind resource as dsv read only and srv at the same time without the need of a copy.
ID3D11DepthStencilView* read_only_dsv = nullptr;
if (config.tex && config.tex == config.ds)
if (config.ds && (config.tex == config.ds|| config.ps.IsFeedbackLoopDepth()) && !config.depth.zwe)
read_only_dsv = static_cast<GSTexture11*>(config.ds)->ReadOnlyDepthStencilView();
// Should be called before changing local srv state.
@ -2742,8 +2757,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
draw_ds = m_state.cached_dsv;
}
GSTexture* draw_rt_clone = nullptr;
if (draw_rt && (config.require_one_barrier || (config.require_full_barrier && m_features.multidraw_fb_copy) || (config.tex && config.tex == config.rt)))
{
// Requires a copy of the RT.
@ -2754,6 +2767,16 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
Console.Warning("D3D11: Failed to allocate temp texture for RT copy.");
}
if (draw_ds && (config.require_one_barrier || (config.require_full_barrier && m_features.multidraw_fb_copy)) &&
config.ps.IsFeedbackLoopDepth())
{
// Requires a copy of the DS.
// Used as "bind ds" flag when texture barrier is unsupported for tex is fb.
draw_ds_clone = CreateTexture(rtsize.x, rtsize.y, 1, draw_ds->GetFormat(), true);
if (!draw_rt_clone)
Console.Warning("D3D11: Failed to allocate temp texture for DS copy.");
}
OMSetRenderTargets(draw_rt, draw_ds, &config.scissor, read_only_dsv);
SetupOM(config.depth, OMBlendSelector(config.colormask, config.blend), config.blend.constant);
@ -2761,7 +2784,8 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::StencilOne && multidraw_fb_copy)
m_ctx->ClearDepthStencilView(*static_cast<GSTexture11*>(draw_ds), D3D11_CLEAR_STENCIL, 0.0f, 1);
SendHWDraw(config, draw_rt_clone, draw_rt, config.require_one_barrier, config.require_full_barrier, false);
SendHWDraw(config, draw_rt_clone, draw_rt, draw_ds_clone, draw_ds,
config.require_one_barrier, config.require_full_barrier, false);
if (config.blend_multi_pass.enable)
{
@ -2787,15 +2811,10 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
}
SetupOM(config.alpha_second_pass.depth, OMBlendSelector(config.alpha_second_pass.colormask, config.blend), config.blend.constant);
SendHWDraw(config, draw_rt_clone, draw_rt, config.alpha_second_pass.require_one_barrier, config.alpha_second_pass.require_full_barrier, true);
SendHWDraw(config, draw_rt_clone, draw_rt, draw_ds_clone, draw_ds,
config.alpha_second_pass.require_one_barrier, config.alpha_second_pass.require_full_barrier, true);
}
if (draw_rt_clone)
Recycle(draw_rt_clone);
if (primid_texture)
Recycle(primid_texture);
if (colclip_rt)
{
config.colclip_update_area = config.colclip_update_area.runion(config.drawarea);
@ -2814,19 +2833,29 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
}
}
void GSDevice11::SendHWDraw(const GSHWDrawConfig& config, GSTexture* draw_rt_clone, GSTexture* draw_rt, const bool one_barrier, const bool full_barrier, const bool skip_first_barrier)
void GSDevice11::SendHWDraw(const GSHWDrawConfig& config,
GSTexture* draw_rt_clone, GSTexture* draw_rt, GSTexture* draw_ds_clone, GSTexture* draw_ds,
const bool one_barrier, const bool full_barrier, const bool skip_first_barrier)
{
if (draw_rt_clone)
if (draw_rt_clone || draw_ds_clone)
{
#ifdef PCSX2_DEVBUILD
if ((one_barrier || full_barrier) && !config.ps.IsFeedbackLoop()) [[unlikely]]
if ((one_barrier || full_barrier) && !(config.ps.IsFeedbackLoopRT() || config.ps.IsFeedbackLoopDepth())) [[unlikely]]
Console.Warning("D3D11: Possible unnecessary copy detected.");
#endif
auto CopyAndBind = [&](GSVector4i drawarea) {
CopyRect(draw_rt, draw_rt_clone, drawarea, drawarea.left, drawarea.top);
if (draw_rt_clone)
CopyRect(draw_rt, draw_rt_clone, drawarea, drawarea.left, drawarea.top);
if (draw_ds_clone)
CopyRect(draw_ds, draw_ds_clone, drawarea, drawarea.left, drawarea.top);
if (one_barrier || full_barrier)
PSSetShaderResource(2, draw_rt_clone);
{
if (draw_rt_clone)
PSSetShaderResource(2, draw_rt_clone);
if (draw_ds_clone)
PSSetShaderResource(4, draw_ds_clone);
}
if (config.tex && config.tex == config.rt)
PSSetShaderResource(0, draw_rt_clone);
};

View File

@ -83,7 +83,7 @@ public:
private:
enum : u32
{
MAX_TEXTURES = 4,
MAX_TEXTURES = 5,
MAX_SAMPLERS = 1,
VERTEX_BUFFER_SIZE = 32 * 1024 * 1024,
INDEX_BUFFER_SIZE = 16 * 1024 * 1024,
@ -345,7 +345,9 @@ public:
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, u8 afix);
void RenderHW(GSHWDrawConfig& config) override;
void SendHWDraw(const GSHWDrawConfig& config, GSTexture* draw_rt_clone, GSTexture* draw_rt, const bool one_barrier, const bool full_barrier, const bool skip_first_barrier);
void SendHWDraw(const GSHWDrawConfig& config,
GSTexture* draw_rt_clone, GSTexture* draw_rt, GSTexture* draw_ds_clone, GSTexture* draw_ds,
const bool one_barrier, const bool full_barrier, const bool skip_first_barrier);
void ClearSamplerCache() override;

View File

@ -2305,9 +2305,9 @@ bool GSDevice12::GetTextureGroupDescriptors(
}
D3D12_CPU_DESCRIPTOR_HANDLE dst_handle = *gpu_handle;
D3D12_CPU_DESCRIPTOR_HANDLE src_handles[NUM_TFX_TEXTURES];
UINT src_sizes[NUM_TFX_TEXTURES];
pxAssert(count <= NUM_TFX_TEXTURES);
D3D12_CPU_DESCRIPTOR_HANDLE src_handles[NUM_TOTAL_TFX_TEXTURES];
UINT src_sizes[NUM_TOTAL_TFX_TEXTURES];
pxAssert(count <= NUM_TOTAL_TFX_TEXTURES);
for (u32 i = 0; i < count; i++)
{
src_handles[i] = cpu_handles[i];
@ -2415,9 +2415,10 @@ bool GSDevice12::CreateRootSignatures()
rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL);
rsb.AddCBVParameter(1, D3D12_SHADER_VISIBILITY_PIXEL);
rsb.AddSRVParameter(0, D3D12_SHADER_VISIBILITY_VERTEX);
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 2, D3D12_SHADER_VISIBILITY_PIXEL);
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 2, D3D12_SHADER_VISIBILITY_PIXEL); // Source / Palette
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, NUM_TFX_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL);
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 2, 2, D3D12_SHADER_VISIBILITY_PIXEL);
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 2, 2, D3D12_SHADER_VISIBILITY_PIXEL); // RT / PrimID
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 4, 1, D3D12_SHADER_VISIBILITY_PIXEL); // Depth
if (!(m_tfx_root_signature = rsb.Create()))
return false;
D3D12::SetObjectName(m_tfx_root_signature.get(), "TFX root signature");
@ -2922,6 +2923,9 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector&
sm.AddMacro("PS_TEX_IS_FB", sel.tex_is_fb);
sm.AddMacro("PS_NO_COLOR", sel.no_color);
sm.AddMacro("PS_NO_COLOR1", sel.no_color1);
sm.AddMacro("PS_ZTST", sel.ztst);
sm.AddMacro("PS_COLOR_FEEDBACK", sel.color_feedback);
sm.AddMacro("PS_DEPTH_FEEDBACK", sel.depth_feedback);
ComPtr<ID3DBlob> ps(m_shader_cache.GetPixelShader(m_tfx_source, sm.GetPtr(), "ps_main"));
it = m_tfx_pixel_shaders.emplace(sel, std::move(ps)).first;
@ -3118,6 +3122,7 @@ void GSDevice12::ExecuteCommandListAndRestartRenderPass(bool wait_for_completion
const bool was_in_render_pass = m_in_render_pass;
EndRenderPass();
ExecuteCommandList(GetWaitType(wait_for_completion, GSConfig.HWSpinCPUForReadbacks));
InvalidateCachedState();
@ -3155,6 +3160,7 @@ void GSDevice12::InvalidateCachedState()
m_tfx_textures_handle_gpu.Clear();
m_tfx_samplers_handle_gpu.Clear();
m_tfx_rt_textures_handle_gpu.Clear();
m_tfx_depth_textures_handle_gpu.Clear();
}
void GSDevice12::SetVertexBuffer(D3D12_GPU_VIRTUAL_ADDRESS buffer, size_t size, size_t stride)
@ -3236,7 +3242,11 @@ void GSDevice12::PSSetShaderResource(int i, GSTexture* sr, bool check_state)
return;
m_tfx_textures[i] = handle;
m_dirty_flags |= (i < 2) ? DIRTY_FLAG_TFX_TEXTURES : DIRTY_FLAG_TFX_RT_TEXTURES;
m_dirty_flags |=
(i < 2) ? DIRTY_FLAG_TFX_TEXTURES :
(i < 4) ? DIRTY_FLAG_TFX_RT_TEXTURES :
(i < 5) ? DIRTY_FLAG_TFX_DEPTH_TEXTURES :
0;
}
void GSDevice12::PSSetSampler(GSHWDrawConfig::SamplerSelector sel)
@ -3642,6 +3652,17 @@ bool GSDevice12::ApplyTFXState(bool already_execed)
flags |= DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2;
}
if (flags & DIRTY_FLAG_TFX_DEPTH_TEXTURES)
{
if (!GetTextureGroupDescriptors(&m_tfx_depth_textures_handle_gpu, m_tfx_textures.data() + 4, 1))
{
ExecuteCommandListAndRestartRenderPass(false, "Ran out of TFX depth descriptor descriptor groups");
return ApplyTFXState(true);
}
flags |= DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_3;
}
ID3D12GraphicsCommandList* cmdlist = GetCommandList();
if (m_current_root_signature != RootSignature::TFX)
@ -3649,7 +3670,8 @@ bool GSDevice12::ApplyTFXState(bool already_execed)
m_current_root_signature = RootSignature::TFX;
flags |= DIRTY_FLAG_VS_CONSTANT_BUFFER_BINDING | DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING |
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE | DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE |
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 | DIRTY_FLAG_PIPELINE;
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 | DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_3 |
DIRTY_FLAG_PIPELINE;
cmdlist->SetGraphicsRootSignature(m_tfx_root_signature.get());
}
@ -3668,6 +3690,8 @@ bool GSDevice12::ApplyTFXState(bool already_execed)
cmdlist->SetGraphicsRootDescriptorTable(TFX_ROOT_SIGNATURE_PARAM_PS_SAMPLERS, m_tfx_samplers_handle_gpu);
if (flags & DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2)
cmdlist->SetGraphicsRootDescriptorTable(TFX_ROOT_SIGNATURE_PARAM_PS_RT_TEXTURES, m_tfx_rt_textures_handle_gpu);
if (flags & DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_3)
cmdlist->SetGraphicsRootDescriptorTable(TFX_ROOT_SIGNATURE_PARAM_PS_DEPTH_TEXTURES, m_tfx_depth_textures_handle_gpu);
ApplyBaseState(flags, cmdlist);
return true;
@ -3832,6 +3856,17 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
GSTexture12* draw_rt = static_cast<GSTexture12*>(config.rt);
GSTexture12* draw_ds = static_cast<GSTexture12*>(config.ds);
GSTexture12* draw_rt_clone = nullptr;
GSTexture12* draw_ds_clone = nullptr;
GSTexture12* date_image = nullptr;
ScopedGuard recycle_temp_textures([&]() {
if (draw_rt_clone)
Recycle(draw_rt_clone);
if (draw_ds_clone)
Recycle(draw_ds_clone);
if (date_image)
Recycle(date_image);
});
// Align the render area to 128x128, hopefully avoiding render pass restarts for small render area changes (e.g. Ratchet and Clank).
const GSVector2i rtsize(config.rt ? config.rt->GetSize() : config.ds->GetSize());
@ -3897,7 +3932,7 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
SetBlendConstants(config.blend.constant);
// Depth testing and sampling, bind resource as dsv read only and srv at the same time without the need of a copy.
if (config.tex && config.tex == config.ds)
if (config.ds && (config.ds == config.tex || config.ps.IsFeedbackLoopDepth()) && !config.depth.zwe)
{
EndRenderPass();
@ -3906,7 +3941,6 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
}
// Primitive ID tracking DATE setup.
GSTexture12* date_image = nullptr;
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking)
{
GSTexture* backup_rt = config.rt;
@ -3994,6 +4028,16 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
Console.Warning("D3D12: Failed to allocate temp texture for RT copy.");
}
if (draw_ds && (config.require_one_barrier || (config.require_full_barrier && m_features.multidraw_fb_copy)) &&
config.ps.IsFeedbackLoopDepth())
{
// Requires a copy of the DS.
// Used as "bind ds" flag when texture barrier is unsupported for tex is fb.
draw_ds_clone = static_cast<GSTexture12*>(CreateTexture(rtsize.x, rtsize.y, 1, draw_ds->GetFormat(), true));
if (!draw_rt_clone)
Console.Warning("D3D12: Failed to allocate temp texture for DS copy.");
}
OMSetRenderTargets(draw_rt, draw_ds, config.scissor);
// Begin render pass if new target or out of the area.
@ -4040,7 +4084,8 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
UploadHWDrawVerticesAndIndices(config);
// now we can do the actual draw
SendHWDraw(pipe, config, draw_rt_clone, draw_rt, config.require_one_barrier, config.require_full_barrier, false);
SendHWDraw(pipe, config, draw_rt_clone, draw_rt, draw_ds_clone, draw_ds,
config.require_one_barrier, config.require_full_barrier, false);
// blend second pass
if (config.blend_multi_pass.enable)
@ -4070,15 +4115,10 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
pipe.cms = config.alpha_second_pass.colormask;
pipe.dss = config.alpha_second_pass.depth;
pipe.bs = config.blend;
SendHWDraw(pipe, config, draw_rt_clone, draw_rt, config.alpha_second_pass.require_one_barrier, config.alpha_second_pass.require_full_barrier, true);
SendHWDraw(pipe, config, draw_rt_clone, draw_rt, draw_ds_clone, draw_ds,
config.alpha_second_pass.require_one_barrier, config.alpha_second_pass.require_full_barrier, true);
}
if (draw_rt_clone)
Recycle(draw_rt_clone);
if (date_image)
Recycle(date_image);
// now blit the colclip texture back to the original target
if (colclip_rt)
{
@ -4113,23 +4153,38 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
}
}
void GSDevice12::SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig& config, GSTexture12* draw_rt_clone, GSTexture12* draw_rt, const bool one_barrier, const bool full_barrier, const bool skip_first_barrier)
void GSDevice12::SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig& config,
GSTexture12* draw_rt_clone, GSTexture12* draw_rt,
GSTexture12* draw_ds_clone, GSTexture12* draw_ds,
const bool one_barrier, const bool full_barrier, const bool skip_first_barrier)
{
if (draw_rt_clone)
if (draw_rt_clone || draw_ds_clone)
{
#ifdef PCSX2_DEVBUILD
if ((one_barrier || full_barrier) && !config.ps.IsFeedbackLoop()) [[unlikely]]
if ((one_barrier || full_barrier) && !(config.ps.IsFeedbackLoopRT() || config.ps.IsFeedbackLoopDepth())) [[unlikely]]
Console.Warning("D3D12: Possible unnecessary copy detected.");
#endif
auto CopyAndBind = [&](GSVector4i drawarea) {
EndRenderPass();
if (draw_rt_clone)
{
CopyRect(draw_rt, draw_rt_clone, drawarea, drawarea.left, drawarea.top);
draw_rt->TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET);
}
CopyRect(draw_rt, draw_rt_clone, drawarea, drawarea.left, drawarea.top);
draw_rt->TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET);
if (draw_ds_clone)
{
CopyRect(draw_ds, draw_ds_clone, drawarea, drawarea.left, drawarea.top);
draw_ds->TransitionToState(D3D12_RESOURCE_STATE_DEPTH_WRITE);
}
if (one_barrier || full_barrier)
PSSetShaderResource(2, draw_rt_clone, true);
{
if (draw_rt_clone)
PSSetShaderResource(2, draw_rt_clone, true);
if (draw_ds_clone)
PSSetShaderResource(4, draw_ds_clone, true);
}
if (config.tex && config.tex == config.rt)
PSSetShaderResource(0, draw_rt_clone, true);
};
@ -4158,7 +4213,6 @@ void GSDevice12::SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig&
return;
}
// Optimization: For alpha second pass we can reuse the copy snapshot from the first pass.
if (!skip_first_barrier)
CopyAndBind(config.drawarea);
@ -4182,7 +4236,7 @@ void GSDevice12::UpdateHWPipelineSelector(GSHWDrawConfig& config)
m_pipeline_selector.ds = config.ds != nullptr;
}
void GSDevice12::UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config)
void GSDevice12::UploadHWDrawVerticesAndIndices(GSHWDrawConfig& config)
{
IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts);

View File

@ -256,7 +256,8 @@ public:
NUM_TFX_CONSTANT_BUFFERS = 2,
NUM_TFX_TEXTURES = 2,
NUM_TFX_RT_TEXTURES = 2,
NUM_TOTAL_TFX_TEXTURES = NUM_TFX_TEXTURES + NUM_TFX_RT_TEXTURES,
NUM_TFX_DEPTH_TEXTURES = 1,
NUM_TOTAL_TFX_TEXTURES = NUM_TFX_TEXTURES + NUM_TFX_RT_TEXTURES + NUM_TFX_DEPTH_TEXTURES,
NUM_TFX_SAMPLERS = 1,
NUM_UTILITY_TEXTURES = 1,
NUM_UTILITY_SAMPLERS = 1,
@ -273,6 +274,7 @@ public:
TFX_ROOT_SIGNATURE_PARAM_PS_TEXTURES = 3,
TFX_ROOT_SIGNATURE_PARAM_PS_SAMPLERS = 4,
TFX_ROOT_SIGNATURE_PARAM_PS_RT_TEXTURES = 5,
TFX_ROOT_SIGNATURE_PARAM_PS_DEPTH_TEXTURES = 6,
UTILITY_ROOT_SIGNATURE_PARAM_PUSH_CONSTANTS = 0,
UTILITY_ROOT_SIGNATURE_PARAM_PS_TEXTURES = 1,
@ -466,10 +468,13 @@ public:
bool BindDrawPipeline(const PipelineSelector& p);
void RenderHW(GSHWDrawConfig& config) override;
void SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig& config, GSTexture12* draw_rt_clone, GSTexture12* draw_rt, const bool one_barrier, const bool full_barrier, const bool skip_first_barrier);
void SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig& config,
GSTexture12* draw_rt_clone, GSTexture12* draw_rt,
GSTexture12* draw_ds_clone, GSTexture12* draw_ds,
const bool one_barrier, const bool full_barrier, const bool skip_first_barrier);
void UpdateHWPipelineSelector(GSHWDrawConfig& config);
void UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config);
void UploadHWDrawVerticesAndIndices(GSHWDrawConfig& config);
public:
/// Ends any render pass, executes the command buffer, and invalidates cached state.
@ -527,33 +532,35 @@ private:
DIRTY_FLAG_TFX_TEXTURES = (1 << 2),
DIRTY_FLAG_TFX_SAMPLERS = (1 << 3),
DIRTY_FLAG_TFX_RT_TEXTURES = (1 << 4),
DIRTY_FLAG_TFX_DEPTH_TEXTURES = (1 << 5),
DIRTY_FLAG_VS_CONSTANT_BUFFER_BINDING = (1 << 5),
DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING = (1 << 6),
DIRTY_FLAG_VS_VERTEX_BUFFER_BINDING = (1 << 7),
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE = (1 << 8),
DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE = (1 << 9),
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 = (1 << 10),
DIRTY_FLAG_VS_CONSTANT_BUFFER_BINDING = (1 << 6),
DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING = (1 << 7),
DIRTY_FLAG_VS_VERTEX_BUFFER_BINDING = (1 << 8),
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE = (1 << 9),
DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE = (1 << 10),
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 = (1 << 11),
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_3 = (1 << 12),
DIRTY_FLAG_VERTEX_BUFFER = (1 << 11),
DIRTY_FLAG_INDEX_BUFFER = (1 << 12),
DIRTY_FLAG_PRIMITIVE_TOPOLOGY = (1 << 13),
DIRTY_FLAG_VIEWPORT = (1 << 14),
DIRTY_FLAG_SCISSOR = (1 << 15),
DIRTY_FLAG_RENDER_TARGET = (1 << 16),
DIRTY_FLAG_PIPELINE = (1 << 17),
DIRTY_FLAG_BLEND_CONSTANTS = (1 << 18),
DIRTY_FLAG_STENCIL_REF = (1 << 19),
DIRTY_FLAG_VERTEX_BUFFER = (1 << 13),
DIRTY_FLAG_INDEX_BUFFER = (1 << 14),
DIRTY_FLAG_PRIMITIVE_TOPOLOGY = (1 << 15),
DIRTY_FLAG_VIEWPORT = (1 << 16),
DIRTY_FLAG_SCISSOR = (1 << 17),
DIRTY_FLAG_RENDER_TARGET = (1 << 18),
DIRTY_FLAG_PIPELINE = (1 << 19),
DIRTY_FLAG_BLEND_CONSTANTS = (1 << 20),
DIRTY_FLAG_STENCIL_REF = (1 << 21),
DIRTY_BASE_STATE = DIRTY_FLAG_VS_CONSTANT_BUFFER_BINDING | DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING |
DIRTY_FLAG_VS_VERTEX_BUFFER_BINDING | DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE |
DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE | DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 |
DIRTY_FLAG_VERTEX_BUFFER | DIRTY_FLAG_INDEX_BUFFER | DIRTY_FLAG_PRIMITIVE_TOPOLOGY |
DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR | DIRTY_FLAG_RENDER_TARGET | DIRTY_FLAG_PIPELINE |
DIRTY_FLAG_BLEND_CONSTANTS | DIRTY_FLAG_STENCIL_REF,
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_3 | DIRTY_FLAG_VERTEX_BUFFER | DIRTY_FLAG_INDEX_BUFFER |
DIRTY_FLAG_PRIMITIVE_TOPOLOGY | DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR | DIRTY_FLAG_RENDER_TARGET |
DIRTY_FLAG_PIPELINE | DIRTY_FLAG_BLEND_CONSTANTS | DIRTY_FLAG_STENCIL_REF,
DIRTY_TFX_STATE =
DIRTY_BASE_STATE | DIRTY_FLAG_TFX_TEXTURES | DIRTY_FLAG_TFX_SAMPLERS | DIRTY_FLAG_TFX_RT_TEXTURES,
DIRTY_TFX_STATE = DIRTY_BASE_STATE | DIRTY_FLAG_TFX_TEXTURES | DIRTY_FLAG_TFX_SAMPLERS |
DIRTY_FLAG_TFX_RT_TEXTURES | DIRTY_FLAG_TFX_DEPTH_TEXTURES,
DIRTY_UTILITY_STATE = DIRTY_BASE_STATE,
DIRTY_CONSTANT_BUFFER_STATE = DIRTY_FLAG_VS_CONSTANT_BUFFER | DIRTY_FLAG_PS_CONSTANT_BUFFER,
};
@ -594,6 +601,7 @@ private:
D3D12DescriptorHandle m_tfx_textures_handle_gpu;
D3D12DescriptorHandle m_tfx_samplers_handle_gpu;
D3D12DescriptorHandle m_tfx_rt_textures_handle_gpu;
D3D12DescriptorHandle m_tfx_depth_textures_handle_gpu;
D3D12DescriptorHandle m_utility_texture_cpu;
D3D12DescriptorHandle m_utility_texture_gpu;

View File

@ -5555,7 +5555,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool
// Hitman suffers from this, not sure on the exact scenario at the moment, but we need the barrier.
if (NeedsBlending() && m_context->ALPHA.IsCdInBlend())
{
// Needed to enable IsFeedbackLoop.
// Needed to enable IsFeedbackLoopRT.
m_conf.ps.channel_fb = 1;
// Assume no overlap when it's a channel shuffle, no need for full barriers.
m_conf.require_one_barrier = true;
@ -7716,12 +7716,48 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
rt->m_alpha_max = rt_new_alpha_max;
rt->m_alpha_min = rt_new_alpha_min;
}
// Alpha test afail configuration
// Warning must be done after EmulateZbuffer
// Depth test is always true so it can be executed in 2 passes (no order required) unlike color.
// The idea is to compute first the color which is independent of the alpha test. And then do a 2nd
// pass to handle the depth based on the alpha test.
const bool ate_first_pass = m_cached_ctx.TEST.DoFirstPass();
bool ate_first_pass = m_cached_ctx.TEST.DoFirstPass();
bool ate_second_pass = m_cached_ctx.TEST.DoSecondPass();
// Check if we should force a feedback loop for AFAIL
if (ate_first_pass && ate_second_pass && GSConfig.HWAFAILFeedback &&
(features.texture_barrier || features.multidraw_fb_copy))
{
const bool possible_zb_only = (m_cached_ctx.TEST.AFAIL == AFAIL_ZB_ONLY) && m_conf.depth.zwe;
const bool possible_rgb_only = (m_cached_ctx.TEST.AFAIL == AFAIL_RGB_ONLY) && rt && m_conf.colormask.wa;
const bool possible_fb_only = (m_cached_ctx.TEST.AFAIL == AFAIL_FB_ONLY) && rt && m_conf.colormask.wrgba;
const bool afail_needs_rt = possible_zb_only || possible_rgb_only;
const bool afail_needs_depth = possible_fb_only || possible_rgb_only;
if (afail_needs_rt)
{
m_conf.ps.color_feedback = rt && m_conf.colormask.wrgba;
ate_second_pass = false;
m_conf.ps.afail = m_cached_ctx.TEST.AFAIL;
m_conf.require_one_barrier |= (m_prim_overlap == PRIM_OVERLAP_NO);
m_conf.require_full_barrier |= (m_prim_overlap != PRIM_OVERLAP_NO);
}
if (afail_needs_depth)
{
m_conf.ps.depth_feedback = m_conf.depth.zwe && !m_cached_ctx.ZBUF.ZMSK;
ate_second_pass = false;
m_conf.ps.afail = m_cached_ctx.TEST.AFAIL;
m_conf.require_one_barrier |= (m_prim_overlap == PRIM_OVERLAP_NO);
m_conf.require_full_barrier |= (m_prim_overlap != PRIM_OVERLAP_NO);
if (m_cached_ctx.TEST.ZTE && m_cached_ctx.TEST.ZTST == ZTST_GEQUAL || m_cached_ctx.TEST.ZTST == ZTST_GREATER)
{
// Enable SW depth testing and disable HW depth testing.
m_conf.ps.ztst = m_cached_ctx.TEST.ZTST;
m_conf.depth.ztst = ZTST_ALWAYS;
}
}
}
bool ate_RGBA_then_Z = false;
bool ate_RGB_then_Z = false;
GL_INS("HW: %sAlpha Test, ATST=%s, AFAIL=%s", (ate_first_pass && ate_second_pass) ? "Complex" : "",
@ -7993,8 +8029,6 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
m_conf.cb_ps.FogColor_AREF = fc.blend32<8>(m_conf.cb_ps.FogColor_AREF);
}
// Update RT scaled alpha flag, nothing's going to read it anymore.
if (rt)
{
@ -8010,9 +8044,12 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
if (m_conf.require_one_barrier || m_conf.require_full_barrier)
pxAssert(!m_conf.blend.enable);
// Barriers aren't needed with fbfetch.
m_conf.require_one_barrier = false;
m_conf.require_full_barrier = false;
if (!m_conf.ps.IsFeedbackLoopDepth())
{
// Barriers aren't needed with fbfetch for color feedback only.
m_conf.require_one_barrier = false;
m_conf.require_full_barrier = false;
}
}
// Multi-pass algorithms shouldn't be needed with full barrier and backends may not handle this correctly
pxAssert(!m_conf.require_full_barrier || !m_conf.ps.colclip_hw);
@ -8030,6 +8067,13 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
m_conf.require_full_barrier = false;
}
if (m_conf.require_full_barrier && (g_gs_device->Features().texture_barrier || g_gs_device->Features().multidraw_fb_copy))
{
ComputeDrawlistGetSize(rt->m_scale);
m_conf.drawlist = &m_drawlist;
m_conf.drawlist_bbox = &m_drawlist_bbox;
}
// rs
const GSVector4i hacked_scissor = m_channel_shuffle ? GSVector4i::cxpr(0, 0, 1024, 1024) : m_context->scissor.in;
const GSVector4i scissor(GSVector4i(GSVector4(rtscale) * GSVector4(hacked_scissor)).rintersect(GSVector4i::loadh(rtsize)));
@ -8100,7 +8144,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
{
m_conf.alpha_second_pass.ps.DisableColorOutput();
}
if (m_conf.alpha_second_pass.ps.IsFeedbackLoop())
if (m_conf.alpha_second_pass.ps.IsFeedbackLoopRT())
{
m_conf.alpha_second_pass.require_one_barrier = m_conf.require_one_barrier;
m_conf.alpha_second_pass.require_full_barrier = m_conf.require_full_barrier;
@ -8124,14 +8168,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
m_conf.cb_ps.FogColor_AREF.a = m_conf.alpha_second_pass.ps_aref;
m_conf.alpha_second_pass.enable = false;
}
if (m_conf.require_full_barrier && (g_gs_device->Features().texture_barrier || g_gs_device->Features().multidraw_fb_copy))
{
ComputeDrawlistGetSize(rt->m_scale);
m_conf.drawlist = &m_drawlist;
m_conf.drawlist_bbox = &m_drawlist_bbox;
}
if (!m_channel_shuffle_width)
g_gs_device->RenderHW(m_conf);
else

View File

@ -1396,6 +1396,9 @@ std::string GSDeviceOGL::GetPSSource(const PSSelector& sel)
+ fmt::format("#define PS_SCANMSK {}\n", sel.scanmsk)
+ fmt::format("#define PS_NO_COLOR {}\n", sel.no_color)
+ fmt::format("#define PS_NO_COLOR1 {}\n", sel.no_color1)
+ fmt::format("#define PS_ZTST {}\n", sel.ztst)
+ fmt::format("#define PS_COLOR_FEEDBACK {}\n", sel.color_feedback)
+ fmt::format("#define PS_DEPTH_FEEDBACK {}\n", sel.depth_feedback)
;
std::string src = GenGlslHeader("ps_main", GL_FRAGMENT_SHADER, macro);
@ -2554,6 +2557,8 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
PSSetShaderResource(2, draw_rt_clone);
else if (config.require_one_barrier || config.require_full_barrier)
PSSetShaderResource(2, colclip_rt ? colclip_rt : config.rt);
if ((config.require_one_barrier || config.require_full_barrier) && config.ps.IsFeedbackLoopDepth())
PSSetShaderResource(4, config.ds);
SetupSampler(config.sampler);
@ -2583,7 +2588,7 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
// On Nvidia, 2 seems to not pick up the data written by 1 unless we add a second barrier.
// Pretty sure GL is supposed to guarantee that the blend unit is coherent with previous pixel write out, so calling this a bug.
if (m_bugs.broken_blend_coherency)
rt_hazard_barrier |= (psel.ps.IsFeedbackLoop() || psel.ps.blend_c == 1) && GLState::rt == config.rt;
rt_hazard_barrier |= (psel.ps.IsFeedbackLoopRT() || psel.ps.blend_c == 1) && GLState::rt == config.rt;
if (config.require_one_barrier || !m_features.texture_barrier)
rt_hazard_barrier = false; // Already in place or not available
@ -2671,7 +2676,7 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
OMSetRenderTargets(draw_rt, draw_ds, &config.scissor);
OMSetColorMaskState(config.colormask);
SetupOM(config.depth);
// Clear stencil as close as possible to the RT bind, to avoid framebuffer swaps.
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::StencilOne && m_features.texture_barrier)
{
@ -2761,7 +2766,7 @@ void GSDeviceOGL::SendHWDraw(const GSHWDrawConfig& config, bool one_barrier, boo
}
#ifdef PCSX2_DEVBUILD
if ((one_barrier || full_barrier) && !config.ps.IsFeedbackLoop()) [[unlikely]]
if ((one_barrier || full_barrier) && !(config.ps.IsFeedbackLoopRT() || config.ps.IsFeedbackLoopDepth())) [[unlikely]]
Console.Warning("OpenGL: Possible unnecessary barrier detected.");
#endif

View File

@ -1501,10 +1501,10 @@ VkRenderPass GSDeviceVK::CreateCachedRenderPass(RenderPassCacheKey key)
VkAttachmentReference* color_reference_ptr = nullptr;
VkAttachmentReference depth_reference;
VkAttachmentReference* depth_reference_ptr = nullptr;
VkAttachmentReference input_reference;
VkAttachmentReference* input_reference_ptr = nullptr;
VkSubpassDependency subpass_dependency;
VkSubpassDependency* subpass_dependency_ptr = nullptr;
std::array<VkAttachmentReference, 2> input_reference;
u32 num_subpass_inputs = 0;
std::array<VkSubpassDependency, 2> subpass_dependency;
u32 num_subpass_dependencies = 0;
std::array<VkAttachmentDescription, 2> attachments;
u32 num_attachments = 0;
if (key.color_format != VK_FORMAT_UNDEFINED)
@ -1524,26 +1524,26 @@ VkRenderPass GSDeviceVK::CreateCachedRenderPass(RenderPassCacheKey key)
{
if (!UseFeedbackLoopLayout())
{
input_reference.attachment = num_attachments;
input_reference.layout = layout;
input_reference_ptr = &input_reference;
input_reference[num_subpass_inputs].attachment = num_attachments;
input_reference[num_subpass_inputs].layout = layout;
num_subpass_inputs++;
}
if (!m_features.framebuffer_fetch)
{
// don't need the framebuffer-local dependency when we have rasterization order attachment access
subpass_dependency.srcSubpass = 0;
subpass_dependency.dstSubpass = 0;
subpass_dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
subpass_dependency.dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
subpass_dependency.srcAccessMask =
subpass_dependency[num_subpass_dependencies].srcSubpass = 0;
subpass_dependency[num_subpass_dependencies].dstSubpass = 0;
subpass_dependency[num_subpass_dependencies].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
subpass_dependency[num_subpass_dependencies].dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
subpass_dependency[num_subpass_dependencies].srcAccessMask =
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
subpass_dependency.dstAccessMask =
subpass_dependency[num_subpass_dependencies].dstAccessMask =
UseFeedbackLoopLayout() ? VK_ACCESS_SHADER_READ_BIT : VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
subpass_dependency.dependencyFlags =
subpass_dependency[num_subpass_dependencies].dependencyFlags =
UseFeedbackLoopLayout() ? (VK_DEPENDENCY_BY_REGION_BIT | VK_DEPENDENCY_FEEDBACK_LOOP_BIT_EXT) :
VK_DEPENDENCY_BY_REGION_BIT;
subpass_dependency_ptr = &subpass_dependency;
num_subpass_dependencies++;
}
}
@ -1562,6 +1562,35 @@ VkRenderPass GSDeviceVK::CreateCachedRenderPass(RenderPassCacheKey key)
depth_reference.attachment = num_attachments;
depth_reference.layout = layout;
depth_reference_ptr = &depth_reference;
if (key.depth_sampling)
{
if (!UseFeedbackLoopLayout())
{
input_reference[num_subpass_inputs].attachment = num_attachments;
input_reference[num_subpass_inputs].layout = layout;
num_subpass_inputs++;
}
if (!m_features.framebuffer_fetch)
{
// don't need the framebuffer-local dependency when we have rasterization order attachment access
subpass_dependency[num_subpass_dependencies].srcSubpass = 0;
subpass_dependency[num_subpass_dependencies].dstSubpass = 0;
subpass_dependency[num_subpass_dependencies].srcStageMask =
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
subpass_dependency[num_subpass_dependencies].dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
subpass_dependency[num_subpass_dependencies].srcAccessMask =
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
subpass_dependency[num_subpass_dependencies].dstAccessMask =
UseFeedbackLoopLayout() ? VK_ACCESS_SHADER_READ_BIT : VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
subpass_dependency[num_subpass_dependencies].dependencyFlags =
UseFeedbackLoopLayout() ? (VK_DEPENDENCY_BY_REGION_BIT | VK_DEPENDENCY_FEEDBACK_LOOP_BIT_EXT) :
VK_DEPENDENCY_BY_REGION_BIT;
num_subpass_dependencies++;
}
}
num_attachments++;
}
@ -1569,11 +1598,11 @@ VkRenderPass GSDeviceVK::CreateCachedRenderPass(RenderPassCacheKey key)
(key.color_feedback_loop && m_optional_extensions.vk_ext_rasterization_order_attachment_access) ?
VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_COLOR_ACCESS_BIT_EXT :
0;
const VkSubpassDescription subpass = {subpass_flags, VK_PIPELINE_BIND_POINT_GRAPHICS, input_reference_ptr ? 1u : 0u,
input_reference_ptr ? input_reference_ptr : nullptr, color_reference_ptr ? 1u : 0u,
const VkSubpassDescription subpass = {subpass_flags, VK_PIPELINE_BIND_POINT_GRAPHICS, num_subpass_inputs,
num_subpass_inputs ? input_reference.data() : nullptr, color_reference_ptr ? 1u : 0u,
color_reference_ptr ? color_reference_ptr : nullptr, nullptr, depth_reference_ptr, 0, nullptr};
const VkRenderPassCreateInfo pass_info = {VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, nullptr, 0u, num_attachments,
attachments.data(), 1u, &subpass, subpass_dependency_ptr ? 1u : 0u, subpass_dependency_ptr};
attachments.data(), 1u, &subpass, num_subpass_dependencies, num_subpass_dependencies ? subpass_dependency.data() : nullptr};
VkRenderPass pass;
const VkResult res = vkCreateRenderPass(m_device, &pass_info, nullptr, &pass);
@ -3379,12 +3408,15 @@ void GSDeviceVK::OMSetRenderTargets(
if (vkRt)
{
m_current_framebuffer =
vkRt->GetLinkedFramebuffer(vkDs, (feedback_loop & FeedbackLoopFlag_ReadAndWriteRT) != 0);
vkRt->GetLinkedFramebuffer(vkDs,
(feedback_loop & FeedbackLoopFlag_ReadAndWriteRT) != 0,
(feedback_loop & FeedbackLoopFlag_ReadAndWriteDepth) != 0);
}
else
{
pxAssert(!(feedback_loop & FeedbackLoopFlag_ReadAndWriteRT));
m_current_framebuffer = vkDs->GetLinkedFramebuffer(nullptr, false);
m_current_framebuffer = vkDs->GetLinkedFramebuffer(
nullptr, false, (feedback_loop & FeedbackLoopFlag_ReadAndWriteDepth) != 0);
}
}
else if (InRenderPass())
@ -3494,7 +3526,21 @@ void GSDeviceVK::OMSetRenderTargets(
if (vkDs)
{
// need to update descriptors to reflect the new layout
if (feedback_loop & FeedbackLoopFlag_ReadDS)
if (feedback_loop & FeedbackLoopFlag_ReadAndWriteDepth)
{
// NVIDIA drivers appear to return random garbage when sampling the RT via a feedback loop, if the load op for
// the render pass is CLEAR. Using vkCmdClearAttachments() doesn't work, so we have to clear the image instead.
// Note: DS feedback loop was added later - we will assume that the same issue is relevant.
if (vkDs->GetState() == GSTexture::State::Cleared && IsDeviceNVIDIA())
vkDs->CommitClear();
if (vkDs->GetLayout() != GSTextureVK::Layout::FeedbackLoop)
{
m_dirty_flags |= (DIRTY_FLAG_TFX_TEXTURE_0 << TFX_TEXTURE_DEPTH);
vkDs->TransitionToLayout(GSTextureVK::Layout::FeedbackLoop);
}
}
else if (feedback_loop & FeedbackLoopFlag_ReadDepth)
{
if (vkDs->GetLayout() != GSTextureVK::Layout::FeedbackLoop)
{
@ -3743,9 +3789,13 @@ bool GSDeviceVK::CreatePipelineLayouts()
dslb.AddBinding(TFX_TEXTURE_PALETTE, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
dslb.AddBinding(TFX_TEXTURE_RT,
(m_features.texture_barrier && !UseFeedbackLoopLayout()) ? VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT :
VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1, VK_SHADER_STAGE_FRAGMENT_BIT);
dslb.AddBinding(TFX_TEXTURE_PRIMID, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
dslb.AddBinding(TFX_TEXTURE_DEPTH,
(m_features.texture_barrier && !UseFeedbackLoopLayout()) ? VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT :
VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1, VK_SHADER_STAGE_FRAGMENT_BIT);
if ((m_tfx_texture_ds_layout = dslb.Create(dev)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(dev, m_tfx_texture_ds_layout, "TFX texture descriptor layout");
@ -4744,6 +4794,9 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector
AddMacro(ss, "PS_TEX_IS_FB", sel.tex_is_fb);
AddMacro(ss, "PS_NO_COLOR", sel.no_color);
AddMacro(ss, "PS_NO_COLOR1", sel.no_color1);
AddMacro(ss, "PS_ZTST", sel.ztst);
AddMacro(ss, "PS_COLOR_FEEDBACK", sel.color_feedback);
AddMacro(ss, "PS_DEPTH_FEEDBACK", sel.depth_feedback);
ss << m_tfx_source;
VkShaderModule mod = g_vulkan_shader_cache->GetFragmentShader(ss.str());
@ -5341,11 +5394,15 @@ bool GSDeviceVK::ApplyTFXState(bool already_execed)
m_current_pipeline_layout = PipelineLayout::TFX;
flags |= DIRTY_FLAG_TFX_UBO | DIRTY_FLAG_TFX_TEXTURES;
// Clear out the RT binding if feedback loop isn't on, because it'll be in the wrong state and make
// Clear out the RT/DS binding if feedback loop isn't on, because it'll be in the wrong state and make
// the validation layer cranky. Not a big deal since we need to write it anyway.
const GSTextureVK::Layout rt_tex_layout = m_tfx_textures[TFX_TEXTURE_RT]->GetLayout();
if (rt_tex_layout != GSTextureVK::Layout::FeedbackLoop && rt_tex_layout != GSTextureVK::Layout::ShaderReadOnly)
m_tfx_textures[TFX_TEXTURE_RT] = m_null_texture.get();
std::array<TFX_TEXTURES, 2> texture_types = { TFX_TEXTURE_RT, TFX_TEXTURE_DEPTH };
for (u32 texture_type : texture_types)
{
const GSTextureVK::Layout tex_layout = m_tfx_textures[texture_type]->GetLayout();
if (tex_layout != GSTextureVK::Layout::FeedbackLoop && tex_layout != GSTextureVK::Layout::ShaderReadOnly)
m_tfx_textures[texture_type] = m_null_texture.get();
}
}
if (flags & DIRTY_FLAG_TFX_UBO)
@ -5386,6 +5443,19 @@ bool GSDeviceVK::ApplyTFXState(bool already_execed)
dsub.AddImageDescriptorWrite(VK_NULL_HANDLE, TFX_TEXTURE_PRIMID,
m_tfx_textures[TFX_TEXTURE_PRIMID]->GetView(), m_tfx_textures[TFX_TEXTURE_PRIMID]->GetVkLayout());
}
if (flags & DIRTY_FLAG_TFX_TEXTURE_DEPTH)
{
if (m_features.texture_barrier && !UseFeedbackLoopLayout())
{
dsub.AddInputAttachmentDescriptorWrite(
VK_NULL_HANDLE, TFX_TEXTURE_DEPTH, m_tfx_textures[TFX_TEXTURE_DEPTH]->GetView(), VK_IMAGE_LAYOUT_GENERAL);
}
else
{
dsub.AddImageDescriptorWrite(VK_NULL_HANDLE, TFX_TEXTURE_DEPTH, m_tfx_textures[TFX_TEXTURE_DEPTH]->GetView(),
m_tfx_textures[TFX_TEXTURE_DEPTH]->GetVkLayout());
}
}
dsub.PushUpdate(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_tfx_pipeline_layout, TFX_DESCRIPTOR_SET_TEXTURES);
}
@ -5545,7 +5615,6 @@ GSTextureVK* GSDeviceVK::SetupPrimitiveTrackingDATE(GSHWDrawConfig& config)
void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
{
const GSVector2i rtsize(config.rt ? config.rt->GetSize() : config.ds->GetSize());
GSTextureVK* draw_rt = static_cast<GSTextureVK*>(config.rt);
GSTextureVK* draw_ds = static_cast<GSTextureVK*>(config.ds);
@ -5597,8 +5666,12 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
UpdateHWPipelineSelector(config, pipe);
// If we don't have a barrier but the texture was drawn to last draw, end the pass to insert a barrier.
if (InRenderPass() && !pipe.IsRTFeedbackLoop() && (config.tex == m_current_render_target || config.tex == m_current_depth_target))
EndRenderPass();
if (InRenderPass())
{
if ((!pipe.IsRTFeedbackLoop() && config.tex == m_current_render_target) ||
(!pipe.IsDepthFeedbackLoop() && config.tex == m_current_depth_target))
EndRenderPass();
}
// now blit the colclip texture back to the original target
if (colclip_rt)
@ -5781,20 +5854,31 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
// Despite the layout changing enforcing the execution dependency between previous draws and the first
// input attachment read, it still wants the region/fragment-local barrier...
const bool skip_first_barrier =
(draw_rt && draw_rt->GetLayout() != GSTextureVK::Layout::FeedbackLoop && !pipe.ps.colclip_hw && !IsDeviceAMD());
bool skip_first_barrier = !pipe.ps.colclip_hw && !IsDeviceAMD();
if (draw_rt)
skip_first_barrier = skip_first_barrier && draw_rt->GetLayout() != GSTextureVK::Layout::FeedbackLoop;
if (draw_ds)
skip_first_barrier = skip_first_barrier && draw_ds->GetLayout() != GSTextureVK::Layout::FeedbackLoop;
OMSetRenderTargets(draw_rt, draw_ds, config.scissor, static_cast<FeedbackLoopFlag>(pipe.feedback_loop_flags));
if (pipe.IsRTFeedbackLoop())
{
pxAssertMsg(m_features.texture_barrier, "Texture barriers enabled");
PSSetShaderResource(2, draw_rt, false);
PSSetShaderResource(TFX_TEXTURE_RT, draw_rt, false);
// If this is the first draw to the target as a feedback loop, make sure we re-generate the texture descriptor.
// Otherwise, we might have a previous descriptor left over, that has the RT in a different state.
m_dirty_flags |= (skip_first_barrier ? static_cast<u32>(DIRTY_FLAG_TFX_TEXTURE_RT) : 0);
}
if (pipe.IsDepthFeedbackLoop())
{
pxAssertMsg(m_features.texture_barrier, "Texture barriers enabled");
PSSetShaderResource(TFX_TEXTURE_DEPTH, draw_ds, false);
// If this is the first draw to the target as a feedback loop, make sure we re-generate the texture descriptor.
// Otherwise, we might have a previous descriptor left over, that has the RT in a different state.
m_dirty_flags |= (skip_first_barrier ? static_cast<u32>(DIRTY_FLAG_TFX_TEXTURE_DEPTH) : 0);
}
// Begin render pass if new target or out of the area.
if (!InRenderPass())
{
@ -5868,7 +5952,8 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
// now we can do the actual draw
if (BindDrawPipeline(pipe))
SendHWDraw(config, draw_rt, config.require_one_barrier, config.require_full_barrier, skip_first_barrier);
SendHWDraw(config, pipe.IsRTFeedbackLoop() ? draw_rt : nullptr, pipe.IsDepthFeedbackLoop() ? draw_ds : nullptr,
config.require_one_barrier, config.require_full_barrier, skip_first_barrier);
// blend second pass
if (config.blend_multi_pass.enable)
@ -5903,8 +5988,8 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
pipe.bs = config.blend;
if (BindDrawPipeline(pipe))
{
SendHWDraw(config, draw_rt, config.alpha_second_pass.require_one_barrier,
config.alpha_second_pass.require_full_barrier, false);
SendHWDraw(config, pipe.IsRTFeedbackLoop() ? draw_rt : nullptr, pipe.IsDepthFeedbackLoop() ? draw_ds : nullptr,
config.alpha_second_pass.require_one_barrier, config.alpha_second_pass.require_full_barrier, false);
}
}
@ -5981,19 +6066,25 @@ void GSDeviceVK::UpdateHWPipelineSelector(GSHWDrawConfig& config, PipelineSelect
pipe.rt = config.rt != nullptr;
pipe.ds = config.ds != nullptr;
pipe.line_width = config.line_expand;
pipe.feedback_loop_flags =
(m_features.texture_barrier &&
(config.ps.IsFeedbackLoop() || config.require_one_barrier || config.require_full_barrier)) ?
FeedbackLoopFlag_ReadAndWriteRT :
FeedbackLoopFlag_None;
pipe.feedback_loop_flags |=
(config.tex && config.tex == config.ds) ? FeedbackLoopFlag_ReadDS : FeedbackLoopFlag_None;
pipe.feedback_loop_flags = FeedbackLoopFlag_None;
if (m_features.texture_barrier && (config.require_one_barrier || config.require_full_barrier))
{
if (config.ps.IsFeedbackLoopRT())
pipe.feedback_loop_flags |= FeedbackLoopFlag_ReadAndWriteRT;
if (config.ps.IsFeedbackLoopDepth())
pipe.feedback_loop_flags |= FeedbackLoopFlag_ReadAndWriteDepth;
}
if (!(pipe.feedback_loop_flags & FeedbackLoopFlag_ReadAndWriteDepth))
{
pipe.feedback_loop_flags |= (config.tex && config.tex == config.ds) ? FeedbackLoopFlag_ReadDepth : FeedbackLoopFlag_None;
}
// enable point size in the vertex shader if we're rendering points regardless of upscaling.
pipe.vs.point_size |= (config.topology == GSHWDrawConfig::Topology::Point);
}
void GSDeviceVK::UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config)
void GSDeviceVK::UploadHWDrawVerticesAndIndices(GSHWDrawConfig& config)
{
IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts, GetVertexAlignment(config.vs.expand));
m_vertex.start *= GetExpansionFactor(config.vs.expand);
@ -6010,7 +6101,7 @@ void GSDeviceVK::UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config)
}
}
VkImageMemoryBarrier GSDeviceVK::GetColorBufferBarrier(GSTextureVK* rt) const
VkImageMemoryBarrier GSDeviceVK::GetColorBufferFeedbackBarrier(GSTextureVK* rt) const
{
const VkImageLayout layout =
UseFeedbackLoopLayout() ? VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT : VK_IMAGE_LAYOUT_GENERAL;
@ -6021,13 +6112,25 @@ VkImageMemoryBarrier GSDeviceVK::GetColorBufferBarrier(GSTextureVK* rt) const
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, rt->GetImage(), {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}};
}
VkDependencyFlags GSDeviceVK::GetColorBufferBarrierFlags() const
VkImageMemoryBarrier GSDeviceVK::GetDepthStencilBufferFeedbackBarrier(GSTextureVK* ds) const
{
const VkImageLayout layout =
UseFeedbackLoopLayout() ? VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT : VK_IMAGE_LAYOUT_GENERAL;
const VkAccessFlags dst_access =
UseFeedbackLoopLayout() ? VK_ACCESS_SHADER_READ_BIT : VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
return {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, nullptr,
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, dst_access, layout, layout,
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, ds->GetImage(),
{VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0u, 1u, 0u, 1u}};
}
VkDependencyFlags GSDeviceVK::GetFeedbackBarrierDependencyFlags() const
{
return UseFeedbackLoopLayout() ? (VK_DEPENDENCY_BY_REGION_BIT | VK_DEPENDENCY_FEEDBACK_LOOP_BIT_EXT) :
VK_DEPENDENCY_BY_REGION_BIT;
}
void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, GSTextureVK* draw_ds,
bool one_barrier, bool full_barrier, bool skip_first_barrier)
{
if (!m_features.texture_barrier) [[unlikely]]
@ -6037,21 +6140,52 @@ void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
}
#ifdef PCSX2_DEVBUILD
if ((one_barrier || full_barrier) && !m_pipeline_selector.ps.IsFeedbackLoop()) [[unlikely]]
if ((one_barrier || full_barrier) && !(m_pipeline_selector.ps.IsFeedbackLoopRT() || m_pipeline_selector.ps.IsFeedbackLoopDepth())) [[unlikely]]
Console.Warning("VK: Possible unnecessary barrier detected.");
#endif
const VkDependencyFlags barrier_flags = GetColorBufferBarrierFlags();
VkDependencyFlags barrier_flags = GetFeedbackBarrierDependencyFlags();
std::array<VkImageMemoryBarrier, 2> barriers;
u32 n_barriers = 0;
if (full_barrier || one_barrier)
{
if (draw_rt)
{
barriers[0] = GetColorBufferFeedbackBarrier(draw_rt);
n_barriers++;
}
if (draw_ds)
{
barriers[1] = GetDepthStencilBufferFeedbackBarrier(draw_ds);
n_barriers++;
}
}
const auto IssueBarriers = [&]() {
if (draw_rt)
{
vkCmdPipelineBarrier(GetCurrentCommandBuffer(),
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, barrier_flags, 0, nullptr, 0, nullptr, 1, &barriers[0]);
}
if (draw_ds)
{
vkCmdPipelineBarrier(GetCurrentCommandBuffer(),
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, barrier_flags, 0, nullptr, 0, nullptr, 1, &barriers[1]);
}
};
if (full_barrier)
{
pxAssert(config.drawlist && !config.drawlist->empty());
const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt);
const u32 indices_per_prim = config.indices_per_prim;
const u32 draw_list_size = static_cast<u32>(config.drawlist->size());
GL_PUSH("Split the draw");
g_perfmon.Put(
GSPerfMon::Barriers, static_cast<u32>(draw_list_size) - static_cast<u32>(skip_first_barrier));
g_perfmon.Put(GSPerfMon::Barriers,
n_barriers * (draw_list_size - static_cast<u32>(skip_first_barrier)));
u32 p = 0;
u32 n = 0;
@ -6066,8 +6200,7 @@ void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
for (; n < draw_list_size; n++)
{
vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, barrier_flags, 0, nullptr, 0, nullptr, 1, &barrier);
IssueBarriers();
const u32 count = (*config.drawlist)[n] * indices_per_prim;
DrawIndexedPrimitive(p, count);
@ -6079,11 +6212,8 @@ void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
if (one_barrier && !skip_first_barrier)
{
g_perfmon.Put(GSPerfMon::Barriers, 1);
const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt);
vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, barrier_flags, 0, nullptr, 0, nullptr, 1, &barrier);
g_perfmon.Put(GSPerfMon::Barriers, n_barriers);
IssueBarriers();
}
DrawIndexedPrimitive();

View File

@ -293,7 +293,8 @@ public:
{
FeedbackLoopFlag_None = 0,
FeedbackLoopFlag_ReadAndWriteRT = 1,
FeedbackLoopFlag_ReadDS = 2,
FeedbackLoopFlag_ReadDepth = 2,
FeedbackLoopFlag_ReadAndWriteDepth = 4,
};
struct alignas(8) PipelineSelector
@ -308,7 +309,7 @@ public:
u32 rt : 1;
u32 ds : 1;
u32 line_width : 1;
u32 feedback_loop_flags : 2;
u32 feedback_loop_flags : 3;
};
u32 key;
@ -326,7 +327,8 @@ public:
__fi PipelineSelector() { std::memset(this, 0, sizeof(*this)); }
__fi bool IsRTFeedbackLoop() const { return ((feedback_loop_flags & FeedbackLoopFlag_ReadAndWriteRT) != 0); }
__fi bool IsTestingAndSamplingDepth() const { return ((feedback_loop_flags & FeedbackLoopFlag_ReadDS) != 0); }
__fi bool IsDepthFeedbackLoop() const { return ((feedback_loop_flags & FeedbackLoopFlag_ReadAndWriteDepth) != 0); }
__fi bool IsTestingAndSamplingDepth() const { return ((feedback_loop_flags & (FeedbackLoopFlag_ReadDepth | FeedbackLoopFlag_ReadAndWriteDepth)) != 0); }
};
static_assert(sizeof(PipelineSelector) == 24, "Pipeline selector is 24 bytes");
@ -357,10 +359,11 @@ public:
};
enum TFX_TEXTURES : u32
{
TFX_TEXTURE_TEXTURE,
TFX_TEXTURE_TEXTURE = 0,
TFX_TEXTURE_PALETTE,
TFX_TEXTURE_RT,
TFX_TEXTURE_PRIMID,
TFX_TEXTURE_DEPTH,
NUM_TFX_TEXTURES
};
@ -568,10 +571,11 @@ public:
void RenderHW(GSHWDrawConfig& config) override;
void UpdateHWPipelineSelector(GSHWDrawConfig& config, PipelineSelector& pipe);
void UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config);
VkImageMemoryBarrier GetColorBufferBarrier(GSTextureVK* rt) const;
VkDependencyFlags GetColorBufferBarrierFlags() const;
void SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
void UploadHWDrawVerticesAndIndices(GSHWDrawConfig& config);
VkImageMemoryBarrier GetColorBufferFeedbackBarrier(GSTextureVK* rt) const;
VkImageMemoryBarrier GetDepthStencilBufferFeedbackBarrier(GSTextureVK* ds) const;
VkDependencyFlags GetFeedbackBarrierDependencyFlags() const;
void SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, GSTextureVK* draw_ds,
bool one_barrier, bool full_barrier, bool skip_first_barrier);
//////////////////////////////////////////////////////////////////////////
@ -621,25 +625,27 @@ public:
private:
enum DIRTY_FLAG : u32
{
DIRTY_FLAG_TFX_TEXTURE_0 = (1 << 0), // 0, 1, 2, 3
DIRTY_FLAG_TFX_UBO = (1 << 4),
DIRTY_FLAG_UTILITY_TEXTURE = (1 << 5),
DIRTY_FLAG_BLEND_CONSTANTS = (1 << 6),
DIRTY_FLAG_LINE_WIDTH = (1 << 7),
DIRTY_FLAG_INDEX_BUFFER = (1 << 8),
DIRTY_FLAG_VIEWPORT = (1 << 9),
DIRTY_FLAG_SCISSOR = (1 << 10),
DIRTY_FLAG_PIPELINE = (1 << 11),
DIRTY_FLAG_VS_CONSTANT_BUFFER = (1 << 12),
DIRTY_FLAG_PS_CONSTANT_BUFFER = (1 << 13),
DIRTY_FLAG_TFX_TEXTURE_0 = (1 << 0), // 0, 1, 2, 3, 4
DIRTY_FLAG_TFX_UBO = (1 << 5),
DIRTY_FLAG_UTILITY_TEXTURE = (1 << 6),
DIRTY_FLAG_BLEND_CONSTANTS = (1 << 7),
DIRTY_FLAG_LINE_WIDTH = (1 << 8),
DIRTY_FLAG_INDEX_BUFFER = (1 << 9),
DIRTY_FLAG_VIEWPORT = (1 << 10),
DIRTY_FLAG_SCISSOR = (1 << 11),
DIRTY_FLAG_PIPELINE = (1 << 12),
DIRTY_FLAG_VS_CONSTANT_BUFFER = (1 << 13),
DIRTY_FLAG_PS_CONSTANT_BUFFER = (1 << 14),
DIRTY_FLAG_TFX_TEXTURE_TEX = (DIRTY_FLAG_TFX_TEXTURE_0 << 0),
DIRTY_FLAG_TFX_TEXTURE_PALETTE = (DIRTY_FLAG_TFX_TEXTURE_0 << 1),
DIRTY_FLAG_TFX_TEXTURE_RT = (DIRTY_FLAG_TFX_TEXTURE_0 << 2),
DIRTY_FLAG_TFX_TEXTURE_PRIMID = (DIRTY_FLAG_TFX_TEXTURE_0 << 3),
DIRTY_FLAG_TFX_TEXTURE_DEPTH = (DIRTY_FLAG_TFX_TEXTURE_0 << 4),
DIRTY_FLAG_TFX_TEXTURES = DIRTY_FLAG_TFX_TEXTURE_TEX | DIRTY_FLAG_TFX_TEXTURE_PALETTE |
DIRTY_FLAG_TFX_TEXTURE_RT | DIRTY_FLAG_TFX_TEXTURE_PRIMID,
DIRTY_FLAG_TFX_TEXTURE_RT | DIRTY_FLAG_TFX_TEXTURE_PRIMID |
DIRTY_FLAG_TFX_TEXTURE_DEPTH,
DIRTY_BASE_STATE = DIRTY_FLAG_INDEX_BUFFER | DIRTY_FLAG_PIPELINE | DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR |
DIRTY_FLAG_BLEND_CONSTANTS | DIRTY_FLAG_LINE_WIDTH,

View File

@ -114,7 +114,7 @@ std::unique_ptr<GSTextureVK> GSTextureVK::Create(Type type, Format format, int w
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT |
(GSDeviceVK::GetInstance()->UseFeedbackLoopLayout() ? VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT
: 0);
: VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT);
vci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
}
break;
@ -198,7 +198,7 @@ void GSTextureVK::Destroy(bool defer)
if (m_type == Type::RenderTarget || m_type == Type::DepthStencil)
{
for (const auto& [other_tex, fb, feedback] : m_framebuffers)
for (const auto& [other_tex, fb, feedback_color, feedback_depth] : m_framebuffers)
{
if (other_tex)
{
@ -738,16 +738,16 @@ void GSTextureVK::TransitionSubresourcesToLayout(
VkFramebuffer GSTextureVK::GetFramebuffer(bool feedback_loop)
{
return GetLinkedFramebuffer(nullptr, feedback_loop);
return GetLinkedFramebuffer(nullptr, feedback_loop, false);
}
VkFramebuffer GSTextureVK::GetLinkedFramebuffer(GSTextureVK* depth_texture, bool feedback_loop)
VkFramebuffer GSTextureVK::GetLinkedFramebuffer(GSTextureVK* depth_texture, bool feedback_loop_color, bool feedback_loop_depth)
{
pxAssertRel(m_type != Type::Texture, "Texture is a render target");
for (const auto& [other_tex, fb, other_feedback_loop] : m_framebuffers)
for (const auto& [other_tex, fb, other_feedback_loop_color, other_feedback_loop_depth] : m_framebuffers)
{
if (other_tex == depth_texture && other_feedback_loop == feedback_loop)
if (other_tex == depth_texture && other_feedback_loop_color == feedback_loop_color && other_feedback_loop_depth == feedback_loop_depth)
return fb;
}
@ -756,7 +756,7 @@ VkFramebuffer GSTextureVK::GetLinkedFramebuffer(GSTextureVK* depth_texture, bool
(m_type != GSTexture::Type::DepthStencil) ? (depth_texture ? depth_texture->m_vk_format : VK_FORMAT_UNDEFINED) :
m_vk_format,
VK_ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_LOAD,
VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_STORE_OP_DONT_CARE, feedback_loop);
VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_STORE_OP_DONT_CARE, feedback_loop_color, feedback_loop_depth);
if (!rp)
return VK_NULL_HANDLE;
@ -771,9 +771,9 @@ VkFramebuffer GSTextureVK::GetLinkedFramebuffer(GSTextureVK* depth_texture, bool
if (!fb)
return VK_NULL_HANDLE;
m_framebuffers.emplace_back(depth_texture, fb, feedback_loop);
m_framebuffers.emplace_back(depth_texture, fb, feedback_loop_color, feedback_loop_depth);
if (depth_texture)
depth_texture->m_framebuffers.emplace_back(this, fb, feedback_loop);
depth_texture->m_framebuffers.emplace_back(this, fb, feedback_loop_color, feedback_loop_depth);
return fb;
}

View File

@ -73,7 +73,7 @@ public:
/// Framebuffers are lazily allocated.
VkFramebuffer GetFramebuffer(bool feedback_loop);
VkFramebuffer GetLinkedFramebuffer(GSTextureVK* depth_texture, bool feedback_loop);
VkFramebuffer GetLinkedFramebuffer(GSTextureVK* depth_texture, bool feedback_loop_color, bool feedback_loop_depth);
// Call when the texture is bound to the pipeline, or read from in a copy.
__fi void SetUseFenceCounter(u64 counter) { m_use_fence_counter = counter; }
@ -103,7 +103,7 @@ private:
// linked framebuffer is combined with depth texture
// list of color textures this depth texture is linked to or vice versa
std::vector<std::tuple<GSTextureVK*, VkFramebuffer, bool>> m_framebuffers;
std::vector<std::tuple<GSTextureVK*, VkFramebuffer, bool, bool>> m_framebuffers;
};
class GSDownloadTextureVK final : public GSDownloadTexture

View File

@ -751,6 +751,7 @@ Pcsx2Config::GSOptions::GSOptions()
PreloadFrameWithGSData = false;
Mipmap = true;
HWMipmap = true;
HWAFAILFeedback = false;
ManualUserHacks = false;
UserHacks_AlignSpriteX = false;
@ -1021,6 +1022,7 @@ void Pcsx2Config::GSOptions::LoadSave(SettingsWrapper& wrap)
SettingsWrapEntryEx(UpscaleMultiplier, "upscale_multiplier");
SettingsWrapBitBoolEx(HWMipmap, "hw_mipmap");
SettingsWrapBitBoolEx(HWAFAILFeedback, "HWAFAILFeedback");
SettingsWrapIntEnumEx(AccurateBlendingUnit, "accurate_blending_unit");
SettingsWrapIntEnumEx(TextureFiltering, "filter");
SettingsWrapIntEnumEx(TexturePreloading, "texture_preloading");