mirror of
https://github.com/PCSX2/pcsx2.git
synced 2025-12-16 04:08:48 +00:00
GS/VK/GL/DX12/DX11: Depth feedback loops and accurate AFAIL.
This commit is contained in:
parent
cd120c3cfd
commit
f0705bf13a
@ -21,6 +21,18 @@
|
||||
#define GS_FORWARD_PRIMID 0
|
||||
#endif
|
||||
|
||||
#ifndef ZTST_GEQUAL
|
||||
#define ZTST_GEQUAL 2
|
||||
#define ZTST_GREATER 3
|
||||
#endif
|
||||
|
||||
#ifndef AFAIL_KEEP
|
||||
#define AFAIL_KEEP 0
|
||||
#define AFAIL_FB_ONLY 1
|
||||
#define AFAIL_ZB_ONLY 2
|
||||
#define AFAIL_RGB_ONLY 3
|
||||
#endif
|
||||
|
||||
#ifndef PS_FST
|
||||
#define PS_IIP 0
|
||||
#define PS_FST 0
|
||||
@ -78,12 +90,16 @@
|
||||
#define PS_NO_COLOR 0
|
||||
#define PS_NO_COLOR1 0
|
||||
#define PS_DATE 0
|
||||
#define PS_TEX_IS_FB 0
|
||||
#define PS_COLOR_FEEDBACK 0
|
||||
#define PS_DEPTH_FEEDBACK 0
|
||||
#endif
|
||||
|
||||
#define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D)
|
||||
#define SW_BLEND_NEEDS_RT (SW_BLEND && (PS_BLEND_A == 1 || PS_BLEND_B == 1 || PS_BLEND_C == 1 || PS_BLEND_D == 1))
|
||||
#define SW_AD_TO_HW (PS_BLEND_C == 1 && PS_A_MASKED)
|
||||
#define NEEDS_RT_FOR_AFAIL (PS_AFAIL == 3 && PS_NO_COLOR1)
|
||||
#define AFAIL_NEEDS_RT (PS_AFAIL == AFAIL_ZB_ONLY || (PS_AFAIL == AFAIL_RGB_ONLY && PS_NO_COLOR1))
|
||||
#define AFAIL_NEEDS_DEPTH (PS_AFAIL == AFAIL_FB_ONLY || PS_AFAIL == AFAIL_RGB_ONLY)
|
||||
|
||||
struct VS_INPUT
|
||||
{
|
||||
@ -138,7 +154,7 @@ struct PS_OUTPUT
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#if PS_ZCLAMP
|
||||
#if PS_ZCLAMP || (PS_DEPTH_FEEDBACK && AFAIL_NEEDS_DEPTH)
|
||||
float depth : SV_Depth;
|
||||
#endif
|
||||
};
|
||||
@ -147,6 +163,7 @@ Texture2D<float4> Texture : register(t0);
|
||||
Texture2D<float4> Palette : register(t1);
|
||||
Texture2D<float4> RtTexture : register(t2);
|
||||
Texture2D<float> PrimMinTexture : register(t3);
|
||||
Texture2D<float> DepthTexture : register(t4);
|
||||
SamplerState TextureSampler : register(s0);
|
||||
|
||||
#ifdef DX12
|
||||
@ -1017,10 +1034,27 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy)
|
||||
|
||||
PS_OUTPUT ps_main(PS_INPUT input)
|
||||
{
|
||||
|
||||
#if PS_DEPTH_FEEDBACK && (PS_ZTST == ZTST_GEQUAL || PS_ZTST == ZTST_GREATER)
|
||||
#if PS_ZTST == ZTST_GEQUAL
|
||||
if (input.p.z < DepthTexture.Load(int3(input.p.xy, 0)).r)
|
||||
discard;
|
||||
#elif PS_ZTST == ZTST_GREATER
|
||||
if (input.p.z <= DepthTexture.Load(int3(input.p.xy, 0)).r)
|
||||
discard;
|
||||
#endif
|
||||
#endif // PS_ZTST
|
||||
|
||||
float4 C = ps_color(input);
|
||||
|
||||
#if PS_FIXED_ONE_A
|
||||
// AA (Fixed one) will output a coverage of 1.0 as alpha
|
||||
C.a = 128.0f;
|
||||
#endif
|
||||
|
||||
bool atst_pass = atst(C);
|
||||
|
||||
#if PS_AFAIL == 0 // KEEP or ATST off
|
||||
#if PS_AFAIL == AFAIL_KEEP
|
||||
if (!atst_pass)
|
||||
discard;
|
||||
#endif
|
||||
@ -1034,14 +1068,6 @@ PS_OUTPUT ps_main(PS_INPUT input)
|
||||
discard;
|
||||
}
|
||||
|
||||
// Must be done before alpha correction
|
||||
|
||||
// AA (Fixed one) will output a coverage of 1.0 as alpha
|
||||
if (PS_FIXED_ONE_A)
|
||||
{
|
||||
C.a = 128.0f;
|
||||
}
|
||||
|
||||
float4 alpha_blend = (float4)0.0f;
|
||||
if (SW_AD_TO_HW)
|
||||
{
|
||||
@ -1186,7 +1212,7 @@ PS_OUTPUT ps_main(PS_INPUT input)
|
||||
|
||||
ps_fbmask(C, input.p.xy);
|
||||
|
||||
#if PS_AFAIL == 3 && !PS_NO_COLOR1 // RGB_ONLY
|
||||
#if (PS_AFAIL == AFAIL_RGB_ONLY) && !PS_NO_COLOR1
|
||||
// Use alpha blend factor to determine whether to update A.
|
||||
alpha_blend.a = float(atst_pass);
|
||||
#endif
|
||||
@ -1197,11 +1223,23 @@ PS_OUTPUT ps_main(PS_INPUT input)
|
||||
#if !PS_NO_COLOR1
|
||||
output.c1 = alpha_blend;
|
||||
#endif
|
||||
#if PS_AFAIL == 3 && PS_NO_COLOR1 // RGB_ONLY, no dual src blend
|
||||
|
||||
// Alpha test with feedback
|
||||
#if (PS_AFAIL == AFAIL_FB_ONLY) && PS_DEPTH_FEEDBACK
|
||||
if (!atst_pass)
|
||||
input.p.z = DepthTexture.Load(int3(input.p.xy, 0)).r;
|
||||
#elif (PS_AFAIL == AFAIL_ZB_ONLY) && PS_COLOR_FEEDBACK
|
||||
if (!atst_pass)
|
||||
output.c0 = RtTexture.Load(int3(input.p.xy, 0));
|
||||
#elif (PS_AFAIL == AFAIL_RGB_ONLY)
|
||||
if (!atst_pass)
|
||||
{
|
||||
float RTa = NEEDS_RT_FOR_AFAIL ? RtTexture.Load(int3(input.p.xy, 0)).a : 0.0f;
|
||||
output.c0.a = RTa;
|
||||
#if PS_COLOR_FEEDBACK && PS_NO_COLOR1 // No dual src blend
|
||||
output.c0.a = RtTexture.Load(int3(input.p.xy, 0)).a;
|
||||
#endif
|
||||
#if PS_DEPTH_FEEDBACK
|
||||
input.p.z = DepthTexture.Load(int3(input.p.xy, 0)).r;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -1211,6 +1249,8 @@ PS_OUTPUT ps_main(PS_INPUT input)
|
||||
|
||||
#if PS_ZCLAMP
|
||||
output.depth = min(input.p.z, MaxDepthPS);
|
||||
#elif PS_DEPTH_FEEDBACK && AFAIL_NEEDS_DEPTH
|
||||
output.depth = input.p.z; // Output depth value for ATST pass/fail
|
||||
#endif
|
||||
|
||||
return output;
|
||||
|
||||
@ -11,6 +11,18 @@
|
||||
#define SHUFFLE_WRITE 2
|
||||
#define SHUFFLE_READWRITE 3
|
||||
|
||||
#ifndef ZTST_GEQUAL
|
||||
#define ZTST_GEQUAL 2
|
||||
#define ZTST_GREATER 3
|
||||
#endif
|
||||
|
||||
#ifndef AFAIL_KEEP
|
||||
#define AFAIL_KEEP 0
|
||||
#define AFAIL_FB_ONLY 1
|
||||
#define AFAIL_ZB_ONLY 2
|
||||
#define AFAIL_RGB_ONLY 3
|
||||
#endif
|
||||
|
||||
// TEX_COORD_DEBUG output the uv coordinate as color. It is useful
|
||||
// to detect bad sampling due to upscaling
|
||||
//#define TEX_COORD_DEBUG
|
||||
@ -25,9 +37,13 @@
|
||||
#define SW_AD_TO_HW (PS_BLEND_C == 1 && PS_A_MASKED)
|
||||
#define PS_PRIMID_INIT (PS_DATE == 1 || PS_DATE == 2)
|
||||
#define NEEDS_RT_EARLY (PS_TEX_IS_FB == 1 || PS_DATE >= 5)
|
||||
#define NEEDS_RT_FOR_AFAIL (PS_AFAIL == 3 && PS_NO_COLOR1)
|
||||
#define NEEDS_RT (NEEDS_RT_EARLY || NEEDS_RT_FOR_AFAIL || (!PS_PRIMID_INIT && (PS_FBMASK || SW_BLEND_NEEDS_RT || SW_AD_TO_HW)))
|
||||
#define NEEDS_RT_FOR_AFAIL (PS_AFAIL == PS_ZB_ONLY || (PS_AFAIL == AFAIL_RGB_ONLY && PS_NO_COLOR1))
|
||||
#define NEEDS_DEPTH_FOR_AFAIL (PS_AFAIL == AFAIL_FB_ONLY || PS_AFAIL == AFAIL_RGB_ONLY)
|
||||
#define NEEDS_RT (NEEDS_RT_EARLY || NEEDS_RT_FOR_AFAIL || (!PS_PRIMID_INIT && (PS_FBMASK || SW_BLEND_NEEDS_RT || SW_AD_TO_HW)) || PS_COLOR_FEEDBACK)
|
||||
#define NEEDS_TEX (PS_TFX != 4)
|
||||
#define NEEDS_DEPTH (PS_DEPTH_FEEDBACK && NEEDS_DEPTH_FOR_AFAIL)
|
||||
|
||||
vec4 FragCoord;
|
||||
|
||||
layout(std140, binding = 0) uniform cb21
|
||||
{
|
||||
@ -107,9 +123,10 @@ layout(binding = 2) uniform sampler2D RtSampler; // note 2 already use by the im
|
||||
|
||||
#if PS_DATE == 3
|
||||
layout(binding = 3) uniform sampler2D img_prim_min;
|
||||
#endif
|
||||
|
||||
// I don't remember why I set this parameter but it is surely useless
|
||||
//layout(pixel_center_integer) in vec4 gl_FragCoord;
|
||||
#if NEEDS_DEPTH
|
||||
layout(binding = 4) uniform sampler2D DepthSampler;
|
||||
#endif
|
||||
|
||||
vec4 sample_from_rt()
|
||||
@ -119,7 +136,16 @@ vec4 sample_from_rt()
|
||||
#elif HAS_FRAMEBUFFER_FETCH
|
||||
return LAST_FRAG_COLOR;
|
||||
#else
|
||||
return texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);
|
||||
return texelFetch(RtSampler, ivec2(FragCoord.xy), 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
vec4 sample_from_depth()
|
||||
{
|
||||
#if !NEEDS_DEPTH
|
||||
return vec4(0.0);
|
||||
#else
|
||||
return texelFetch(DepthSampler, ivec2(FragCoord.xy), 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -315,7 +341,7 @@ int fetch_raw_depth()
|
||||
#if PS_TEX_IS_FB == 1
|
||||
return int(sample_from_rt().r * multiplier);
|
||||
#else
|
||||
return int(texelFetch(TextureSampler, ivec2(gl_FragCoord.xy), 0).r * multiplier);
|
||||
return int(texelFetch(TextureSampler, ivec2(FragCoord.xy), 0).r * multiplier);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -324,7 +350,7 @@ vec4 fetch_raw_color()
|
||||
#if PS_TEX_IS_FB == 1
|
||||
return sample_from_rt();
|
||||
#else
|
||||
return texelFetch(TextureSampler, ivec2(gl_FragCoord.xy), 0);
|
||||
return texelFetch(TextureSampler, ivec2(FragCoord.xy), 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -724,9 +750,9 @@ void ps_dither(inout vec3 C, float As)
|
||||
{
|
||||
#if PS_DITHER > 0 && PS_DITHER < 3
|
||||
#if PS_DITHER == 2
|
||||
ivec2 fpos = ivec2(gl_FragCoord.xy);
|
||||
ivec2 fpos = ivec2(FragCoord.xy);
|
||||
#else
|
||||
ivec2 fpos = ivec2(gl_FragCoord.xy * RcpScaleFactor);
|
||||
ivec2 fpos = ivec2(FragCoord.xy * RcpScaleFactor);
|
||||
#endif
|
||||
float value = DitherMatrix[fpos.y&3][fpos.x&3];
|
||||
|
||||
@ -969,9 +995,21 @@ float As = As_rgba.a;
|
||||
|
||||
void ps_main()
|
||||
{
|
||||
FragCoord = gl_FragCoord;
|
||||
|
||||
#if NEEDS_DEPTH && (PS_ZTST == ZTST_GEQUAL || PS_ZTST == ZTST_GREATER)
|
||||
#if PS_ZTST == ZTST_GEQUAL
|
||||
if (FragCoord.z < sample_from_depth().r)
|
||||
discard;
|
||||
#elif PS_ZTST == ZTST_GREATER
|
||||
if (FragCoord.z <= sample_from_depth().r)
|
||||
discard;
|
||||
#endif
|
||||
#endif // PS_ZTST
|
||||
|
||||
#if PS_SCANMSK & 2
|
||||
// fail depth test on prohibited lines
|
||||
if ((int(gl_FragCoord.y) & 1) == (PS_SCANMSK & 1))
|
||||
if ((int(FragCoord.y) & 1) == (PS_SCANMSK & 1))
|
||||
discard;
|
||||
#endif
|
||||
|
||||
@ -1007,7 +1045,7 @@ void ps_main()
|
||||
#endif
|
||||
|
||||
#if PS_DATE == 3
|
||||
int stencil_ceil = int(texelFetch(img_prim_min, ivec2(gl_FragCoord.xy), 0).r);
|
||||
int stencil_ceil = int(texelFetch(img_prim_min, ivec2(FragCoord.xy), 0).r);
|
||||
// Note gl_PrimitiveID == stencil_ceil will be the primitive that will update
|
||||
// the bad alpha value so we must keep it.
|
||||
|
||||
@ -1017,18 +1055,17 @@ void ps_main()
|
||||
#endif
|
||||
|
||||
vec4 C = ps_color();
|
||||
bool atst_pass = atst(C);
|
||||
|
||||
#if PS_AFAIL == 0 // KEEP or ATST off
|
||||
if (!atst_pass)
|
||||
discard;
|
||||
#if PS_FIXED_ONE_A
|
||||
// AA (Fixed one) will output a coverage of 1.0 as alpha
|
||||
C.a = 128.0f;
|
||||
#endif
|
||||
|
||||
// Must be done before alpha correction
|
||||
bool atst_pass = atst(C);
|
||||
|
||||
// AA (Fixed one) will output a coverage of 1.0 as alpha
|
||||
#if PS_FIXED_ONE_A
|
||||
C.a = 128.0f;
|
||||
#if PS_AFAIL == AFAIL_KEEP
|
||||
if (!atst_pass)
|
||||
discard;
|
||||
#endif
|
||||
|
||||
#if SW_AD_TO_HW
|
||||
@ -1066,7 +1103,6 @@ void ps_main()
|
||||
|
||||
ps_blend(C, alpha_blend);
|
||||
|
||||
|
||||
#if PS_SHUFFLE
|
||||
#if !PS_READ16_SRC && !PS_SHUFFLE_SAME && !(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
|
||||
uvec4 denorm_c_after = uvec4(C);
|
||||
@ -1118,32 +1154,54 @@ void ps_main()
|
||||
|
||||
ps_fbmask(C);
|
||||
|
||||
#if PS_AFAIL == 3 && !PS_NO_COLOR1 // RGB_ONLY
|
||||
#if PS_AFAIL == AFAIL_RGB && !PS_NO_COLOR1
|
||||
// Use alpha blend factor to determine whether to update A.
|
||||
alpha_blend.a = float(atst_pass);
|
||||
#endif
|
||||
|
||||
#if !PS_NO_COLOR
|
||||
#if PS_RTA_CORRECTION
|
||||
SV_Target0.a = C.a / 128.0f;
|
||||
C.a = C.a / 128.0f;
|
||||
#else
|
||||
SV_Target0.a = C.a / 255.0f;
|
||||
C.a = C.a / 255.0f;
|
||||
#endif
|
||||
#if PS_COLCLIP_HW == 1
|
||||
SV_Target0.rgb = vec3(C.rgb / 65535.0f);
|
||||
C.rgb = vec3(C.rgb / 65535.0f);
|
||||
#else
|
||||
SV_Target0.rgb = C.rgb / 255.0f;
|
||||
C.rgb = C.rgb / 255.0f;
|
||||
#endif
|
||||
#if PS_AFAIL == 3 && PS_NO_COLOR1 // RGB_ONLY, no dual src blend
|
||||
|
||||
// Alpha test with feedback
|
||||
#if (PS_AFAIL == AFAIL_FB_ONLY) && NEEDS_DEPTH
|
||||
if (!atst_pass)
|
||||
SV_Target0.a = sample_from_rt().a;
|
||||
FragCoord.z = sample_from_depth().r;
|
||||
#elif (PS_AFAIL == AFAIL_ZB_ONLY) && NEEDS_RT
|
||||
if (!atst_pass)
|
||||
C = sample_from_rt();
|
||||
#elif (PS_AFAIL == AFAIL_RGB_ONLY)
|
||||
if (!atst_pass)
|
||||
{
|
||||
#if NEEDS_RT && PS_NO_COLOR1 // No dual src blend
|
||||
C.a = sample_from_rt().a;
|
||||
#endif
|
||||
#if NEEDS_DEPTH
|
||||
FragCoord.z = sample_from_depth().r;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
// Warning: do not write SV_Target0 until the end since the value might be needed for
|
||||
// FB fetch in sample_from_rt().
|
||||
SV_Target0 = C;
|
||||
|
||||
#if !PS_NO_COLOR1
|
||||
SV_Target1 = alpha_blend;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if PS_ZCLAMP
|
||||
gl_FragDepth = min(gl_FragCoord.z, MaxDepthPS);
|
||||
gl_FragDepth = min(FragCoord.z, MaxDepthPS);
|
||||
#elif NEEDS_DEPTH && AFAIL_NEEDS_DEPTH
|
||||
gl_FragDepth = FragCoord.z; // Output depth value for ATST pass/fail
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -245,6 +245,18 @@ void main()
|
||||
#define GS_LINE 0
|
||||
#endif
|
||||
|
||||
#ifndef ZTST_GEQUAL
|
||||
#define ZTST_GEQUAL 2
|
||||
#define ZTST_GREATER 3
|
||||
#endif
|
||||
|
||||
#ifndef AFAIL_KEEP
|
||||
#define AFAIL_KEEP 0
|
||||
#define AFAIL_FB_ONLY 1
|
||||
#define AFAIL_ZB_ONLY 2
|
||||
#define AFAIL_RGB_ONLY 3
|
||||
#endif
|
||||
|
||||
#ifndef PS_FST
|
||||
#define PS_FST 0
|
||||
#define PS_WMS 0
|
||||
@ -288,19 +300,31 @@ void main()
|
||||
#define PS_DITHER 0
|
||||
#define PS_DITHER_ADJUST 0
|
||||
#define PS_ZCLAMP 0
|
||||
#define PS_FEEDBACK_LOOP 0
|
||||
#define PS_SCANMSK 0
|
||||
#define PS_AUTOMATIC_LOD 0
|
||||
#define PS_MANUAL_LOD 0
|
||||
#define PS_TEX_IS_FB 0
|
||||
#define PS_NO_COLOR 0
|
||||
#define PS_NO_COLOR1 0
|
||||
#define PS_DATE 0
|
||||
#define PS_TEX_IS_FB 0
|
||||
#define PS_COLOR_FEEDBACK 0
|
||||
#define PS_DEPTH_FEEDBACK 0
|
||||
#endif
|
||||
|
||||
#define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D)
|
||||
#define SW_BLEND_NEEDS_RT (SW_BLEND && (PS_BLEND_A == 1 || PS_BLEND_B == 1 || PS_BLEND_C == 1 || PS_BLEND_D == 1))
|
||||
#define SW_AD_TO_HW (PS_BLEND_C == 1 && PS_A_MASKED)
|
||||
#define AFAIL_NEEDS_RT (PS_AFAIL == 3 && PS_NO_COLOR1)
|
||||
#define AFAIL_NEEDS_RT (PS_AFAIL == AFAIL_ZB_ONLY || (PS_AFAIL == AFAIL_RGB_ONLY && PS_NO_COLOR1))
|
||||
#define AFAIL_NEEDS_DEPTH (PS_AFAIL == AFAIL_FB_ONLY || PS_AFAIL == AFAIL_RGB_ONLY)
|
||||
|
||||
#define PS_FEEDBACK_LOOP_IS_NEEDED (PS_TEX_IS_FB == 1 || AFAIL_NEEDS_RT || PS_FBMASK || SW_BLEND_NEEDS_RT || SW_AD_TO_HW || (PS_DATE >= 5))
|
||||
#define PS_FEEDBACK_LOOP_IS_NEEDED_RT (PS_TEX_IS_FB == 1 || AFAIL_NEEDS_RT || PS_FBMASK || SW_BLEND_NEEDS_RT || SW_AD_TO_HW || (PS_DATE >= 5) || PS_COLOR_FEEDBACK)
|
||||
#define PS_FEEDBACK_LOOP_IS_NEEDED_DEPTH (PS_DEPTH_FEEDBACK && AFAIL_NEEDS_DEPTH)
|
||||
|
||||
#define NEEDS_TEX (PS_TFX != 4)
|
||||
|
||||
vec4 FragCoord;
|
||||
|
||||
layout(std140, set = 0, binding = 1) uniform cb1
|
||||
{
|
||||
vec3 FogColor;
|
||||
@ -345,13 +369,30 @@ layout(set = 1, binding = 0) uniform sampler2D Texture;
|
||||
layout(set = 1, binding = 1) uniform texture2D Palette;
|
||||
#endif
|
||||
|
||||
#if PS_FEEDBACK_LOOP_IS_NEEDED
|
||||
#if PS_FEEDBACK_LOOP_IS_NEEDED_RT || PS_FEEDBACK_LOOP_IS_NEEDED_DEPTH
|
||||
#if defined(DISABLE_TEXTURE_BARRIER) || defined(HAS_FEEDBACK_LOOP_LAYOUT)
|
||||
layout(set = 1, binding = 2) uniform texture2D RtSampler;
|
||||
vec4 sample_from_rt() { return texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0); }
|
||||
#if PS_FEEDBACK_LOOP_IS_NEEDED_RT
|
||||
layout(set = 1, binding = 2) uniform texture2D RtSampler;
|
||||
vec4 sample_from_rt() { return texelFetch(RtSampler, ivec2(FragCoord.xy), 0); }
|
||||
#endif
|
||||
#if PS_FEEDBACK_LOOP_IS_NEEDED_DEPTH
|
||||
layout(set = 1, binding = 4) uniform texture2D DepthSampler;
|
||||
vec4 sample_from_depth() { return texelFetch(DepthSampler, ivec2(FragCoord.xy), 0); }
|
||||
#endif
|
||||
#else
|
||||
layout(input_attachment_index = 0, set = 1, binding = 2) uniform subpassInput RtSampler;
|
||||
vec4 sample_from_rt() { return subpassLoad(RtSampler); }
|
||||
// Must consider each case separately since the input attachment indices must be consecutive.
|
||||
#if PS_FEEDBACK_LOOP_IS_NEEDED_RT && PS_FEEDBACK_LOOP_IS_NEEDED_DEPTH
|
||||
layout(input_attachment_index = 0, set = 1, binding = 2) uniform subpassInput RtSampler;
|
||||
layout(input_attachment_index = 1, set = 1, binding = 4) uniform subpassInput DepthSampler;
|
||||
vec4 sample_from_rt() { return subpassLoad(RtSampler); }
|
||||
vec4 sample_from_depth() { return subpassLoad(DepthSampler); }
|
||||
#elif PS_FEEDBACK_LOOP_IS_NEEDED_RT
|
||||
layout(input_attachment_index = 0, set = 1, binding = 2) uniform subpassInput RtSampler;
|
||||
vec4 sample_from_rt() { return subpassLoad(RtSampler); }
|
||||
#elif PS_FEEDBACK_LOOP_IS_NEEDED_DEPTH
|
||||
layout(input_attachment_index = 0, set = 1, binding = 4) uniform subpassInput DepthSampler;
|
||||
vec4 sample_from_depth() { return subpassLoad(DepthSampler); }
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -925,19 +966,19 @@ vec4 ps_color()
|
||||
#if !NEEDS_TEX
|
||||
vec4 T = vec4(0.0f);
|
||||
#elif PS_CHANNEL_FETCH == 1
|
||||
vec4 T = fetch_red(ivec2(gl_FragCoord.xy));
|
||||
vec4 T = fetch_red(ivec2(FragCoord.xy));
|
||||
#elif PS_CHANNEL_FETCH == 2
|
||||
vec4 T = fetch_green(ivec2(gl_FragCoord.xy));
|
||||
vec4 T = fetch_green(ivec2(FragCoord.xy));
|
||||
#elif PS_CHANNEL_FETCH == 3
|
||||
vec4 T = fetch_blue(ivec2(gl_FragCoord.xy));
|
||||
vec4 T = fetch_blue(ivec2(FragCoord.xy));
|
||||
#elif PS_CHANNEL_FETCH == 4
|
||||
vec4 T = fetch_alpha(ivec2(gl_FragCoord.xy));
|
||||
vec4 T = fetch_alpha(ivec2(FragCoord.xy));
|
||||
#elif PS_CHANNEL_FETCH == 5
|
||||
vec4 T = fetch_rgb(ivec2(gl_FragCoord.xy));
|
||||
vec4 T = fetch_rgb(ivec2(FragCoord.xy));
|
||||
#elif PS_CHANNEL_FETCH == 6
|
||||
vec4 T = fetch_gXbY(ivec2(gl_FragCoord.xy));
|
||||
vec4 T = fetch_gXbY(ivec2(FragCoord.xy));
|
||||
#elif PS_DEPTH_FMT > 0
|
||||
vec4 T = sample_depth(st_int, ivec2(gl_FragCoord.xy));
|
||||
vec4 T = sample_depth(st_int, ivec2(FragCoord.xy));
|
||||
#else
|
||||
vec4 T = sample_color(st);
|
||||
#endif
|
||||
@ -969,7 +1010,6 @@ vec4 ps_color()
|
||||
void ps_fbmask(inout vec4 C)
|
||||
{
|
||||
#if PS_FBMASK
|
||||
|
||||
#if PS_COLCLIP_HW == 1
|
||||
vec4 RT = trunc(sample_from_rt() * 65535.0f);
|
||||
#else
|
||||
@ -985,9 +1025,9 @@ void ps_dither(inout vec3 C, float As)
|
||||
ivec2 fpos;
|
||||
|
||||
#if PS_DITHER == 2
|
||||
fpos = ivec2(gl_FragCoord.xy);
|
||||
fpos = ivec2(FragCoord.xy);
|
||||
#else
|
||||
fpos = ivec2(gl_FragCoord.xy * RcpScaleFactor);
|
||||
fpos = ivec2(FragCoord.xy * RcpScaleFactor);
|
||||
#endif
|
||||
|
||||
float value = DitherMatrix[fpos.y & 3][fpos.x & 3];
|
||||
@ -1065,7 +1105,7 @@ void ps_blend(inout vec4 Color, inout vec4 As_rgba)
|
||||
As_rgba.rgb = vec3(1.0f);
|
||||
#endif
|
||||
|
||||
#if PS_FEEDBACK_LOOP_IS_NEEDED
|
||||
#if PS_FEEDBACK_LOOP_IS_NEEDED_RT
|
||||
vec4 RT = sample_from_rt();
|
||||
#else
|
||||
// Not used, but we define it to make the selection below simpler.
|
||||
@ -1078,7 +1118,7 @@ void ps_blend(inout vec4 Color, inout vec4 As_rgba)
|
||||
float Ad = trunc(RT.a * 255.0f + 0.1f) / 128.0f;
|
||||
#endif
|
||||
|
||||
#if PS_SHUFFLE && PS_FEEDBACK_LOOP_IS_NEEDED
|
||||
#if PS_SHUFFLE && PS_FEEDBACK_LOOP_IS_NEEDED_RT
|
||||
uvec4 denorm_rt = uvec4(RT);
|
||||
#if (PS_PROCESS_BA & SHUFFLE_WRITE)
|
||||
RT.r = float((denorm_rt.b << 3) & 0xF8u);
|
||||
@ -1230,9 +1270,21 @@ void ps_blend(inout vec4 Color, inout vec4 As_rgba)
|
||||
|
||||
void main()
|
||||
{
|
||||
FragCoord = gl_FragCoord;
|
||||
|
||||
#if PS_FEEDBACK_LOOP_IS_NEEDED_DEPTH && (PS_ZTST == ZTST_GEQUAL || PS_ZTST == ZTST_GREATER)
|
||||
#if PS_ZTST == ZTST_GEQUAL
|
||||
if (FragCoord.z < sample_from_depth().r)
|
||||
discard;
|
||||
#elif PS_ZTST == ZTST_GREATER
|
||||
if (FragCoord.z <= sample_from_depth().r)
|
||||
discard;
|
||||
#endif
|
||||
#endif // PS_ZTST
|
||||
|
||||
#if PS_SCANMSK & 2
|
||||
// fail depth test on prohibited lines
|
||||
if ((int(gl_FragCoord.y) & 1) == (PS_SCANMSK & 1))
|
||||
if ((int(FragCoord.y) & 1) == (PS_SCANMSK & 1))
|
||||
discard;
|
||||
#endif
|
||||
#if PS_DATE >= 5
|
||||
@ -1267,7 +1319,7 @@ void main()
|
||||
#endif // PS_DATE >= 5
|
||||
|
||||
#if PS_DATE == 3
|
||||
int stencil_ceil = int(texelFetch(PrimMinTexture, ivec2(gl_FragCoord.xy), 0).r);
|
||||
int stencil_ceil = int(texelFetch(PrimMinTexture, ivec2(FragCoord.xy), 0).r);
|
||||
// Note gl_PrimitiveID == stencil_ceil will be the primitive that will update
|
||||
// the bad alpha value so we must keep it.
|
||||
|
||||
@ -1277,18 +1329,17 @@ void main()
|
||||
#endif
|
||||
|
||||
vec4 C = ps_color();
|
||||
bool atst_pass = atst(C);
|
||||
|
||||
#if PS_AFAIL == 0 // KEEP or ATST off
|
||||
if (!atst_pass)
|
||||
discard;
|
||||
#if PS_FIXED_ONE_A
|
||||
// AA (Fixed one) will output a coverage of 1.0 as alpha
|
||||
C.a = 128.0f;
|
||||
#endif
|
||||
|
||||
// Must be done before alpha correction
|
||||
bool atst_pass = atst(C);
|
||||
|
||||
// AA (Fixed one) will output a coverage of 1.0 as alpha
|
||||
#if PS_FIXED_ONE_A
|
||||
C.a = 128.0f;
|
||||
#if PS_AFAIL == ATST_KEEP
|
||||
if (!atst_pass)
|
||||
discard;
|
||||
#endif
|
||||
|
||||
#if SW_AD_TO_HW
|
||||
@ -1327,7 +1378,7 @@ void main()
|
||||
#else
|
||||
ps_blend(C, alpha_blend);
|
||||
|
||||
#if PS_SHUFFLE
|
||||
#if PS_SHUFFLE
|
||||
#if !PS_READ16_SRC && !PS_SHUFFLE_SAME && !(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
|
||||
uvec4 denorm_c_after = uvec4(C);
|
||||
#if (PS_PROCESS_BA & SHUFFLE_READ)
|
||||
@ -1375,7 +1426,7 @@ void main()
|
||||
|
||||
ps_fbmask(C);
|
||||
|
||||
#if PS_AFAIL == 3 && !PS_NO_COLOR1 // RGB_ONLY
|
||||
#if (PS_AFAIL == AFAIL_RGB_ONLY) && !PS_NO_COLOR1
|
||||
// Use alpha blend factor to determine whether to update A.
|
||||
alpha_blend.a = float(atst_pass);
|
||||
#endif
|
||||
@ -1394,16 +1445,32 @@ void main()
|
||||
#if !PS_NO_COLOR1
|
||||
o_col1 = alpha_blend;
|
||||
#endif
|
||||
#if PS_AFAIL == 3 && PS_NO_COLOR1 // RGB_ONLY, no dual src blend
|
||||
|
||||
// Alpha test with feedback
|
||||
#if (PS_AFAIL == AFAIL_FB_ONLY) && PS_FEEDBACK_LOOP_IS_NEEDED_DEPTH
|
||||
if (!atst_pass)
|
||||
FragCoord.z = sample_from_depth().r;
|
||||
#elif (PS_AFAIL == AFAIL_ZB_ONLY) && PS_FEEDBACK_LOOP_IS_NEEDED_RT
|
||||
if (!atst_pass)
|
||||
o_col0 = sample_from_rt();
|
||||
#elif (PS_AFAIL == AFAIL_RGB_ONLY)
|
||||
if (!atst_pass)
|
||||
{
|
||||
#if PS_FEEDBACK_LOOP_IS_NEEDED_RT && PS_NO_COLOR1 // No dual src blend
|
||||
o_col0.a = sample_from_rt().a;
|
||||
#endif
|
||||
#if PS_FEEDBACK_LOOP_IS_NEEDED_DEPTH
|
||||
FragCoord.z = sample_from_depth().r;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if PS_ZCLAMP
|
||||
gl_FragDepth = min(gl_FragCoord.z, MaxDepthPS);
|
||||
gl_FragDepth = min(FragCoord.z, MaxDepthPS);
|
||||
#elif PS_FEEDBACK_LOOP_IS_NEEDED_DEPTH && AFAIL_NEEDS_DEPTH
|
||||
gl_FragDepth = FragCoord.z; // Output depth value for ATST pass/fail
|
||||
#endif
|
||||
|
||||
#endif // PS_DATE
|
||||
}
|
||||
|
||||
|
||||
@ -757,6 +757,7 @@ struct Pcsx2Config
|
||||
PreloadFrameWithGSData : 1,
|
||||
Mipmap : 1,
|
||||
HWMipmap : 1,
|
||||
HWAFAILFeedback : 1,
|
||||
ManualUserHacks : 1,
|
||||
UserHacks_AlignSpriteX : 1,
|
||||
UserHacks_CPUFBConversion : 1,
|
||||
|
||||
@ -775,7 +775,6 @@ REG64_(GIFReg, TEST)
|
||||
REG_END2
|
||||
__forceinline bool DoFirstPass() const { return !ATE || ATST != ATST_NEVER; } // not all pixels fail automatically
|
||||
__forceinline bool DoSecondPass() const { return ATE && ATST != ATST_ALWAYS && AFAIL != AFAIL_KEEP; } // pixels may fail, write fb/z
|
||||
__forceinline bool NoSecondPass() const { return ATE && ATST != ATST_ALWAYS && AFAIL == AFAIL_KEEP; } // pixels may fail, no output
|
||||
__forceinline u32 GetAFAIL(u32 fpsm) const { return (AFAIL == AFAIL_RGB_ONLY && (fpsm & 0xF) != 0) ? static_cast<u32>(AFAIL_FB_ONLY) : AFAIL; } // FB Only when not 32bit Framebuffer
|
||||
REG_END2
|
||||
|
||||
|
||||
@ -431,6 +431,8 @@ const char* GSState::GetFlushReasonString(GSFlushReason reason)
|
||||
return "VSYNC";
|
||||
case GSFlushReason::GSREOPEN:
|
||||
return "GS REOPEN";
|
||||
case GSFlushReason::VERTEXCOUNT:
|
||||
return "VERTEX COUNT";
|
||||
case GSFlushReason::UNKNOWN:
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
|
||||
@ -354,6 +354,7 @@ struct alignas(16) GSHWDrawConfig
|
||||
u32 date : 3;
|
||||
u32 atst : 3;
|
||||
u32 afail : 2;
|
||||
u32 ztst : 2;
|
||||
// Color sampling
|
||||
u32 fst : 1; // Investigate to do it on the VS
|
||||
u32 tfx : 3;
|
||||
@ -414,6 +415,10 @@ struct alignas(16) GSHWDrawConfig
|
||||
|
||||
// Scan mask
|
||||
u32 scanmsk : 2;
|
||||
|
||||
// Feedback
|
||||
u32 color_feedback : 1;
|
||||
u32 depth_feedback : 1;
|
||||
};
|
||||
|
||||
struct
|
||||
@ -428,11 +433,16 @@ struct alignas(16) GSHWDrawConfig
|
||||
__fi bool operator!=(const PSSelector& rhs) const { return (key_lo != rhs.key_lo || key_hi != rhs.key_hi); }
|
||||
__fi bool operator<(const PSSelector& rhs) const { return (key_lo < rhs.key_lo || key_hi < rhs.key_hi); }
|
||||
|
||||
__fi bool IsFeedbackLoop() const
|
||||
__fi bool IsFeedbackLoopRT() const
|
||||
{
|
||||
const u32 sw_blend_bits = blend_a | blend_b | blend_d;
|
||||
const bool sw_blend_needs_rt = (sw_blend_bits != 0 && ((sw_blend_bits | blend_c) & 1u)) || ((a_masked & blend_c) != 0);
|
||||
return channel_fb || tex_is_fb || fbmask || (date >= 5) || sw_blend_needs_rt;
|
||||
return color_feedback || channel_fb || tex_is_fb || fbmask || (date >= 5) || sw_blend_needs_rt;;
|
||||
}
|
||||
|
||||
__fi bool IsFeedbackLoopDepth() const
|
||||
{
|
||||
return depth_feedback;
|
||||
}
|
||||
|
||||
/// Disables color output from the pixel shader, this is done when all channels are masked.
|
||||
|
||||
@ -14,6 +14,7 @@
|
||||
#include "common/Error.h"
|
||||
#include "common/Path.h"
|
||||
#include "common/StringUtil.h"
|
||||
#include "common/ScopedGuard.h"
|
||||
|
||||
#include "imgui.h"
|
||||
#include "IconsFontAwesome6.h"
|
||||
@ -1766,6 +1767,9 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant
|
||||
sm.AddMacro("PS_TEX_IS_FB", sel.tex_is_fb);
|
||||
sm.AddMacro("PS_NO_COLOR", sel.no_color);
|
||||
sm.AddMacro("PS_NO_COLOR1", sel.no_color1);
|
||||
sm.AddMacro("PS_ZTST", sel.ztst);
|
||||
sm.AddMacro("PS_COLOR_FEEDBACK", sel.color_feedback);
|
||||
sm.AddMacro("PS_DEPTH_FEEDBACK", sel.depth_feedback);
|
||||
|
||||
wil::com_ptr_nothrow<ID3D11PixelShader> ps = m_shader_cache.GetPixelShader(m_dev.get(), m_tfx_source, sm.GetPtr(), "ps_main");
|
||||
i = m_ps.try_emplace(sel, std::move(ps)).first;
|
||||
@ -2583,6 +2587,18 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
|
||||
{
|
||||
const GSVector2i rtsize = (config.rt ? config.rt : config.ds)->GetSize();
|
||||
GSTexture* colclip_rt = g_gs_device->GetColorClipTexture();
|
||||
GSTexture* draw_rt_clone = nullptr;
|
||||
GSTexture* draw_ds_clone = nullptr;
|
||||
GSTexture* primid_texture = nullptr;
|
||||
|
||||
ScopedGuard recycle_temp_textures([&]() {
|
||||
if (draw_rt_clone)
|
||||
Recycle(draw_rt_clone);
|
||||
if (draw_ds_clone)
|
||||
Recycle(draw_ds_clone);
|
||||
if (primid_texture)
|
||||
Recycle(primid_texture);
|
||||
});
|
||||
|
||||
if (colclip_rt)
|
||||
{
|
||||
@ -2627,7 +2643,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
|
||||
|
||||
// Destination Alpha Setup
|
||||
const bool multidraw_fb_copy = m_features.multidraw_fb_copy && (config.require_one_barrier || config.require_full_barrier);
|
||||
GSTexture* primid_texture = nullptr;
|
||||
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking)
|
||||
{
|
||||
primid_texture = CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::PrimID, false);
|
||||
@ -2689,7 +2704,7 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
|
||||
|
||||
// Depth testing and sampling, bind resource as dsv read only and srv at the same time without the need of a copy.
|
||||
ID3D11DepthStencilView* read_only_dsv = nullptr;
|
||||
if (config.tex && config.tex == config.ds)
|
||||
if (config.ds && (config.tex == config.ds|| config.ps.IsFeedbackLoopDepth()) && !config.depth.zwe)
|
||||
read_only_dsv = static_cast<GSTexture11*>(config.ds)->ReadOnlyDepthStencilView();
|
||||
|
||||
// Should be called before changing local srv state.
|
||||
@ -2742,8 +2757,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
|
||||
draw_ds = m_state.cached_dsv;
|
||||
}
|
||||
|
||||
GSTexture* draw_rt_clone = nullptr;
|
||||
|
||||
if (draw_rt && (config.require_one_barrier || (config.require_full_barrier && m_features.multidraw_fb_copy) || (config.tex && config.tex == config.rt)))
|
||||
{
|
||||
// Requires a copy of the RT.
|
||||
@ -2754,6 +2767,16 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
|
||||
Console.Warning("D3D11: Failed to allocate temp texture for RT copy.");
|
||||
}
|
||||
|
||||
if (draw_ds && (config.require_one_barrier || (config.require_full_barrier && m_features.multidraw_fb_copy)) &&
|
||||
config.ps.IsFeedbackLoopDepth())
|
||||
{
|
||||
// Requires a copy of the DS.
|
||||
// Used as "bind ds" flag when texture barrier is unsupported for tex is fb.
|
||||
draw_ds_clone = CreateTexture(rtsize.x, rtsize.y, 1, draw_ds->GetFormat(), true);
|
||||
if (!draw_rt_clone)
|
||||
Console.Warning("D3D11: Failed to allocate temp texture for DS copy.");
|
||||
}
|
||||
|
||||
OMSetRenderTargets(draw_rt, draw_ds, &config.scissor, read_only_dsv);
|
||||
SetupOM(config.depth, OMBlendSelector(config.colormask, config.blend), config.blend.constant);
|
||||
|
||||
@ -2761,7 +2784,8 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
|
||||
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::StencilOne && multidraw_fb_copy)
|
||||
m_ctx->ClearDepthStencilView(*static_cast<GSTexture11*>(draw_ds), D3D11_CLEAR_STENCIL, 0.0f, 1);
|
||||
|
||||
SendHWDraw(config, draw_rt_clone, draw_rt, config.require_one_barrier, config.require_full_barrier, false);
|
||||
SendHWDraw(config, draw_rt_clone, draw_rt, draw_ds_clone, draw_ds,
|
||||
config.require_one_barrier, config.require_full_barrier, false);
|
||||
|
||||
if (config.blend_multi_pass.enable)
|
||||
{
|
||||
@ -2787,15 +2811,10 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
|
||||
}
|
||||
|
||||
SetupOM(config.alpha_second_pass.depth, OMBlendSelector(config.alpha_second_pass.colormask, config.blend), config.blend.constant);
|
||||
SendHWDraw(config, draw_rt_clone, draw_rt, config.alpha_second_pass.require_one_barrier, config.alpha_second_pass.require_full_barrier, true);
|
||||
SendHWDraw(config, draw_rt_clone, draw_rt, draw_ds_clone, draw_ds,
|
||||
config.alpha_second_pass.require_one_barrier, config.alpha_second_pass.require_full_barrier, true);
|
||||
}
|
||||
|
||||
if (draw_rt_clone)
|
||||
Recycle(draw_rt_clone);
|
||||
|
||||
if (primid_texture)
|
||||
Recycle(primid_texture);
|
||||
|
||||
if (colclip_rt)
|
||||
{
|
||||
config.colclip_update_area = config.colclip_update_area.runion(config.drawarea);
|
||||
@ -2814,19 +2833,29 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
|
||||
}
|
||||
}
|
||||
|
||||
void GSDevice11::SendHWDraw(const GSHWDrawConfig& config, GSTexture* draw_rt_clone, GSTexture* draw_rt, const bool one_barrier, const bool full_barrier, const bool skip_first_barrier)
|
||||
void GSDevice11::SendHWDraw(const GSHWDrawConfig& config,
|
||||
GSTexture* draw_rt_clone, GSTexture* draw_rt, GSTexture* draw_ds_clone, GSTexture* draw_ds,
|
||||
const bool one_barrier, const bool full_barrier, const bool skip_first_barrier)
|
||||
{
|
||||
if (draw_rt_clone)
|
||||
if (draw_rt_clone || draw_ds_clone)
|
||||
{
|
||||
#ifdef PCSX2_DEVBUILD
|
||||
if ((one_barrier || full_barrier) && !config.ps.IsFeedbackLoop()) [[unlikely]]
|
||||
if ((one_barrier || full_barrier) && !(config.ps.IsFeedbackLoopRT() || config.ps.IsFeedbackLoopDepth())) [[unlikely]]
|
||||
Console.Warning("D3D11: Possible unnecessary copy detected.");
|
||||
#endif
|
||||
|
||||
auto CopyAndBind = [&](GSVector4i drawarea) {
|
||||
CopyRect(draw_rt, draw_rt_clone, drawarea, drawarea.left, drawarea.top);
|
||||
if (draw_rt_clone)
|
||||
CopyRect(draw_rt, draw_rt_clone, drawarea, drawarea.left, drawarea.top);
|
||||
if (draw_ds_clone)
|
||||
CopyRect(draw_ds, draw_ds_clone, drawarea, drawarea.left, drawarea.top);
|
||||
if (one_barrier || full_barrier)
|
||||
PSSetShaderResource(2, draw_rt_clone);
|
||||
{
|
||||
if (draw_rt_clone)
|
||||
PSSetShaderResource(2, draw_rt_clone);
|
||||
if (draw_ds_clone)
|
||||
PSSetShaderResource(4, draw_ds_clone);
|
||||
}
|
||||
if (config.tex && config.tex == config.rt)
|
||||
PSSetShaderResource(0, draw_rt_clone);
|
||||
};
|
||||
|
||||
@ -83,7 +83,7 @@ public:
|
||||
private:
|
||||
enum : u32
|
||||
{
|
||||
MAX_TEXTURES = 4,
|
||||
MAX_TEXTURES = 5,
|
||||
MAX_SAMPLERS = 1,
|
||||
VERTEX_BUFFER_SIZE = 32 * 1024 * 1024,
|
||||
INDEX_BUFFER_SIZE = 16 * 1024 * 1024,
|
||||
@ -345,7 +345,9 @@ public:
|
||||
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, u8 afix);
|
||||
|
||||
void RenderHW(GSHWDrawConfig& config) override;
|
||||
void SendHWDraw(const GSHWDrawConfig& config, GSTexture* draw_rt_clone, GSTexture* draw_rt, const bool one_barrier, const bool full_barrier, const bool skip_first_barrier);
|
||||
void SendHWDraw(const GSHWDrawConfig& config,
|
||||
GSTexture* draw_rt_clone, GSTexture* draw_rt, GSTexture* draw_ds_clone, GSTexture* draw_ds,
|
||||
const bool one_barrier, const bool full_barrier, const bool skip_first_barrier);
|
||||
|
||||
void ClearSamplerCache() override;
|
||||
|
||||
|
||||
@ -2305,9 +2305,9 @@ bool GSDevice12::GetTextureGroupDescriptors(
|
||||
}
|
||||
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE dst_handle = *gpu_handle;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE src_handles[NUM_TFX_TEXTURES];
|
||||
UINT src_sizes[NUM_TFX_TEXTURES];
|
||||
pxAssert(count <= NUM_TFX_TEXTURES);
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE src_handles[NUM_TOTAL_TFX_TEXTURES];
|
||||
UINT src_sizes[NUM_TOTAL_TFX_TEXTURES];
|
||||
pxAssert(count <= NUM_TOTAL_TFX_TEXTURES);
|
||||
for (u32 i = 0; i < count; i++)
|
||||
{
|
||||
src_handles[i] = cpu_handles[i];
|
||||
@ -2415,9 +2415,10 @@ bool GSDevice12::CreateRootSignatures()
|
||||
rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL);
|
||||
rsb.AddCBVParameter(1, D3D12_SHADER_VISIBILITY_PIXEL);
|
||||
rsb.AddSRVParameter(0, D3D12_SHADER_VISIBILITY_VERTEX);
|
||||
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 2, D3D12_SHADER_VISIBILITY_PIXEL);
|
||||
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 2, D3D12_SHADER_VISIBILITY_PIXEL); // Source / Palette
|
||||
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, NUM_TFX_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL);
|
||||
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 2, 2, D3D12_SHADER_VISIBILITY_PIXEL);
|
||||
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 2, 2, D3D12_SHADER_VISIBILITY_PIXEL); // RT / PrimID
|
||||
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 4, 1, D3D12_SHADER_VISIBILITY_PIXEL); // Depth
|
||||
if (!(m_tfx_root_signature = rsb.Create()))
|
||||
return false;
|
||||
D3D12::SetObjectName(m_tfx_root_signature.get(), "TFX root signature");
|
||||
@ -2922,6 +2923,9 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector&
|
||||
sm.AddMacro("PS_TEX_IS_FB", sel.tex_is_fb);
|
||||
sm.AddMacro("PS_NO_COLOR", sel.no_color);
|
||||
sm.AddMacro("PS_NO_COLOR1", sel.no_color1);
|
||||
sm.AddMacro("PS_ZTST", sel.ztst);
|
||||
sm.AddMacro("PS_COLOR_FEEDBACK", sel.color_feedback);
|
||||
sm.AddMacro("PS_DEPTH_FEEDBACK", sel.depth_feedback);
|
||||
|
||||
ComPtr<ID3DBlob> ps(m_shader_cache.GetPixelShader(m_tfx_source, sm.GetPtr(), "ps_main"));
|
||||
it = m_tfx_pixel_shaders.emplace(sel, std::move(ps)).first;
|
||||
@ -3118,6 +3122,7 @@ void GSDevice12::ExecuteCommandListAndRestartRenderPass(bool wait_for_completion
|
||||
|
||||
const bool was_in_render_pass = m_in_render_pass;
|
||||
EndRenderPass();
|
||||
|
||||
ExecuteCommandList(GetWaitType(wait_for_completion, GSConfig.HWSpinCPUForReadbacks));
|
||||
InvalidateCachedState();
|
||||
|
||||
@ -3155,6 +3160,7 @@ void GSDevice12::InvalidateCachedState()
|
||||
m_tfx_textures_handle_gpu.Clear();
|
||||
m_tfx_samplers_handle_gpu.Clear();
|
||||
m_tfx_rt_textures_handle_gpu.Clear();
|
||||
m_tfx_depth_textures_handle_gpu.Clear();
|
||||
}
|
||||
|
||||
void GSDevice12::SetVertexBuffer(D3D12_GPU_VIRTUAL_ADDRESS buffer, size_t size, size_t stride)
|
||||
@ -3236,7 +3242,11 @@ void GSDevice12::PSSetShaderResource(int i, GSTexture* sr, bool check_state)
|
||||
return;
|
||||
|
||||
m_tfx_textures[i] = handle;
|
||||
m_dirty_flags |= (i < 2) ? DIRTY_FLAG_TFX_TEXTURES : DIRTY_FLAG_TFX_RT_TEXTURES;
|
||||
m_dirty_flags |=
|
||||
(i < 2) ? DIRTY_FLAG_TFX_TEXTURES :
|
||||
(i < 4) ? DIRTY_FLAG_TFX_RT_TEXTURES :
|
||||
(i < 5) ? DIRTY_FLAG_TFX_DEPTH_TEXTURES :
|
||||
0;
|
||||
}
|
||||
|
||||
void GSDevice12::PSSetSampler(GSHWDrawConfig::SamplerSelector sel)
|
||||
@ -3642,6 +3652,17 @@ bool GSDevice12::ApplyTFXState(bool already_execed)
|
||||
flags |= DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2;
|
||||
}
|
||||
|
||||
if (flags & DIRTY_FLAG_TFX_DEPTH_TEXTURES)
|
||||
{
|
||||
if (!GetTextureGroupDescriptors(&m_tfx_depth_textures_handle_gpu, m_tfx_textures.data() + 4, 1))
|
||||
{
|
||||
ExecuteCommandListAndRestartRenderPass(false, "Ran out of TFX depth descriptor descriptor groups");
|
||||
return ApplyTFXState(true);
|
||||
}
|
||||
|
||||
flags |= DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_3;
|
||||
}
|
||||
|
||||
ID3D12GraphicsCommandList* cmdlist = GetCommandList();
|
||||
|
||||
if (m_current_root_signature != RootSignature::TFX)
|
||||
@ -3649,7 +3670,8 @@ bool GSDevice12::ApplyTFXState(bool already_execed)
|
||||
m_current_root_signature = RootSignature::TFX;
|
||||
flags |= DIRTY_FLAG_VS_CONSTANT_BUFFER_BINDING | DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING |
|
||||
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE | DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE |
|
||||
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 | DIRTY_FLAG_PIPELINE;
|
||||
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 | DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_3 |
|
||||
DIRTY_FLAG_PIPELINE;
|
||||
cmdlist->SetGraphicsRootSignature(m_tfx_root_signature.get());
|
||||
}
|
||||
|
||||
@ -3668,6 +3690,8 @@ bool GSDevice12::ApplyTFXState(bool already_execed)
|
||||
cmdlist->SetGraphicsRootDescriptorTable(TFX_ROOT_SIGNATURE_PARAM_PS_SAMPLERS, m_tfx_samplers_handle_gpu);
|
||||
if (flags & DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2)
|
||||
cmdlist->SetGraphicsRootDescriptorTable(TFX_ROOT_SIGNATURE_PARAM_PS_RT_TEXTURES, m_tfx_rt_textures_handle_gpu);
|
||||
if (flags & DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_3)
|
||||
cmdlist->SetGraphicsRootDescriptorTable(TFX_ROOT_SIGNATURE_PARAM_PS_DEPTH_TEXTURES, m_tfx_depth_textures_handle_gpu);
|
||||
|
||||
ApplyBaseState(flags, cmdlist);
|
||||
return true;
|
||||
@ -3832,6 +3856,17 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
|
||||
GSTexture12* draw_rt = static_cast<GSTexture12*>(config.rt);
|
||||
GSTexture12* draw_ds = static_cast<GSTexture12*>(config.ds);
|
||||
GSTexture12* draw_rt_clone = nullptr;
|
||||
GSTexture12* draw_ds_clone = nullptr;
|
||||
GSTexture12* date_image = nullptr;
|
||||
|
||||
ScopedGuard recycle_temp_textures([&]() {
|
||||
if (draw_rt_clone)
|
||||
Recycle(draw_rt_clone);
|
||||
if (draw_ds_clone)
|
||||
Recycle(draw_ds_clone);
|
||||
if (date_image)
|
||||
Recycle(date_image);
|
||||
});
|
||||
|
||||
// Align the render area to 128x128, hopefully avoiding render pass restarts for small render area changes (e.g. Ratchet and Clank).
|
||||
const GSVector2i rtsize(config.rt ? config.rt->GetSize() : config.ds->GetSize());
|
||||
@ -3897,7 +3932,7 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
|
||||
SetBlendConstants(config.blend.constant);
|
||||
|
||||
// Depth testing and sampling, bind resource as dsv read only and srv at the same time without the need of a copy.
|
||||
if (config.tex && config.tex == config.ds)
|
||||
if (config.ds && (config.ds == config.tex || config.ps.IsFeedbackLoopDepth()) && !config.depth.zwe)
|
||||
{
|
||||
EndRenderPass();
|
||||
|
||||
@ -3906,7 +3941,6 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
|
||||
}
|
||||
|
||||
// Primitive ID tracking DATE setup.
|
||||
GSTexture12* date_image = nullptr;
|
||||
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking)
|
||||
{
|
||||
GSTexture* backup_rt = config.rt;
|
||||
@ -3994,6 +4028,16 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
|
||||
Console.Warning("D3D12: Failed to allocate temp texture for RT copy.");
|
||||
}
|
||||
|
||||
if (draw_ds && (config.require_one_barrier || (config.require_full_barrier && m_features.multidraw_fb_copy)) &&
|
||||
config.ps.IsFeedbackLoopDepth())
|
||||
{
|
||||
// Requires a copy of the DS.
|
||||
// Used as "bind ds" flag when texture barrier is unsupported for tex is fb.
|
||||
draw_ds_clone = static_cast<GSTexture12*>(CreateTexture(rtsize.x, rtsize.y, 1, draw_ds->GetFormat(), true));
|
||||
if (!draw_rt_clone)
|
||||
Console.Warning("D3D12: Failed to allocate temp texture for DS copy.");
|
||||
}
|
||||
|
||||
OMSetRenderTargets(draw_rt, draw_ds, config.scissor);
|
||||
|
||||
// Begin render pass if new target or out of the area.
|
||||
@ -4040,7 +4084,8 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
|
||||
UploadHWDrawVerticesAndIndices(config);
|
||||
|
||||
// now we can do the actual draw
|
||||
SendHWDraw(pipe, config, draw_rt_clone, draw_rt, config.require_one_barrier, config.require_full_barrier, false);
|
||||
SendHWDraw(pipe, config, draw_rt_clone, draw_rt, draw_ds_clone, draw_ds,
|
||||
config.require_one_barrier, config.require_full_barrier, false);
|
||||
|
||||
// blend second pass
|
||||
if (config.blend_multi_pass.enable)
|
||||
@ -4070,15 +4115,10 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
|
||||
pipe.cms = config.alpha_second_pass.colormask;
|
||||
pipe.dss = config.alpha_second_pass.depth;
|
||||
pipe.bs = config.blend;
|
||||
SendHWDraw(pipe, config, draw_rt_clone, draw_rt, config.alpha_second_pass.require_one_barrier, config.alpha_second_pass.require_full_barrier, true);
|
||||
SendHWDraw(pipe, config, draw_rt_clone, draw_rt, draw_ds_clone, draw_ds,
|
||||
config.alpha_second_pass.require_one_barrier, config.alpha_second_pass.require_full_barrier, true);
|
||||
}
|
||||
|
||||
if (draw_rt_clone)
|
||||
Recycle(draw_rt_clone);
|
||||
|
||||
if (date_image)
|
||||
Recycle(date_image);
|
||||
|
||||
// now blit the colclip texture back to the original target
|
||||
if (colclip_rt)
|
||||
{
|
||||
@ -4113,23 +4153,38 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
|
||||
}
|
||||
}
|
||||
|
||||
void GSDevice12::SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig& config, GSTexture12* draw_rt_clone, GSTexture12* draw_rt, const bool one_barrier, const bool full_barrier, const bool skip_first_barrier)
|
||||
void GSDevice12::SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig& config,
|
||||
GSTexture12* draw_rt_clone, GSTexture12* draw_rt,
|
||||
GSTexture12* draw_ds_clone, GSTexture12* draw_ds,
|
||||
const bool one_barrier, const bool full_barrier, const bool skip_first_barrier)
|
||||
{
|
||||
if (draw_rt_clone)
|
||||
if (draw_rt_clone || draw_ds_clone)
|
||||
{
|
||||
|
||||
#ifdef PCSX2_DEVBUILD
|
||||
if ((one_barrier || full_barrier) && !config.ps.IsFeedbackLoop()) [[unlikely]]
|
||||
if ((one_barrier || full_barrier) && !(config.ps.IsFeedbackLoopRT() || config.ps.IsFeedbackLoopDepth())) [[unlikely]]
|
||||
Console.Warning("D3D12: Possible unnecessary copy detected.");
|
||||
#endif
|
||||
auto CopyAndBind = [&](GSVector4i drawarea) {
|
||||
EndRenderPass();
|
||||
if (draw_rt_clone)
|
||||
{
|
||||
CopyRect(draw_rt, draw_rt_clone, drawarea, drawarea.left, drawarea.top);
|
||||
draw_rt->TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET);
|
||||
}
|
||||
|
||||
CopyRect(draw_rt, draw_rt_clone, drawarea, drawarea.left, drawarea.top);
|
||||
draw_rt->TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET);
|
||||
if (draw_ds_clone)
|
||||
{
|
||||
CopyRect(draw_ds, draw_ds_clone, drawarea, drawarea.left, drawarea.top);
|
||||
draw_ds->TransitionToState(D3D12_RESOURCE_STATE_DEPTH_WRITE);
|
||||
}
|
||||
|
||||
if (one_barrier || full_barrier)
|
||||
PSSetShaderResource(2, draw_rt_clone, true);
|
||||
{
|
||||
if (draw_rt_clone)
|
||||
PSSetShaderResource(2, draw_rt_clone, true);
|
||||
if (draw_ds_clone)
|
||||
PSSetShaderResource(4, draw_ds_clone, true);
|
||||
}
|
||||
if (config.tex && config.tex == config.rt)
|
||||
PSSetShaderResource(0, draw_rt_clone, true);
|
||||
};
|
||||
@ -4158,7 +4213,6 @@ void GSDevice12::SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig&
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// Optimization: For alpha second pass we can reuse the copy snapshot from the first pass.
|
||||
if (!skip_first_barrier)
|
||||
CopyAndBind(config.drawarea);
|
||||
@ -4182,7 +4236,7 @@ void GSDevice12::UpdateHWPipelineSelector(GSHWDrawConfig& config)
|
||||
m_pipeline_selector.ds = config.ds != nullptr;
|
||||
}
|
||||
|
||||
void GSDevice12::UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config)
|
||||
void GSDevice12::UploadHWDrawVerticesAndIndices(GSHWDrawConfig& config)
|
||||
{
|
||||
IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts);
|
||||
|
||||
|
||||
@ -256,7 +256,8 @@ public:
|
||||
NUM_TFX_CONSTANT_BUFFERS = 2,
|
||||
NUM_TFX_TEXTURES = 2,
|
||||
NUM_TFX_RT_TEXTURES = 2,
|
||||
NUM_TOTAL_TFX_TEXTURES = NUM_TFX_TEXTURES + NUM_TFX_RT_TEXTURES,
|
||||
NUM_TFX_DEPTH_TEXTURES = 1,
|
||||
NUM_TOTAL_TFX_TEXTURES = NUM_TFX_TEXTURES + NUM_TFX_RT_TEXTURES + NUM_TFX_DEPTH_TEXTURES,
|
||||
NUM_TFX_SAMPLERS = 1,
|
||||
NUM_UTILITY_TEXTURES = 1,
|
||||
NUM_UTILITY_SAMPLERS = 1,
|
||||
@ -273,6 +274,7 @@ public:
|
||||
TFX_ROOT_SIGNATURE_PARAM_PS_TEXTURES = 3,
|
||||
TFX_ROOT_SIGNATURE_PARAM_PS_SAMPLERS = 4,
|
||||
TFX_ROOT_SIGNATURE_PARAM_PS_RT_TEXTURES = 5,
|
||||
TFX_ROOT_SIGNATURE_PARAM_PS_DEPTH_TEXTURES = 6,
|
||||
|
||||
UTILITY_ROOT_SIGNATURE_PARAM_PUSH_CONSTANTS = 0,
|
||||
UTILITY_ROOT_SIGNATURE_PARAM_PS_TEXTURES = 1,
|
||||
@ -466,10 +468,13 @@ public:
|
||||
bool BindDrawPipeline(const PipelineSelector& p);
|
||||
|
||||
void RenderHW(GSHWDrawConfig& config) override;
|
||||
void SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig& config, GSTexture12* draw_rt_clone, GSTexture12* draw_rt, const bool one_barrier, const bool full_barrier, const bool skip_first_barrier);
|
||||
void SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig& config,
|
||||
GSTexture12* draw_rt_clone, GSTexture12* draw_rt,
|
||||
GSTexture12* draw_ds_clone, GSTexture12* draw_ds,
|
||||
const bool one_barrier, const bool full_barrier, const bool skip_first_barrier);
|
||||
|
||||
void UpdateHWPipelineSelector(GSHWDrawConfig& config);
|
||||
void UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config);
|
||||
void UploadHWDrawVerticesAndIndices(GSHWDrawConfig& config);
|
||||
|
||||
public:
|
||||
/// Ends any render pass, executes the command buffer, and invalidates cached state.
|
||||
@ -527,33 +532,35 @@ private:
|
||||
DIRTY_FLAG_TFX_TEXTURES = (1 << 2),
|
||||
DIRTY_FLAG_TFX_SAMPLERS = (1 << 3),
|
||||
DIRTY_FLAG_TFX_RT_TEXTURES = (1 << 4),
|
||||
DIRTY_FLAG_TFX_DEPTH_TEXTURES = (1 << 5),
|
||||
|
||||
DIRTY_FLAG_VS_CONSTANT_BUFFER_BINDING = (1 << 5),
|
||||
DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING = (1 << 6),
|
||||
DIRTY_FLAG_VS_VERTEX_BUFFER_BINDING = (1 << 7),
|
||||
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE = (1 << 8),
|
||||
DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE = (1 << 9),
|
||||
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 = (1 << 10),
|
||||
DIRTY_FLAG_VS_CONSTANT_BUFFER_BINDING = (1 << 6),
|
||||
DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING = (1 << 7),
|
||||
DIRTY_FLAG_VS_VERTEX_BUFFER_BINDING = (1 << 8),
|
||||
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE = (1 << 9),
|
||||
DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE = (1 << 10),
|
||||
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 = (1 << 11),
|
||||
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_3 = (1 << 12),
|
||||
|
||||
DIRTY_FLAG_VERTEX_BUFFER = (1 << 11),
|
||||
DIRTY_FLAG_INDEX_BUFFER = (1 << 12),
|
||||
DIRTY_FLAG_PRIMITIVE_TOPOLOGY = (1 << 13),
|
||||
DIRTY_FLAG_VIEWPORT = (1 << 14),
|
||||
DIRTY_FLAG_SCISSOR = (1 << 15),
|
||||
DIRTY_FLAG_RENDER_TARGET = (1 << 16),
|
||||
DIRTY_FLAG_PIPELINE = (1 << 17),
|
||||
DIRTY_FLAG_BLEND_CONSTANTS = (1 << 18),
|
||||
DIRTY_FLAG_STENCIL_REF = (1 << 19),
|
||||
DIRTY_FLAG_VERTEX_BUFFER = (1 << 13),
|
||||
DIRTY_FLAG_INDEX_BUFFER = (1 << 14),
|
||||
DIRTY_FLAG_PRIMITIVE_TOPOLOGY = (1 << 15),
|
||||
DIRTY_FLAG_VIEWPORT = (1 << 16),
|
||||
DIRTY_FLAG_SCISSOR = (1 << 17),
|
||||
DIRTY_FLAG_RENDER_TARGET = (1 << 18),
|
||||
DIRTY_FLAG_PIPELINE = (1 << 19),
|
||||
DIRTY_FLAG_BLEND_CONSTANTS = (1 << 20),
|
||||
DIRTY_FLAG_STENCIL_REF = (1 << 21),
|
||||
|
||||
DIRTY_BASE_STATE = DIRTY_FLAG_VS_CONSTANT_BUFFER_BINDING | DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING |
|
||||
DIRTY_FLAG_VS_VERTEX_BUFFER_BINDING | DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE |
|
||||
DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE | DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 |
|
||||
DIRTY_FLAG_VERTEX_BUFFER | DIRTY_FLAG_INDEX_BUFFER | DIRTY_FLAG_PRIMITIVE_TOPOLOGY |
|
||||
DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR | DIRTY_FLAG_RENDER_TARGET | DIRTY_FLAG_PIPELINE |
|
||||
DIRTY_FLAG_BLEND_CONSTANTS | DIRTY_FLAG_STENCIL_REF,
|
||||
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_3 | DIRTY_FLAG_VERTEX_BUFFER | DIRTY_FLAG_INDEX_BUFFER |
|
||||
DIRTY_FLAG_PRIMITIVE_TOPOLOGY | DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR | DIRTY_FLAG_RENDER_TARGET |
|
||||
DIRTY_FLAG_PIPELINE | DIRTY_FLAG_BLEND_CONSTANTS | DIRTY_FLAG_STENCIL_REF,
|
||||
|
||||
DIRTY_TFX_STATE =
|
||||
DIRTY_BASE_STATE | DIRTY_FLAG_TFX_TEXTURES | DIRTY_FLAG_TFX_SAMPLERS | DIRTY_FLAG_TFX_RT_TEXTURES,
|
||||
DIRTY_TFX_STATE = DIRTY_BASE_STATE | DIRTY_FLAG_TFX_TEXTURES | DIRTY_FLAG_TFX_SAMPLERS |
|
||||
DIRTY_FLAG_TFX_RT_TEXTURES | DIRTY_FLAG_TFX_DEPTH_TEXTURES,
|
||||
DIRTY_UTILITY_STATE = DIRTY_BASE_STATE,
|
||||
DIRTY_CONSTANT_BUFFER_STATE = DIRTY_FLAG_VS_CONSTANT_BUFFER | DIRTY_FLAG_PS_CONSTANT_BUFFER,
|
||||
};
|
||||
@ -594,6 +601,7 @@ private:
|
||||
D3D12DescriptorHandle m_tfx_textures_handle_gpu;
|
||||
D3D12DescriptorHandle m_tfx_samplers_handle_gpu;
|
||||
D3D12DescriptorHandle m_tfx_rt_textures_handle_gpu;
|
||||
D3D12DescriptorHandle m_tfx_depth_textures_handle_gpu;
|
||||
|
||||
D3D12DescriptorHandle m_utility_texture_cpu;
|
||||
D3D12DescriptorHandle m_utility_texture_gpu;
|
||||
|
||||
@ -5555,7 +5555,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool
|
||||
// Hitman suffers from this, not sure on the exact scenario at the moment, but we need the barrier.
|
||||
if (NeedsBlending() && m_context->ALPHA.IsCdInBlend())
|
||||
{
|
||||
// Needed to enable IsFeedbackLoop.
|
||||
// Needed to enable IsFeedbackLoopRT.
|
||||
m_conf.ps.channel_fb = 1;
|
||||
// Assume no overlap when it's a channel shuffle, no need for full barriers.
|
||||
m_conf.require_one_barrier = true;
|
||||
@ -7716,12 +7716,48 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
|
||||
rt->m_alpha_max = rt_new_alpha_max;
|
||||
rt->m_alpha_min = rt_new_alpha_min;
|
||||
}
|
||||
|
||||
// Alpha test afail configuration
|
||||
// Warning must be done after EmulateZbuffer
|
||||
// Depth test is always true so it can be executed in 2 passes (no order required) unlike color.
|
||||
// The idea is to compute first the color which is independent of the alpha test. And then do a 2nd
|
||||
// pass to handle the depth based on the alpha test.
|
||||
const bool ate_first_pass = m_cached_ctx.TEST.DoFirstPass();
|
||||
bool ate_first_pass = m_cached_ctx.TEST.DoFirstPass();
|
||||
bool ate_second_pass = m_cached_ctx.TEST.DoSecondPass();
|
||||
|
||||
// Check if we should force a feedback loop for AFAIL
|
||||
if (ate_first_pass && ate_second_pass && GSConfig.HWAFAILFeedback &&
|
||||
(features.texture_barrier || features.multidraw_fb_copy))
|
||||
{
|
||||
const bool possible_zb_only = (m_cached_ctx.TEST.AFAIL == AFAIL_ZB_ONLY) && m_conf.depth.zwe;
|
||||
const bool possible_rgb_only = (m_cached_ctx.TEST.AFAIL == AFAIL_RGB_ONLY) && rt && m_conf.colormask.wa;
|
||||
const bool possible_fb_only = (m_cached_ctx.TEST.AFAIL == AFAIL_FB_ONLY) && rt && m_conf.colormask.wrgba;
|
||||
|
||||
const bool afail_needs_rt = possible_zb_only || possible_rgb_only;
|
||||
const bool afail_needs_depth = possible_fb_only || possible_rgb_only;
|
||||
|
||||
if (afail_needs_rt)
|
||||
{
|
||||
m_conf.ps.color_feedback = rt && m_conf.colormask.wrgba;
|
||||
ate_second_pass = false;
|
||||
m_conf.ps.afail = m_cached_ctx.TEST.AFAIL;
|
||||
m_conf.require_one_barrier |= (m_prim_overlap == PRIM_OVERLAP_NO);
|
||||
m_conf.require_full_barrier |= (m_prim_overlap != PRIM_OVERLAP_NO);
|
||||
}
|
||||
|
||||
if (afail_needs_depth)
|
||||
{
|
||||
m_conf.ps.depth_feedback = m_conf.depth.zwe && !m_cached_ctx.ZBUF.ZMSK;
|
||||
ate_second_pass = false;
|
||||
m_conf.ps.afail = m_cached_ctx.TEST.AFAIL;
|
||||
m_conf.require_one_barrier |= (m_prim_overlap == PRIM_OVERLAP_NO);
|
||||
m_conf.require_full_barrier |= (m_prim_overlap != PRIM_OVERLAP_NO);
|
||||
if (m_cached_ctx.TEST.ZTE && m_cached_ctx.TEST.ZTST == ZTST_GEQUAL || m_cached_ctx.TEST.ZTST == ZTST_GREATER)
|
||||
{
|
||||
// Enable SW depth testing and disable HW depth testing.
|
||||
m_conf.ps.ztst = m_cached_ctx.TEST.ZTST;
|
||||
m_conf.depth.ztst = ZTST_ALWAYS;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool ate_RGBA_then_Z = false;
|
||||
bool ate_RGB_then_Z = false;
|
||||
GL_INS("HW: %sAlpha Test, ATST=%s, AFAIL=%s", (ate_first_pass && ate_second_pass) ? "Complex" : "",
|
||||
@ -7993,8 +8029,6 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
|
||||
m_conf.cb_ps.FogColor_AREF = fc.blend32<8>(m_conf.cb_ps.FogColor_AREF);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Update RT scaled alpha flag, nothing's going to read it anymore.
|
||||
if (rt)
|
||||
{
|
||||
@ -8010,9 +8044,12 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
|
||||
if (m_conf.require_one_barrier || m_conf.require_full_barrier)
|
||||
pxAssert(!m_conf.blend.enable);
|
||||
|
||||
// Barriers aren't needed with fbfetch.
|
||||
m_conf.require_one_barrier = false;
|
||||
m_conf.require_full_barrier = false;
|
||||
if (!m_conf.ps.IsFeedbackLoopDepth())
|
||||
{
|
||||
// Barriers aren't needed with fbfetch for color feedback only.
|
||||
m_conf.require_one_barrier = false;
|
||||
m_conf.require_full_barrier = false;
|
||||
}
|
||||
}
|
||||
// Multi-pass algorithms shouldn't be needed with full barrier and backends may not handle this correctly
|
||||
pxAssert(!m_conf.require_full_barrier || !m_conf.ps.colclip_hw);
|
||||
@ -8030,6 +8067,13 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
|
||||
m_conf.require_full_barrier = false;
|
||||
}
|
||||
|
||||
if (m_conf.require_full_barrier && (g_gs_device->Features().texture_barrier || g_gs_device->Features().multidraw_fb_copy))
|
||||
{
|
||||
ComputeDrawlistGetSize(rt->m_scale);
|
||||
m_conf.drawlist = &m_drawlist;
|
||||
m_conf.drawlist_bbox = &m_drawlist_bbox;
|
||||
}
|
||||
|
||||
// rs
|
||||
const GSVector4i hacked_scissor = m_channel_shuffle ? GSVector4i::cxpr(0, 0, 1024, 1024) : m_context->scissor.in;
|
||||
const GSVector4i scissor(GSVector4i(GSVector4(rtscale) * GSVector4(hacked_scissor)).rintersect(GSVector4i::loadh(rtsize)));
|
||||
@ -8100,7 +8144,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
|
||||
{
|
||||
m_conf.alpha_second_pass.ps.DisableColorOutput();
|
||||
}
|
||||
if (m_conf.alpha_second_pass.ps.IsFeedbackLoop())
|
||||
if (m_conf.alpha_second_pass.ps.IsFeedbackLoopRT())
|
||||
{
|
||||
m_conf.alpha_second_pass.require_one_barrier = m_conf.require_one_barrier;
|
||||
m_conf.alpha_second_pass.require_full_barrier = m_conf.require_full_barrier;
|
||||
@ -8124,14 +8168,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
|
||||
m_conf.cb_ps.FogColor_AREF.a = m_conf.alpha_second_pass.ps_aref;
|
||||
m_conf.alpha_second_pass.enable = false;
|
||||
}
|
||||
|
||||
if (m_conf.require_full_barrier && (g_gs_device->Features().texture_barrier || g_gs_device->Features().multidraw_fb_copy))
|
||||
{
|
||||
ComputeDrawlistGetSize(rt->m_scale);
|
||||
m_conf.drawlist = &m_drawlist;
|
||||
m_conf.drawlist_bbox = &m_drawlist_bbox;
|
||||
}
|
||||
|
||||
|
||||
if (!m_channel_shuffle_width)
|
||||
g_gs_device->RenderHW(m_conf);
|
||||
else
|
||||
|
||||
@ -1396,6 +1396,9 @@ std::string GSDeviceOGL::GetPSSource(const PSSelector& sel)
|
||||
+ fmt::format("#define PS_SCANMSK {}\n", sel.scanmsk)
|
||||
+ fmt::format("#define PS_NO_COLOR {}\n", sel.no_color)
|
||||
+ fmt::format("#define PS_NO_COLOR1 {}\n", sel.no_color1)
|
||||
+ fmt::format("#define PS_ZTST {}\n", sel.ztst)
|
||||
+ fmt::format("#define PS_COLOR_FEEDBACK {}\n", sel.color_feedback)
|
||||
+ fmt::format("#define PS_DEPTH_FEEDBACK {}\n", sel.depth_feedback)
|
||||
;
|
||||
|
||||
std::string src = GenGlslHeader("ps_main", GL_FRAGMENT_SHADER, macro);
|
||||
@ -2554,6 +2557,8 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
|
||||
PSSetShaderResource(2, draw_rt_clone);
|
||||
else if (config.require_one_barrier || config.require_full_barrier)
|
||||
PSSetShaderResource(2, colclip_rt ? colclip_rt : config.rt);
|
||||
if ((config.require_one_barrier || config.require_full_barrier) && config.ps.IsFeedbackLoopDepth())
|
||||
PSSetShaderResource(4, config.ds);
|
||||
|
||||
SetupSampler(config.sampler);
|
||||
|
||||
@ -2583,7 +2588,7 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
|
||||
// On Nvidia, 2 seems to not pick up the data written by 1 unless we add a second barrier.
|
||||
// Pretty sure GL is supposed to guarantee that the blend unit is coherent with previous pixel write out, so calling this a bug.
|
||||
if (m_bugs.broken_blend_coherency)
|
||||
rt_hazard_barrier |= (psel.ps.IsFeedbackLoop() || psel.ps.blend_c == 1) && GLState::rt == config.rt;
|
||||
rt_hazard_barrier |= (psel.ps.IsFeedbackLoopRT() || psel.ps.blend_c == 1) && GLState::rt == config.rt;
|
||||
if (config.require_one_barrier || !m_features.texture_barrier)
|
||||
rt_hazard_barrier = false; // Already in place or not available
|
||||
|
||||
@ -2671,7 +2676,7 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
|
||||
OMSetRenderTargets(draw_rt, draw_ds, &config.scissor);
|
||||
OMSetColorMaskState(config.colormask);
|
||||
SetupOM(config.depth);
|
||||
|
||||
|
||||
// Clear stencil as close as possible to the RT bind, to avoid framebuffer swaps.
|
||||
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::StencilOne && m_features.texture_barrier)
|
||||
{
|
||||
@ -2761,7 +2766,7 @@ void GSDeviceOGL::SendHWDraw(const GSHWDrawConfig& config, bool one_barrier, boo
|
||||
}
|
||||
|
||||
#ifdef PCSX2_DEVBUILD
|
||||
if ((one_barrier || full_barrier) && !config.ps.IsFeedbackLoop()) [[unlikely]]
|
||||
if ((one_barrier || full_barrier) && !(config.ps.IsFeedbackLoopRT() || config.ps.IsFeedbackLoopDepth())) [[unlikely]]
|
||||
Console.Warning("OpenGL: Possible unnecessary barrier detected.");
|
||||
#endif
|
||||
|
||||
|
||||
@ -1501,10 +1501,10 @@ VkRenderPass GSDeviceVK::CreateCachedRenderPass(RenderPassCacheKey key)
|
||||
VkAttachmentReference* color_reference_ptr = nullptr;
|
||||
VkAttachmentReference depth_reference;
|
||||
VkAttachmentReference* depth_reference_ptr = nullptr;
|
||||
VkAttachmentReference input_reference;
|
||||
VkAttachmentReference* input_reference_ptr = nullptr;
|
||||
VkSubpassDependency subpass_dependency;
|
||||
VkSubpassDependency* subpass_dependency_ptr = nullptr;
|
||||
std::array<VkAttachmentReference, 2> input_reference;
|
||||
u32 num_subpass_inputs = 0;
|
||||
std::array<VkSubpassDependency, 2> subpass_dependency;
|
||||
u32 num_subpass_dependencies = 0;
|
||||
std::array<VkAttachmentDescription, 2> attachments;
|
||||
u32 num_attachments = 0;
|
||||
if (key.color_format != VK_FORMAT_UNDEFINED)
|
||||
@ -1524,26 +1524,26 @@ VkRenderPass GSDeviceVK::CreateCachedRenderPass(RenderPassCacheKey key)
|
||||
{
|
||||
if (!UseFeedbackLoopLayout())
|
||||
{
|
||||
input_reference.attachment = num_attachments;
|
||||
input_reference.layout = layout;
|
||||
input_reference_ptr = &input_reference;
|
||||
input_reference[num_subpass_inputs].attachment = num_attachments;
|
||||
input_reference[num_subpass_inputs].layout = layout;
|
||||
num_subpass_inputs++;
|
||||
}
|
||||
|
||||
if (!m_features.framebuffer_fetch)
|
||||
{
|
||||
// don't need the framebuffer-local dependency when we have rasterization order attachment access
|
||||
subpass_dependency.srcSubpass = 0;
|
||||
subpass_dependency.dstSubpass = 0;
|
||||
subpass_dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
subpass_dependency.dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
|
||||
subpass_dependency.srcAccessMask =
|
||||
subpass_dependency[num_subpass_dependencies].srcSubpass = 0;
|
||||
subpass_dependency[num_subpass_dependencies].dstSubpass = 0;
|
||||
subpass_dependency[num_subpass_dependencies].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
subpass_dependency[num_subpass_dependencies].dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
|
||||
subpass_dependency[num_subpass_dependencies].srcAccessMask =
|
||||
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
subpass_dependency.dstAccessMask =
|
||||
subpass_dependency[num_subpass_dependencies].dstAccessMask =
|
||||
UseFeedbackLoopLayout() ? VK_ACCESS_SHADER_READ_BIT : VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
|
||||
subpass_dependency.dependencyFlags =
|
||||
subpass_dependency[num_subpass_dependencies].dependencyFlags =
|
||||
UseFeedbackLoopLayout() ? (VK_DEPENDENCY_BY_REGION_BIT | VK_DEPENDENCY_FEEDBACK_LOOP_BIT_EXT) :
|
||||
VK_DEPENDENCY_BY_REGION_BIT;
|
||||
subpass_dependency_ptr = &subpass_dependency;
|
||||
num_subpass_dependencies++;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1562,6 +1562,35 @@ VkRenderPass GSDeviceVK::CreateCachedRenderPass(RenderPassCacheKey key)
|
||||
depth_reference.attachment = num_attachments;
|
||||
depth_reference.layout = layout;
|
||||
depth_reference_ptr = &depth_reference;
|
||||
|
||||
if (key.depth_sampling)
|
||||
{
|
||||
if (!UseFeedbackLoopLayout())
|
||||
{
|
||||
input_reference[num_subpass_inputs].attachment = num_attachments;
|
||||
input_reference[num_subpass_inputs].layout = layout;
|
||||
num_subpass_inputs++;
|
||||
}
|
||||
|
||||
if (!m_features.framebuffer_fetch)
|
||||
{
|
||||
// don't need the framebuffer-local dependency when we have rasterization order attachment access
|
||||
subpass_dependency[num_subpass_dependencies].srcSubpass = 0;
|
||||
subpass_dependency[num_subpass_dependencies].dstSubpass = 0;
|
||||
subpass_dependency[num_subpass_dependencies].srcStageMask =
|
||||
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
|
||||
subpass_dependency[num_subpass_dependencies].dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
|
||||
subpass_dependency[num_subpass_dependencies].srcAccessMask =
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
subpass_dependency[num_subpass_dependencies].dstAccessMask =
|
||||
UseFeedbackLoopLayout() ? VK_ACCESS_SHADER_READ_BIT : VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
|
||||
subpass_dependency[num_subpass_dependencies].dependencyFlags =
|
||||
UseFeedbackLoopLayout() ? (VK_DEPENDENCY_BY_REGION_BIT | VK_DEPENDENCY_FEEDBACK_LOOP_BIT_EXT) :
|
||||
VK_DEPENDENCY_BY_REGION_BIT;
|
||||
num_subpass_dependencies++;
|
||||
}
|
||||
}
|
||||
|
||||
num_attachments++;
|
||||
}
|
||||
|
||||
@ -1569,11 +1598,11 @@ VkRenderPass GSDeviceVK::CreateCachedRenderPass(RenderPassCacheKey key)
|
||||
(key.color_feedback_loop && m_optional_extensions.vk_ext_rasterization_order_attachment_access) ?
|
||||
VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_COLOR_ACCESS_BIT_EXT :
|
||||
0;
|
||||
const VkSubpassDescription subpass = {subpass_flags, VK_PIPELINE_BIND_POINT_GRAPHICS, input_reference_ptr ? 1u : 0u,
|
||||
input_reference_ptr ? input_reference_ptr : nullptr, color_reference_ptr ? 1u : 0u,
|
||||
const VkSubpassDescription subpass = {subpass_flags, VK_PIPELINE_BIND_POINT_GRAPHICS, num_subpass_inputs,
|
||||
num_subpass_inputs ? input_reference.data() : nullptr, color_reference_ptr ? 1u : 0u,
|
||||
color_reference_ptr ? color_reference_ptr : nullptr, nullptr, depth_reference_ptr, 0, nullptr};
|
||||
const VkRenderPassCreateInfo pass_info = {VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, nullptr, 0u, num_attachments,
|
||||
attachments.data(), 1u, &subpass, subpass_dependency_ptr ? 1u : 0u, subpass_dependency_ptr};
|
||||
attachments.data(), 1u, &subpass, num_subpass_dependencies, num_subpass_dependencies ? subpass_dependency.data() : nullptr};
|
||||
|
||||
VkRenderPass pass;
|
||||
const VkResult res = vkCreateRenderPass(m_device, &pass_info, nullptr, &pass);
|
||||
@ -3379,12 +3408,15 @@ void GSDeviceVK::OMSetRenderTargets(
|
||||
if (vkRt)
|
||||
{
|
||||
m_current_framebuffer =
|
||||
vkRt->GetLinkedFramebuffer(vkDs, (feedback_loop & FeedbackLoopFlag_ReadAndWriteRT) != 0);
|
||||
vkRt->GetLinkedFramebuffer(vkDs,
|
||||
(feedback_loop & FeedbackLoopFlag_ReadAndWriteRT) != 0,
|
||||
(feedback_loop & FeedbackLoopFlag_ReadAndWriteDepth) != 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
pxAssert(!(feedback_loop & FeedbackLoopFlag_ReadAndWriteRT));
|
||||
m_current_framebuffer = vkDs->GetLinkedFramebuffer(nullptr, false);
|
||||
m_current_framebuffer = vkDs->GetLinkedFramebuffer(
|
||||
nullptr, false, (feedback_loop & FeedbackLoopFlag_ReadAndWriteDepth) != 0);
|
||||
}
|
||||
}
|
||||
else if (InRenderPass())
|
||||
@ -3494,7 +3526,21 @@ void GSDeviceVK::OMSetRenderTargets(
|
||||
if (vkDs)
|
||||
{
|
||||
// need to update descriptors to reflect the new layout
|
||||
if (feedback_loop & FeedbackLoopFlag_ReadDS)
|
||||
if (feedback_loop & FeedbackLoopFlag_ReadAndWriteDepth)
|
||||
{
|
||||
// NVIDIA drivers appear to return random garbage when sampling the RT via a feedback loop, if the load op for
|
||||
// the render pass is CLEAR. Using vkCmdClearAttachments() doesn't work, so we have to clear the image instead.
|
||||
// Note: DS feedback loop was added later - we will assume that the same issue is relevant.
|
||||
if (vkDs->GetState() == GSTexture::State::Cleared && IsDeviceNVIDIA())
|
||||
vkDs->CommitClear();
|
||||
|
||||
if (vkDs->GetLayout() != GSTextureVK::Layout::FeedbackLoop)
|
||||
{
|
||||
m_dirty_flags |= (DIRTY_FLAG_TFX_TEXTURE_0 << TFX_TEXTURE_DEPTH);
|
||||
vkDs->TransitionToLayout(GSTextureVK::Layout::FeedbackLoop);
|
||||
}
|
||||
}
|
||||
else if (feedback_loop & FeedbackLoopFlag_ReadDepth)
|
||||
{
|
||||
if (vkDs->GetLayout() != GSTextureVK::Layout::FeedbackLoop)
|
||||
{
|
||||
@ -3743,9 +3789,13 @@ bool GSDeviceVK::CreatePipelineLayouts()
|
||||
dslb.AddBinding(TFX_TEXTURE_PALETTE, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
|
||||
dslb.AddBinding(TFX_TEXTURE_RT,
|
||||
(m_features.texture_barrier && !UseFeedbackLoopLayout()) ? VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT :
|
||||
VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
|
||||
VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
|
||||
1, VK_SHADER_STAGE_FRAGMENT_BIT);
|
||||
dslb.AddBinding(TFX_TEXTURE_PRIMID, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
|
||||
dslb.AddBinding(TFX_TEXTURE_DEPTH,
|
||||
(m_features.texture_barrier && !UseFeedbackLoopLayout()) ? VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT :
|
||||
VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
|
||||
1, VK_SHADER_STAGE_FRAGMENT_BIT);
|
||||
if ((m_tfx_texture_ds_layout = dslb.Create(dev)) == VK_NULL_HANDLE)
|
||||
return false;
|
||||
Vulkan::SetObjectName(dev, m_tfx_texture_ds_layout, "TFX texture descriptor layout");
|
||||
@ -4744,6 +4794,9 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector
|
||||
AddMacro(ss, "PS_TEX_IS_FB", sel.tex_is_fb);
|
||||
AddMacro(ss, "PS_NO_COLOR", sel.no_color);
|
||||
AddMacro(ss, "PS_NO_COLOR1", sel.no_color1);
|
||||
AddMacro(ss, "PS_ZTST", sel.ztst);
|
||||
AddMacro(ss, "PS_COLOR_FEEDBACK", sel.color_feedback);
|
||||
AddMacro(ss, "PS_DEPTH_FEEDBACK", sel.depth_feedback);
|
||||
ss << m_tfx_source;
|
||||
|
||||
VkShaderModule mod = g_vulkan_shader_cache->GetFragmentShader(ss.str());
|
||||
@ -5341,11 +5394,15 @@ bool GSDeviceVK::ApplyTFXState(bool already_execed)
|
||||
m_current_pipeline_layout = PipelineLayout::TFX;
|
||||
flags |= DIRTY_FLAG_TFX_UBO | DIRTY_FLAG_TFX_TEXTURES;
|
||||
|
||||
// Clear out the RT binding if feedback loop isn't on, because it'll be in the wrong state and make
|
||||
// Clear out the RT/DS binding if feedback loop isn't on, because it'll be in the wrong state and make
|
||||
// the validation layer cranky. Not a big deal since we need to write it anyway.
|
||||
const GSTextureVK::Layout rt_tex_layout = m_tfx_textures[TFX_TEXTURE_RT]->GetLayout();
|
||||
if (rt_tex_layout != GSTextureVK::Layout::FeedbackLoop && rt_tex_layout != GSTextureVK::Layout::ShaderReadOnly)
|
||||
m_tfx_textures[TFX_TEXTURE_RT] = m_null_texture.get();
|
||||
std::array<TFX_TEXTURES, 2> texture_types = { TFX_TEXTURE_RT, TFX_TEXTURE_DEPTH };
|
||||
for (u32 texture_type : texture_types)
|
||||
{
|
||||
const GSTextureVK::Layout tex_layout = m_tfx_textures[texture_type]->GetLayout();
|
||||
if (tex_layout != GSTextureVK::Layout::FeedbackLoop && tex_layout != GSTextureVK::Layout::ShaderReadOnly)
|
||||
m_tfx_textures[texture_type] = m_null_texture.get();
|
||||
}
|
||||
}
|
||||
|
||||
if (flags & DIRTY_FLAG_TFX_UBO)
|
||||
@ -5386,6 +5443,19 @@ bool GSDeviceVK::ApplyTFXState(bool already_execed)
|
||||
dsub.AddImageDescriptorWrite(VK_NULL_HANDLE, TFX_TEXTURE_PRIMID,
|
||||
m_tfx_textures[TFX_TEXTURE_PRIMID]->GetView(), m_tfx_textures[TFX_TEXTURE_PRIMID]->GetVkLayout());
|
||||
}
|
||||
if (flags & DIRTY_FLAG_TFX_TEXTURE_DEPTH)
|
||||
{
|
||||
if (m_features.texture_barrier && !UseFeedbackLoopLayout())
|
||||
{
|
||||
dsub.AddInputAttachmentDescriptorWrite(
|
||||
VK_NULL_HANDLE, TFX_TEXTURE_DEPTH, m_tfx_textures[TFX_TEXTURE_DEPTH]->GetView(), VK_IMAGE_LAYOUT_GENERAL);
|
||||
}
|
||||
else
|
||||
{
|
||||
dsub.AddImageDescriptorWrite(VK_NULL_HANDLE, TFX_TEXTURE_DEPTH, m_tfx_textures[TFX_TEXTURE_DEPTH]->GetView(),
|
||||
m_tfx_textures[TFX_TEXTURE_DEPTH]->GetVkLayout());
|
||||
}
|
||||
}
|
||||
|
||||
dsub.PushUpdate(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_tfx_pipeline_layout, TFX_DESCRIPTOR_SET_TEXTURES);
|
||||
}
|
||||
@ -5545,7 +5615,6 @@ GSTextureVK* GSDeviceVK::SetupPrimitiveTrackingDATE(GSHWDrawConfig& config)
|
||||
|
||||
void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
|
||||
{
|
||||
|
||||
const GSVector2i rtsize(config.rt ? config.rt->GetSize() : config.ds->GetSize());
|
||||
GSTextureVK* draw_rt = static_cast<GSTextureVK*>(config.rt);
|
||||
GSTextureVK* draw_ds = static_cast<GSTextureVK*>(config.ds);
|
||||
@ -5597,8 +5666,12 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
|
||||
UpdateHWPipelineSelector(config, pipe);
|
||||
|
||||
// If we don't have a barrier but the texture was drawn to last draw, end the pass to insert a barrier.
|
||||
if (InRenderPass() && !pipe.IsRTFeedbackLoop() && (config.tex == m_current_render_target || config.tex == m_current_depth_target))
|
||||
EndRenderPass();
|
||||
if (InRenderPass())
|
||||
{
|
||||
if ((!pipe.IsRTFeedbackLoop() && config.tex == m_current_render_target) ||
|
||||
(!pipe.IsDepthFeedbackLoop() && config.tex == m_current_depth_target))
|
||||
EndRenderPass();
|
||||
}
|
||||
|
||||
// now blit the colclip texture back to the original target
|
||||
if (colclip_rt)
|
||||
@ -5781,20 +5854,31 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
|
||||
// Despite the layout changing enforcing the execution dependency between previous draws and the first
|
||||
// input attachment read, it still wants the region/fragment-local barrier...
|
||||
|
||||
const bool skip_first_barrier =
|
||||
(draw_rt && draw_rt->GetLayout() != GSTextureVK::Layout::FeedbackLoop && !pipe.ps.colclip_hw && !IsDeviceAMD());
|
||||
bool skip_first_barrier = !pipe.ps.colclip_hw && !IsDeviceAMD();
|
||||
if (draw_rt)
|
||||
skip_first_barrier = skip_first_barrier && draw_rt->GetLayout() != GSTextureVK::Layout::FeedbackLoop;
|
||||
if (draw_ds)
|
||||
skip_first_barrier = skip_first_barrier && draw_ds->GetLayout() != GSTextureVK::Layout::FeedbackLoop;
|
||||
|
||||
OMSetRenderTargets(draw_rt, draw_ds, config.scissor, static_cast<FeedbackLoopFlag>(pipe.feedback_loop_flags));
|
||||
if (pipe.IsRTFeedbackLoop())
|
||||
{
|
||||
pxAssertMsg(m_features.texture_barrier, "Texture barriers enabled");
|
||||
PSSetShaderResource(2, draw_rt, false);
|
||||
PSSetShaderResource(TFX_TEXTURE_RT, draw_rt, false);
|
||||
|
||||
// If this is the first draw to the target as a feedback loop, make sure we re-generate the texture descriptor.
|
||||
// Otherwise, we might have a previous descriptor left over, that has the RT in a different state.
|
||||
m_dirty_flags |= (skip_first_barrier ? static_cast<u32>(DIRTY_FLAG_TFX_TEXTURE_RT) : 0);
|
||||
}
|
||||
if (pipe.IsDepthFeedbackLoop())
|
||||
{
|
||||
pxAssertMsg(m_features.texture_barrier, "Texture barriers enabled");
|
||||
PSSetShaderResource(TFX_TEXTURE_DEPTH, draw_ds, false);
|
||||
|
||||
// If this is the first draw to the target as a feedback loop, make sure we re-generate the texture descriptor.
|
||||
// Otherwise, we might have a previous descriptor left over, that has the RT in a different state.
|
||||
m_dirty_flags |= (skip_first_barrier ? static_cast<u32>(DIRTY_FLAG_TFX_TEXTURE_DEPTH) : 0);
|
||||
}
|
||||
// Begin render pass if new target or out of the area.
|
||||
if (!InRenderPass())
|
||||
{
|
||||
@ -5868,7 +5952,8 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
|
||||
|
||||
// now we can do the actual draw
|
||||
if (BindDrawPipeline(pipe))
|
||||
SendHWDraw(config, draw_rt, config.require_one_barrier, config.require_full_barrier, skip_first_barrier);
|
||||
SendHWDraw(config, pipe.IsRTFeedbackLoop() ? draw_rt : nullptr, pipe.IsDepthFeedbackLoop() ? draw_ds : nullptr,
|
||||
config.require_one_barrier, config.require_full_barrier, skip_first_barrier);
|
||||
|
||||
// blend second pass
|
||||
if (config.blend_multi_pass.enable)
|
||||
@ -5903,8 +5988,8 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
|
||||
pipe.bs = config.blend;
|
||||
if (BindDrawPipeline(pipe))
|
||||
{
|
||||
SendHWDraw(config, draw_rt, config.alpha_second_pass.require_one_barrier,
|
||||
config.alpha_second_pass.require_full_barrier, false);
|
||||
SendHWDraw(config, pipe.IsRTFeedbackLoop() ? draw_rt : nullptr, pipe.IsDepthFeedbackLoop() ? draw_ds : nullptr,
|
||||
config.alpha_second_pass.require_one_barrier, config.alpha_second_pass.require_full_barrier, false);
|
||||
}
|
||||
}
|
||||
|
||||
@ -5981,19 +6066,25 @@ void GSDeviceVK::UpdateHWPipelineSelector(GSHWDrawConfig& config, PipelineSelect
|
||||
pipe.rt = config.rt != nullptr;
|
||||
pipe.ds = config.ds != nullptr;
|
||||
pipe.line_width = config.line_expand;
|
||||
pipe.feedback_loop_flags =
|
||||
(m_features.texture_barrier &&
|
||||
(config.ps.IsFeedbackLoop() || config.require_one_barrier || config.require_full_barrier)) ?
|
||||
FeedbackLoopFlag_ReadAndWriteRT :
|
||||
FeedbackLoopFlag_None;
|
||||
pipe.feedback_loop_flags |=
|
||||
(config.tex && config.tex == config.ds) ? FeedbackLoopFlag_ReadDS : FeedbackLoopFlag_None;
|
||||
pipe.feedback_loop_flags = FeedbackLoopFlag_None;
|
||||
if (m_features.texture_barrier && (config.require_one_barrier || config.require_full_barrier))
|
||||
{
|
||||
if (config.ps.IsFeedbackLoopRT())
|
||||
pipe.feedback_loop_flags |= FeedbackLoopFlag_ReadAndWriteRT;
|
||||
|
||||
if (config.ps.IsFeedbackLoopDepth())
|
||||
pipe.feedback_loop_flags |= FeedbackLoopFlag_ReadAndWriteDepth;
|
||||
}
|
||||
if (!(pipe.feedback_loop_flags & FeedbackLoopFlag_ReadAndWriteDepth))
|
||||
{
|
||||
pipe.feedback_loop_flags |= (config.tex && config.tex == config.ds) ? FeedbackLoopFlag_ReadDepth : FeedbackLoopFlag_None;
|
||||
}
|
||||
|
||||
// enable point size in the vertex shader if we're rendering points regardless of upscaling.
|
||||
pipe.vs.point_size |= (config.topology == GSHWDrawConfig::Topology::Point);
|
||||
}
|
||||
|
||||
void GSDeviceVK::UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config)
|
||||
void GSDeviceVK::UploadHWDrawVerticesAndIndices(GSHWDrawConfig& config)
|
||||
{
|
||||
IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts, GetVertexAlignment(config.vs.expand));
|
||||
m_vertex.start *= GetExpansionFactor(config.vs.expand);
|
||||
@ -6010,7 +6101,7 @@ void GSDeviceVK::UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config)
|
||||
}
|
||||
}
|
||||
|
||||
VkImageMemoryBarrier GSDeviceVK::GetColorBufferBarrier(GSTextureVK* rt) const
|
||||
VkImageMemoryBarrier GSDeviceVK::GetColorBufferFeedbackBarrier(GSTextureVK* rt) const
|
||||
{
|
||||
const VkImageLayout layout =
|
||||
UseFeedbackLoopLayout() ? VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT : VK_IMAGE_LAYOUT_GENERAL;
|
||||
@ -6021,13 +6112,25 @@ VkImageMemoryBarrier GSDeviceVK::GetColorBufferBarrier(GSTextureVK* rt) const
|
||||
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, rt->GetImage(), {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}};
|
||||
}
|
||||
|
||||
VkDependencyFlags GSDeviceVK::GetColorBufferBarrierFlags() const
|
||||
VkImageMemoryBarrier GSDeviceVK::GetDepthStencilBufferFeedbackBarrier(GSTextureVK* ds) const
|
||||
{
|
||||
const VkImageLayout layout =
|
||||
UseFeedbackLoopLayout() ? VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT : VK_IMAGE_LAYOUT_GENERAL;
|
||||
const VkAccessFlags dst_access =
|
||||
UseFeedbackLoopLayout() ? VK_ACCESS_SHADER_READ_BIT : VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
|
||||
return {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, nullptr,
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, dst_access, layout, layout,
|
||||
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, ds->GetImage(),
|
||||
{VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0u, 1u, 0u, 1u}};
|
||||
}
|
||||
|
||||
VkDependencyFlags GSDeviceVK::GetFeedbackBarrierDependencyFlags() const
|
||||
{
|
||||
return UseFeedbackLoopLayout() ? (VK_DEPENDENCY_BY_REGION_BIT | VK_DEPENDENCY_FEEDBACK_LOOP_BIT_EXT) :
|
||||
VK_DEPENDENCY_BY_REGION_BIT;
|
||||
}
|
||||
|
||||
void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
|
||||
void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, GSTextureVK* draw_ds,
|
||||
bool one_barrier, bool full_barrier, bool skip_first_barrier)
|
||||
{
|
||||
if (!m_features.texture_barrier) [[unlikely]]
|
||||
@ -6037,21 +6140,52 @@ void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
|
||||
}
|
||||
|
||||
#ifdef PCSX2_DEVBUILD
|
||||
if ((one_barrier || full_barrier) && !m_pipeline_selector.ps.IsFeedbackLoop()) [[unlikely]]
|
||||
if ((one_barrier || full_barrier) && !(m_pipeline_selector.ps.IsFeedbackLoopRT() || m_pipeline_selector.ps.IsFeedbackLoopDepth())) [[unlikely]]
|
||||
Console.Warning("VK: Possible unnecessary barrier detected.");
|
||||
#endif
|
||||
const VkDependencyFlags barrier_flags = GetColorBufferBarrierFlags();
|
||||
VkDependencyFlags barrier_flags = GetFeedbackBarrierDependencyFlags();
|
||||
|
||||
std::array<VkImageMemoryBarrier, 2> barriers;
|
||||
u32 n_barriers = 0;
|
||||
if (full_barrier || one_barrier)
|
||||
{
|
||||
if (draw_rt)
|
||||
{
|
||||
barriers[0] = GetColorBufferFeedbackBarrier(draw_rt);
|
||||
n_barriers++;
|
||||
}
|
||||
if (draw_ds)
|
||||
{
|
||||
barriers[1] = GetDepthStencilBufferFeedbackBarrier(draw_ds);
|
||||
n_barriers++;
|
||||
}
|
||||
}
|
||||
|
||||
const auto IssueBarriers = [&]() {
|
||||
if (draw_rt)
|
||||
{
|
||||
vkCmdPipelineBarrier(GetCurrentCommandBuffer(),
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, barrier_flags, 0, nullptr, 0, nullptr, 1, &barriers[0]);
|
||||
}
|
||||
if (draw_ds)
|
||||
{
|
||||
vkCmdPipelineBarrier(GetCurrentCommandBuffer(),
|
||||
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
|
||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, barrier_flags, 0, nullptr, 0, nullptr, 1, &barriers[1]);
|
||||
}
|
||||
};
|
||||
|
||||
if (full_barrier)
|
||||
{
|
||||
pxAssert(config.drawlist && !config.drawlist->empty());
|
||||
|
||||
const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt);
|
||||
const u32 indices_per_prim = config.indices_per_prim;
|
||||
const u32 draw_list_size = static_cast<u32>(config.drawlist->size());
|
||||
|
||||
GL_PUSH("Split the draw");
|
||||
g_perfmon.Put(
|
||||
GSPerfMon::Barriers, static_cast<u32>(draw_list_size) - static_cast<u32>(skip_first_barrier));
|
||||
g_perfmon.Put(GSPerfMon::Barriers,
|
||||
n_barriers * (draw_list_size - static_cast<u32>(skip_first_barrier)));
|
||||
|
||||
u32 p = 0;
|
||||
u32 n = 0;
|
||||
@ -6066,8 +6200,7 @@ void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
|
||||
|
||||
for (; n < draw_list_size; n++)
|
||||
{
|
||||
vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, barrier_flags, 0, nullptr, 0, nullptr, 1, &barrier);
|
||||
IssueBarriers();
|
||||
|
||||
const u32 count = (*config.drawlist)[n] * indices_per_prim;
|
||||
DrawIndexedPrimitive(p, count);
|
||||
@ -6079,11 +6212,8 @@ void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
|
||||
|
||||
if (one_barrier && !skip_first_barrier)
|
||||
{
|
||||
g_perfmon.Put(GSPerfMon::Barriers, 1);
|
||||
|
||||
const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt);
|
||||
vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, barrier_flags, 0, nullptr, 0, nullptr, 1, &barrier);
|
||||
g_perfmon.Put(GSPerfMon::Barriers, n_barriers);
|
||||
IssueBarriers();
|
||||
}
|
||||
|
||||
DrawIndexedPrimitive();
|
||||
|
||||
@ -293,7 +293,8 @@ public:
|
||||
{
|
||||
FeedbackLoopFlag_None = 0,
|
||||
FeedbackLoopFlag_ReadAndWriteRT = 1,
|
||||
FeedbackLoopFlag_ReadDS = 2,
|
||||
FeedbackLoopFlag_ReadDepth = 2,
|
||||
FeedbackLoopFlag_ReadAndWriteDepth = 4,
|
||||
};
|
||||
|
||||
struct alignas(8) PipelineSelector
|
||||
@ -308,7 +309,7 @@ public:
|
||||
u32 rt : 1;
|
||||
u32 ds : 1;
|
||||
u32 line_width : 1;
|
||||
u32 feedback_loop_flags : 2;
|
||||
u32 feedback_loop_flags : 3;
|
||||
};
|
||||
|
||||
u32 key;
|
||||
@ -326,7 +327,8 @@ public:
|
||||
__fi PipelineSelector() { std::memset(this, 0, sizeof(*this)); }
|
||||
|
||||
__fi bool IsRTFeedbackLoop() const { return ((feedback_loop_flags & FeedbackLoopFlag_ReadAndWriteRT) != 0); }
|
||||
__fi bool IsTestingAndSamplingDepth() const { return ((feedback_loop_flags & FeedbackLoopFlag_ReadDS) != 0); }
|
||||
__fi bool IsDepthFeedbackLoop() const { return ((feedback_loop_flags & FeedbackLoopFlag_ReadAndWriteDepth) != 0); }
|
||||
__fi bool IsTestingAndSamplingDepth() const { return ((feedback_loop_flags & (FeedbackLoopFlag_ReadDepth | FeedbackLoopFlag_ReadAndWriteDepth)) != 0); }
|
||||
};
|
||||
static_assert(sizeof(PipelineSelector) == 24, "Pipeline selector is 24 bytes");
|
||||
|
||||
@ -357,10 +359,11 @@ public:
|
||||
};
|
||||
enum TFX_TEXTURES : u32
|
||||
{
|
||||
TFX_TEXTURE_TEXTURE,
|
||||
TFX_TEXTURE_TEXTURE = 0,
|
||||
TFX_TEXTURE_PALETTE,
|
||||
TFX_TEXTURE_RT,
|
||||
TFX_TEXTURE_PRIMID,
|
||||
TFX_TEXTURE_DEPTH,
|
||||
|
||||
NUM_TFX_TEXTURES
|
||||
};
|
||||
@ -568,10 +571,11 @@ public:
|
||||
|
||||
void RenderHW(GSHWDrawConfig& config) override;
|
||||
void UpdateHWPipelineSelector(GSHWDrawConfig& config, PipelineSelector& pipe);
|
||||
void UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config);
|
||||
VkImageMemoryBarrier GetColorBufferBarrier(GSTextureVK* rt) const;
|
||||
VkDependencyFlags GetColorBufferBarrierFlags() const;
|
||||
void SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
|
||||
void UploadHWDrawVerticesAndIndices(GSHWDrawConfig& config);
|
||||
VkImageMemoryBarrier GetColorBufferFeedbackBarrier(GSTextureVK* rt) const;
|
||||
VkImageMemoryBarrier GetDepthStencilBufferFeedbackBarrier(GSTextureVK* ds) const;
|
||||
VkDependencyFlags GetFeedbackBarrierDependencyFlags() const;
|
||||
void SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, GSTextureVK* draw_ds,
|
||||
bool one_barrier, bool full_barrier, bool skip_first_barrier);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
@ -621,25 +625,27 @@ public:
|
||||
private:
|
||||
enum DIRTY_FLAG : u32
|
||||
{
|
||||
DIRTY_FLAG_TFX_TEXTURE_0 = (1 << 0), // 0, 1, 2, 3
|
||||
DIRTY_FLAG_TFX_UBO = (1 << 4),
|
||||
DIRTY_FLAG_UTILITY_TEXTURE = (1 << 5),
|
||||
DIRTY_FLAG_BLEND_CONSTANTS = (1 << 6),
|
||||
DIRTY_FLAG_LINE_WIDTH = (1 << 7),
|
||||
DIRTY_FLAG_INDEX_BUFFER = (1 << 8),
|
||||
DIRTY_FLAG_VIEWPORT = (1 << 9),
|
||||
DIRTY_FLAG_SCISSOR = (1 << 10),
|
||||
DIRTY_FLAG_PIPELINE = (1 << 11),
|
||||
DIRTY_FLAG_VS_CONSTANT_BUFFER = (1 << 12),
|
||||
DIRTY_FLAG_PS_CONSTANT_BUFFER = (1 << 13),
|
||||
DIRTY_FLAG_TFX_TEXTURE_0 = (1 << 0), // 0, 1, 2, 3, 4
|
||||
DIRTY_FLAG_TFX_UBO = (1 << 5),
|
||||
DIRTY_FLAG_UTILITY_TEXTURE = (1 << 6),
|
||||
DIRTY_FLAG_BLEND_CONSTANTS = (1 << 7),
|
||||
DIRTY_FLAG_LINE_WIDTH = (1 << 8),
|
||||
DIRTY_FLAG_INDEX_BUFFER = (1 << 9),
|
||||
DIRTY_FLAG_VIEWPORT = (1 << 10),
|
||||
DIRTY_FLAG_SCISSOR = (1 << 11),
|
||||
DIRTY_FLAG_PIPELINE = (1 << 12),
|
||||
DIRTY_FLAG_VS_CONSTANT_BUFFER = (1 << 13),
|
||||
DIRTY_FLAG_PS_CONSTANT_BUFFER = (1 << 14),
|
||||
|
||||
DIRTY_FLAG_TFX_TEXTURE_TEX = (DIRTY_FLAG_TFX_TEXTURE_0 << 0),
|
||||
DIRTY_FLAG_TFX_TEXTURE_PALETTE = (DIRTY_FLAG_TFX_TEXTURE_0 << 1),
|
||||
DIRTY_FLAG_TFX_TEXTURE_RT = (DIRTY_FLAG_TFX_TEXTURE_0 << 2),
|
||||
DIRTY_FLAG_TFX_TEXTURE_PRIMID = (DIRTY_FLAG_TFX_TEXTURE_0 << 3),
|
||||
DIRTY_FLAG_TFX_TEXTURE_DEPTH = (DIRTY_FLAG_TFX_TEXTURE_0 << 4),
|
||||
|
||||
DIRTY_FLAG_TFX_TEXTURES = DIRTY_FLAG_TFX_TEXTURE_TEX | DIRTY_FLAG_TFX_TEXTURE_PALETTE |
|
||||
DIRTY_FLAG_TFX_TEXTURE_RT | DIRTY_FLAG_TFX_TEXTURE_PRIMID,
|
||||
DIRTY_FLAG_TFX_TEXTURE_RT | DIRTY_FLAG_TFX_TEXTURE_PRIMID |
|
||||
DIRTY_FLAG_TFX_TEXTURE_DEPTH,
|
||||
|
||||
DIRTY_BASE_STATE = DIRTY_FLAG_INDEX_BUFFER | DIRTY_FLAG_PIPELINE | DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR |
|
||||
DIRTY_FLAG_BLEND_CONSTANTS | DIRTY_FLAG_LINE_WIDTH,
|
||||
|
||||
@ -114,7 +114,7 @@ std::unique_ptr<GSTextureVK> GSTextureVK::Create(Type type, Format format, int w
|
||||
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
|
||||
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT |
|
||||
(GSDeviceVK::GetInstance()->UseFeedbackLoopLayout() ? VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT
|
||||
: 0);
|
||||
: VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT);
|
||||
vci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
}
|
||||
break;
|
||||
@ -198,7 +198,7 @@ void GSTextureVK::Destroy(bool defer)
|
||||
|
||||
if (m_type == Type::RenderTarget || m_type == Type::DepthStencil)
|
||||
{
|
||||
for (const auto& [other_tex, fb, feedback] : m_framebuffers)
|
||||
for (const auto& [other_tex, fb, feedback_color, feedback_depth] : m_framebuffers)
|
||||
{
|
||||
if (other_tex)
|
||||
{
|
||||
@ -738,16 +738,16 @@ void GSTextureVK::TransitionSubresourcesToLayout(
|
||||
|
||||
VkFramebuffer GSTextureVK::GetFramebuffer(bool feedback_loop)
|
||||
{
|
||||
return GetLinkedFramebuffer(nullptr, feedback_loop);
|
||||
return GetLinkedFramebuffer(nullptr, feedback_loop, false);
|
||||
}
|
||||
|
||||
VkFramebuffer GSTextureVK::GetLinkedFramebuffer(GSTextureVK* depth_texture, bool feedback_loop)
|
||||
VkFramebuffer GSTextureVK::GetLinkedFramebuffer(GSTextureVK* depth_texture, bool feedback_loop_color, bool feedback_loop_depth)
|
||||
{
|
||||
pxAssertRel(m_type != Type::Texture, "Texture is a render target");
|
||||
|
||||
for (const auto& [other_tex, fb, other_feedback_loop] : m_framebuffers)
|
||||
for (const auto& [other_tex, fb, other_feedback_loop_color, other_feedback_loop_depth] : m_framebuffers)
|
||||
{
|
||||
if (other_tex == depth_texture && other_feedback_loop == feedback_loop)
|
||||
if (other_tex == depth_texture && other_feedback_loop_color == feedback_loop_color && other_feedback_loop_depth == feedback_loop_depth)
|
||||
return fb;
|
||||
}
|
||||
|
||||
@ -756,7 +756,7 @@ VkFramebuffer GSTextureVK::GetLinkedFramebuffer(GSTextureVK* depth_texture, bool
|
||||
(m_type != GSTexture::Type::DepthStencil) ? (depth_texture ? depth_texture->m_vk_format : VK_FORMAT_UNDEFINED) :
|
||||
m_vk_format,
|
||||
VK_ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_LOAD,
|
||||
VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_STORE_OP_DONT_CARE, feedback_loop);
|
||||
VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_STORE_OP_DONT_CARE, feedback_loop_color, feedback_loop_depth);
|
||||
if (!rp)
|
||||
return VK_NULL_HANDLE;
|
||||
|
||||
@ -771,9 +771,9 @@ VkFramebuffer GSTextureVK::GetLinkedFramebuffer(GSTextureVK* depth_texture, bool
|
||||
if (!fb)
|
||||
return VK_NULL_HANDLE;
|
||||
|
||||
m_framebuffers.emplace_back(depth_texture, fb, feedback_loop);
|
||||
m_framebuffers.emplace_back(depth_texture, fb, feedback_loop_color, feedback_loop_depth);
|
||||
if (depth_texture)
|
||||
depth_texture->m_framebuffers.emplace_back(this, fb, feedback_loop);
|
||||
depth_texture->m_framebuffers.emplace_back(this, fb, feedback_loop_color, feedback_loop_depth);
|
||||
return fb;
|
||||
}
|
||||
|
||||
|
||||
@ -73,7 +73,7 @@ public:
|
||||
/// Framebuffers are lazily allocated.
|
||||
VkFramebuffer GetFramebuffer(bool feedback_loop);
|
||||
|
||||
VkFramebuffer GetLinkedFramebuffer(GSTextureVK* depth_texture, bool feedback_loop);
|
||||
VkFramebuffer GetLinkedFramebuffer(GSTextureVK* depth_texture, bool feedback_loop_color, bool feedback_loop_depth);
|
||||
|
||||
// Call when the texture is bound to the pipeline, or read from in a copy.
|
||||
__fi void SetUseFenceCounter(u64 counter) { m_use_fence_counter = counter; }
|
||||
@ -103,7 +103,7 @@ private:
|
||||
|
||||
// linked framebuffer is combined with depth texture
|
||||
// list of color textures this depth texture is linked to or vice versa
|
||||
std::vector<std::tuple<GSTextureVK*, VkFramebuffer, bool>> m_framebuffers;
|
||||
std::vector<std::tuple<GSTextureVK*, VkFramebuffer, bool, bool>> m_framebuffers;
|
||||
};
|
||||
|
||||
class GSDownloadTextureVK final : public GSDownloadTexture
|
||||
|
||||
@ -751,6 +751,7 @@ Pcsx2Config::GSOptions::GSOptions()
|
||||
PreloadFrameWithGSData = false;
|
||||
Mipmap = true;
|
||||
HWMipmap = true;
|
||||
HWAFAILFeedback = false;
|
||||
|
||||
ManualUserHacks = false;
|
||||
UserHacks_AlignSpriteX = false;
|
||||
@ -1021,6 +1022,7 @@ void Pcsx2Config::GSOptions::LoadSave(SettingsWrapper& wrap)
|
||||
SettingsWrapEntryEx(UpscaleMultiplier, "upscale_multiplier");
|
||||
|
||||
SettingsWrapBitBoolEx(HWMipmap, "hw_mipmap");
|
||||
SettingsWrapBitBoolEx(HWAFAILFeedback, "HWAFAILFeedback");
|
||||
SettingsWrapIntEnumEx(AccurateBlendingUnit, "accurate_blending_unit");
|
||||
SettingsWrapIntEnumEx(TextureFiltering, "filter");
|
||||
SettingsWrapIntEnumEx(TexturePreloading, "texture_preloading");
|
||||
|
||||
Loading…
Reference in New Issue
Block a user