mirror of
https://github.com/PCSX2/pcsx2.git
synced 2025-12-16 04:08:48 +00:00
Merge cbd4a9c92f into cf4412ecbe
This commit is contained in:
commit
e6aef219cc
@ -1,6 +1,9 @@
|
|||||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||||
// SPDX-License-Identifier: GPL-3.0+
|
// SPDX-License-Identifier: GPL-3.0+
|
||||||
|
|
||||||
|
#define ACCURATE_LINES 1
|
||||||
|
#define ACCURATE_TRIANGLES 2
|
||||||
|
|
||||||
#define FMT_32 0
|
#define FMT_32 0
|
||||||
#define FMT_24 1
|
#define FMT_24 1
|
||||||
#define FMT_16 2
|
#define FMT_16 2
|
||||||
@ -21,6 +24,11 @@
|
|||||||
#define GS_FORWARD_PRIMID 0
|
#define GS_FORWARD_PRIMID 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef ZTST_GEQUAL
|
||||||
|
#define ZTST_GEQUAL 2
|
||||||
|
#define ZTST_GREATER 3
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef PS_FST
|
#ifndef PS_FST
|
||||||
#define PS_IIP 0
|
#define PS_IIP 0
|
||||||
#define PS_FST 0
|
#define PS_FST 0
|
||||||
@ -84,6 +92,7 @@
|
|||||||
#define SW_BLEND_NEEDS_RT (SW_BLEND && (PS_BLEND_A == 1 || PS_BLEND_B == 1 || PS_BLEND_C == 1 || PS_BLEND_D == 1))
|
#define SW_BLEND_NEEDS_RT (SW_BLEND && (PS_BLEND_A == 1 || PS_BLEND_B == 1 || PS_BLEND_C == 1 || PS_BLEND_D == 1))
|
||||||
#define SW_AD_TO_HW (PS_BLEND_C == 1 && PS_A_MASKED)
|
#define SW_AD_TO_HW (PS_BLEND_C == 1 && PS_A_MASKED)
|
||||||
#define NEEDS_RT_FOR_AFAIL (PS_AFAIL == 3 && PS_NO_COLOR1)
|
#define NEEDS_RT_FOR_AFAIL (PS_AFAIL == 3 && PS_NO_COLOR1)
|
||||||
|
#define NEEDS_DEPTH ((PS_ACCURATE_PRIMS == ACCURATE_TRIANGLES) && PS_ACCURATE_PRIMS_AA && PS_ZCLAMP)
|
||||||
|
|
||||||
struct VS_INPUT
|
struct VS_INPUT
|
||||||
{
|
{
|
||||||
@ -94,6 +103,9 @@ struct VS_INPUT
|
|||||||
uint z : POSITION1;
|
uint z : POSITION1;
|
||||||
uint2 uv : TEXCOORD2;
|
uint2 uv : TEXCOORD2;
|
||||||
float4 f : COLOR1;
|
float4 f : COLOR1;
|
||||||
|
#ifdef VS_ACCURATE_PRIMS
|
||||||
|
uint vertex_id : SV_VertexID;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
struct VS_OUTPUT
|
struct VS_OUTPUT
|
||||||
@ -107,6 +119,12 @@ struct VS_OUTPUT
|
|||||||
#else
|
#else
|
||||||
nointerpolation float4 c : COLOR0;
|
nointerpolation float4 c : COLOR0;
|
||||||
#endif
|
#endif
|
||||||
|
#if VS_ACCURATE_PRIMS
|
||||||
|
nointerpolation uint accurate_prims_index : TEXCOORD3;
|
||||||
|
#if VS_ACCURATE_PRIMS == ACCURATE_TRIANGLES
|
||||||
|
nointerpolation uint accurate_triangles_interior : TEXCOORD4;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
struct PS_INPUT
|
struct PS_INPUT
|
||||||
@ -122,6 +140,38 @@ struct PS_INPUT
|
|||||||
#if (PS_DATE >= 1 && PS_DATE <= 3) || GS_FORWARD_PRIMID
|
#if (PS_DATE >= 1 && PS_DATE <= 3) || GS_FORWARD_PRIMID
|
||||||
uint primid : SV_PrimitiveID;
|
uint primid : SV_PrimitiveID;
|
||||||
#endif
|
#endif
|
||||||
|
#if PS_ACCURATE_PRIMS
|
||||||
|
nointerpolation uint accurate_prims_index : TEXCOORD3;
|
||||||
|
#if PS_ACCURATE_PRIMS == ACCURATE_TRIANGLES
|
||||||
|
nointerpolation uint accurate_triangles_interior : TEXCOORD4;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
struct AccuratePrimsEdgeData
|
||||||
|
{
|
||||||
|
// Interpolated attributes
|
||||||
|
float4 t_float0; // 0
|
||||||
|
float4 t_float1; // 16
|
||||||
|
float4 t_int0; // 32
|
||||||
|
float4 t_int1; // 48
|
||||||
|
float4 c0; // 64
|
||||||
|
float4 c1; // 80
|
||||||
|
float4 p0; // 96
|
||||||
|
float4 p1; // 112
|
||||||
|
int4 edge0; // 128
|
||||||
|
int4 edge1; // 144
|
||||||
|
int2 xy0; // 160
|
||||||
|
int2 xy1; // 168
|
||||||
|
uint step_x; // 176
|
||||||
|
uint draw0; // 180
|
||||||
|
uint draw1; // 184
|
||||||
|
uint top_left; // 188
|
||||||
|
uint side; // 192
|
||||||
|
uint _pad0; // 196
|
||||||
|
uint _pad1; // 200
|
||||||
|
uint _pad2; // 204
|
||||||
|
// Total 208
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef PIXEL_SHADER
|
#ifdef PIXEL_SHADER
|
||||||
@ -147,6 +197,8 @@ Texture2D<float4> Texture : register(t0);
|
|||||||
Texture2D<float4> Palette : register(t1);
|
Texture2D<float4> Palette : register(t1);
|
||||||
Texture2D<float4> RtTexture : register(t2);
|
Texture2D<float4> RtTexture : register(t2);
|
||||||
Texture2D<float> PrimMinTexture : register(t3);
|
Texture2D<float> PrimMinTexture : register(t3);
|
||||||
|
Texture2D<float> DepthTexture : register(t4);
|
||||||
|
StructuredBuffer<AccuratePrimsEdgeData> accurate_prims_data : register(t5);
|
||||||
SamplerState TextureSampler : register(s0);
|
SamplerState TextureSampler : register(s0);
|
||||||
|
|
||||||
#ifdef DX12
|
#ifdef DX12
|
||||||
@ -172,6 +224,12 @@ cbuffer cb1
|
|||||||
float4x4 DitherMatrix;
|
float4x4 DitherMatrix;
|
||||||
float ScaledScaleFactor;
|
float ScaledScaleFactor;
|
||||||
float RcpScaleFactor;
|
float RcpScaleFactor;
|
||||||
|
uint _pad0;
|
||||||
|
uint _pad1;
|
||||||
|
uint accurate_prims_base_index;
|
||||||
|
uint _pad2;
|
||||||
|
uint _pad3;
|
||||||
|
uint _pad4;
|
||||||
};
|
};
|
||||||
|
|
||||||
float4 sample_c(float2 uv, float uv_w, int2 xy)
|
float4 sample_c(float2 uv, float uv_w, int2 xy)
|
||||||
@ -1015,9 +1073,242 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if PS_ACCURATE_PRIMS
|
||||||
|
// Interpolate vertex attributes over a line/edge manually.
|
||||||
|
void InterpolateAttributesManual(AccuratePrimsEdgeData data, int weight0, int weight1, inout PS_INPUT input)
|
||||||
|
{
|
||||||
|
float weight0_f = float(weight0);
|
||||||
|
float weight1_f = float(weight1);
|
||||||
|
float weight_total = float(weight0 + weight1);
|
||||||
|
|
||||||
|
float4 t_float_interp = (weight1_f * data.t_float1 + weight0_f * data.t_float0) / weight_total;
|
||||||
|
float4 t_int_interp = (weight1_f * data.t_int1 + weight0_f * data.t_int0) / weight_total;
|
||||||
|
float4 c_interp = (weight1_f * data.c1 + weight0_f * data.c0) / weight_total;
|
||||||
|
float z_interp = (weight1_f * data.p1.z + weight0_f * data.p0.z) / weight_total;
|
||||||
|
|
||||||
|
// No interpolation for constant attributes.
|
||||||
|
input.t = lerp(t_float_interp, data.t_float1, float4(data.t_float1 == data.t_float0));
|
||||||
|
input.ti = lerp(t_int_interp, data.t_int1, float4(data.t_int1 == data.t_int0));
|
||||||
|
input.c = lerp(c_interp, data.c1, float4(data.c1 == data.c0));
|
||||||
|
input.p.z = (data.p1.z == data.p0.z) ? data.p1.z : z_interp;
|
||||||
|
|
||||||
|
// Clamp attributes. Fog/Z are normalized.
|
||||||
|
input.c = clamp(input.c, 0.0f, 255.0f);
|
||||||
|
input.t.z = clamp(input.t.z, 0.0f, 1.0f);
|
||||||
|
input.p.z = clamp(input.p.z, 0.0f, 1.0f);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if PS_ACCURATE_PRIMS == ACCURATE_LINES
|
||||||
|
void HandleAccurateLines(inout PS_INPUT input, out float alpha_coverage)
|
||||||
|
{
|
||||||
|
AccuratePrimsEdgeData data = accurate_prims_data[accurate_prims_base_index + input.accurate_prims_index];
|
||||||
|
|
||||||
|
int2 xy0 = data.xy0;
|
||||||
|
int2 xy1 = data.xy1;
|
||||||
|
int2 dxy = xy1 - xy0;
|
||||||
|
int2 xy0_i = (xy0 + 8) & ~0xF;
|
||||||
|
int2 xy1_i = (xy1 + 8) & ~0xF;
|
||||||
|
bool step_x = bool(data.step_x);
|
||||||
|
bool draw0 = bool(data.draw0);
|
||||||
|
bool draw1 = bool(data.draw1);
|
||||||
|
|
||||||
|
// 4-bit fixed point: 16 subpixels per pixel
|
||||||
|
int2 xy_i = 16 * int2(floor(input.p.xy)); // Subtract half-integer pixel center.
|
||||||
|
|
||||||
|
// Determine major/minor axes
|
||||||
|
int major0 = step_x ? xy0.x : xy0.y;
|
||||||
|
int major1 = step_x ? xy1.x : xy1.y;
|
||||||
|
int minor0 = step_x ? xy0.y : xy0.x;
|
||||||
|
int minor1 = step_x ? xy1.y : xy1.x;
|
||||||
|
int major_i = step_x ? xy_i.x : xy_i.y;
|
||||||
|
int minor_i = step_x ? xy_i.y : xy_i.x;
|
||||||
|
int d_major = step_x ? dxy.x : dxy.y;
|
||||||
|
int d_major_scaled = 16 * d_major;
|
||||||
|
|
||||||
|
int major0_i = step_x ? xy0_i.x : xy0_i.y;
|
||||||
|
int major1_i = step_x ? xy1_i.x : xy1_i.y;
|
||||||
|
|
||||||
|
// Discard if outside line range
|
||||||
|
if (major_i < min(major0_i, major1_i) ||
|
||||||
|
major_i > max(major0_i, major1_i))
|
||||||
|
discard;
|
||||||
|
|
||||||
|
if ((major_i == major0_i && !draw0) ||
|
||||||
|
(major_i == major1_i && !draw1))
|
||||||
|
discard;
|
||||||
|
|
||||||
|
int weight0 = major1 - major_i;
|
||||||
|
int weight1 = major_i - major0;
|
||||||
|
|
||||||
|
// Compute minor axis line in fixed-point
|
||||||
|
int minor_line = weight1 * minor1 + weight0 * minor0;
|
||||||
|
|
||||||
|
#if PS_ACCURATE_PRIMS_AA
|
||||||
|
// Proper fixed-point AA rounding
|
||||||
|
int minor_i_expected_0 = (minor_line / d_major) & ~0xF;
|
||||||
|
int minor_i_expected_1 = minor_i_expected_0 + 16;
|
||||||
|
int alpha_i_0 = d_major_scaled - (minor_line - d_major * minor_i_expected_0);
|
||||||
|
int alpha_i_1 = d_major_scaled - alpha_i_0;
|
||||||
|
|
||||||
|
int alpha_i;
|
||||||
|
if (minor_i == minor_i_expected_0)
|
||||||
|
alpha_i = alpha_i_0;
|
||||||
|
else if (minor_i == minor_i_expected_1)
|
||||||
|
alpha_i = alpha_i_1;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
alpha_i = 0; // Prevent compiler warning.
|
||||||
|
discard;
|
||||||
|
}
|
||||||
|
// Make sure that the output alpha is always <= 127 for AA.
|
||||||
|
alpha_coverage = floor(clamp(128.0f * float(alpha_i) / float(d_major_scaled), 0.0f, 127.0f));
|
||||||
|
#else
|
||||||
|
// Non-AA: fixed-point rounding and 4-bit alignment
|
||||||
|
int minor_i_expected = ((2 * minor_line + d_major_scaled) / (2 * d_major)) & ~0xF;
|
||||||
|
if (minor_i != minor_i_expected)
|
||||||
|
discard;
|
||||||
|
alpha_coverage = 128.0f;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Interpolate attributes
|
||||||
|
InterpolateAttributesManual(data, weight0, weight1, input);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if PS_ACCURATE_PRIMS == ACCURATE_TRIANGLES
|
||||||
|
void HandleAccurateTrianglesEdge(inout PS_INPUT input, out float alpha_coverage)
|
||||||
|
{
|
||||||
|
AccuratePrimsEdgeData data = accurate_prims_data[accurate_prims_base_index + input.accurate_prims_index];
|
||||||
|
|
||||||
|
int2 xy0 = data.xy0;
|
||||||
|
int2 xy1 = data.xy1;
|
||||||
|
int2 dxy = xy1 - xy0;
|
||||||
|
int2 xy0_i = (xy0 + 8) & ~0xF;
|
||||||
|
int2 xy1_i = (xy1 + 8) & ~0xF;
|
||||||
|
bool step_x = bool(data.step_x);
|
||||||
|
bool side = bool(data.side);
|
||||||
|
bool top_left = bool(data.top_left);
|
||||||
|
|
||||||
|
// 4-bit fixed point: 16 subpixels per pixel
|
||||||
|
int2 xy_i = 16 * int2(floor(input.p.xy)); // Subtract half-integer pixel center.
|
||||||
|
|
||||||
|
// Determine major/minor axes
|
||||||
|
int major0 = step_x ? xy0.x : xy0.y;
|
||||||
|
int major1 = step_x ? xy1.x : xy1.y;
|
||||||
|
int minor0 = step_x ? xy0.y : xy0.x;
|
||||||
|
int minor1 = step_x ? xy1.y : xy1.x;
|
||||||
|
int major_i = step_x ? xy_i.x : xy_i.y;
|
||||||
|
int minor_i = step_x ? xy_i.y : xy_i.x;
|
||||||
|
int d_major = step_x ? dxy.x : dxy.y;
|
||||||
|
int d_major_scaled = 16 * d_major;
|
||||||
|
|
||||||
|
int major0_i = step_x ? xy0_i.x : xy0_i.y;
|
||||||
|
int major1_i = step_x ? xy1_i.x : xy1_i.y;
|
||||||
|
|
||||||
|
// Discard if outside edge range.
|
||||||
|
// Note: this is not exactly what the SW rasterizer does.
|
||||||
|
// See the note in GSRasterizer::DrawEdgeTriangle() about the asymmetry in X and Y bounds checking.
|
||||||
|
if (major_i < min(major0_i, major1_i) ||
|
||||||
|
major_i > max(major0_i, major1_i))
|
||||||
|
discard;
|
||||||
|
|
||||||
|
// Discard if on wrong side of other edges
|
||||||
|
if (dot(data.edge0, int4(xy_i, 1, 0)) <= 0 ||
|
||||||
|
dot(data.edge1, int4(xy_i, 1, 0)) <= 0)
|
||||||
|
discard;
|
||||||
|
|
||||||
|
int weight0 = major1 - major_i;
|
||||||
|
int weight1 = major_i - major0;
|
||||||
|
|
||||||
|
// Compute minor axis line in fixed-point
|
||||||
|
int minor_line = weight1 * minor1 + weight0 * minor0;
|
||||||
|
int minor_i_expected = minor_line / d_major;
|
||||||
|
int minor_i_expected_0 = minor_i_expected & ~0xF;
|
||||||
|
int minor_i_expected_1 = minor_i_expected_0 + 16;
|
||||||
|
int alpha_i_0 = d_major_scaled - (minor_line - d_major * minor_i_expected_0);
|
||||||
|
int alpha_i_1 = d_major_scaled - alpha_i_0;
|
||||||
|
|
||||||
|
// Proper fixed-point AA rounding
|
||||||
|
int alpha_i;
|
||||||
|
if ((minor_i_expected & 0xF) == 0)
|
||||||
|
{
|
||||||
|
// On a pixel center
|
||||||
|
alpha_i = top_left ? 0 : d_major_scaled;
|
||||||
|
minor_i_expected += top_left ? (side ? -16 : 16) : 0;
|
||||||
|
}
|
||||||
|
else if (side)
|
||||||
|
{
|
||||||
|
minor_i_expected = minor_i_expected_0;
|
||||||
|
alpha_i = alpha_i_0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
minor_i_expected = minor_i_expected_1;
|
||||||
|
alpha_i = alpha_i_1;
|
||||||
|
}
|
||||||
|
if (minor_i != minor_i_expected)
|
||||||
|
discard;
|
||||||
|
|
||||||
|
#if PS_ACCURATE_PRIMS_AA
|
||||||
|
// Make sure that the output alpha is always <= 127 for AA.
|
||||||
|
alpha_coverage = floor(clamp(128.0f * float(alpha_i) / float(d_major_scaled), 0.0f, 127.0f));
|
||||||
|
#else
|
||||||
|
alpha_coverage = 128.0f;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Interpolate attributes
|
||||||
|
InterpolateAttributesManual(data, weight0, weight1, input);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
PS_OUTPUT ps_main(PS_INPUT input)
|
PS_OUTPUT ps_main(PS_INPUT input)
|
||||||
{
|
{
|
||||||
|
#if PS_ACCURATE_PRIMS
|
||||||
|
float alpha_coverage;
|
||||||
|
#if PS_ACCURATE_PRIMS == ACCURATE_LINES
|
||||||
|
HandleAccurateLines(input, alpha_coverage);
|
||||||
|
#elif PS_ACCURATE_PRIMS == ACCURATE_TRIANGLES
|
||||||
|
if (bool(input.accurate_triangles_interior))
|
||||||
|
{
|
||||||
|
alpha_coverage = 128.0f;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
HandleAccurateTrianglesEdge(input, alpha_coverage);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif // PS_ACCURATE_PRIMS
|
||||||
|
|
||||||
|
#if NEEDS_DEPTH
|
||||||
|
float current_depth = DepthTexture.Load(int3(floor(input.p.xy), 0)).r;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if PS_ZCLAMP && (PS_ZTST == ZTST_GEQUAL || PS_ZTST == ZTST_GREATER)
|
||||||
|
#if PS_ZTST == ZTST_GEQUAL
|
||||||
|
if (input.p.z < current_depth)
|
||||||
|
discard;
|
||||||
|
#elif PS_ZTST == ZTST_GREATER
|
||||||
|
if (input.p.z <= current_depth)
|
||||||
|
discard;
|
||||||
|
#endif
|
||||||
|
#endif // PS_ZTST
|
||||||
|
|
||||||
float4 C = ps_color(input);
|
float4 C = ps_color(input);
|
||||||
|
|
||||||
|
#if PS_FIXED_ONE_A
|
||||||
|
// AA (Fixed one) will output a coverage of 1.0 as alpha
|
||||||
|
C.a = 128.0f;
|
||||||
|
#elif PS_ACCURATE_PRIMS_AA
|
||||||
|
// AA: coverage is computed in alpha_coverage
|
||||||
|
#if PS_ACCURATE_PRIMS_AA_ABE
|
||||||
|
if (floor(C.a) == 128.0f) // According to manual & hardware tests the coverage is only used if the fragment alpha is 128.
|
||||||
|
C.a = alpha_coverage;
|
||||||
|
#else
|
||||||
|
C.a = alpha_coverage;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
bool atst_pass = atst(C);
|
bool atst_pass = atst(C);
|
||||||
|
|
||||||
#if PS_AFAIL == 0 // KEEP or ATST off
|
#if PS_AFAIL == 0 // KEEP or ATST off
|
||||||
@ -1034,14 +1325,6 @@ PS_OUTPUT ps_main(PS_INPUT input)
|
|||||||
discard;
|
discard;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Must be done before alpha correction
|
|
||||||
|
|
||||||
// AA (Fixed one) will output a coverage of 1.0 as alpha
|
|
||||||
if (PS_FIXED_ONE_A)
|
|
||||||
{
|
|
||||||
C.a = 128.0f;
|
|
||||||
}
|
|
||||||
|
|
||||||
float4 alpha_blend = (float4)0.0f;
|
float4 alpha_blend = (float4)0.0f;
|
||||||
if (SW_AD_TO_HW)
|
if (SW_AD_TO_HW)
|
||||||
{
|
{
|
||||||
@ -1210,7 +1493,14 @@ PS_OUTPUT ps_main(PS_INPUT input)
|
|||||||
#endif // PS_DATE != 1/2
|
#endif // PS_DATE != 1/2
|
||||||
|
|
||||||
#if PS_ZCLAMP
|
#if PS_ZCLAMP
|
||||||
output.depth = min(input.p.z, MaxDepthPS);
|
#if PS_ACCURATE_PRIMS == ACCURATE_TRIANGLES
|
||||||
|
if (bool(input.accurate_triangles_interior))
|
||||||
|
output.depth = min(input.p.z, MaxDepthPS);
|
||||||
|
else
|
||||||
|
output.depth = current_depth; // No depth update for triangle edges.
|
||||||
|
#else
|
||||||
|
output.depth = min(input.p.z, MaxDepthPS);
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return output;
|
return output;
|
||||||
@ -1236,7 +1526,9 @@ cbuffer cb0
|
|||||||
float2 TextureOffset;
|
float2 TextureOffset;
|
||||||
float2 PointSize;
|
float2 PointSize;
|
||||||
uint MaxDepth;
|
uint MaxDepth;
|
||||||
uint BaseVertex; // Only used in DX11.
|
uint pad_cb0;
|
||||||
|
uint BaseVertex;
|
||||||
|
uint pad_cb0_2;
|
||||||
};
|
};
|
||||||
|
|
||||||
VS_OUTPUT vs_main(VS_INPUT input)
|
VS_OUTPUT vs_main(VS_INPUT input)
|
||||||
@ -1256,6 +1548,28 @@ VS_OUTPUT vs_main(VS_INPUT input)
|
|||||||
output.p.xy = output.p.xy * float2(VertexScale.x, -VertexScale.y) - float2(VertexOffset.x, -VertexOffset.y);
|
output.p.xy = output.p.xy * float2(VertexScale.x, -VertexScale.y) - float2(VertexOffset.x, -VertexOffset.y);
|
||||||
output.p.z *= exp2(-32.0f); // integer->float depth
|
output.p.z *= exp2(-32.0f); // integer->float depth
|
||||||
|
|
||||||
|
#if VS_ACCURATE_PRIMS == ACCURATE_LINES
|
||||||
|
output.accurate_prims_index = input.vertex_id / 6;
|
||||||
|
output.t = 0.0f;
|
||||||
|
output.ti = 0.0f;
|
||||||
|
output.c = 0.0f;
|
||||||
|
return output; // Don't send line vertex attributes - they are interpolated manually in the pixel shader.
|
||||||
|
#elif VS_ACCURATE_PRIMS == ACCURATE_TRIANGLES
|
||||||
|
uint prim_id = input.vertex_id / 21;
|
||||||
|
output.accurate_triangles_interior = uint((input.vertex_id - 21 * prim_id) < 3); // First 3 vertices in each group of 21 is interior.
|
||||||
|
if (!bool(output.accurate_triangles_interior))
|
||||||
|
{
|
||||||
|
uint edge = (input.vertex_id - 21 * prim_id - 3) / 6; // Each group of 6 vertices after first 3 is one edge.
|
||||||
|
output.accurate_prims_index = 3 * prim_id + edge;
|
||||||
|
output.t = 0.0f;
|
||||||
|
output.ti = 0.0f;
|
||||||
|
output.c = 0.0f;
|
||||||
|
return output; // Don't send edge vertex attributes - they are interpolated manually in the fragment shader.
|
||||||
|
}
|
||||||
|
output.accurate_prims_index = 0;
|
||||||
|
// Send the interior vertex attributes for fixed function interpolation.
|
||||||
|
#endif
|
||||||
|
|
||||||
if(VS_TME)
|
if(VS_TME)
|
||||||
{
|
{
|
||||||
float2 uv = input.uv - TextureOffset;
|
float2 uv = input.uv - TextureOffset;
|
||||||
|
|||||||
@ -3,6 +3,9 @@
|
|||||||
|
|
||||||
//#version 420 // Keep it for text editor detection
|
//#version 420 // Keep it for text editor detection
|
||||||
|
|
||||||
|
#define ACCURATE_LINES 1
|
||||||
|
#define ACCURATE_TRIANGLES 2
|
||||||
|
|
||||||
#define FMT_32 0
|
#define FMT_32 0
|
||||||
#define FMT_24 1
|
#define FMT_24 1
|
||||||
#define FMT_16 2
|
#define FMT_16 2
|
||||||
@ -11,6 +14,11 @@
|
|||||||
#define SHUFFLE_WRITE 2
|
#define SHUFFLE_WRITE 2
|
||||||
#define SHUFFLE_READWRITE 3
|
#define SHUFFLE_READWRITE 3
|
||||||
|
|
||||||
|
#ifndef ZTST_GEQUAL
|
||||||
|
#define ZTST_GEQUAL 2
|
||||||
|
#define ZTST_GREATER 3
|
||||||
|
#endif
|
||||||
|
|
||||||
// TEX_COORD_DEBUG output the uv coordinate as color. It is useful
|
// TEX_COORD_DEBUG output the uv coordinate as color. It is useful
|
||||||
// to detect bad sampling due to upscaling
|
// to detect bad sampling due to upscaling
|
||||||
//#define TEX_COORD_DEBUG
|
//#define TEX_COORD_DEBUG
|
||||||
@ -28,6 +36,9 @@
|
|||||||
#define NEEDS_RT_FOR_AFAIL (PS_AFAIL == 3 && PS_NO_COLOR1)
|
#define NEEDS_RT_FOR_AFAIL (PS_AFAIL == 3 && PS_NO_COLOR1)
|
||||||
#define NEEDS_RT (NEEDS_RT_EARLY || NEEDS_RT_FOR_AFAIL || (!PS_PRIMID_INIT && (PS_FBMASK || SW_BLEND_NEEDS_RT || SW_AD_TO_HW)))
|
#define NEEDS_RT (NEEDS_RT_EARLY || NEEDS_RT_FOR_AFAIL || (!PS_PRIMID_INIT && (PS_FBMASK || SW_BLEND_NEEDS_RT || SW_AD_TO_HW)))
|
||||||
#define NEEDS_TEX (PS_TFX != 4)
|
#define NEEDS_TEX (PS_TFX != 4)
|
||||||
|
#define NEEDS_DEPTH ((PS_ACCURATE_PRIMS == ACCURATE_TRIANGLES) && PS_ACCURATE_PRIMS_AA && PS_ZCLAMP)
|
||||||
|
|
||||||
|
vec4 FragCoord;
|
||||||
|
|
||||||
layout(std140, binding = 0) uniform cb21
|
layout(std140, binding = 0) uniform cb21
|
||||||
{
|
{
|
||||||
@ -57,8 +68,71 @@ layout(std140, binding = 0) uniform cb21
|
|||||||
|
|
||||||
float ScaledScaleFactor;
|
float ScaledScaleFactor;
|
||||||
float RcpScaleFactor;
|
float RcpScaleFactor;
|
||||||
|
uint _pad0;
|
||||||
|
uint _pad1;
|
||||||
|
|
||||||
|
uint accurate_prims_base_index;
|
||||||
|
uint _pad2;
|
||||||
|
uint _pad3;
|
||||||
|
uint _pad4;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if PS_ACCURATE_PRIMS
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
vec4 t_float;
|
||||||
|
vec4 t_int;
|
||||||
|
vec4 c;
|
||||||
|
} PSin;
|
||||||
|
|
||||||
|
in SHADER
|
||||||
|
{
|
||||||
|
vec4 t_float;
|
||||||
|
vec4 t_int;
|
||||||
|
|
||||||
|
#if PS_IIP != 0
|
||||||
|
vec4 c;
|
||||||
|
#else
|
||||||
|
flat vec4 c;
|
||||||
|
#endif
|
||||||
|
} PSinReal;
|
||||||
|
|
||||||
|
flat in uint accurate_prims_index;
|
||||||
|
#if PS_ACCURATE_PRIMS == ACCURATE_TRIANGLES
|
||||||
|
flat in uint accurate_triangles_interior;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct AccuratePrimsEdgeData
|
||||||
|
{
|
||||||
|
// Interpolated attributes
|
||||||
|
vec4 t_float0; // 0
|
||||||
|
vec4 t_float1; // 16
|
||||||
|
vec4 t_int0; // 32
|
||||||
|
vec4 t_int1; // 48
|
||||||
|
vec4 c0; // 64
|
||||||
|
vec4 c1; // 80
|
||||||
|
vec4 p0; // 96
|
||||||
|
vec4 p1; // 112
|
||||||
|
ivec4 edge0; // 128
|
||||||
|
ivec4 edge1; // 144
|
||||||
|
ivec2 xy0; // 160
|
||||||
|
ivec2 xy1; // 168
|
||||||
|
uint step_x; // 176
|
||||||
|
uint draw0; // 180
|
||||||
|
uint draw1; // 184
|
||||||
|
uint top_left; // 188
|
||||||
|
uint side; // 192
|
||||||
|
uint _pad0; // 196
|
||||||
|
uint _pad1; // 200
|
||||||
|
uint _pad2; // 204
|
||||||
|
// Total 208
|
||||||
|
};
|
||||||
|
|
||||||
|
layout (std140, binding = 3) buffer AccuratePrimsEdgeDataBuffer {
|
||||||
|
AccuratePrimsEdgeData accurate_prims_data[];
|
||||||
|
};
|
||||||
|
|
||||||
|
#else
|
||||||
in SHADER
|
in SHADER
|
||||||
{
|
{
|
||||||
vec4 t_float;
|
vec4 t_float;
|
||||||
@ -70,6 +144,7 @@ in SHADER
|
|||||||
flat vec4 c;
|
flat vec4 c;
|
||||||
#endif
|
#endif
|
||||||
} PSin;
|
} PSin;
|
||||||
|
#endif
|
||||||
|
|
||||||
#define TARGET_0_QUALIFIER out
|
#define TARGET_0_QUALIFIER out
|
||||||
|
|
||||||
@ -107,9 +182,10 @@ layout(binding = 2) uniform sampler2D RtSampler; // note 2 already use by the im
|
|||||||
|
|
||||||
#if PS_DATE == 3
|
#if PS_DATE == 3
|
||||||
layout(binding = 3) uniform sampler2D img_prim_min;
|
layout(binding = 3) uniform sampler2D img_prim_min;
|
||||||
|
#endif
|
||||||
|
|
||||||
// I don't remember why I set this parameter but it is surely useless
|
#if NEEDS_DEPTH
|
||||||
//layout(pixel_center_integer) in vec4 gl_FragCoord;
|
layout(binding = 4) uniform sampler2D DepthSampler;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
vec4 sample_from_rt()
|
vec4 sample_from_rt()
|
||||||
@ -119,7 +195,16 @@ vec4 sample_from_rt()
|
|||||||
#elif HAS_FRAMEBUFFER_FETCH
|
#elif HAS_FRAMEBUFFER_FETCH
|
||||||
return LAST_FRAG_COLOR;
|
return LAST_FRAG_COLOR;
|
||||||
#else
|
#else
|
||||||
return texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);
|
return texelFetch(RtSampler, ivec2(FragCoord.xy), 0);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
vec4 sample_from_depth()
|
||||||
|
{
|
||||||
|
#if !NEEDS_DEPTH
|
||||||
|
return vec4(0.0);
|
||||||
|
#else
|
||||||
|
return texelFetch(DepthSampler, ivec2(FragCoord.xy), 0);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -315,7 +400,7 @@ int fetch_raw_depth()
|
|||||||
#if PS_TEX_IS_FB == 1
|
#if PS_TEX_IS_FB == 1
|
||||||
return int(sample_from_rt().r * multiplier);
|
return int(sample_from_rt().r * multiplier);
|
||||||
#else
|
#else
|
||||||
return int(texelFetch(TextureSampler, ivec2(gl_FragCoord.xy), 0).r * multiplier);
|
return int(texelFetch(TextureSampler, ivec2(FragCoord.xy), 0).r * multiplier);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -324,7 +409,7 @@ vec4 fetch_raw_color()
|
|||||||
#if PS_TEX_IS_FB == 1
|
#if PS_TEX_IS_FB == 1
|
||||||
return sample_from_rt();
|
return sample_from_rt();
|
||||||
#else
|
#else
|
||||||
return texelFetch(TextureSampler, ivec2(gl_FragCoord.xy), 0);
|
return texelFetch(TextureSampler, ivec2(FragCoord.xy), 0);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -724,9 +809,9 @@ void ps_dither(inout vec3 C, float As)
|
|||||||
{
|
{
|
||||||
#if PS_DITHER > 0 && PS_DITHER < 3
|
#if PS_DITHER > 0 && PS_DITHER < 3
|
||||||
#if PS_DITHER == 2
|
#if PS_DITHER == 2
|
||||||
ivec2 fpos = ivec2(gl_FragCoord.xy);
|
ivec2 fpos = ivec2(FragCoord.xy);
|
||||||
#else
|
#else
|
||||||
ivec2 fpos = ivec2(gl_FragCoord.xy * RcpScaleFactor);
|
ivec2 fpos = ivec2(FragCoord.xy * RcpScaleFactor);
|
||||||
#endif
|
#endif
|
||||||
float value = DitherMatrix[fpos.y&3][fpos.x&3];
|
float value = DitherMatrix[fpos.y&3][fpos.x&3];
|
||||||
|
|
||||||
@ -967,11 +1052,233 @@ float As = As_rgba.a;
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if PS_ACCURATE_PRIMS
|
||||||
|
// Interpolate vertex attributes over a line/edge manually.
|
||||||
|
void InterpolateAttributesManual(AccuratePrimsEdgeData data, int weight0, int weight1)
|
||||||
|
{
|
||||||
|
float weight0_f = float(weight0);
|
||||||
|
float weight1_f = float(weight1);
|
||||||
|
float weight_total = float(weight0 + weight1);
|
||||||
|
|
||||||
|
vec4 t_float_interp = (weight1_f * data.t_float1 + weight0_f * data.t_float0) / weight_total;
|
||||||
|
vec4 t_int_interp = (weight1_f * data.t_int1 + weight0_f * data.t_int0) / weight_total;
|
||||||
|
vec4 c_interp = (weight1_f * data.c1 + weight0_f * data.c0) / weight_total;
|
||||||
|
float z_interp = (weight1_f * data.p1.z + weight0_f * data.p0.z) / weight_total;
|
||||||
|
|
||||||
|
// No interpolation for constant attributes.
|
||||||
|
PSin.t_float = mix(t_float_interp, data.t_float1, equal(data.t_float1, data.t_float0));
|
||||||
|
PSin.t_int = mix(t_int_interp, data.t_int1, equal(data.t_int1, data.t_int0));
|
||||||
|
PSin.c = mix(c_interp, data.c1, equal(data.c1, data.c0));
|
||||||
|
FragCoord.z = (data.p1.z == data.p0.z) ? data.p1.z : z_interp;
|
||||||
|
|
||||||
|
// Clamp attributes. Fog/Z are normalized.
|
||||||
|
PSin.c = clamp(PSin.c, 0.0f, 255.0f);
|
||||||
|
PSin.t_float.z = clamp(PSin.t_float.z, 0.0f, 1.0f);
|
||||||
|
FragCoord.z = clamp(FragCoord.z, 0.0f, 1.0f);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if PS_ACCURATE_PRIMS == ACCURATE_LINES
|
||||||
|
void HandleAccurateLines(out float alpha_coverage)
|
||||||
|
{
|
||||||
|
AccuratePrimsEdgeData data = accurate_prims_data[accurate_prims_base_index + accurate_prims_index];
|
||||||
|
|
||||||
|
ivec2 xy0 = data.xy0;
|
||||||
|
ivec2 xy1 = data.xy1;
|
||||||
|
ivec2 dxy = xy1 - xy0;
|
||||||
|
ivec2 xy0_i = (xy0 + 8) & ~0xF;
|
||||||
|
ivec2 xy1_i = (xy1 + 8) & ~0xF;
|
||||||
|
bool step_x = bool(data.step_x);
|
||||||
|
bool draw0 = bool(data.draw0);
|
||||||
|
bool draw1 = bool(data.draw1);
|
||||||
|
|
||||||
|
// 4-bit fixed point: 16 subpixels per pixel
|
||||||
|
ivec2 xy_i = 16 * ivec2(floor(FragCoord.xy)); // Subtract half-integer pixel center.
|
||||||
|
|
||||||
|
// Determine major/minor axes
|
||||||
|
int major0 = step_x ? xy0.x : xy0.y;
|
||||||
|
int major1 = step_x ? xy1.x : xy1.y;
|
||||||
|
int minor0 = step_x ? xy0.y : xy0.x;
|
||||||
|
int minor1 = step_x ? xy1.y : xy1.x;
|
||||||
|
int major_i = step_x ? xy_i.x : xy_i.y;
|
||||||
|
int minor_i = step_x ? xy_i.y : xy_i.x;
|
||||||
|
int d_major = step_x ? dxy.x : dxy.y;
|
||||||
|
int d_major_scaled = 16 * d_major;
|
||||||
|
|
||||||
|
int major0_i = step_x ? xy0_i.x : xy0_i.y;
|
||||||
|
int major1_i = step_x ? xy1_i.x : xy1_i.y;
|
||||||
|
|
||||||
|
// Discard if outside line range
|
||||||
|
if (major_i < min(major0_i, major1_i) ||
|
||||||
|
major_i > max(major0_i, major1_i))
|
||||||
|
discard;
|
||||||
|
|
||||||
|
if ((major_i == major0_i && !draw0) ||
|
||||||
|
(major_i == major1_i && !draw1))
|
||||||
|
discard;
|
||||||
|
|
||||||
|
int weight0 = major1 - major_i;
|
||||||
|
int weight1 = major_i - major0;
|
||||||
|
|
||||||
|
// Compute minor axis line in fixed-point
|
||||||
|
int minor_line = weight1 * minor1 + weight0 * minor0;
|
||||||
|
|
||||||
|
#if PS_ACCURATE_PRIMS_AA
|
||||||
|
// Proper fixed-point AA rounding
|
||||||
|
int minor_i_expected_0 = (minor_line / d_major) & ~0xF;
|
||||||
|
int minor_i_expected_1 = minor_i_expected_0 + 16;
|
||||||
|
int alpha_i_0 = d_major_scaled - (minor_line - d_major * minor_i_expected_0);
|
||||||
|
int alpha_i_1 = d_major_scaled - alpha_i_0;
|
||||||
|
|
||||||
|
int alpha_i;
|
||||||
|
if (minor_i == minor_i_expected_0)
|
||||||
|
alpha_i = alpha_i_0;
|
||||||
|
else if (minor_i == minor_i_expected_1)
|
||||||
|
alpha_i = alpha_i_1;
|
||||||
|
else
|
||||||
|
discard;
|
||||||
|
// Make sure that the output alpha is always <= 127 for AA.
|
||||||
|
alpha_coverage = floor(clamp(128.0f * float(alpha_i) / float(d_major_scaled), 0.0f, 127.0f));
|
||||||
|
#else
|
||||||
|
// Non-AA: fixed-point rounding and 4-bit alignment
|
||||||
|
int minor_i_expected = ((2 * minor_line + d_major_scaled) / (2 * d_major)) & ~0xF;
|
||||||
|
if (minor_i != minor_i_expected)
|
||||||
|
discard;
|
||||||
|
alpha_coverage = 128.0f;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Interpolate attributes
|
||||||
|
InterpolateAttributesManual(data, weight0, weight1);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if PS_ACCURATE_PRIMS == ACCURATE_TRIANGLES
|
||||||
|
void HandleAccurateTrianglesEdge(out float alpha_coverage)
|
||||||
|
{
|
||||||
|
AccuratePrimsEdgeData data = accurate_prims_data[accurate_prims_base_index + accurate_prims_index];
|
||||||
|
|
||||||
|
ivec2 xy0 = data.xy0;
|
||||||
|
ivec2 xy1 = data.xy1;
|
||||||
|
ivec2 dxy = xy1 - xy0;
|
||||||
|
ivec2 xy0_i = (xy0 + 8) & ~0xF;
|
||||||
|
ivec2 xy1_i = (xy1 + 8) & ~0xF;
|
||||||
|
bool step_x = bool(data.step_x);
|
||||||
|
bool side = bool(data.side);
|
||||||
|
bool top_left = bool(data.top_left);
|
||||||
|
|
||||||
|
// 4-bit fixed point: 16 subpixels per pixel
|
||||||
|
ivec2 xy_i = 16 * ivec2(floor(FragCoord.xy)); // Subtract half-integer pixel center.
|
||||||
|
|
||||||
|
// Determine major/minor axes
|
||||||
|
int major0 = step_x ? xy0.x : xy0.y;
|
||||||
|
int major1 = step_x ? xy1.x : xy1.y;
|
||||||
|
int minor0 = step_x ? xy0.y : xy0.x;
|
||||||
|
int minor1 = step_x ? xy1.y : xy1.x;
|
||||||
|
int major_i = step_x ? xy_i.x : xy_i.y;
|
||||||
|
int minor_i = step_x ? xy_i.y : xy_i.x;
|
||||||
|
int d_major = step_x ? dxy.x : dxy.y;
|
||||||
|
int d_major_scaled = 16 * d_major;
|
||||||
|
|
||||||
|
int major0_i = step_x ? xy0_i.x : xy0_i.y;
|
||||||
|
int major1_i = step_x ? xy1_i.x : xy1_i.y;
|
||||||
|
|
||||||
|
// Discard if outside edge range.
|
||||||
|
// Note: this is not exactly what the SW rasterizer does.
|
||||||
|
// See the note in GSRasterizer::DrawEdgeTriangle() about the asymmetry in X and Y bounds checking.
|
||||||
|
if (major_i < min(major0_i, major1_i) ||
|
||||||
|
major_i > max(major0_i, major1_i))
|
||||||
|
discard;
|
||||||
|
|
||||||
|
// Discard if on wrong side of other edges
|
||||||
|
if (dot(data.edge0, ivec4(xy_i, 1, 0)) <= 0 ||
|
||||||
|
dot(data.edge1, ivec4(xy_i, 1, 0)) <= 0)
|
||||||
|
discard;
|
||||||
|
|
||||||
|
int weight0 = major1 - major_i;
|
||||||
|
int weight1 = major_i - major0;
|
||||||
|
|
||||||
|
// Compute minor axis line in fixed-point
|
||||||
|
int minor_line = weight1 * minor1 + weight0 * minor0;
|
||||||
|
int minor_i_expected = minor_line / d_major;
|
||||||
|
int minor_i_expected_0 = minor_i_expected & ~0xF;
|
||||||
|
int minor_i_expected_1 = minor_i_expected_0 + 16;
|
||||||
|
bool minor_i_pixel_center = ((minor_line - d_major * minor_i_expected_0) & 0xF) == 0;
|
||||||
|
int alpha_i_0 = d_major_scaled - (minor_line - d_major * minor_i_expected_0);
|
||||||
|
int alpha_i_1 = d_major_scaled - alpha_i_0;
|
||||||
|
|
||||||
|
// Proper fixed-point AA rounding
|
||||||
|
int alpha_i;
|
||||||
|
if ((minor_i_expected & 0xF) == 0)
|
||||||
|
{
|
||||||
|
// On a pixel center
|
||||||
|
alpha_i = top_left ? 0 : d_major_scaled;
|
||||||
|
minor_i_expected += top_left ? (side ? -16 : 16) : 0;
|
||||||
|
}
|
||||||
|
else if (side)
|
||||||
|
{
|
||||||
|
minor_i_expected = minor_i_expected_0;
|
||||||
|
alpha_i = alpha_i_0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
minor_i_expected = minor_i_expected_1;
|
||||||
|
alpha_i = alpha_i_1;
|
||||||
|
}
|
||||||
|
if (minor_i != minor_i_expected)
|
||||||
|
discard;
|
||||||
|
|
||||||
|
#if PS_ACCURATE_PRIMS_AA
|
||||||
|
// Make sure that the output alpha is always <= 127 for AA.
|
||||||
|
alpha_coverage = floor(clamp(128.0f * float(alpha_i) / float(d_major_scaled), 0.0f, 127.0f));
|
||||||
|
#else
|
||||||
|
alpha_coverage = 128.0f;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Interpolate attributes
|
||||||
|
InterpolateAttributesManual(data, weight0, weight1);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void ps_main()
|
void ps_main()
|
||||||
{
|
{
|
||||||
|
FragCoord = gl_FragCoord;
|
||||||
|
|
||||||
|
#if PS_ACCURATE_PRIMS
|
||||||
|
float alpha_coverage;
|
||||||
|
#if PS_ACCURATE_PRIMS == ACCURATE_LINES
|
||||||
|
HandleAccurateLines(alpha_coverage);
|
||||||
|
#elif PS_ACCURATE_PRIMS == ACCURATE_TRIANGLES
|
||||||
|
if (bool(accurate_triangles_interior))
|
||||||
|
{
|
||||||
|
alpha_coverage = 128.0f;
|
||||||
|
PSin.t_float = PSinReal.t_float;
|
||||||
|
PSin.t_int = PSinReal.t_int;
|
||||||
|
PSin.c = PSinReal.c;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
HandleAccurateTrianglesEdge(alpha_coverage);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif // PS_ACCURATE_PRIMS
|
||||||
|
|
||||||
|
#if NEEDS_DEPTH
|
||||||
|
float current_depth = sample_from_depth().r;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if PS_ZCLAMP && (PS_ZTST == ZTST_GEQUAL || PS_ZTST == ZTST_GREATER)
|
||||||
|
#if PS_ZTST == ZTST_GEQUAL
|
||||||
|
if (FragCoord.z < current_depth)
|
||||||
|
discard;
|
||||||
|
#elif PS_ZTST == ZTST_GREATER
|
||||||
|
if (FragCoord.z <= current_depth)
|
||||||
|
discard;
|
||||||
|
#endif
|
||||||
|
#endif // PS_ZTST
|
||||||
|
|
||||||
#if PS_SCANMSK & 2
|
#if PS_SCANMSK & 2
|
||||||
// fail depth test on prohibited lines
|
// fail depth test on prohibited lines
|
||||||
if ((int(gl_FragCoord.y) & 1) == (PS_SCANMSK & 1))
|
if ((int(FragCoord.y) & 1) == (PS_SCANMSK & 1))
|
||||||
discard;
|
discard;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1007,7 +1314,7 @@ void ps_main()
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if PS_DATE == 3
|
#if PS_DATE == 3
|
||||||
int stencil_ceil = int(texelFetch(img_prim_min, ivec2(gl_FragCoord.xy), 0).r);
|
int stencil_ceil = int(texelFetch(img_prim_min, ivec2(FragCoord.xy), 0).r);
|
||||||
// Note gl_PrimitiveID == stencil_ceil will be the primitive that will update
|
// Note gl_PrimitiveID == stencil_ceil will be the primitive that will update
|
||||||
// the bad alpha value so we must keep it.
|
// the bad alpha value so we must keep it.
|
||||||
|
|
||||||
@ -1017,6 +1324,20 @@ void ps_main()
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
vec4 C = ps_color();
|
vec4 C = ps_color();
|
||||||
|
|
||||||
|
#if PS_FIXED_ONE_A
|
||||||
|
// AA (Fixed one) will output a coverage of 1.0 as alpha
|
||||||
|
C.a = 128.0f;
|
||||||
|
#elif PS_ACCURATE_PRIMS_AA
|
||||||
|
// AA: coverage is computed in alpha_coverage
|
||||||
|
#if PS_ACCURATE_PRIMS_AA_ABE
|
||||||
|
if (floor(C.a) == 128.0f) // According to manual & hardware tests the coverage is only used if the fragment alpha is 128.
|
||||||
|
C.a = alpha_coverage;
|
||||||
|
#else
|
||||||
|
C.a = alpha_coverage;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
bool atst_pass = atst(C);
|
bool atst_pass = atst(C);
|
||||||
|
|
||||||
#if PS_AFAIL == 0 // KEEP or ATST off
|
#if PS_AFAIL == 0 // KEEP or ATST off
|
||||||
@ -1024,13 +1345,6 @@ void ps_main()
|
|||||||
discard;
|
discard;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Must be done before alpha correction
|
|
||||||
|
|
||||||
// AA (Fixed one) will output a coverage of 1.0 as alpha
|
|
||||||
#if PS_FIXED_ONE_A
|
|
||||||
C.a = 128.0f;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if SW_AD_TO_HW
|
#if SW_AD_TO_HW
|
||||||
#if PS_RTA_CORRECTION
|
#if PS_RTA_CORRECTION
|
||||||
vec4 RT = trunc(sample_from_rt() * 128.0f + 0.1f);
|
vec4 RT = trunc(sample_from_rt() * 128.0f + 0.1f);
|
||||||
@ -1144,6 +1458,13 @@ void ps_main()
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if PS_ZCLAMP
|
#if PS_ZCLAMP
|
||||||
gl_FragDepth = min(gl_FragCoord.z, MaxDepthPS);
|
#if PS_ACCURATE_PRIMS == ACCURATE_TRIANGLES
|
||||||
|
if (bool(accurate_triangles_interior))
|
||||||
|
gl_FragDepth = min(FragCoord.z, MaxDepthPS);
|
||||||
|
else
|
||||||
|
gl_FragDepth = current_depth; // No depth update for triangle edges.
|
||||||
|
#else
|
||||||
|
gl_FragDepth = min(FragCoord.z, MaxDepthPS);
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|||||||
@ -3,6 +3,16 @@
|
|||||||
|
|
||||||
//#version 420 // Keep it for text editor detection
|
//#version 420 // Keep it for text editor detection
|
||||||
|
|
||||||
|
#define ACCURATE_LINES 1
|
||||||
|
#define ACCURATE_TRIANGLES 2
|
||||||
|
|
||||||
|
#if VS_ACCURATE_PRIMS
|
||||||
|
flat out uint accurate_prims_index;
|
||||||
|
#if VS_ACCURATE_PRIMS == ACCURATE_TRIANGLES
|
||||||
|
flat out uint accurate_triangles_interior;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
layout(std140, binding = 1) uniform cb20
|
layout(std140, binding = 1) uniform cb20
|
||||||
{
|
{
|
||||||
vec2 VertexScale;
|
vec2 VertexScale;
|
||||||
@ -14,6 +24,8 @@ layout(std140, binding = 1) uniform cb20
|
|||||||
vec2 PointSize;
|
vec2 PointSize;
|
||||||
uint MaxDepth;
|
uint MaxDepth;
|
||||||
uint pad_cb20;
|
uint pad_cb20;
|
||||||
|
uint BaseVertex;
|
||||||
|
uint pad_cb20_2;
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef VERTEX_SHADER
|
#ifdef VERTEX_SHADER
|
||||||
@ -75,6 +87,28 @@ void vs_main()
|
|||||||
gl_Position.z = float(z) * exp_min32;
|
gl_Position.z = float(z) * exp_min32;
|
||||||
gl_Position.w = 1.0f;
|
gl_Position.w = 1.0f;
|
||||||
|
|
||||||
|
#if VS_ACCURATE_PRIMS == ACCURATE_LINES
|
||||||
|
accurate_prims_index = (gl_VertexID - BaseVertex) / 6;
|
||||||
|
VSout.t_float = vec4(0.0f);
|
||||||
|
VSout.t_int = vec4(0.0f);
|
||||||
|
VSout.c = vec4(0.0f);
|
||||||
|
return; // Don't send line vertex attributes - they are interpolated manually in the fragment shader.
|
||||||
|
#elif VS_ACCURATE_PRIMS == ACCURATE_TRIANGLES
|
||||||
|
uint vertex_id = gl_VertexID - BaseVertex;
|
||||||
|
uint prim_id = vertex_id / 21;
|
||||||
|
accurate_triangles_interior = uint((vertex_id - 21 * prim_id) < 3); // First 3 vertices in each group of 21 is interior.
|
||||||
|
if (!bool(accurate_triangles_interior))
|
||||||
|
{
|
||||||
|
uint edge = (vertex_id - 21 * prim_id - 3) / 6; // Each group of 6 vertices after first 3 is one edge.
|
||||||
|
accurate_prims_index = 3 * prim_id + edge;
|
||||||
|
VSout.t_float = vec4(0.0f);
|
||||||
|
VSout.t_int = vec4(0.0f);
|
||||||
|
VSout.c = vec4(0.0f);
|
||||||
|
return; // Don't send edge vertex attributes - they are interpolated manually in the fragment shader.
|
||||||
|
}
|
||||||
|
// Send the interior vertex attributes for fixed function interpolation.
|
||||||
|
#endif
|
||||||
|
|
||||||
texture_coord();
|
texture_coord();
|
||||||
|
|
||||||
VSout.c = i_c;
|
VSout.c = i_c;
|
||||||
|
|||||||
@ -1,12 +1,23 @@
|
|||||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||||
// SPDX-License-Identifier: GPL-3.0+
|
// SPDX-License-Identifier: GPL-3.0+
|
||||||
|
|
||||||
|
#define ACCURATE_LINES 1
|
||||||
|
#define ACCURATE_TRIANGLES 2
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// Vertex Shader
|
// Vertex Shader
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
#if defined(VERTEX_SHADER)
|
#if defined(VERTEX_SHADER)
|
||||||
|
|
||||||
|
#if VS_ACCURATE_PRIMS
|
||||||
|
layout(location = 7) flat out uint accurate_prims_index;
|
||||||
|
#if VS_ACCURATE_PRIMS == ACCURATE_TRIANGLES
|
||||||
|
layout(location = 8) flat out uint accurate_triangles_interior;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
layout(std140, set = 0, binding = 0) uniform cb0
|
layout(std140, set = 0, binding = 0) uniform cb0
|
||||||
{
|
{
|
||||||
vec2 VertexScale;
|
vec2 VertexScale;
|
||||||
@ -16,6 +27,8 @@ layout(std140, set = 0, binding = 0) uniform cb0
|
|||||||
vec2 PointSize;
|
vec2 PointSize;
|
||||||
uint MaxDepth;
|
uint MaxDepth;
|
||||||
uint pad_cb0;
|
uint pad_cb0;
|
||||||
|
uint BaseVertex;
|
||||||
|
uint pad_cb0_2;
|
||||||
};
|
};
|
||||||
|
|
||||||
layout(location = 0) out VSOutput
|
layout(location = 0) out VSOutput
|
||||||
@ -55,6 +68,28 @@ void main()
|
|||||||
gl_Position.z *= exp2(-32.0f); // integer->float depth
|
gl_Position.z *= exp2(-32.0f); // integer->float depth
|
||||||
gl_Position.y = -gl_Position.y;
|
gl_Position.y = -gl_Position.y;
|
||||||
|
|
||||||
|
#if VS_ACCURATE_PRIMS == ACCURATE_LINES
|
||||||
|
accurate_prims_index = (gl_VertexIndex - BaseVertex) / 6;
|
||||||
|
vsOut.t = vec4(0.0f);
|
||||||
|
vsOut.ti = vec4(0.0f);
|
||||||
|
vsOut.c = vec4(0.0f);
|
||||||
|
return; // Don't send line vertex attributes - they are interpolated manually in the fragment shader.
|
||||||
|
#elif VS_ACCURATE_PRIMS == ACCURATE_TRIANGLES
|
||||||
|
uint vertex_id = gl_VertexIndex - BaseVertex;
|
||||||
|
uint prim_id = vertex_id / 21;
|
||||||
|
accurate_triangles_interior = uint((vertex_id - 21 * prim_id) < 3); // First 3 vertices in each group of 21 is interior.
|
||||||
|
if (!bool(accurate_triangles_interior))
|
||||||
|
{
|
||||||
|
uint edge = (vertex_id - 21 * prim_id - 3) / 6; // Each group of 6 vertices after first 3 is one edge.
|
||||||
|
accurate_prims_index = 3 * prim_id + edge;
|
||||||
|
vsOut.t = vec4(0.0f);
|
||||||
|
vsOut.ti = vec4(0.0f);
|
||||||
|
vsOut.c = vec4(0.0f);
|
||||||
|
return; // Don't send edge vertex attributes - they are interpolated manually in the fragment shader.
|
||||||
|
}
|
||||||
|
// Send the interior vertex attributes for fixed function interpolation.
|
||||||
|
#endif
|
||||||
|
|
||||||
#if VS_TME
|
#if VS_TME
|
||||||
vec2 uv = a_uv - TextureOffset;
|
vec2 uv = a_uv - TextureOffset;
|
||||||
vec2 st = a_st - TextureOffset;
|
vec2 st = a_st - TextureOffset;
|
||||||
@ -245,6 +280,11 @@ void main()
|
|||||||
#define GS_LINE 0
|
#define GS_LINE 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef ZTST_GEQUAL
|
||||||
|
#define ZTST_GEQUAL 2
|
||||||
|
#define ZTST_GREATER 3
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef PS_FST
|
#ifndef PS_FST
|
||||||
#define PS_FST 0
|
#define PS_FST 0
|
||||||
#define PS_WMS 0
|
#define PS_WMS 0
|
||||||
@ -298,9 +338,12 @@ void main()
|
|||||||
#define AFAIL_NEEDS_RT (PS_AFAIL == 3 && PS_NO_COLOR1)
|
#define AFAIL_NEEDS_RT (PS_AFAIL == 3 && PS_NO_COLOR1)
|
||||||
|
|
||||||
#define PS_FEEDBACK_LOOP_IS_NEEDED (PS_TEX_IS_FB == 1 || AFAIL_NEEDS_RT || PS_FBMASK || SW_BLEND_NEEDS_RT || SW_AD_TO_HW || (PS_DATE >= 5))
|
#define PS_FEEDBACK_LOOP_IS_NEEDED (PS_TEX_IS_FB == 1 || AFAIL_NEEDS_RT || PS_FBMASK || SW_BLEND_NEEDS_RT || SW_AD_TO_HW || (PS_DATE >= 5))
|
||||||
|
#define PS_FEEDBACK_LOOP_IS_NEEDED_DEPTH ((PS_ACCURATE_PRIMS == ACCURATE_TRIANGLES) && PS_ACCURATE_PRIMS_AA && PS_ZCLAMP)
|
||||||
|
|
||||||
#define NEEDS_TEX (PS_TFX != 4)
|
#define NEEDS_TEX (PS_TFX != 4)
|
||||||
|
|
||||||
|
vec4 FragCoord;
|
||||||
|
|
||||||
layout(std140, set = 0, binding = 1) uniform cb1
|
layout(std140, set = 0, binding = 1) uniform cb1
|
||||||
{
|
{
|
||||||
vec3 FogColor;
|
vec3 FogColor;
|
||||||
@ -320,8 +363,71 @@ layout(std140, set = 0, binding = 1) uniform cb1
|
|||||||
mat4 DitherMatrix;
|
mat4 DitherMatrix;
|
||||||
float ScaledScaleFactor;
|
float ScaledScaleFactor;
|
||||||
float RcpScaleFactor;
|
float RcpScaleFactor;
|
||||||
|
uint _pad0;
|
||||||
|
uint _pad1;
|
||||||
|
|
||||||
|
uint accurate_prims_base_index;
|
||||||
|
uint _pad2;
|
||||||
|
uint _pad3;
|
||||||
|
uint _pad4;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if PS_ACCURATE_PRIMS
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
vec4 t;
|
||||||
|
vec4 ti;
|
||||||
|
vec4 c;
|
||||||
|
} vsIn;
|
||||||
|
|
||||||
|
layout(location = 0) in VSOutput
|
||||||
|
{
|
||||||
|
vec4 t;
|
||||||
|
vec4 ti;
|
||||||
|
#if PS_IIP != 0
|
||||||
|
vec4 c;
|
||||||
|
#else
|
||||||
|
flat vec4 c;
|
||||||
|
#endif
|
||||||
|
} vsInReal;
|
||||||
|
|
||||||
|
layout(location = 7) flat in uint accurate_prims_index;
|
||||||
|
#if PS_ACCURATE_PRIMS == ACCURATE_TRIANGLES
|
||||||
|
layout(location = 8) flat in uint accurate_triangles_interior;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct AccuratePrimsEdgeData
|
||||||
|
{
|
||||||
|
// Interpolated attributes
|
||||||
|
vec4 t_float0; // 0
|
||||||
|
vec4 t_float1; // 16
|
||||||
|
vec4 t_int0; // 32
|
||||||
|
vec4 t_int1; // 48
|
||||||
|
vec4 c0; // 64
|
||||||
|
vec4 c1; // 80
|
||||||
|
vec4 p0; // 96
|
||||||
|
vec4 p1; // 112
|
||||||
|
ivec4 edge0; // 128
|
||||||
|
ivec4 edge1; // 144
|
||||||
|
ivec2 xy0; // 160
|
||||||
|
ivec2 xy1; // 168
|
||||||
|
uint step_x; // 176
|
||||||
|
uint draw0; // 180
|
||||||
|
uint draw1; // 184
|
||||||
|
uint top_left; // 188
|
||||||
|
uint side; // 192
|
||||||
|
uint _pad0; // 196
|
||||||
|
uint _pad1; // 200
|
||||||
|
uint _pad2; // 204
|
||||||
|
// Total 208
|
||||||
|
};
|
||||||
|
|
||||||
|
layout (std140, set = 0, binding = 3) readonly buffer AccuratePrimsEdgeDataBuffer {
|
||||||
|
AccuratePrimsEdgeData accurate_prims_data[];
|
||||||
|
};
|
||||||
|
|
||||||
|
#else // PS_ACCURATE_PRIMS
|
||||||
|
|
||||||
layout(location = 0) in VSOutput
|
layout(location = 0) in VSOutput
|
||||||
{
|
{
|
||||||
vec4 t;
|
vec4 t;
|
||||||
@ -333,6 +439,8 @@ layout(location = 0) in VSOutput
|
|||||||
#endif
|
#endif
|
||||||
} vsIn;
|
} vsIn;
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
#if !PS_NO_COLOR && !PS_NO_COLOR1
|
#if !PS_NO_COLOR && !PS_NO_COLOR1
|
||||||
layout(location = 0, index = 0) out vec4 o_col0;
|
layout(location = 0, index = 0) out vec4 o_col0;
|
||||||
layout(location = 0, index = 1) out vec4 o_col1;
|
layout(location = 0, index = 1) out vec4 o_col1;
|
||||||
@ -345,13 +453,21 @@ layout(set = 1, binding = 0) uniform sampler2D Texture;
|
|||||||
layout(set = 1, binding = 1) uniform texture2D Palette;
|
layout(set = 1, binding = 1) uniform texture2D Palette;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if PS_FEEDBACK_LOOP_IS_NEEDED
|
#if PS_FEEDBACK_LOOP_IS_NEEDED || PS_FEEDBACK_LOOP_IS_NEEDED_DEPTH
|
||||||
#if defined(DISABLE_TEXTURE_BARRIER) || defined(HAS_FEEDBACK_LOOP_LAYOUT)
|
#if defined(DISABLE_TEXTURE_BARRIER) || defined(HAS_FEEDBACK_LOOP_LAYOUT)
|
||||||
layout(set = 1, binding = 2) uniform texture2D RtSampler;
|
layout(set = 1, binding = 2) uniform texture2D RtSampler;
|
||||||
vec4 sample_from_rt() { return texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0); }
|
vec4 sample_from_rt() { return texelFetch(RtSampler, ivec2(FragCoord.xy), 0); }
|
||||||
|
#if PS_FEEDBACK_LOOP_IS_NEEDED_DEPTH
|
||||||
|
layout(set = 1, binding = 4) uniform texture2D DepthSampler;
|
||||||
|
vec4 sample_from_depth() { return texelFetch(DepthSampler, ivec2(FragCoord.xy), 0); }
|
||||||
|
#endif
|
||||||
#else
|
#else
|
||||||
layout(input_attachment_index = 0, set = 1, binding = 2) uniform subpassInput RtSampler;
|
layout(input_attachment_index = 0, set = 1, binding = 2) uniform subpassInput RtSampler;
|
||||||
vec4 sample_from_rt() { return subpassLoad(RtSampler); }
|
vec4 sample_from_rt() { return subpassLoad(RtSampler); }
|
||||||
|
#if PS_FEEDBACK_LOOP_IS_NEEDED_DEPTH
|
||||||
|
layout(input_attachment_index = 1, set = 1, binding = 4) uniform subpassInput DepthSampler;
|
||||||
|
vec4 sample_from_depth() { return subpassLoad(DepthSampler); }
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -925,19 +1041,19 @@ vec4 ps_color()
|
|||||||
#if !NEEDS_TEX
|
#if !NEEDS_TEX
|
||||||
vec4 T = vec4(0.0f);
|
vec4 T = vec4(0.0f);
|
||||||
#elif PS_CHANNEL_FETCH == 1
|
#elif PS_CHANNEL_FETCH == 1
|
||||||
vec4 T = fetch_red(ivec2(gl_FragCoord.xy));
|
vec4 T = fetch_red(ivec2(FragCoord.xy));
|
||||||
#elif PS_CHANNEL_FETCH == 2
|
#elif PS_CHANNEL_FETCH == 2
|
||||||
vec4 T = fetch_green(ivec2(gl_FragCoord.xy));
|
vec4 T = fetch_green(ivec2(FragCoord.xy));
|
||||||
#elif PS_CHANNEL_FETCH == 3
|
#elif PS_CHANNEL_FETCH == 3
|
||||||
vec4 T = fetch_blue(ivec2(gl_FragCoord.xy));
|
vec4 T = fetch_blue(ivec2(FragCoord.xy));
|
||||||
#elif PS_CHANNEL_FETCH == 4
|
#elif PS_CHANNEL_FETCH == 4
|
||||||
vec4 T = fetch_alpha(ivec2(gl_FragCoord.xy));
|
vec4 T = fetch_alpha(ivec2(FragCoord.xy));
|
||||||
#elif PS_CHANNEL_FETCH == 5
|
#elif PS_CHANNEL_FETCH == 5
|
||||||
vec4 T = fetch_rgb(ivec2(gl_FragCoord.xy));
|
vec4 T = fetch_rgb(ivec2(FragCoord.xy));
|
||||||
#elif PS_CHANNEL_FETCH == 6
|
#elif PS_CHANNEL_FETCH == 6
|
||||||
vec4 T = fetch_gXbY(ivec2(gl_FragCoord.xy));
|
vec4 T = fetch_gXbY(ivec2(FragCoord.xy));
|
||||||
#elif PS_DEPTH_FMT > 0
|
#elif PS_DEPTH_FMT > 0
|
||||||
vec4 T = sample_depth(st_int, ivec2(gl_FragCoord.xy));
|
vec4 T = sample_depth(st_int, ivec2(FragCoord.xy));
|
||||||
#else
|
#else
|
||||||
vec4 T = sample_color(st);
|
vec4 T = sample_color(st);
|
||||||
#endif
|
#endif
|
||||||
@ -985,9 +1101,9 @@ void ps_dither(inout vec3 C, float As)
|
|||||||
ivec2 fpos;
|
ivec2 fpos;
|
||||||
|
|
||||||
#if PS_DITHER == 2
|
#if PS_DITHER == 2
|
||||||
fpos = ivec2(gl_FragCoord.xy);
|
fpos = ivec2(FragCoord.xy);
|
||||||
#else
|
#else
|
||||||
fpos = ivec2(gl_FragCoord.xy * RcpScaleFactor);
|
fpos = ivec2(FragCoord.xy * RcpScaleFactor);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
float value = DitherMatrix[fpos.y & 3][fpos.x & 3];
|
float value = DitherMatrix[fpos.y & 3][fpos.x & 3];
|
||||||
@ -1228,11 +1344,232 @@ void ps_blend(inout vec4 Color, inout vec4 As_rgba)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if PS_ACCURATE_PRIMS
|
||||||
|
// Interpolate vertex attributes over a line/edge manually.
|
||||||
|
void InterpolateAttributesManual(AccuratePrimsEdgeData data, int weight0, int weight1)
|
||||||
|
{
|
||||||
|
float weight0_f = float(weight0);
|
||||||
|
float weight1_f = float(weight1);
|
||||||
|
float weight_total = float(weight0 + weight1);
|
||||||
|
|
||||||
|
vec4 t_float_interp = (weight1_f * data.t_float1 + weight0_f * data.t_float0) / weight_total;
|
||||||
|
vec4 t_int_interp = (weight1_f * data.t_int1 + weight0_f * data.t_int0) / weight_total;
|
||||||
|
vec4 c_interp = (weight1_f * data.c1 + weight0_f * data.c0) / weight_total;
|
||||||
|
float z_interp = (weight1_f * data.p1.z + weight0_f * data.p0.z) / weight_total;
|
||||||
|
|
||||||
|
// No interpolation for constant attributes.
|
||||||
|
vsIn.t = mix(t_float_interp, data.t_float1, equal(data.t_float1, data.t_float0));
|
||||||
|
vsIn.ti = mix(t_int_interp, data.t_int1, equal(data.t_int1, data.t_int0));
|
||||||
|
vsIn.c = mix(c_interp, data.c1, equal(data.c1, data.c0));
|
||||||
|
FragCoord.z = (data.p1.z == data.p0.z) ? data.p1.z : z_interp;
|
||||||
|
|
||||||
|
// Clamp attributes. Fog/Z are normalized.
|
||||||
|
vsIn.c = clamp(vsIn.c, 0.0f, 255.0f);
|
||||||
|
vsIn.t.z = clamp(vsIn.t.z, 0.0f, 1.0f);
|
||||||
|
FragCoord.z = clamp(FragCoord.z, 0.0f, 1.0f);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if PS_ACCURATE_PRIMS == ACCURATE_LINES
|
||||||
|
void HandleAccurateLines(out float alpha_coverage)
|
||||||
|
{
|
||||||
|
AccuratePrimsEdgeData data = accurate_prims_data[accurate_prims_base_index + accurate_prims_index];
|
||||||
|
|
||||||
|
ivec2 xy0 = data.xy0;
|
||||||
|
ivec2 xy1 = data.xy1;
|
||||||
|
ivec2 dxy = xy1 - xy0;
|
||||||
|
ivec2 xy0_i = (xy0 + 8) & ~0xF;
|
||||||
|
ivec2 xy1_i = (xy1 + 8) & ~0xF;
|
||||||
|
bool step_x = bool(data.step_x);
|
||||||
|
bool draw0 = bool(data.draw0);
|
||||||
|
bool draw1 = bool(data.draw1);
|
||||||
|
|
||||||
|
// 4-bit fixed point: 16 subpixels per pixel
|
||||||
|
ivec2 xy_i = 16 * ivec2(floor(FragCoord.xy)); // Subtract half-integer pixel center.
|
||||||
|
|
||||||
|
// Determine major/minor axes
|
||||||
|
int major0 = step_x ? xy0.x : xy0.y;
|
||||||
|
int major1 = step_x ? xy1.x : xy1.y;
|
||||||
|
int minor0 = step_x ? xy0.y : xy0.x;
|
||||||
|
int minor1 = step_x ? xy1.y : xy1.x;
|
||||||
|
int major_i = step_x ? xy_i.x : xy_i.y;
|
||||||
|
int minor_i = step_x ? xy_i.y : xy_i.x;
|
||||||
|
int d_major = step_x ? dxy.x : dxy.y;
|
||||||
|
int d_major_scaled = 16 * d_major;
|
||||||
|
|
||||||
|
int major0_i = step_x ? xy0_i.x : xy0_i.y;
|
||||||
|
int major1_i = step_x ? xy1_i.x : xy1_i.y;
|
||||||
|
|
||||||
|
// Discard if outside line range
|
||||||
|
if (major_i < min(major0_i, major1_i) ||
|
||||||
|
major_i > max(major0_i, major1_i))
|
||||||
|
discard;
|
||||||
|
|
||||||
|
if ((major_i == major0_i && !draw0) ||
|
||||||
|
(major_i == major1_i && !draw1))
|
||||||
|
discard;
|
||||||
|
|
||||||
|
int weight0 = major1 - major_i;
|
||||||
|
int weight1 = major_i - major0;
|
||||||
|
|
||||||
|
// Compute minor axis line in fixed-point
|
||||||
|
int minor_line = weight1 * minor1 + weight0 * minor0;
|
||||||
|
|
||||||
|
#if PS_ACCURATE_PRIMS_AA
|
||||||
|
// Proper fixed-point AA rounding
|
||||||
|
int minor_i_expected_0 = (minor_line / d_major) & ~0xF;
|
||||||
|
int minor_i_expected_1 = minor_i_expected_0 + 16;
|
||||||
|
int alpha_i_0 = d_major_scaled - (minor_line - d_major * minor_i_expected_0);
|
||||||
|
int alpha_i_1 = d_major_scaled - alpha_i_0;
|
||||||
|
|
||||||
|
int alpha_i;
|
||||||
|
if (minor_i == minor_i_expected_0)
|
||||||
|
alpha_i = alpha_i_0;
|
||||||
|
else if (minor_i == minor_i_expected_1)
|
||||||
|
alpha_i = alpha_i_1;
|
||||||
|
else
|
||||||
|
discard;
|
||||||
|
// Make sure that the output alpha is always <= 127 for AA.
|
||||||
|
alpha_coverage = floor(clamp(128.0f * float(alpha_i) / float(d_major_scaled), 0.0f, 127.0f));
|
||||||
|
#else
|
||||||
|
// Non-AA: fixed-point rounding and 4-bit alignment
|
||||||
|
int minor_i_expected = ((2 * minor_line + d_major_scaled) / (2 * d_major)) & ~0xF;
|
||||||
|
if (minor_i != minor_i_expected)
|
||||||
|
discard;
|
||||||
|
alpha_coverage = 128.0f;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Interpolate attributes
|
||||||
|
InterpolateAttributesManual(data, weight0, weight1);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if PS_ACCURATE_PRIMS == ACCURATE_TRIANGLES
|
||||||
|
void HandleAccurateTrianglesEdge(out float alpha_coverage)
|
||||||
|
{
|
||||||
|
AccuratePrimsEdgeData data = accurate_prims_data[accurate_prims_base_index + accurate_prims_index];
|
||||||
|
|
||||||
|
ivec2 xy0 = data.xy0;
|
||||||
|
ivec2 xy1 = data.xy1;
|
||||||
|
ivec2 dxy = xy1 - xy0;
|
||||||
|
ivec2 xy0_i = (xy0 + 8) & ~0xF;
|
||||||
|
ivec2 xy1_i = (xy1 + 8) & ~0xF;
|
||||||
|
bool step_x = bool(data.step_x);
|
||||||
|
bool side = bool(data.side);
|
||||||
|
bool top_left = bool(data.top_left);
|
||||||
|
|
||||||
|
// 4-bit fixed point: 16 subpixels per pixel
|
||||||
|
ivec2 xy_i = 16 * ivec2(floor(FragCoord.xy)); // Subtract half-integer pixel center.
|
||||||
|
|
||||||
|
// Determine major/minor axes
|
||||||
|
int major0 = step_x ? xy0.x : xy0.y;
|
||||||
|
int major1 = step_x ? xy1.x : xy1.y;
|
||||||
|
int minor0 = step_x ? xy0.y : xy0.x;
|
||||||
|
int minor1 = step_x ? xy1.y : xy1.x;
|
||||||
|
int major_i = step_x ? xy_i.x : xy_i.y;
|
||||||
|
int minor_i = step_x ? xy_i.y : xy_i.x;
|
||||||
|
int d_major = step_x ? dxy.x : dxy.y;
|
||||||
|
int d_major_scaled = 16 * d_major;
|
||||||
|
|
||||||
|
int major0_i = step_x ? xy0_i.x : xy0_i.y;
|
||||||
|
int major1_i = step_x ? xy1_i.x : xy1_i.y;
|
||||||
|
|
||||||
|
// Discard if outside edge range.
|
||||||
|
// Note: this is not exactly what the SW rasterizer does.
|
||||||
|
// See the note in GSRasterizer::DrawEdgeTriangle() about the asymmetry in X and Y bounds checking.
|
||||||
|
if (major_i < min(major0_i, major1_i) ||
|
||||||
|
major_i > max(major0_i, major1_i))
|
||||||
|
discard;
|
||||||
|
|
||||||
|
// Discard if on wrong side of other edges
|
||||||
|
if (dot(data.edge0, ivec4(xy_i, 1, 0)) <= 0 ||
|
||||||
|
dot(data.edge1, ivec4(xy_i, 1, 0)) <= 0)
|
||||||
|
discard;
|
||||||
|
|
||||||
|
int weight0 = major1 - major_i;
|
||||||
|
int weight1 = major_i - major0;
|
||||||
|
|
||||||
|
// Compute minor axis line in fixed-point
|
||||||
|
int minor_line = weight1 * minor1 + weight0 * minor0;
|
||||||
|
int minor_i_expected = minor_line / d_major;
|
||||||
|
int minor_i_expected_0 = minor_i_expected & ~0xF;
|
||||||
|
int minor_i_expected_1 = minor_i_expected_0 + 16;
|
||||||
|
int alpha_i_0 = d_major_scaled - (minor_line - d_major * minor_i_expected_0);
|
||||||
|
int alpha_i_1 = d_major_scaled - alpha_i_0;
|
||||||
|
|
||||||
|
// Proper fixed-point AA rounding
|
||||||
|
int alpha_i;
|
||||||
|
if ((minor_i_expected & 0xF) == 0)
|
||||||
|
{
|
||||||
|
// On a pixel center
|
||||||
|
alpha_i = top_left ? 0 : d_major_scaled;
|
||||||
|
minor_i_expected += top_left ? (side ? -16 : 16) : 0;
|
||||||
|
}
|
||||||
|
else if (side)
|
||||||
|
{
|
||||||
|
minor_i_expected = minor_i_expected_0;
|
||||||
|
alpha_i = alpha_i_0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
minor_i_expected = minor_i_expected_1;
|
||||||
|
alpha_i = alpha_i_1;
|
||||||
|
}
|
||||||
|
if (minor_i != minor_i_expected)
|
||||||
|
discard;
|
||||||
|
|
||||||
|
#if PS_ACCURATE_PRIMS_AA
|
||||||
|
// Make sure that the output alpha is always <= 127 for AA.
|
||||||
|
alpha_coverage = floor(clamp(128.0f * float(alpha_i) / float(d_major_scaled), 0.0f, 127.0f));
|
||||||
|
#else
|
||||||
|
alpha_coverage = 128.0f;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Interpolate attributes
|
||||||
|
InterpolateAttributesManual(data, weight0, weight1);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void main()
|
void main()
|
||||||
{
|
{
|
||||||
|
FragCoord = gl_FragCoord;
|
||||||
|
|
||||||
|
#if PS_ACCURATE_PRIMS
|
||||||
|
float alpha_coverage;
|
||||||
|
#if PS_ACCURATE_PRIMS == ACCURATE_LINES
|
||||||
|
HandleAccurateLines(alpha_coverage);
|
||||||
|
#elif PS_ACCURATE_PRIMS == ACCURATE_TRIANGLES
|
||||||
|
if (bool(accurate_triangles_interior))
|
||||||
|
{
|
||||||
|
alpha_coverage = 128.0f;
|
||||||
|
vsIn.t = vsInReal.t;
|
||||||
|
vsIn.ti = vsInReal.ti;
|
||||||
|
vsIn.c = vsInReal.c;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
HandleAccurateTrianglesEdge(alpha_coverage);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif // PS_ACCURATE_PRIMS
|
||||||
|
|
||||||
|
#if PS_FEEDBACK_LOOP_IS_NEEDED_DEPTH
|
||||||
|
float current_depth = sample_from_depth().r;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if PS_ZCLAMP && (PS_ZTST == ZTST_GEQUAL || PS_ZTST == ZTST_GREATER)
|
||||||
|
#if PS_ZTST == ZTST_GEQUAL
|
||||||
|
if (FragCoord.z < current_depth)
|
||||||
|
discard;
|
||||||
|
#elif PS_ZTST == ZTST_GREATER
|
||||||
|
if (FragCoord.z <= current_depth)
|
||||||
|
discard;
|
||||||
|
#endif
|
||||||
|
#endif // PS_ZTST
|
||||||
|
|
||||||
#if PS_SCANMSK & 2
|
#if PS_SCANMSK & 2
|
||||||
// fail depth test on prohibited lines
|
// fail depth test on prohibited lines
|
||||||
if ((int(gl_FragCoord.y) & 1) == (PS_SCANMSK & 1))
|
if ((int(FragCoord.y) & 1) == (PS_SCANMSK & 1))
|
||||||
discard;
|
discard;
|
||||||
#endif
|
#endif
|
||||||
#if PS_DATE >= 5
|
#if PS_DATE >= 5
|
||||||
@ -1267,7 +1604,7 @@ void main()
|
|||||||
#endif // PS_DATE >= 5
|
#endif // PS_DATE >= 5
|
||||||
|
|
||||||
#if PS_DATE == 3
|
#if PS_DATE == 3
|
||||||
int stencil_ceil = int(texelFetch(PrimMinTexture, ivec2(gl_FragCoord.xy), 0).r);
|
int stencil_ceil = int(texelFetch(PrimMinTexture, ivec2(FragCoord.xy), 0).r);
|
||||||
// Note gl_PrimitiveID == stencil_ceil will be the primitive that will update
|
// Note gl_PrimitiveID == stencil_ceil will be the primitive that will update
|
||||||
// the bad alpha value so we must keep it.
|
// the bad alpha value so we must keep it.
|
||||||
|
|
||||||
@ -1277,6 +1614,20 @@ void main()
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
vec4 C = ps_color();
|
vec4 C = ps_color();
|
||||||
|
|
||||||
|
#if PS_FIXED_ONE_A
|
||||||
|
// AA (Fixed one) will output a coverage of 1.0 as alpha
|
||||||
|
C.a = 128.0f;
|
||||||
|
#elif PS_ACCURATE_PRIMS_AA
|
||||||
|
// AA: coverage is computed in alpha_coverage
|
||||||
|
#if PS_ACCURATE_PRIMS_AA_ABE
|
||||||
|
if (floor(C.a) == 128.0f) // According to manual & hardware tests the coverage is only used if the fragment alpha is 128.
|
||||||
|
C.a = alpha_coverage;
|
||||||
|
#else
|
||||||
|
C.a = alpha_coverage;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
bool atst_pass = atst(C);
|
bool atst_pass = atst(C);
|
||||||
|
|
||||||
#if PS_AFAIL == 0 // KEEP or ATST off
|
#if PS_AFAIL == 0 // KEEP or ATST off
|
||||||
@ -1284,13 +1635,6 @@ void main()
|
|||||||
discard;
|
discard;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Must be done before alpha correction
|
|
||||||
|
|
||||||
// AA (Fixed one) will output a coverage of 1.0 as alpha
|
|
||||||
#if PS_FIXED_ONE_A
|
|
||||||
C.a = 128.0f;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if SW_AD_TO_HW
|
#if SW_AD_TO_HW
|
||||||
#if PS_RTA_CORRECTION
|
#if PS_RTA_CORRECTION
|
||||||
vec4 RT = trunc(sample_from_rt() * 128.0f + 0.1f);
|
vec4 RT = trunc(sample_from_rt() * 128.0f + 0.1f);
|
||||||
@ -1327,7 +1671,7 @@ void main()
|
|||||||
#else
|
#else
|
||||||
ps_blend(C, alpha_blend);
|
ps_blend(C, alpha_blend);
|
||||||
|
|
||||||
#if PS_SHUFFLE
|
#if PS_SHUFFLE
|
||||||
#if !PS_READ16_SRC && !PS_SHUFFLE_SAME && !(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
|
#if !PS_READ16_SRC && !PS_SHUFFLE_SAME && !(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
|
||||||
uvec4 denorm_c_after = uvec4(C);
|
uvec4 denorm_c_after = uvec4(C);
|
||||||
#if (PS_PROCESS_BA & SHUFFLE_READ)
|
#if (PS_PROCESS_BA & SHUFFLE_READ)
|
||||||
@ -1401,9 +1745,15 @@ void main()
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if PS_ZCLAMP
|
#if PS_ZCLAMP
|
||||||
gl_FragDepth = min(gl_FragCoord.z, MaxDepthPS);
|
#if PS_ACCURATE_PRIMS == ACCURATE_TRIANGLES
|
||||||
|
if (bool(accurate_triangles_interior))
|
||||||
|
gl_FragDepth = min(FragCoord.z, MaxDepthPS);
|
||||||
|
else
|
||||||
|
gl_FragDepth = current_depth; // No depth update for triangle edges.
|
||||||
|
#else
|
||||||
|
gl_FragDepth = min(FragCoord.z, MaxDepthPS);
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif // PS_DATE
|
#endif // PS_DATE
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -757,6 +757,7 @@ struct Pcsx2Config
|
|||||||
PreloadFrameWithGSData : 1,
|
PreloadFrameWithGSData : 1,
|
||||||
Mipmap : 1,
|
Mipmap : 1,
|
||||||
HWMipmap : 1,
|
HWMipmap : 1,
|
||||||
|
HWAccuratePrims: 1,
|
||||||
ManualUserHacks : 1,
|
ManualUserHacks : 1,
|
||||||
UserHacks_AlignSpriteX : 1,
|
UserHacks_AlignSpriteX : 1,
|
||||||
UserHacks_CPUFBConversion : 1,
|
UserHacks_CPUFBConversion : 1,
|
||||||
|
|||||||
@ -431,6 +431,10 @@ const char* GSState::GetFlushReasonString(GSFlushReason reason)
|
|||||||
return "VSYNC";
|
return "VSYNC";
|
||||||
case GSFlushReason::GSREOPEN:
|
case GSFlushReason::GSREOPEN:
|
||||||
return "GS REOPEN";
|
return "GS REOPEN";
|
||||||
|
case GSFlushReason::VERTEXCOUNT:
|
||||||
|
return "VERTEX COUNT";
|
||||||
|
case GSFlushReason::VERTEXCOUNTEXPANDED:
|
||||||
|
return "VERTEX COUNT EXPANDED";
|
||||||
case GSFlushReason::UNKNOWN:
|
case GSFlushReason::UNKNOWN:
|
||||||
default:
|
default:
|
||||||
return "UNKNOWN";
|
return "UNKNOWN";
|
||||||
@ -3265,6 +3269,20 @@ void GSState::UpdateVertexKick()
|
|||||||
|
|
||||||
m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZF2] = m_fpGIFPackedRegHandlerSTQRGBAXYZF2[prim];
|
m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZF2] = m_fpGIFPackedRegHandlerSTQRGBAXYZF2[prim];
|
||||||
m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZ2] = m_fpGIFPackedRegHandlerSTQRGBAXYZ2[prim];
|
m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZ2] = m_fpGIFPackedRegHandlerSTQRGBAXYZ2[prim];
|
||||||
|
|
||||||
|
if (UsingAccuratePrims())
|
||||||
|
{
|
||||||
|
if (GSUtil::GetPrimClass(prim) == GS_LINE_CLASS)
|
||||||
|
m_vertex_expansion_factor = 3;
|
||||||
|
else if (GSUtil::GetPrimClass(prim) == GS_TRIANGLE_CLASS)
|
||||||
|
m_vertex_expansion_factor = 7;
|
||||||
|
else
|
||||||
|
pxFail("Wrong primitive class."); // Impossible.
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
m_vertex_expansion_factor = 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSState::GrowVertexBuffer()
|
void GSState::GrowVertexBuffer()
|
||||||
@ -4891,6 +4909,12 @@ __forceinline void GSState::VertexKick(u32 skip)
|
|||||||
constexpr u32 max_vertices = MaxVerticesForPrim(prim);
|
constexpr u32 max_vertices = MaxVerticesForPrim(prim);
|
||||||
if (max_vertices != 0 && m_vertex.tail >= max_vertices)
|
if (max_vertices != 0 && m_vertex.tail >= max_vertices)
|
||||||
Flush(VERTEXCOUNT);
|
Flush(VERTEXCOUNT);
|
||||||
|
|
||||||
|
if (m_vertex_expansion_factor != 1)
|
||||||
|
{
|
||||||
|
if (max_vertices != 0 && (m_vertex_expansion_factor * m_index.tail) >= max_vertices)
|
||||||
|
Flush(VERTEXCOUNTEXPANDED);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Checks if region repeat is used (applying it does something to at least one of the values in min...max)
|
/// Checks if region repeat is used (applying it does something to at least one of the values in min...max)
|
||||||
@ -5227,12 +5251,15 @@ void GSState::CalcAlphaMinMax(const int tex_alpha_min, const int tex_alpha_max)
|
|||||||
// Limit max to 255 as we send 500 when we don't know, makes calculating 24/16bit easier.
|
// Limit max to 255 as we send 500 when we don't know, makes calculating 24/16bit easier.
|
||||||
int min = tex_alpha_min, max = std::min(tex_alpha_max, 255);
|
int min = tex_alpha_min, max = std::min(tex_alpha_max, 255);
|
||||||
|
|
||||||
if (IsCoverageAlpha())
|
if (IsCoverageAlphaFixedOne())
|
||||||
{
|
{
|
||||||
// HW renderer doesn't currently support AA, so its min is 128.
|
// HW renderer doesn't support AA1, assume alpha is constant 128.
|
||||||
// If we add AA support to the HW renderer, this will need to be changed.
|
min = 128;
|
||||||
// (Will probably only be supported with ROV/FBFetch so we would want to check for that.)
|
max = 128;
|
||||||
min = GSIsHardwareRenderer() ? 128 : 0;
|
}
|
||||||
|
else if (IsCoverageAlphaSupported())
|
||||||
|
{
|
||||||
|
min = 0;
|
||||||
max = 128;
|
max = 128;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -5527,7 +5554,24 @@ bool GSState::IsMipMapActive()
|
|||||||
|
|
||||||
bool GSState::IsCoverageAlpha()
|
bool GSState::IsCoverageAlpha()
|
||||||
{
|
{
|
||||||
return !PRIM->ABE && PRIM->AA1 && (m_vt.m_primclass == GS_LINE_CLASS || m_vt.m_primclass == GS_TRIANGLE_CLASS);
|
return PRIM->AA1 && (m_vt.m_primclass == GS_LINE_CLASS || m_vt.m_primclass == GS_TRIANGLE_CLASS);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool GSState::IsCoverageAlphaFixedOne()
|
||||||
|
{
|
||||||
|
return IsCoverageAlpha() && !PRIM->ABE && !IsCoverageAlphaSupported();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool GSState::IsCoverageAlphaSupported()
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool GSState::UsingAccuratePrims()
|
||||||
|
{
|
||||||
|
return g_gs_device->Features().accurate_prims &&
|
||||||
|
(GSUtil::GetPrimClass(PRIM->PRIM) == GS_LINE_CLASS ||
|
||||||
|
(GSUtil::GetPrimClass(PRIM->PRIM) == GS_TRIANGLE_CLASS && PRIM->AA1));
|
||||||
}
|
}
|
||||||
|
|
||||||
GIFRegTEX0 GSState::GetTex0Layer(u32 lod)
|
GIFRegTEX0 GSState::GetTex0Layer(u32 lod)
|
||||||
|
|||||||
@ -165,6 +165,8 @@ protected:
|
|||||||
u32 tail;
|
u32 tail;
|
||||||
} m_draw_index = {};
|
} m_draw_index = {};
|
||||||
|
|
||||||
|
int m_vertex_expansion_factor = 1;
|
||||||
|
|
||||||
void UpdateContext();
|
void UpdateContext();
|
||||||
void UpdateScissor();
|
void UpdateScissor();
|
||||||
|
|
||||||
@ -207,6 +209,9 @@ protected:
|
|||||||
bool IsMipMapDraw();
|
bool IsMipMapDraw();
|
||||||
bool IsMipMapActive();
|
bool IsMipMapActive();
|
||||||
bool IsCoverageAlpha();
|
bool IsCoverageAlpha();
|
||||||
|
bool IsCoverageAlphaFixedOne();
|
||||||
|
virtual bool IsCoverageAlphaSupported();
|
||||||
|
bool UsingAccuratePrims();
|
||||||
void CalcAlphaMinMax(const int tex_min, const int tex_max);
|
void CalcAlphaMinMax(const int tex_min, const int tex_max);
|
||||||
void CorrectATEAlphaMinMax(const u32 atst, const int aref);
|
void CorrectATEAlphaMinMax(const u32 atst, const int aref);
|
||||||
|
|
||||||
@ -327,6 +332,7 @@ public:
|
|||||||
VSYNC = 1 << 13,
|
VSYNC = 1 << 13,
|
||||||
GSREOPEN = 1 << 14,
|
GSREOPEN = 1 << 14,
|
||||||
VERTEXCOUNT = 1 << 15,
|
VERTEXCOUNT = 1 << 15,
|
||||||
|
VERTEXCOUNTEXPANDED = 1 << 16,
|
||||||
};
|
};
|
||||||
|
|
||||||
GSFlushReason m_state_flush_reason = UNKNOWN;
|
GSFlushReason m_state_flush_reason = UNKNOWN;
|
||||||
|
|||||||
@ -57,6 +57,16 @@ public:
|
|||||||
return (std::memcmp(this, &v, sizeof(*this)) != 0);
|
return (std::memcmp(this, &v, sizeof(*this)) != 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
constexpr GSVector2T operator+(const GSVector2T& v) const
|
||||||
|
{
|
||||||
|
return {x + v.x, y + v.y};
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr GSVector2T operator-(const GSVector2T& v) const
|
||||||
|
{
|
||||||
|
return {x - v.x, y - v.y};
|
||||||
|
}
|
||||||
|
|
||||||
constexpr GSVector2T operator*(const GSVector2T& v) const
|
constexpr GSVector2T operator*(const GSVector2T& v) const
|
||||||
{
|
{
|
||||||
return { x * v.x, y * v.y };
|
return { x * v.x, y * v.y };
|
||||||
@ -81,6 +91,11 @@ public:
|
|||||||
typedef GSVector2T<float> GSVector2;
|
typedef GSVector2T<float> GSVector2;
|
||||||
typedef GSVector2T<int> GSVector2i;
|
typedef GSVector2T<int> GSVector2i;
|
||||||
|
|
||||||
|
constexpr GSVector2i operator&(const GSVector2i& v0, const GSVector2i& v1)
|
||||||
|
{
|
||||||
|
return {v0.x & v1.x, v0.y & v1.y};
|
||||||
|
}
|
||||||
|
|
||||||
class GSVector4;
|
class GSVector4;
|
||||||
class GSVector4i;
|
class GSVector4i;
|
||||||
|
|
||||||
|
|||||||
@ -289,6 +289,41 @@ struct HWBlend
|
|||||||
BlendFactor src, dst;
|
BlendFactor src, dst;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct alignas(16) AccuratePrimsEdgeData
|
||||||
|
{
|
||||||
|
// Interpolated attributes
|
||||||
|
GSVector4 t_float0; // 0
|
||||||
|
GSVector4 t_float1; // 16
|
||||||
|
GSVector4 t_int0; // 32
|
||||||
|
GSVector4 t_int1; // 48
|
||||||
|
GSVector4 c0; // 64
|
||||||
|
GSVector4 c1; // 80
|
||||||
|
GSVector4 p0; // 96
|
||||||
|
GSVector4 p1; // 112
|
||||||
|
GSVector4i edge0; // 128
|
||||||
|
GSVector4i edge1; // 144
|
||||||
|
GSVector2i xy0; // 160
|
||||||
|
GSVector2i xy1; // 168
|
||||||
|
u32 step_x; // 176
|
||||||
|
u32 draw0; // 180
|
||||||
|
u32 draw1; // 184
|
||||||
|
u32 top_left; // 188
|
||||||
|
u32 side; // 192
|
||||||
|
u32 _pad0; // 196
|
||||||
|
u32 _pad1; // 200
|
||||||
|
u32 _pad2; // 204
|
||||||
|
// Total 208
|
||||||
|
};
|
||||||
|
|
||||||
|
static_assert(sizeof(AccuratePrimsEdgeData) == 208);
|
||||||
|
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
ACCURATE_PRIMS_DISABLE = 0,
|
||||||
|
ACCURATE_PRIMS_LINE = 1,
|
||||||
|
ACCURATE_PRIMS_TRIANGLE = 2
|
||||||
|
};
|
||||||
|
|
||||||
struct alignas(16) GSHWDrawConfig
|
struct alignas(16) GSHWDrawConfig
|
||||||
{
|
{
|
||||||
enum class Topology: u8
|
enum class Topology: u8
|
||||||
@ -316,7 +351,7 @@ struct alignas(16) GSHWDrawConfig
|
|||||||
u8 iip : 1;
|
u8 iip : 1;
|
||||||
u8 point_size : 1; ///< Set when points need to be expanded without VS expanding.
|
u8 point_size : 1; ///< Set when points need to be expanded without VS expanding.
|
||||||
VSExpand expand : 2;
|
VSExpand expand : 2;
|
||||||
u8 _free : 2;
|
u8 accurate_prims : 2; // 0 - disables; 1 - lines; 2 - triangles.
|
||||||
};
|
};
|
||||||
u8 key;
|
u8 key;
|
||||||
};
|
};
|
||||||
@ -354,6 +389,7 @@ struct alignas(16) GSHWDrawConfig
|
|||||||
u32 date : 3;
|
u32 date : 3;
|
||||||
u32 atst : 3;
|
u32 atst : 3;
|
||||||
u32 afail : 2;
|
u32 afail : 2;
|
||||||
|
u32 ztst : 2;
|
||||||
// Color sampling
|
// Color sampling
|
||||||
u32 fst : 1; // Investigate to do it on the VS
|
u32 fst : 1; // Investigate to do it on the VS
|
||||||
u32 tfx : 3;
|
u32 tfx : 3;
|
||||||
@ -414,6 +450,11 @@ struct alignas(16) GSHWDrawConfig
|
|||||||
|
|
||||||
// Scan mask
|
// Scan mask
|
||||||
u32 scanmsk : 2;
|
u32 scanmsk : 2;
|
||||||
|
|
||||||
|
// Accurate lines
|
||||||
|
u32 accurate_prims : 2; // 0 - disabled; 1 - lines; 2 - triangles
|
||||||
|
u32 accurate_prims_aa : 1;
|
||||||
|
u32 accurate_prims_aa_abe : 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct
|
struct
|
||||||
@ -435,6 +476,13 @@ struct alignas(16) GSHWDrawConfig
|
|||||||
return channel_fb || tex_is_fb || fbmask || (date >= 5) || sw_blend_needs_rt;
|
return channel_fb || tex_is_fb || fbmask || (date >= 5) || sw_blend_needs_rt;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__fi bool IsFeedbackLoopDepth() const
|
||||||
|
{
|
||||||
|
// Note: Manual depth testing/interpolation for accurate prims is bundled with zclamp to reduce pipeline combinations.
|
||||||
|
// The zclamp is used to indicate that either Z write of Z testing is enabled.
|
||||||
|
return (accurate_prims == ACCURATE_PRIMS_TRIANGLE) && accurate_prims_aa && zclamp;
|
||||||
|
}
|
||||||
|
|
||||||
/// Disables color output from the pixel shader, this is done when all channels are masked.
|
/// Disables color output from the pixel shader, this is done when all channels are masked.
|
||||||
__fi void DisableColorOutput()
|
__fi void DisableColorOutput()
|
||||||
{
|
{
|
||||||
@ -579,6 +627,7 @@ struct alignas(16) GSHWDrawConfig
|
|||||||
GSVector2 texture_offset;
|
GSVector2 texture_offset;
|
||||||
GSVector2 point_size;
|
GSVector2 point_size;
|
||||||
GSVector2i max_depth;
|
GSVector2i max_depth;
|
||||||
|
GSVector2i base_vertex;
|
||||||
__fi VSConstantBuffer()
|
__fi VSConstantBuffer()
|
||||||
{
|
{
|
||||||
memset(static_cast<void*>(this), 0, sizeof(*this));
|
memset(static_cast<void*>(this), 0, sizeof(*this));
|
||||||
@ -628,6 +677,8 @@ struct alignas(16) GSHWDrawConfig
|
|||||||
|
|
||||||
GSVector4 ScaleFactor;
|
GSVector4 ScaleFactor;
|
||||||
|
|
||||||
|
GSVector4i accurate_prims_base_index;
|
||||||
|
|
||||||
__fi PSConstantBuffer()
|
__fi PSConstantBuffer()
|
||||||
{
|
{
|
||||||
memset(static_cast<void*>(this), 0, sizeof(*this));
|
memset(static_cast<void*>(this), 0, sizeof(*this));
|
||||||
@ -745,6 +796,9 @@ struct alignas(16) GSHWDrawConfig
|
|||||||
SetDATM datm : 2;
|
SetDATM datm : 2;
|
||||||
bool line_expand : 1;
|
bool line_expand : 1;
|
||||||
|
|
||||||
|
bool accurate_prims;
|
||||||
|
std::vector<AccuratePrimsEdgeData>* accurate_prims_edge_data;
|
||||||
|
|
||||||
struct AlphaPass
|
struct AlphaPass
|
||||||
{
|
{
|
||||||
alignas(8) PSSelector ps;
|
alignas(8) PSSelector ps;
|
||||||
@ -843,6 +897,7 @@ public:
|
|||||||
bool stencil_buffer : 1; ///< Supports stencil buffer, and can use for DATE.
|
bool stencil_buffer : 1; ///< Supports stencil buffer, and can use for DATE.
|
||||||
bool cas_sharpening : 1; ///< Supports sufficient functionality for contrast adaptive sharpening.
|
bool cas_sharpening : 1; ///< Supports sufficient functionality for contrast adaptive sharpening.
|
||||||
bool test_and_sample_depth: 1; ///< Supports concurrently binding the depth-stencil buffer for sampling and depth testing.
|
bool test_and_sample_depth: 1; ///< Supports concurrently binding the depth-stencil buffer for sampling and depth testing.
|
||||||
|
bool accurate_prims : 1; ///< Supports AA1 triangles/lines and accurate lines shaders.
|
||||||
FeatureSupport()
|
FeatureSupport()
|
||||||
{
|
{
|
||||||
memset(this, 0, sizeof(*this));
|
memset(this, 0, sizeof(*this));
|
||||||
|
|||||||
@ -14,6 +14,7 @@
|
|||||||
#include "common/Error.h"
|
#include "common/Error.h"
|
||||||
#include "common/Path.h"
|
#include "common/Path.h"
|
||||||
#include "common/StringUtil.h"
|
#include "common/StringUtil.h"
|
||||||
|
#include "common/ScopedGuard.h"
|
||||||
|
|
||||||
#include "imgui.h"
|
#include "imgui.h"
|
||||||
#include "IconsFontAwesome6.h"
|
#include "IconsFontAwesome6.h"
|
||||||
@ -395,6 +396,39 @@ bool GSDevice11::Create(GSVSyncMode vsync_mode, bool allow_present_throttle)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bd = {};
|
||||||
|
|
||||||
|
if (m_features.accurate_prims)
|
||||||
|
{
|
||||||
|
bd.Usage = D3D11_USAGE_DEFAULT;
|
||||||
|
bd.CPUAccessFlags = 0;
|
||||||
|
bd.ByteWidth = ACCURATE_PRIMS_BUFFER_SIZE;
|
||||||
|
bd.BindFlags = D3D11_BIND_SHADER_RESOURCE;
|
||||||
|
bd.StructureByteStride = sizeof(AccuratePrimsEdgeData);
|
||||||
|
bd.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
|
||||||
|
|
||||||
|
if (FAILED(m_dev->CreateBuffer(&bd, nullptr, m_accurate_prims_b.put())))
|
||||||
|
{
|
||||||
|
Console.Error("D3D11: Failed to create accurate prims buffer.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const CD3D11_SHADER_RESOURCE_VIEW_DESC accurate_prims_b_srv_desc(
|
||||||
|
D3D11_SRV_DIMENSION_BUFFER, DXGI_FORMAT_UNKNOWN, 0,
|
||||||
|
ACCURATE_PRIMS_BUFFER_SIZE / sizeof(AccuratePrimsEdgeData));
|
||||||
|
|
||||||
|
if (FAILED(m_dev->CreateShaderResourceView(m_accurate_prims_b.get(), &accurate_prims_b_srv_desc,
|
||||||
|
m_accurate_prims_b_srv.put())))
|
||||||
|
{
|
||||||
|
Console.Error("D3D11: Failed to create accurate prims buffer SRV.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If MAX_TEXTURES changes, please change the register for this buffer in the shader.
|
||||||
|
static_assert(MAX_TEXTURES == 5);
|
||||||
|
m_ctx->PSSetShaderResources(5, 1, m_accurate_prims_b_srv.addressof());
|
||||||
|
}
|
||||||
|
|
||||||
// rasterizer
|
// rasterizer
|
||||||
|
|
||||||
memset(&rd, 0, sizeof(rd));
|
memset(&rd, 0, sizeof(rd));
|
||||||
@ -541,6 +575,8 @@ void GSDevice11::Destroy()
|
|||||||
m_expand_vb_srv.reset();
|
m_expand_vb_srv.reset();
|
||||||
m_expand_vb.reset();
|
m_expand_vb.reset();
|
||||||
m_expand_ib.reset();
|
m_expand_ib.reset();
|
||||||
|
m_accurate_prims_b.reset();
|
||||||
|
m_accurate_prims_b_srv.reset();
|
||||||
|
|
||||||
m_vs.clear();
|
m_vs.clear();
|
||||||
m_vs_cb.reset();
|
m_vs_cb.reset();
|
||||||
@ -599,6 +635,8 @@ void GSDevice11::SetFeatures(IDXGIAdapter1* adapter)
|
|||||||
m_max_texture_size = (m_feature_level >= D3D_FEATURE_LEVEL_11_0) ?
|
m_max_texture_size = (m_feature_level >= D3D_FEATURE_LEVEL_11_0) ?
|
||||||
D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION :
|
D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION :
|
||||||
D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION;
|
D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION;
|
||||||
|
|
||||||
|
m_features.accurate_prims = GSConfig.HWAccuratePrims;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GSDevice11::HasSurface() const
|
bool GSDevice11::HasSurface() const
|
||||||
@ -1665,6 +1703,7 @@ void GSDevice11::SetupVS(VSSelector sel, const GSHWDrawConfig::VSConstantBuffer*
|
|||||||
sm.AddMacro("VS_FST", sel.fst);
|
sm.AddMacro("VS_FST", sel.fst);
|
||||||
sm.AddMacro("VS_IIP", sel.iip);
|
sm.AddMacro("VS_IIP", sel.iip);
|
||||||
sm.AddMacro("VS_EXPAND", static_cast<int>(sel.expand));
|
sm.AddMacro("VS_EXPAND", static_cast<int>(sel.expand));
|
||||||
|
sm.AddMacro("VS_ACCURATE_PRIMS", static_cast<int>(sel.accurate_prims));
|
||||||
|
|
||||||
static constexpr const D3D11_INPUT_ELEMENT_DESC layout[] =
|
static constexpr const D3D11_INPUT_ELEMENT_DESC layout[] =
|
||||||
{
|
{
|
||||||
@ -1766,6 +1805,10 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant
|
|||||||
sm.AddMacro("PS_TEX_IS_FB", sel.tex_is_fb);
|
sm.AddMacro("PS_TEX_IS_FB", sel.tex_is_fb);
|
||||||
sm.AddMacro("PS_NO_COLOR", sel.no_color);
|
sm.AddMacro("PS_NO_COLOR", sel.no_color);
|
||||||
sm.AddMacro("PS_NO_COLOR1", sel.no_color1);
|
sm.AddMacro("PS_NO_COLOR1", sel.no_color1);
|
||||||
|
sm.AddMacro("PS_ACCURATE_PRIMS", sel.accurate_prims);
|
||||||
|
sm.AddMacro("PS_ACCURATE_PRIMS_AA", sel.accurate_prims_aa);
|
||||||
|
sm.AddMacro("PS_ACCURATE_PRIMS_AA_ABE", sel.accurate_prims_aa_abe);
|
||||||
|
sm.AddMacro("PS_ZTST", sel.ztst);
|
||||||
|
|
||||||
wil::com_ptr_nothrow<ID3D11PixelShader> ps = m_shader_cache.GetPixelShader(m_dev.get(), m_tfx_source, sm.GetPtr(), "ps_main");
|
wil::com_ptr_nothrow<ID3D11PixelShader> ps = m_shader_cache.GetPixelShader(m_dev.get(), m_tfx_source, sm.GetPtr(), "ps_main");
|
||||||
i = m_ps.try_emplace(sel, std::move(ps)).first;
|
i = m_ps.try_emplace(sel, std::move(ps)).first;
|
||||||
@ -2280,6 +2323,32 @@ bool GSDevice11::IASetExpandVertexBuffer(const void* vertex, u32 stride, u32 cou
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool GSDevice11::SetupAccuratePrims(GSHWDrawConfig& config)
|
||||||
|
{
|
||||||
|
if (config.accurate_prims)
|
||||||
|
{
|
||||||
|
const u32 count = config.accurate_prims_edge_data->size();
|
||||||
|
const u32 size = count * sizeof(AccuratePrimsEdgeData);
|
||||||
|
|
||||||
|
if (size > ACCURATE_PRIMS_BUFFER_SIZE)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Performance note: UpdateSubresource() copies data to a temp staging buffer to avoid stalling the GPU,
|
||||||
|
// so a manual ring buffer is not needed here like VK/DX12.
|
||||||
|
D3D11_BOX dst_region{};
|
||||||
|
dst_region.left = 0;
|
||||||
|
dst_region.right = size;
|
||||||
|
dst_region.top = 0;
|
||||||
|
dst_region.bottom = 1;
|
||||||
|
dst_region.front = 0;
|
||||||
|
dst_region.back = 1;
|
||||||
|
m_ctx->UpdateSubresource(m_accurate_prims_b.get(), 0, &dst_region, config.accurate_prims_edge_data->data(), size, 0);
|
||||||
|
|
||||||
|
config.cb_ps.accurate_prims_base_index.x = 0; // No offsetting needed like DX12/VK since we don't use a ring buffer.
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
u16* GSDevice11::IAMapIndexBuffer(u32 count)
|
u16* GSDevice11::IAMapIndexBuffer(u32 count)
|
||||||
{
|
{
|
||||||
if (count > (INDEX_BUFFER_SIZE / sizeof(u16)))
|
if (count > (INDEX_BUFFER_SIZE / sizeof(u16)))
|
||||||
@ -2583,6 +2652,18 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
|
|||||||
{
|
{
|
||||||
const GSVector2i rtsize = (config.rt ? config.rt : config.ds)->GetSize();
|
const GSVector2i rtsize = (config.rt ? config.rt : config.ds)->GetSize();
|
||||||
GSTexture* colclip_rt = g_gs_device->GetColorClipTexture();
|
GSTexture* colclip_rt = g_gs_device->GetColorClipTexture();
|
||||||
|
GSTexture* draw_rt_clone = nullptr;
|
||||||
|
GSTexture* draw_ds_clone = nullptr;
|
||||||
|
GSTexture* primid_texture = nullptr;
|
||||||
|
|
||||||
|
ScopedGuard recycle_temp_textures([&]() {
|
||||||
|
if (draw_rt_clone)
|
||||||
|
Recycle(draw_rt_clone);
|
||||||
|
if (draw_ds_clone)
|
||||||
|
Recycle(draw_ds_clone);
|
||||||
|
if (primid_texture)
|
||||||
|
Recycle(primid_texture);
|
||||||
|
});
|
||||||
|
|
||||||
if (colclip_rt)
|
if (colclip_rt)
|
||||||
{
|
{
|
||||||
@ -2627,7 +2708,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
|
|||||||
|
|
||||||
// Destination Alpha Setup
|
// Destination Alpha Setup
|
||||||
const bool multidraw_fb_copy = m_features.multidraw_fb_copy && (config.require_one_barrier || config.require_full_barrier);
|
const bool multidraw_fb_copy = m_features.multidraw_fb_copy && (config.require_one_barrier || config.require_full_barrier);
|
||||||
GSTexture* primid_texture = nullptr;
|
|
||||||
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking)
|
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking)
|
||||||
{
|
{
|
||||||
primid_texture = CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::PrimID, false);
|
primid_texture = CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::PrimID, false);
|
||||||
@ -2652,7 +2732,7 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
config.cb_vs.max_depth.y = m_vertex.start;
|
config.cb_vs.base_vertex = m_vertex.start;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -2663,6 +2743,12 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!SetupAccuratePrims(config))
|
||||||
|
{
|
||||||
|
Console.Error("D3D11: Failed to setup accurate prims");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (config.vs.UseExpandIndexBuffer())
|
if (config.vs.UseExpandIndexBuffer())
|
||||||
{
|
{
|
||||||
IASetIndexBuffer(m_expand_ib.get());
|
IASetIndexBuffer(m_expand_ib.get());
|
||||||
@ -2742,8 +2828,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
|
|||||||
draw_ds = m_state.cached_dsv;
|
draw_ds = m_state.cached_dsv;
|
||||||
}
|
}
|
||||||
|
|
||||||
GSTexture* draw_rt_clone = nullptr;
|
|
||||||
|
|
||||||
if (draw_rt && (config.require_one_barrier || (config.require_full_barrier && m_features.multidraw_fb_copy) || (config.tex && config.tex == config.rt)))
|
if (draw_rt && (config.require_one_barrier || (config.require_full_barrier && m_features.multidraw_fb_copy) || (config.tex && config.tex == config.rt)))
|
||||||
{
|
{
|
||||||
// Requires a copy of the RT.
|
// Requires a copy of the RT.
|
||||||
@ -2754,6 +2838,15 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
|
|||||||
Console.Warning("D3D11: Failed to allocate temp texture for RT copy.");
|
Console.Warning("D3D11: Failed to allocate temp texture for RT copy.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (draw_ds && config.require_full_barrier && m_features.multidraw_fb_copy && config.ps.IsFeedbackLoopDepth())
|
||||||
|
{
|
||||||
|
// Requires a copy of the DS.
|
||||||
|
// Used as "bind ds" flag when texture barrier is unsupported for tex is fb.
|
||||||
|
draw_ds_clone = CreateTexture(rtsize.x, rtsize.y, 1, draw_ds->GetFormat(), true);
|
||||||
|
if (!draw_rt_clone)
|
||||||
|
Console.Warning("D3D11: Failed to allocate temp texture for DS copy.");
|
||||||
|
}
|
||||||
|
|
||||||
OMSetRenderTargets(draw_rt, draw_ds, &config.scissor, read_only_dsv);
|
OMSetRenderTargets(draw_rt, draw_ds, &config.scissor, read_only_dsv);
|
||||||
SetupOM(config.depth, OMBlendSelector(config.colormask, config.blend), config.blend.constant);
|
SetupOM(config.depth, OMBlendSelector(config.colormask, config.blend), config.blend.constant);
|
||||||
|
|
||||||
@ -2761,7 +2854,7 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
|
|||||||
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::StencilOne && multidraw_fb_copy)
|
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::StencilOne && multidraw_fb_copy)
|
||||||
m_ctx->ClearDepthStencilView(*static_cast<GSTexture11*>(draw_ds), D3D11_CLEAR_STENCIL, 0.0f, 1);
|
m_ctx->ClearDepthStencilView(*static_cast<GSTexture11*>(draw_ds), D3D11_CLEAR_STENCIL, 0.0f, 1);
|
||||||
|
|
||||||
SendHWDraw(config, draw_rt_clone, draw_rt, config.require_one_barrier, config.require_full_barrier, false);
|
SendHWDraw(config, draw_rt_clone, draw_rt, draw_ds_clone, draw_ds, config.require_one_barrier, config.require_full_barrier, false);
|
||||||
|
|
||||||
if (config.blend_multi_pass.enable)
|
if (config.blend_multi_pass.enable)
|
||||||
{
|
{
|
||||||
@ -2787,15 +2880,10 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
|
|||||||
}
|
}
|
||||||
|
|
||||||
SetupOM(config.alpha_second_pass.depth, OMBlendSelector(config.alpha_second_pass.colormask, config.blend), config.blend.constant);
|
SetupOM(config.alpha_second_pass.depth, OMBlendSelector(config.alpha_second_pass.colormask, config.blend), config.blend.constant);
|
||||||
SendHWDraw(config, draw_rt_clone, draw_rt, config.alpha_second_pass.require_one_barrier, config.alpha_second_pass.require_full_barrier, true);
|
SendHWDraw(config, draw_rt_clone, draw_rt, draw_ds_clone, draw_ds,
|
||||||
|
config.alpha_second_pass.require_one_barrier, config.alpha_second_pass.require_full_barrier, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (draw_rt_clone)
|
|
||||||
Recycle(draw_rt_clone);
|
|
||||||
|
|
||||||
if (primid_texture)
|
|
||||||
Recycle(primid_texture);
|
|
||||||
|
|
||||||
if (colclip_rt)
|
if (colclip_rt)
|
||||||
{
|
{
|
||||||
config.colclip_update_area = config.colclip_update_area.runion(config.drawarea);
|
config.colclip_update_area = config.colclip_update_area.runion(config.drawarea);
|
||||||
@ -2814,19 +2902,29 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDevice11::SendHWDraw(const GSHWDrawConfig& config, GSTexture* draw_rt_clone, GSTexture* draw_rt, const bool one_barrier, const bool full_barrier, const bool skip_first_barrier)
|
void GSDevice11::SendHWDraw(const GSHWDrawConfig& config,
|
||||||
|
GSTexture* draw_rt_clone, GSTexture* draw_rt, GSTexture* draw_ds_clone, GSTexture* draw_ds,
|
||||||
|
const bool one_barrier, const bool full_barrier, const bool skip_first_barrier)
|
||||||
{
|
{
|
||||||
if (draw_rt_clone)
|
if (draw_rt_clone || draw_ds_clone)
|
||||||
{
|
{
|
||||||
#ifdef PCSX2_DEVBUILD
|
#ifdef PCSX2_DEVBUILD
|
||||||
if ((one_barrier || full_barrier) && !config.ps.IsFeedbackLoop()) [[unlikely]]
|
if ((one_barrier || full_barrier) && !(config.ps.IsFeedbackLoop() || config.ps.IsFeedbackLoopDepth())) [[unlikely]]
|
||||||
Console.Warning("D3D11: Possible unnecessary copy detected.");
|
Console.Warning("D3D11: Possible unnecessary copy detected.");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
auto CopyAndBind = [&](GSVector4i drawarea) {
|
auto CopyAndBind = [&](GSVector4i drawarea) {
|
||||||
CopyRect(draw_rt, draw_rt_clone, drawarea, drawarea.left, drawarea.top);
|
if (draw_rt_clone)
|
||||||
|
CopyRect(draw_rt, draw_rt_clone, drawarea, drawarea.left, drawarea.top);
|
||||||
|
if (draw_ds_clone)
|
||||||
|
CopyRect(draw_ds, draw_ds_clone, drawarea, drawarea.left, drawarea.top);
|
||||||
if (one_barrier || full_barrier)
|
if (one_barrier || full_barrier)
|
||||||
PSSetShaderResource(2, draw_rt_clone);
|
{
|
||||||
|
if (draw_rt_clone)
|
||||||
|
PSSetShaderResource(2, draw_rt_clone);
|
||||||
|
if (draw_ds_clone)
|
||||||
|
PSSetShaderResource(4, draw_ds_clone);
|
||||||
|
}
|
||||||
if (config.tex && config.tex == config.rt)
|
if (config.tex && config.tex == config.rt)
|
||||||
PSSetShaderResource(0, draw_rt_clone);
|
PSSetShaderResource(0, draw_rt_clone);
|
||||||
};
|
};
|
||||||
|
|||||||
@ -83,10 +83,14 @@ public:
|
|||||||
private:
|
private:
|
||||||
enum : u32
|
enum : u32
|
||||||
{
|
{
|
||||||
MAX_TEXTURES = 4,
|
MAX_TEXTURES = 5,
|
||||||
MAX_SAMPLERS = 1,
|
MAX_SAMPLERS = 1,
|
||||||
VERTEX_BUFFER_SIZE = 32 * 1024 * 1024,
|
VERTEX_BUFFER_SIZE = 32 * 1024 * 1024,
|
||||||
INDEX_BUFFER_SIZE = 16 * 1024 * 1024,
|
INDEX_BUFFER_SIZE = 16 * 1024 * 1024,
|
||||||
|
|
||||||
|
// Structured buffer size must be multiple of element size.
|
||||||
|
ACCURATE_PRIMS_BUFFER_SIZE = (32 * 1024 * 1024 / sizeof(AccuratePrimsEdgeData)) * sizeof(AccuratePrimsEdgeData),
|
||||||
|
|
||||||
NUM_TIMESTAMP_QUERIES = 5,
|
NUM_TIMESTAMP_QUERIES = 5,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -126,6 +130,8 @@ private:
|
|||||||
wil::com_ptr_nothrow<ID3D11Buffer> m_expand_vb;
|
wil::com_ptr_nothrow<ID3D11Buffer> m_expand_vb;
|
||||||
wil::com_ptr_nothrow<ID3D11Buffer> m_expand_ib;
|
wil::com_ptr_nothrow<ID3D11Buffer> m_expand_ib;
|
||||||
wil::com_ptr_nothrow<ID3D11ShaderResourceView> m_expand_vb_srv;
|
wil::com_ptr_nothrow<ID3D11ShaderResourceView> m_expand_vb_srv;
|
||||||
|
wil::com_ptr_nothrow<ID3D11Buffer> m_accurate_prims_b;
|
||||||
|
wil::com_ptr_nothrow<ID3D11ShaderResourceView> m_accurate_prims_b_srv;
|
||||||
|
|
||||||
D3D_FEATURE_LEVEL m_feature_level = D3D_FEATURE_LEVEL_10_0;
|
D3D_FEATURE_LEVEL m_feature_level = D3D_FEATURE_LEVEL_10_0;
|
||||||
u32 m_vb_pos = 0; // bytes
|
u32 m_vb_pos = 0; // bytes
|
||||||
@ -317,6 +323,7 @@ public:
|
|||||||
void IAUnmapVertexBuffer(u32 stride, u32 count);
|
void IAUnmapVertexBuffer(u32 stride, u32 count);
|
||||||
bool IASetVertexBuffer(const void* vertex, u32 stride, u32 count);
|
bool IASetVertexBuffer(const void* vertex, u32 stride, u32 count);
|
||||||
bool IASetExpandVertexBuffer(const void* vertex, u32 stride, u32 count);
|
bool IASetExpandVertexBuffer(const void* vertex, u32 stride, u32 count);
|
||||||
|
bool SetupAccuratePrims(GSHWDrawConfig& config);
|
||||||
|
|
||||||
u16* IAMapIndexBuffer(u32 count);
|
u16* IAMapIndexBuffer(u32 count);
|
||||||
void IAUnmapIndexBuffer(u32 count);
|
void IAUnmapIndexBuffer(u32 count);
|
||||||
@ -345,7 +352,9 @@ public:
|
|||||||
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, u8 afix);
|
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, u8 afix);
|
||||||
|
|
||||||
void RenderHW(GSHWDrawConfig& config) override;
|
void RenderHW(GSHWDrawConfig& config) override;
|
||||||
void SendHWDraw(const GSHWDrawConfig& config, GSTexture* draw_rt_clone, GSTexture* draw_rt, const bool one_barrier, const bool full_barrier, const bool skip_first_barrier);
|
void SendHWDraw(const GSHWDrawConfig& config,
|
||||||
|
GSTexture* draw_rt_clone, GSTexture* draw_rt, GSTexture* draw_ds_clone, GSTexture* draw_ds,
|
||||||
|
const bool one_barrier, const bool full_barrier, const bool skip_first_barrier);
|
||||||
|
|
||||||
void ClearSamplerCache() override;
|
void ClearSamplerCache() override;
|
||||||
|
|
||||||
|
|||||||
@ -20,29 +20,33 @@ D3D12StreamBuffer::~D3D12StreamBuffer()
|
|||||||
Destroy();
|
Destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool D3D12StreamBuffer::Create(u32 size)
|
bool D3D12StreamBuffer::Create(u32 size, bool default_heap)
|
||||||
{
|
{
|
||||||
const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN,
|
const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN,
|
||||||
{1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE};
|
{1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE};
|
||||||
|
|
||||||
D3D12MA::ALLOCATION_DESC allocationDesc = {};
|
D3D12MA::ALLOCATION_DESC allocationDesc = {};
|
||||||
allocationDesc.Flags = D3D12MA::ALLOCATION_FLAG_COMMITTED;
|
allocationDesc.Flags = D3D12MA::ALLOCATION_FLAG_COMMITTED;
|
||||||
allocationDesc.HeapType = D3D12_HEAP_TYPE_UPLOAD;
|
allocationDesc.HeapType = default_heap ? D3D12_HEAP_TYPE_DEFAULT : D3D12_HEAP_TYPE_UPLOAD;
|
||||||
|
|
||||||
wil::com_ptr_nothrow<ID3D12Resource> buffer;
|
wil::com_ptr_nothrow<ID3D12Resource> buffer;
|
||||||
wil::com_ptr_nothrow<D3D12MA::Allocation> allocation;
|
wil::com_ptr_nothrow<D3D12MA::Allocation> allocation;
|
||||||
HRESULT hr = GSDevice12::GetInstance()->GetAllocator()->CreateResource(&allocationDesc, &resource_desc,
|
HRESULT hr = GSDevice12::GetInstance()->GetAllocator()->CreateResource(&allocationDesc, &resource_desc,
|
||||||
D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, allocation.put(), IID_PPV_ARGS(buffer.put()));
|
default_heap ? D3D12_RESOURCE_STATE_COMMON : D3D12_RESOURCE_STATE_GENERIC_READ,
|
||||||
|
nullptr, allocation.put(), IID_PPV_ARGS(buffer.put()));
|
||||||
pxAssertMsg(SUCCEEDED(hr), "Allocate buffer");
|
pxAssertMsg(SUCCEEDED(hr), "Allocate buffer");
|
||||||
if (FAILED(hr))
|
if (FAILED(hr))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
static const D3D12_RANGE read_range = {};
|
static const D3D12_RANGE read_range = {};
|
||||||
u8* host_pointer;
|
u8* host_pointer = nullptr;
|
||||||
hr = buffer->Map(0, &read_range, reinterpret_cast<void**>(&host_pointer));
|
if (!default_heap)
|
||||||
pxAssertMsg(SUCCEEDED(hr), "Map buffer");
|
{
|
||||||
if (FAILED(hr))
|
hr = buffer->Map(0, &read_range, reinterpret_cast<void**>(&host_pointer));
|
||||||
return false;
|
pxAssertMsg(SUCCEEDED(hr), "Map buffer");
|
||||||
|
if (FAILED(hr))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
Destroy(true);
|
Destroy(true);
|
||||||
|
|
||||||
@ -51,6 +55,7 @@ bool D3D12StreamBuffer::Create(u32 size)
|
|||||||
m_host_pointer = host_pointer;
|
m_host_pointer = host_pointer;
|
||||||
m_size = size;
|
m_size = size;
|
||||||
m_gpu_pointer = m_buffer->GetGPUVirtualAddress();
|
m_gpu_pointer = m_buffer->GetGPUVirtualAddress();
|
||||||
|
m_default_heap = default_heap;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -148,6 +153,7 @@ void D3D12StreamBuffer::Destroy(bool defer)
|
|||||||
m_current_offset = 0;
|
m_current_offset = 0;
|
||||||
m_current_space = 0;
|
m_current_space = 0;
|
||||||
m_current_gpu_position = 0;
|
m_current_gpu_position = 0;
|
||||||
|
m_default_heap = false;
|
||||||
m_tracked_fences.clear();
|
m_tracked_fences.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -22,7 +22,7 @@ public:
|
|||||||
D3D12StreamBuffer();
|
D3D12StreamBuffer();
|
||||||
~D3D12StreamBuffer();
|
~D3D12StreamBuffer();
|
||||||
|
|
||||||
bool Create(u32 size);
|
bool Create(u32 size, bool default_heap = false);
|
||||||
|
|
||||||
__fi bool IsValid() const { return static_cast<bool>(m_buffer); }
|
__fi bool IsValid() const { return static_cast<bool>(m_buffer); }
|
||||||
__fi ID3D12Resource* GetBuffer() const { return m_buffer.get(); }
|
__fi ID3D12Resource* GetBuffer() const { return m_buffer.get(); }
|
||||||
@ -54,7 +54,8 @@ private:
|
|||||||
wil::com_ptr_nothrow<ID3D12Resource> m_buffer;
|
wil::com_ptr_nothrow<ID3D12Resource> m_buffer;
|
||||||
wil::com_ptr_nothrow<D3D12MA::Allocation> m_allocation;
|
wil::com_ptr_nothrow<D3D12MA::Allocation> m_allocation;
|
||||||
D3D12_GPU_VIRTUAL_ADDRESS m_gpu_pointer = {};
|
D3D12_GPU_VIRTUAL_ADDRESS m_gpu_pointer = {};
|
||||||
u8* m_host_pointer = nullptr;
|
u8* m_host_pointer = nullptr; // Only used for upload heaps.
|
||||||
|
bool m_default_heap = false; // False for upload heap; true for default heap.
|
||||||
|
|
||||||
// List of fences and the corresponding positions in the buffer
|
// List of fences and the corresponding positions in the buffer
|
||||||
std::deque<std::pair<u64, u32>> m_tracked_fences;
|
std::deque<std::pair<u64, u32>> m_tracked_fences;
|
||||||
|
|||||||
@ -624,52 +624,91 @@ bool GSDevice12::SetGPUTimingEnabled(bool enabled)
|
|||||||
bool GSDevice12::AllocatePreinitializedGPUBuffer(u32 size, ID3D12Resource** gpu_buffer,
|
bool GSDevice12::AllocatePreinitializedGPUBuffer(u32 size, ID3D12Resource** gpu_buffer,
|
||||||
D3D12MA::Allocation** gpu_allocation, const std::function<void(void*)>& fill_callback)
|
D3D12MA::Allocation** gpu_allocation, const std::function<void(void*)>& fill_callback)
|
||||||
{
|
{
|
||||||
// Try to place the fixed index buffer in GPU local memory.
|
// Allocate and fill staging buffer
|
||||||
// Use the staging buffer to copy into it.
|
ID3D12Resource* cpu_buffer = AllocateUploadStagingBuffer(size, fill_callback);
|
||||||
|
|
||||||
|
// Create GPU buffer
|
||||||
const D3D12_RESOURCE_DESC rd = {D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN, {1, 0},
|
const D3D12_RESOURCE_DESC rd = {D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN, {1, 0},
|
||||||
D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE};
|
D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE};
|
||||||
|
|
||||||
const D3D12MA::ALLOCATION_DESC cpu_ad = {D3D12MA::ALLOCATION_FLAG_NONE, D3D12_HEAP_TYPE_UPLOAD};
|
|
||||||
|
|
||||||
ComPtr<ID3D12Resource> cpu_buffer;
|
|
||||||
ComPtr<D3D12MA::Allocation> cpu_allocation;
|
|
||||||
HRESULT hr = m_allocator->CreateResource(
|
|
||||||
&cpu_ad, &rd, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, cpu_allocation.put(), IID_PPV_ARGS(cpu_buffer.put()));
|
|
||||||
pxAssertMsg(SUCCEEDED(hr), "Allocate CPU buffer");
|
|
||||||
if (FAILED(hr))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
static constexpr const D3D12_RANGE read_range = {};
|
|
||||||
const D3D12_RANGE write_range = {0, size};
|
|
||||||
void* mapped;
|
|
||||||
hr = cpu_buffer->Map(0, &read_range, &mapped);
|
|
||||||
pxAssertMsg(SUCCEEDED(hr), "Map CPU buffer");
|
|
||||||
if (FAILED(hr))
|
|
||||||
return false;
|
|
||||||
fill_callback(mapped);
|
|
||||||
cpu_buffer->Unmap(0, &write_range);
|
|
||||||
|
|
||||||
const D3D12MA::ALLOCATION_DESC gpu_ad = {D3D12MA::ALLOCATION_FLAG_COMMITTED, D3D12_HEAP_TYPE_DEFAULT};
|
const D3D12MA::ALLOCATION_DESC gpu_ad = {D3D12MA::ALLOCATION_FLAG_COMMITTED, D3D12_HEAP_TYPE_DEFAULT};
|
||||||
|
HRESULT hr = m_allocator->CreateResource(
|
||||||
hr = m_allocator->CreateResource(
|
|
||||||
&gpu_ad, &rd, D3D12_RESOURCE_STATE_COMMON, nullptr, gpu_allocation, IID_PPV_ARGS(gpu_buffer));
|
&gpu_ad, &rd, D3D12_RESOURCE_STATE_COMMON, nullptr, gpu_allocation, IID_PPV_ARGS(gpu_buffer));
|
||||||
pxAssertMsg(SUCCEEDED(hr), "Allocate GPU buffer");
|
pxAssertMsg(SUCCEEDED(hr), "Allocate GPU buffer");
|
||||||
if (FAILED(hr))
|
if (FAILED(hr))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
GetInitCommandList()->CopyBufferRegion(*gpu_buffer, 0, cpu_buffer.get(), 0, size);
|
// Copy the data
|
||||||
|
GetInitCommandList()->CopyBufferRegion(*gpu_buffer, 0, cpu_buffer, 0, size);
|
||||||
|
|
||||||
|
// Transition GPU buffer to COPY_DEST
|
||||||
D3D12_RESOURCE_BARRIER rb = {D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, D3D12_RESOURCE_BARRIER_FLAG_NONE};
|
D3D12_RESOURCE_BARRIER rb = {D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, D3D12_RESOURCE_BARRIER_FLAG_NONE};
|
||||||
rb.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
rb.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
||||||
rb.Transition.pResource = *gpu_buffer;
|
rb.Transition.pResource = *gpu_buffer;
|
||||||
rb.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; // COMMON -> COPY_DEST at first use.
|
rb.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; // COMMON -> COPY_DEST at first use.
|
||||||
rb.Transition.StateAfter = D3D12_RESOURCE_STATE_INDEX_BUFFER;
|
rb.Transition.StateAfter = D3D12_RESOURCE_STATE_INDEX_BUFFER;
|
||||||
GetInitCommandList()->ResourceBarrier(1, &rb);
|
GetInitCommandList()->ResourceBarrier(1, &rb);
|
||||||
|
|
||||||
DeferResourceDestruction(cpu_allocation.get(), cpu_buffer.get());
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ID3D12Resource* GSDevice12::WriteTextureUploadBuffer(u32 size, std::function<void(void*)> write_data, u32& offset_out)
|
||||||
|
{
|
||||||
|
if (!m_texture_stream_buffer.ReserveMemory(size, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT))
|
||||||
|
{
|
||||||
|
GSDevice12::GetInstance()->ExecuteCommandList(
|
||||||
|
false, "While waiting for %u bytes in texture upload buffer", size);
|
||||||
|
if (!m_texture_stream_buffer.ReserveMemory(size, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT))
|
||||||
|
{
|
||||||
|
Console.Error("Failed to reserve texture upload memory (%u bytes).", size);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
offset_out = m_texture_stream_buffer.GetCurrentOffset();
|
||||||
|
write_data(m_texture_stream_buffer.GetCurrentHostPointer());
|
||||||
|
m_texture_stream_buffer.CommitMemory(size);
|
||||||
|
return m_texture_stream_buffer.GetBuffer();
|
||||||
|
}
|
||||||
|
|
||||||
|
ID3D12Resource* GSDevice12::AllocateUploadStagingBuffer(u32 size, std::function<void(void*)> write_data)
|
||||||
|
{
|
||||||
|
wil::com_ptr_nothrow<ID3D12Resource> resource;
|
||||||
|
wil::com_ptr_nothrow<D3D12MA::Allocation> allocation;
|
||||||
|
|
||||||
|
// Allocate staging buffer
|
||||||
|
const D3D12MA::ALLOCATION_DESC allocation_desc = {D3D12MA::ALLOCATION_FLAG_NONE, D3D12_HEAP_TYPE_UPLOAD};
|
||||||
|
const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1,
|
||||||
|
DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE};
|
||||||
|
HRESULT hr = GetAllocator()->CreateResource(&allocation_desc, &resource_desc,
|
||||||
|
D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, allocation.put(), IID_PPV_ARGS(resource.put()));
|
||||||
|
if (FAILED(hr))
|
||||||
|
{
|
||||||
|
Console.WriteLn("(AllocateUploadStagingBuffer) CreateCommittedResource() failed with %08X", hr);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Map
|
||||||
|
static constexpr const D3D12_RANGE read_range = {};
|
||||||
|
void* map_ptr;
|
||||||
|
hr = resource->Map(0, &read_range, &map_ptr);
|
||||||
|
if (FAILED(hr))
|
||||||
|
{
|
||||||
|
Console.WriteLn("(AllocateUploadStagingBuffer) Map() failed with %08X", hr);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write data
|
||||||
|
write_data(map_ptr);
|
||||||
|
|
||||||
|
// Unmap
|
||||||
|
const D3D12_RANGE write_range = {0, size};
|
||||||
|
resource->Unmap(0, &write_range);
|
||||||
|
|
||||||
|
// Immediately queue it for freeing after the command buffer finishes, since it's only needed for the copy.
|
||||||
|
// This adds the reference needed to keep the buffer alive.
|
||||||
|
DeferResourceDestruction(allocation.get(), resource.get());
|
||||||
|
return resource.get();
|
||||||
|
}
|
||||||
|
|
||||||
RenderAPI GSDevice12::GetRenderAPI() const
|
RenderAPI GSDevice12::GetRenderAPI() const
|
||||||
{
|
{
|
||||||
return RenderAPI::D3D12;
|
return RenderAPI::D3D12;
|
||||||
@ -1250,6 +1289,8 @@ bool GSDevice12::CheckFeatures(const u32& vendor_id)
|
|||||||
DXGI_FEATURE_PRESENT_ALLOW_TEARING, &allow_tearing_supported, sizeof(allow_tearing_supported));
|
DXGI_FEATURE_PRESENT_ALLOW_TEARING, &allow_tearing_supported, sizeof(allow_tearing_supported));
|
||||||
m_allow_tearing_supported = (SUCCEEDED(hr) && allow_tearing_supported == TRUE);
|
m_allow_tearing_supported = (SUCCEEDED(hr) && allow_tearing_supported == TRUE);
|
||||||
|
|
||||||
|
m_features.accurate_prims = GSConfig.HWAccuratePrims;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2178,6 +2219,93 @@ void GSDevice12::IASetIndexBuffer(const void* index, size_t count)
|
|||||||
m_index_stream_buffer.CommitMemory(size);
|
m_index_stream_buffer.CommitMemory(size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GSDevice12::SetupAccuratePrimsBuffer(GSHWDrawConfig& config)
|
||||||
|
{
|
||||||
|
if (config.accurate_prims)
|
||||||
|
{
|
||||||
|
// Unbind the buffer.
|
||||||
|
m_dirty_flags |= DIRTY_FLAG_PS_ACCURATE_PRIMS_BUFFER_BINDING;
|
||||||
|
|
||||||
|
const u32 count = config.accurate_prims_edge_data->size();
|
||||||
|
const u32 size = count * sizeof(AccuratePrimsEdgeData);
|
||||||
|
|
||||||
|
// Reserve the GPU region.
|
||||||
|
if (!m_accurate_prims_stream_buffer.ReserveMemory(size, sizeof(AccuratePrimsEdgeData)))
|
||||||
|
{
|
||||||
|
ExecuteCommandListAndRestartRenderPass(false, "Uploading bytes to accurate prims buffer");
|
||||||
|
if (!m_accurate_prims_stream_buffer.ReserveMemory(size, sizeof(AccuratePrimsEdgeData)))
|
||||||
|
pxFailRel("Failed to reserve space for accurate prims");
|
||||||
|
}
|
||||||
|
|
||||||
|
const u32 offset = m_accurate_prims_stream_buffer.GetCurrentOffset();
|
||||||
|
|
||||||
|
if (InRenderPass())
|
||||||
|
EndRenderPass();
|
||||||
|
|
||||||
|
// Copy data to an upload buffer.
|
||||||
|
ID3D12Resource* upload_buffer;
|
||||||
|
u32 upload_buffer_offset;
|
||||||
|
|
||||||
|
const auto upload_data = [&](void* map_ptr) {
|
||||||
|
std::memcpy(map_ptr, config.accurate_prims_edge_data->data(), size);
|
||||||
|
};
|
||||||
|
|
||||||
|
// If the texture is larger than half our streaming buffer size, use a separate buffer.
|
||||||
|
// Otherwise allocation will either fail, or require lots of cmdbuffer submissions.
|
||||||
|
if (size > m_texture_stream_buffer.GetSize() / 2)
|
||||||
|
{
|
||||||
|
upload_buffer_offset = 0;
|
||||||
|
upload_buffer = AllocateUploadStagingBuffer(size, upload_data);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
upload_buffer = WriteTextureUploadBuffer(size, upload_data, upload_buffer_offset);
|
||||||
|
}
|
||||||
|
if (!upload_buffer)
|
||||||
|
{
|
||||||
|
Console.Error("Failed to get upload buffer for accurate prims data.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy data from upload to GPU buffer.
|
||||||
|
const D3D12_RESOURCE_BARRIER barrier_sr_to_dst = {
|
||||||
|
D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
|
||||||
|
D3D12_RESOURCE_BARRIER_FLAG_NONE,
|
||||||
|
{{m_accurate_prims_stream_buffer.GetBuffer(), 0,
|
||||||
|
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
|
||||||
|
D3D12_RESOURCE_STATE_COPY_DEST}}};
|
||||||
|
GetCommandList()->ResourceBarrier(1, &barrier_sr_to_dst);
|
||||||
|
GetCommandList()->CopyBufferRegion(
|
||||||
|
m_accurate_prims_stream_buffer.GetBuffer(), offset, upload_buffer, upload_buffer_offset, size);
|
||||||
|
|
||||||
|
// Commit the GPU region.
|
||||||
|
m_accurate_prims_stream_buffer.CommitMemory(size);
|
||||||
|
|
||||||
|
// Issue the barrier since this will be used next draw.
|
||||||
|
const D3D12_RESOURCE_BARRIER barrier_dst_to_sr = {
|
||||||
|
D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
|
||||||
|
D3D12_RESOURCE_BARRIER_FLAG_NONE,
|
||||||
|
{{m_accurate_prims_stream_buffer.GetBuffer(), 0,
|
||||||
|
D3D12_RESOURCE_STATE_COPY_DEST,
|
||||||
|
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE}}};
|
||||||
|
GetCommandList()->ResourceBarrier(1, &barrier_dst_to_sr);
|
||||||
|
|
||||||
|
m_accurate_prims_stream_buffer_offset = offset; // Save this for the constant buffer.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GSDevice12::SetupAccuratePrimsConstants(GSHWDrawConfig& config)
|
||||||
|
{
|
||||||
|
if (config.accurate_prims)
|
||||||
|
{
|
||||||
|
config.cb_vs.base_vertex = m_vertex.start;
|
||||||
|
config.cb_ps.accurate_prims_base_index.x = m_accurate_prims_stream_buffer_offset / sizeof(AccuratePrimsEdgeData);
|
||||||
|
|
||||||
|
SetVSConstantBuffer(config.cb_vs);
|
||||||
|
SetPSConstantBuffer(config.cb_ps);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void GSDevice12::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i& scissor)
|
void GSDevice12::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i& scissor)
|
||||||
{
|
{
|
||||||
GSTexture12* vkRt = static_cast<GSTexture12*>(rt);
|
GSTexture12* vkRt = static_cast<GSTexture12*>(rt);
|
||||||
@ -2305,9 +2433,9 @@ bool GSDevice12::GetTextureGroupDescriptors(
|
|||||||
}
|
}
|
||||||
|
|
||||||
D3D12_CPU_DESCRIPTOR_HANDLE dst_handle = *gpu_handle;
|
D3D12_CPU_DESCRIPTOR_HANDLE dst_handle = *gpu_handle;
|
||||||
D3D12_CPU_DESCRIPTOR_HANDLE src_handles[NUM_TFX_TEXTURES];
|
D3D12_CPU_DESCRIPTOR_HANDLE src_handles[NUM_TOTAL_TFX_TEXTURES];
|
||||||
UINT src_sizes[NUM_TFX_TEXTURES];
|
UINT src_sizes[NUM_TOTAL_TFX_TEXTURES];
|
||||||
pxAssert(count <= NUM_TFX_TEXTURES);
|
pxAssert(count <= NUM_TOTAL_TFX_TEXTURES);
|
||||||
for (u32 i = 0; i < count; i++)
|
for (u32 i = 0; i < count; i++)
|
||||||
{
|
{
|
||||||
src_handles[i] = cpu_handles[i];
|
src_handles[i] = cpu_handles[i];
|
||||||
@ -2365,6 +2493,39 @@ bool GSDevice12::CreateBuffers()
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!m_accurate_prims_stream_buffer.Create(
|
||||||
|
m_features.accurate_prims ? ACCURATE_PRIMS_BUFFER_SIZE : sizeof(AccuratePrimsEdgeData), true))
|
||||||
|
{
|
||||||
|
Host::ReportErrorAsync("GS", "Failed to allocate accurate prims buffer");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!m_descriptor_heap_manager.Allocate(&m_accurate_prims_srv_descriptor_cpu))
|
||||||
|
{
|
||||||
|
Console.Error("Failed to allocate accurate prims CPU descriptor");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m_features.accurate_prims)
|
||||||
|
{
|
||||||
|
// Transition to accurate prims buffer to pixel shader resource and create the shader resource view.
|
||||||
|
const D3D12_RESOURCE_BARRIER barrier = {
|
||||||
|
D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
|
||||||
|
D3D12_RESOURCE_BARRIER_FLAG_NONE,
|
||||||
|
{{m_accurate_prims_stream_buffer.GetBuffer(), 0,
|
||||||
|
D3D12_RESOURCE_STATE_COMMON,
|
||||||
|
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE}}};
|
||||||
|
GetInitCommandList()->ResourceBarrier(1, &barrier);
|
||||||
|
|
||||||
|
D3D12_SHADER_RESOURCE_VIEW_DESC desc = {
|
||||||
|
DXGI_FORMAT_UNKNOWN, D3D12_SRV_DIMENSION_BUFFER, D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING};
|
||||||
|
desc.Buffer.FirstElement = 0;
|
||||||
|
desc.Buffer.NumElements = ACCURATE_PRIMS_BUFFER_SIZE / sizeof(AccuratePrimsEdgeData);
|
||||||
|
desc.Buffer.StructureByteStride = sizeof(AccuratePrimsEdgeData);
|
||||||
|
m_device->CreateShaderResourceView(m_accurate_prims_stream_buffer.GetBuffer(), &desc,
|
||||||
|
m_accurate_prims_srv_descriptor_cpu.cpu_handle);
|
||||||
|
}
|
||||||
|
|
||||||
if (!m_vertex_constant_buffer.Create(VERTEX_UNIFORM_BUFFER_SIZE))
|
if (!m_vertex_constant_buffer.Create(VERTEX_UNIFORM_BUFFER_SIZE))
|
||||||
{
|
{
|
||||||
Host::ReportErrorAsync("GS", "Failed to allocate vertex uniform buffer");
|
Host::ReportErrorAsync("GS", "Failed to allocate vertex uniform buffer");
|
||||||
@ -2415,9 +2576,11 @@ bool GSDevice12::CreateRootSignatures()
|
|||||||
rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL);
|
rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL);
|
||||||
rsb.AddCBVParameter(1, D3D12_SHADER_VISIBILITY_PIXEL);
|
rsb.AddCBVParameter(1, D3D12_SHADER_VISIBILITY_PIXEL);
|
||||||
rsb.AddSRVParameter(0, D3D12_SHADER_VISIBILITY_VERTEX);
|
rsb.AddSRVParameter(0, D3D12_SHADER_VISIBILITY_VERTEX);
|
||||||
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 2, D3D12_SHADER_VISIBILITY_PIXEL);
|
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 2, D3D12_SHADER_VISIBILITY_PIXEL); // Source / Palette
|
||||||
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, NUM_TFX_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL);
|
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, NUM_TFX_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL);
|
||||||
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 2, 2, D3D12_SHADER_VISIBILITY_PIXEL);
|
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 2, 2, D3D12_SHADER_VISIBILITY_PIXEL); // RT / PrimID
|
||||||
|
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 4, 1, D3D12_SHADER_VISIBILITY_PIXEL); // Depth
|
||||||
|
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 5, 1, D3D12_SHADER_VISIBILITY_PIXEL); // Accurate Prims
|
||||||
if (!(m_tfx_root_signature = rsb.Create()))
|
if (!(m_tfx_root_signature = rsb.Create()))
|
||||||
return false;
|
return false;
|
||||||
D3D12::SetObjectName(m_tfx_root_signature.get(), "TFX root signature");
|
D3D12::SetObjectName(m_tfx_root_signature.get(), "TFX root signature");
|
||||||
@ -2805,6 +2968,7 @@ void GSDevice12::DestroyResources()
|
|||||||
m_vertex_constant_buffer.Destroy(false);
|
m_vertex_constant_buffer.Destroy(false);
|
||||||
m_index_stream_buffer.Destroy(false);
|
m_index_stream_buffer.Destroy(false);
|
||||||
m_vertex_stream_buffer.Destroy(false);
|
m_vertex_stream_buffer.Destroy(false);
|
||||||
|
m_accurate_prims_stream_buffer.Destroy(false);
|
||||||
|
|
||||||
m_utility_root_signature.reset();
|
m_utility_root_signature.reset();
|
||||||
m_tfx_root_signature.reset();
|
m_tfx_root_signature.reset();
|
||||||
@ -2818,6 +2982,7 @@ void GSDevice12::DestroyResources()
|
|||||||
m_shader_cache.Close();
|
m_shader_cache.Close();
|
||||||
|
|
||||||
m_descriptor_heap_manager.Free(&m_null_srv_descriptor);
|
m_descriptor_heap_manager.Free(&m_null_srv_descriptor);
|
||||||
|
m_descriptor_heap_manager.Free(&m_accurate_prims_srv_descriptor_cpu);
|
||||||
m_timestamp_query_buffer.reset();
|
m_timestamp_query_buffer.reset();
|
||||||
m_timestamp_query_allocation.reset();
|
m_timestamp_query_allocation.reset();
|
||||||
m_sampler_heap_manager.Destroy();
|
m_sampler_heap_manager.Destroy();
|
||||||
@ -2851,6 +3016,7 @@ const ID3DBlob* GSDevice12::GetTFXVertexShader(GSHWDrawConfig::VSSelector sel)
|
|||||||
sm.AddMacro("VS_FST", sel.fst);
|
sm.AddMacro("VS_FST", sel.fst);
|
||||||
sm.AddMacro("VS_IIP", sel.iip);
|
sm.AddMacro("VS_IIP", sel.iip);
|
||||||
sm.AddMacro("VS_EXPAND", static_cast<int>(sel.expand));
|
sm.AddMacro("VS_EXPAND", static_cast<int>(sel.expand));
|
||||||
|
sm.AddMacro("VS_ACCURATE_PRIMS", static_cast<int>(sel.accurate_prims));
|
||||||
|
|
||||||
const char* entry_point = (sel.expand != GSHWDrawConfig::VSExpand::None) ? "vs_main_expand" : "vs_main";
|
const char* entry_point = (sel.expand != GSHWDrawConfig::VSExpand::None) ? "vs_main_expand" : "vs_main";
|
||||||
ComPtr<ID3DBlob> vs(m_shader_cache.GetVertexShader(m_tfx_source, sm.GetPtr(), entry_point));
|
ComPtr<ID3DBlob> vs(m_shader_cache.GetVertexShader(m_tfx_source, sm.GetPtr(), entry_point));
|
||||||
@ -2922,6 +3088,10 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector&
|
|||||||
sm.AddMacro("PS_TEX_IS_FB", sel.tex_is_fb);
|
sm.AddMacro("PS_TEX_IS_FB", sel.tex_is_fb);
|
||||||
sm.AddMacro("PS_NO_COLOR", sel.no_color);
|
sm.AddMacro("PS_NO_COLOR", sel.no_color);
|
||||||
sm.AddMacro("PS_NO_COLOR1", sel.no_color1);
|
sm.AddMacro("PS_NO_COLOR1", sel.no_color1);
|
||||||
|
sm.AddMacro("PS_ACCURATE_PRIMS", sel.accurate_prims);
|
||||||
|
sm.AddMacro("PS_ACCURATE_PRIMS_AA", sel.accurate_prims_aa);
|
||||||
|
sm.AddMacro("PS_ACCURATE_PRIMS_AA_ABE", sel.accurate_prims_aa_abe);
|
||||||
|
sm.AddMacro("PS_ZTST", sel.ztst);
|
||||||
|
|
||||||
ComPtr<ID3DBlob> ps(m_shader_cache.GetPixelShader(m_tfx_source, sm.GetPtr(), "ps_main"));
|
ComPtr<ID3DBlob> ps(m_shader_cache.GetPixelShader(m_tfx_source, sm.GetPtr(), "ps_main"));
|
||||||
it = m_tfx_pixel_shaders.emplace(sel, std::move(ps)).first;
|
it = m_tfx_pixel_shaders.emplace(sel, std::move(ps)).first;
|
||||||
@ -3155,6 +3325,7 @@ void GSDevice12::InvalidateCachedState()
|
|||||||
m_tfx_textures_handle_gpu.Clear();
|
m_tfx_textures_handle_gpu.Clear();
|
||||||
m_tfx_samplers_handle_gpu.Clear();
|
m_tfx_samplers_handle_gpu.Clear();
|
||||||
m_tfx_rt_textures_handle_gpu.Clear();
|
m_tfx_rt_textures_handle_gpu.Clear();
|
||||||
|
m_tfx_depth_textures_handle_gpu.Clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDevice12::SetVertexBuffer(D3D12_GPU_VIRTUAL_ADDRESS buffer, size_t size, size_t stride)
|
void GSDevice12::SetVertexBuffer(D3D12_GPU_VIRTUAL_ADDRESS buffer, size_t size, size_t stride)
|
||||||
@ -3236,7 +3407,11 @@ void GSDevice12::PSSetShaderResource(int i, GSTexture* sr, bool check_state)
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
m_tfx_textures[i] = handle;
|
m_tfx_textures[i] = handle;
|
||||||
m_dirty_flags |= (i < 2) ? DIRTY_FLAG_TFX_TEXTURES : DIRTY_FLAG_TFX_RT_TEXTURES;
|
m_dirty_flags |=
|
||||||
|
(i < 2) ? DIRTY_FLAG_TFX_TEXTURES :
|
||||||
|
(i < 4) ? DIRTY_FLAG_TFX_RT_TEXTURES :
|
||||||
|
(i < 5) ? DIRTY_FLAG_TFX_DEPTH_TEXTURES :
|
||||||
|
0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDevice12::PSSetSampler(GSHWDrawConfig::SamplerSelector sel)
|
void GSDevice12::PSSetSampler(GSHWDrawConfig::SamplerSelector sel)
|
||||||
@ -3642,6 +3817,17 @@ bool GSDevice12::ApplyTFXState(bool already_execed)
|
|||||||
flags |= DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2;
|
flags |= DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (flags & DIRTY_FLAG_TFX_DEPTH_TEXTURES)
|
||||||
|
{
|
||||||
|
if (!GetTextureGroupDescriptors(&m_tfx_depth_textures_handle_gpu, m_tfx_textures.data() + 4, 1))
|
||||||
|
{
|
||||||
|
ExecuteCommandListAndRestartRenderPass(false, "Ran out of TFX depth descriptor descriptor groups");
|
||||||
|
return ApplyTFXState(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
flags |= DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_3;
|
||||||
|
}
|
||||||
|
|
||||||
ID3D12GraphicsCommandList* cmdlist = GetCommandList();
|
ID3D12GraphicsCommandList* cmdlist = GetCommandList();
|
||||||
|
|
||||||
if (m_current_root_signature != RootSignature::TFX)
|
if (m_current_root_signature != RootSignature::TFX)
|
||||||
@ -3649,7 +3835,8 @@ bool GSDevice12::ApplyTFXState(bool already_execed)
|
|||||||
m_current_root_signature = RootSignature::TFX;
|
m_current_root_signature = RootSignature::TFX;
|
||||||
flags |= DIRTY_FLAG_VS_CONSTANT_BUFFER_BINDING | DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING |
|
flags |= DIRTY_FLAG_VS_CONSTANT_BUFFER_BINDING | DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING |
|
||||||
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE | DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE |
|
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE | DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE |
|
||||||
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 | DIRTY_FLAG_PIPELINE;
|
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 | DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_3 |
|
||||||
|
DIRTY_FLAG_PIPELINE;
|
||||||
cmdlist->SetGraphicsRootSignature(m_tfx_root_signature.get());
|
cmdlist->SetGraphicsRootSignature(m_tfx_root_signature.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3662,12 +3849,28 @@ bool GSDevice12::ApplyTFXState(bool already_execed)
|
|||||||
cmdlist->SetGraphicsRootShaderResourceView(TFX_ROOT_SIGNATURE_PARAM_VS_SRV,
|
cmdlist->SetGraphicsRootShaderResourceView(TFX_ROOT_SIGNATURE_PARAM_VS_SRV,
|
||||||
m_vertex_stream_buffer.GetGPUPointer() + m_vertex.start * sizeof(GSVertex));
|
m_vertex_stream_buffer.GetGPUPointer() + m_vertex.start * sizeof(GSVertex));
|
||||||
}
|
}
|
||||||
|
if (flags & DIRTY_FLAG_PS_ACCURATE_PRIMS_BUFFER_BINDING)
|
||||||
|
{
|
||||||
|
if (!GetDescriptorAllocator().Allocate(1, &m_accurate_prims_srv_descriptor_gpu))
|
||||||
|
{
|
||||||
|
Console.Error("Failed to allocate accurate prims GPU descriptor");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_device.get()->CopyDescriptorsSimple(
|
||||||
|
1, m_accurate_prims_srv_descriptor_gpu, m_accurate_prims_srv_descriptor_cpu, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
||||||
|
|
||||||
|
cmdlist->SetGraphicsRootDescriptorTable(TFX_ROOT_SIGNATURE_PARAM_PS_ACCURATE_PRIMS_SRV, m_accurate_prims_srv_descriptor_gpu);
|
||||||
|
|
||||||
|
}
|
||||||
if (flags & DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE)
|
if (flags & DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE)
|
||||||
cmdlist->SetGraphicsRootDescriptorTable(TFX_ROOT_SIGNATURE_PARAM_PS_TEXTURES, m_tfx_textures_handle_gpu);
|
cmdlist->SetGraphicsRootDescriptorTable(TFX_ROOT_SIGNATURE_PARAM_PS_TEXTURES, m_tfx_textures_handle_gpu);
|
||||||
if (flags & DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE)
|
if (flags & DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE)
|
||||||
cmdlist->SetGraphicsRootDescriptorTable(TFX_ROOT_SIGNATURE_PARAM_PS_SAMPLERS, m_tfx_samplers_handle_gpu);
|
cmdlist->SetGraphicsRootDescriptorTable(TFX_ROOT_SIGNATURE_PARAM_PS_SAMPLERS, m_tfx_samplers_handle_gpu);
|
||||||
if (flags & DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2)
|
if (flags & DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2)
|
||||||
cmdlist->SetGraphicsRootDescriptorTable(TFX_ROOT_SIGNATURE_PARAM_PS_RT_TEXTURES, m_tfx_rt_textures_handle_gpu);
|
cmdlist->SetGraphicsRootDescriptorTable(TFX_ROOT_SIGNATURE_PARAM_PS_RT_TEXTURES, m_tfx_rt_textures_handle_gpu);
|
||||||
|
if (flags & DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_3)
|
||||||
|
cmdlist->SetGraphicsRootDescriptorTable(TFX_ROOT_SIGNATURE_PARAM_PS_DEPTH_TEXTURES, m_tfx_depth_textures_handle_gpu);
|
||||||
|
|
||||||
ApplyBaseState(flags, cmdlist);
|
ApplyBaseState(flags, cmdlist);
|
||||||
return true;
|
return true;
|
||||||
@ -3832,12 +4035,26 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
|
|||||||
GSTexture12* draw_rt = static_cast<GSTexture12*>(config.rt);
|
GSTexture12* draw_rt = static_cast<GSTexture12*>(config.rt);
|
||||||
GSTexture12* draw_ds = static_cast<GSTexture12*>(config.ds);
|
GSTexture12* draw_ds = static_cast<GSTexture12*>(config.ds);
|
||||||
GSTexture12* draw_rt_clone = nullptr;
|
GSTexture12* draw_rt_clone = nullptr;
|
||||||
|
GSTexture12* draw_ds_clone = nullptr;
|
||||||
|
GSTexture12* date_image = nullptr;
|
||||||
|
|
||||||
|
ScopedGuard recycle_temp_textures([&]() {
|
||||||
|
if (draw_rt_clone)
|
||||||
|
Recycle(draw_rt_clone);
|
||||||
|
if (draw_ds_clone)
|
||||||
|
Recycle(draw_ds_clone);
|
||||||
|
if (date_image)
|
||||||
|
Recycle(date_image);
|
||||||
|
});
|
||||||
|
|
||||||
// Align the render area to 128x128, hopefully avoiding render pass restarts for small render area changes (e.g. Ratchet and Clank).
|
// Align the render area to 128x128, hopefully avoiding render pass restarts for small render area changes (e.g. Ratchet and Clank).
|
||||||
const GSVector2i rtsize(config.rt ? config.rt->GetSize() : config.ds->GetSize());
|
const GSVector2i rtsize(config.rt ? config.rt->GetSize() : config.ds->GetSize());
|
||||||
|
|
||||||
PipelineSelector& pipe = m_pipeline_selector;
|
PipelineSelector& pipe = m_pipeline_selector;
|
||||||
|
|
||||||
|
// Copying buffers needs to done outside render pass so do this early.
|
||||||
|
SetupAccuratePrimsBuffer(config);
|
||||||
|
|
||||||
// figure out the pipeline
|
// figure out the pipeline
|
||||||
UpdateHWPipelineSelector(config);
|
UpdateHWPipelineSelector(config);
|
||||||
|
|
||||||
@ -3906,7 +4123,6 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Primitive ID tracking DATE setup.
|
// Primitive ID tracking DATE setup.
|
||||||
GSTexture12* date_image = nullptr;
|
|
||||||
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking)
|
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking)
|
||||||
{
|
{
|
||||||
GSTexture* backup_rt = config.rt;
|
GSTexture* backup_rt = config.rt;
|
||||||
@ -3994,6 +4210,15 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
|
|||||||
Console.Warning("D3D12: Failed to allocate temp texture for RT copy.");
|
Console.Warning("D3D12: Failed to allocate temp texture for RT copy.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (draw_ds && config.require_full_barrier && m_features.multidraw_fb_copy && config.ps.IsFeedbackLoopDepth())
|
||||||
|
{
|
||||||
|
// Requires a copy of the DS.
|
||||||
|
// Used as "bind ds" flag when texture barrier is unsupported for tex is fb.
|
||||||
|
draw_ds_clone = static_cast<GSTexture12*>(CreateTexture(rtsize.x, rtsize.y, 1, draw_ds->GetFormat(), true));
|
||||||
|
if (!draw_rt_clone)
|
||||||
|
Console.Warning("D3D12: Failed to allocate temp texture for DS copy.");
|
||||||
|
}
|
||||||
|
|
||||||
OMSetRenderTargets(draw_rt, draw_ds, config.scissor);
|
OMSetRenderTargets(draw_rt, draw_ds, config.scissor);
|
||||||
|
|
||||||
// Begin render pass if new target or out of the area.
|
// Begin render pass if new target or out of the area.
|
||||||
@ -4040,7 +4265,8 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
|
|||||||
UploadHWDrawVerticesAndIndices(config);
|
UploadHWDrawVerticesAndIndices(config);
|
||||||
|
|
||||||
// now we can do the actual draw
|
// now we can do the actual draw
|
||||||
SendHWDraw(pipe, config, draw_rt_clone, draw_rt, config.require_one_barrier, config.require_full_barrier, false);
|
SendHWDraw(pipe, config, draw_rt_clone, draw_rt, draw_ds_clone, draw_ds,
|
||||||
|
config.require_one_barrier, config.require_full_barrier, false);
|
||||||
|
|
||||||
// blend second pass
|
// blend second pass
|
||||||
if (config.blend_multi_pass.enable)
|
if (config.blend_multi_pass.enable)
|
||||||
@ -4070,15 +4296,10 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
|
|||||||
pipe.cms = config.alpha_second_pass.colormask;
|
pipe.cms = config.alpha_second_pass.colormask;
|
||||||
pipe.dss = config.alpha_second_pass.depth;
|
pipe.dss = config.alpha_second_pass.depth;
|
||||||
pipe.bs = config.blend;
|
pipe.bs = config.blend;
|
||||||
SendHWDraw(pipe, config, draw_rt_clone, draw_rt, config.alpha_second_pass.require_one_barrier, config.alpha_second_pass.require_full_barrier, true);
|
SendHWDraw(pipe, config, draw_rt_clone, draw_rt, draw_ds_clone, draw_ds,
|
||||||
|
config.alpha_second_pass.require_one_barrier, config.alpha_second_pass.require_full_barrier, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (draw_rt_clone)
|
|
||||||
Recycle(draw_rt_clone);
|
|
||||||
|
|
||||||
if (date_image)
|
|
||||||
Recycle(date_image);
|
|
||||||
|
|
||||||
// now blit the colclip texture back to the original target
|
// now blit the colclip texture back to the original target
|
||||||
if (colclip_rt)
|
if (colclip_rt)
|
||||||
{
|
{
|
||||||
@ -4113,23 +4334,40 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDevice12::SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig& config, GSTexture12* draw_rt_clone, GSTexture12* draw_rt, const bool one_barrier, const bool full_barrier, const bool skip_first_barrier)
|
void GSDevice12::SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig& config,
|
||||||
|
GSTexture12* draw_rt_clone, GSTexture12* draw_rt,
|
||||||
|
GSTexture12* draw_ds_clone, GSTexture12* draw_ds,
|
||||||
|
const bool one_barrier, const bool full_barrier, const bool skip_first_barrier)
|
||||||
{
|
{
|
||||||
if (draw_rt_clone)
|
if (draw_rt_clone || draw_ds_clone)
|
||||||
{
|
{
|
||||||
|
|
||||||
#ifdef PCSX2_DEVBUILD
|
#ifdef PCSX2_DEVBUILD
|
||||||
if ((one_barrier || full_barrier) && !config.ps.IsFeedbackLoop()) [[unlikely]]
|
if ((one_barrier || full_barrier) && !(config.ps.IsFeedbackLoop() || config.ps.IsFeedbackLoopDepth())) [[unlikely]]
|
||||||
Console.Warning("D3D12: Possible unnecessary copy detected.");
|
Console.Warning("D3D12: Possible unnecessary copy detected.");
|
||||||
#endif
|
#endif
|
||||||
auto CopyAndBind = [&](GSVector4i drawarea) {
|
auto CopyAndBind = [&](GSVector4i drawarea) {
|
||||||
EndRenderPass();
|
EndRenderPass();
|
||||||
|
|
||||||
CopyRect(draw_rt, draw_rt_clone, drawarea, drawarea.left, drawarea.top);
|
if (draw_rt_clone)
|
||||||
draw_rt->TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET);
|
{
|
||||||
|
CopyRect(draw_rt, draw_rt_clone, drawarea, drawarea.left, drawarea.top);
|
||||||
|
draw_rt->TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (draw_ds_clone)
|
||||||
|
{
|
||||||
|
CopyRect(draw_ds, draw_ds_clone, drawarea, drawarea.left, drawarea.top);
|
||||||
|
draw_ds->TransitionToState(D3D12_RESOURCE_STATE_DEPTH_WRITE);
|
||||||
|
}
|
||||||
|
|
||||||
if (one_barrier || full_barrier)
|
if (one_barrier || full_barrier)
|
||||||
PSSetShaderResource(2, draw_rt_clone, true);
|
{
|
||||||
|
if (draw_rt_clone)
|
||||||
|
PSSetShaderResource(2, draw_rt_clone, true);
|
||||||
|
if (draw_ds_clone)
|
||||||
|
PSSetShaderResource(4, draw_ds_clone, true);
|
||||||
|
}
|
||||||
if (config.tex && config.tex == config.rt)
|
if (config.tex && config.tex == config.rt)
|
||||||
PSSetShaderResource(0, draw_rt_clone, true);
|
PSSetShaderResource(0, draw_rt_clone, true);
|
||||||
};
|
};
|
||||||
@ -4158,7 +4396,6 @@ void GSDevice12::SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig&
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Optimization: For alpha second pass we can reuse the copy snapshot from the first pass.
|
// Optimization: For alpha second pass we can reuse the copy snapshot from the first pass.
|
||||||
if (!skip_first_barrier)
|
if (!skip_first_barrier)
|
||||||
CopyAndBind(config.drawarea);
|
CopyAndBind(config.drawarea);
|
||||||
@ -4182,7 +4419,7 @@ void GSDevice12::UpdateHWPipelineSelector(GSHWDrawConfig& config)
|
|||||||
m_pipeline_selector.ds = config.ds != nullptr;
|
m_pipeline_selector.ds = config.ds != nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDevice12::UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config)
|
void GSDevice12::UploadHWDrawVerticesAndIndices(GSHWDrawConfig& config)
|
||||||
{
|
{
|
||||||
IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts);
|
IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts);
|
||||||
|
|
||||||
@ -4200,4 +4437,7 @@ void GSDevice12::UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config)
|
|||||||
{
|
{
|
||||||
IASetIndexBuffer(config.indices, config.nindices);
|
IASetIndexBuffer(config.indices, config.nindices);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Needs to be done after vertex offset is set.
|
||||||
|
SetupAccuratePrimsConstants(config);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -129,6 +129,8 @@ public:
|
|||||||
// Allocates a temporary CPU staging buffer, fires the callback with it to populate, then copies to a GPU buffer.
|
// Allocates a temporary CPU staging buffer, fires the callback with it to populate, then copies to a GPU buffer.
|
||||||
bool AllocatePreinitializedGPUBuffer(u32 size, ID3D12Resource** gpu_buffer, D3D12MA::Allocation** gpu_allocation,
|
bool AllocatePreinitializedGPUBuffer(u32 size, ID3D12Resource** gpu_buffer, D3D12MA::Allocation** gpu_allocation,
|
||||||
const std::function<void(void*)>& fill_callback);
|
const std::function<void(void*)>& fill_callback);
|
||||||
|
ID3D12Resource* AllocateUploadStagingBuffer(u32 size, std::function<void(void*)> write_data);
|
||||||
|
ID3D12Resource* WriteTextureUploadBuffer(u32 size, std::function<void(void*)> write_data, u32& offset_out);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct CommandListResources
|
struct CommandListResources
|
||||||
@ -256,7 +258,8 @@ public:
|
|||||||
NUM_TFX_CONSTANT_BUFFERS = 2,
|
NUM_TFX_CONSTANT_BUFFERS = 2,
|
||||||
NUM_TFX_TEXTURES = 2,
|
NUM_TFX_TEXTURES = 2,
|
||||||
NUM_TFX_RT_TEXTURES = 2,
|
NUM_TFX_RT_TEXTURES = 2,
|
||||||
NUM_TOTAL_TFX_TEXTURES = NUM_TFX_TEXTURES + NUM_TFX_RT_TEXTURES,
|
NUM_TFX_DEPTH_TEXTURES = 1,
|
||||||
|
NUM_TOTAL_TFX_TEXTURES = NUM_TFX_TEXTURES + NUM_TFX_RT_TEXTURES + NUM_TFX_DEPTH_TEXTURES,
|
||||||
NUM_TFX_SAMPLERS = 1,
|
NUM_TFX_SAMPLERS = 1,
|
||||||
NUM_UTILITY_TEXTURES = 1,
|
NUM_UTILITY_TEXTURES = 1,
|
||||||
NUM_UTILITY_SAMPLERS = 1,
|
NUM_UTILITY_SAMPLERS = 1,
|
||||||
@ -264,6 +267,10 @@ public:
|
|||||||
|
|
||||||
VERTEX_BUFFER_SIZE = 32 * 1024 * 1024,
|
VERTEX_BUFFER_SIZE = 32 * 1024 * 1024,
|
||||||
INDEX_BUFFER_SIZE = 16 * 1024 * 1024,
|
INDEX_BUFFER_SIZE = 16 * 1024 * 1024,
|
||||||
|
|
||||||
|
// Structured buffer size must be multiple of element size.
|
||||||
|
ACCURATE_PRIMS_BUFFER_SIZE = (32 * 1024 * 1024 / sizeof(AccuratePrimsEdgeData)) * sizeof(AccuratePrimsEdgeData),
|
||||||
|
|
||||||
VERTEX_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024,
|
VERTEX_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024,
|
||||||
FRAGMENT_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024,
|
FRAGMENT_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024,
|
||||||
|
|
||||||
@ -273,6 +280,8 @@ public:
|
|||||||
TFX_ROOT_SIGNATURE_PARAM_PS_TEXTURES = 3,
|
TFX_ROOT_SIGNATURE_PARAM_PS_TEXTURES = 3,
|
||||||
TFX_ROOT_SIGNATURE_PARAM_PS_SAMPLERS = 4,
|
TFX_ROOT_SIGNATURE_PARAM_PS_SAMPLERS = 4,
|
||||||
TFX_ROOT_SIGNATURE_PARAM_PS_RT_TEXTURES = 5,
|
TFX_ROOT_SIGNATURE_PARAM_PS_RT_TEXTURES = 5,
|
||||||
|
TFX_ROOT_SIGNATURE_PARAM_PS_DEPTH_TEXTURES = 6,
|
||||||
|
TFX_ROOT_SIGNATURE_PARAM_PS_ACCURATE_PRIMS_SRV = 7,
|
||||||
|
|
||||||
UTILITY_ROOT_SIGNATURE_PARAM_PUSH_CONSTANTS = 0,
|
UTILITY_ROOT_SIGNATURE_PARAM_PUSH_CONSTANTS = 0,
|
||||||
UTILITY_ROOT_SIGNATURE_PARAM_PS_TEXTURES = 1,
|
UTILITY_ROOT_SIGNATURE_PARAM_PS_TEXTURES = 1,
|
||||||
@ -299,6 +308,10 @@ private:
|
|||||||
|
|
||||||
D3D12StreamBuffer m_vertex_stream_buffer;
|
D3D12StreamBuffer m_vertex_stream_buffer;
|
||||||
D3D12StreamBuffer m_index_stream_buffer;
|
D3D12StreamBuffer m_index_stream_buffer;
|
||||||
|
D3D12StreamBuffer m_accurate_prims_stream_buffer;
|
||||||
|
u32 m_accurate_prims_stream_buffer_offset = 0; // Ring buffer offset for the current draw.
|
||||||
|
D3D12DescriptorHandle m_accurate_prims_srv_descriptor_cpu;
|
||||||
|
D3D12DescriptorHandle m_accurate_prims_srv_descriptor_gpu;
|
||||||
D3D12StreamBuffer m_vertex_constant_buffer;
|
D3D12StreamBuffer m_vertex_constant_buffer;
|
||||||
D3D12StreamBuffer m_pixel_constant_buffer;
|
D3D12StreamBuffer m_pixel_constant_buffer;
|
||||||
D3D12StreamBuffer m_texture_stream_buffer;
|
D3D12StreamBuffer m_texture_stream_buffer;
|
||||||
@ -455,6 +468,8 @@ public:
|
|||||||
|
|
||||||
void IASetVertexBuffer(const void* vertex, size_t stride, size_t count);
|
void IASetVertexBuffer(const void* vertex, size_t stride, size_t count);
|
||||||
void IASetIndexBuffer(const void* index, size_t count);
|
void IASetIndexBuffer(const void* index, size_t count);
|
||||||
|
void SetupAccuratePrimsBuffer(GSHWDrawConfig& config);
|
||||||
|
void SetupAccuratePrimsConstants(GSHWDrawConfig& config);
|
||||||
|
|
||||||
void PSSetShaderResource(int i, GSTexture* sr, bool check_state);
|
void PSSetShaderResource(int i, GSTexture* sr, bool check_state);
|
||||||
void PSSetSampler(GSHWDrawConfig::SamplerSelector sel);
|
void PSSetSampler(GSHWDrawConfig::SamplerSelector sel);
|
||||||
@ -466,10 +481,13 @@ public:
|
|||||||
bool BindDrawPipeline(const PipelineSelector& p);
|
bool BindDrawPipeline(const PipelineSelector& p);
|
||||||
|
|
||||||
void RenderHW(GSHWDrawConfig& config) override;
|
void RenderHW(GSHWDrawConfig& config) override;
|
||||||
void SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig& config, GSTexture12* draw_rt_clone, GSTexture12* draw_rt, const bool one_barrier, const bool full_barrier, const bool skip_first_barrier);
|
void SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig& config,
|
||||||
|
GSTexture12* draw_rt_clone, GSTexture12* draw_rt,
|
||||||
|
GSTexture12* draw_ds_clone, GSTexture12* draw_ds,
|
||||||
|
const bool one_barrier, const bool full_barrier, const bool skip_first_barrier);
|
||||||
|
|
||||||
void UpdateHWPipelineSelector(GSHWDrawConfig& config);
|
void UpdateHWPipelineSelector(GSHWDrawConfig& config);
|
||||||
void UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config);
|
void UploadHWDrawVerticesAndIndices(GSHWDrawConfig& config);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/// Ends any render pass, executes the command buffer, and invalidates cached state.
|
/// Ends any render pass, executes the command buffer, and invalidates cached state.
|
||||||
@ -527,33 +545,37 @@ private:
|
|||||||
DIRTY_FLAG_TFX_TEXTURES = (1 << 2),
|
DIRTY_FLAG_TFX_TEXTURES = (1 << 2),
|
||||||
DIRTY_FLAG_TFX_SAMPLERS = (1 << 3),
|
DIRTY_FLAG_TFX_SAMPLERS = (1 << 3),
|
||||||
DIRTY_FLAG_TFX_RT_TEXTURES = (1 << 4),
|
DIRTY_FLAG_TFX_RT_TEXTURES = (1 << 4),
|
||||||
|
DIRTY_FLAG_TFX_DEPTH_TEXTURES = (1 << 5),
|
||||||
|
|
||||||
DIRTY_FLAG_VS_CONSTANT_BUFFER_BINDING = (1 << 5),
|
DIRTY_FLAG_VS_CONSTANT_BUFFER_BINDING = (1 << 6),
|
||||||
DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING = (1 << 6),
|
DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING = (1 << 7),
|
||||||
DIRTY_FLAG_VS_VERTEX_BUFFER_BINDING = (1 << 7),
|
DIRTY_FLAG_VS_VERTEX_BUFFER_BINDING = (1 << 8),
|
||||||
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE = (1 << 8),
|
DIRTY_FLAG_PS_ACCURATE_PRIMS_BUFFER_BINDING = (1 << 9),
|
||||||
DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE = (1 << 9),
|
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE = (1 << 10),
|
||||||
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 = (1 << 10),
|
DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE = (1 << 11),
|
||||||
|
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 = (1 << 12),
|
||||||
|
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_3 = (1 << 13),
|
||||||
|
|
||||||
DIRTY_FLAG_VERTEX_BUFFER = (1 << 11),
|
DIRTY_FLAG_VERTEX_BUFFER = (1 << 14),
|
||||||
DIRTY_FLAG_INDEX_BUFFER = (1 << 12),
|
DIRTY_FLAG_INDEX_BUFFER = (1 << 15),
|
||||||
DIRTY_FLAG_PRIMITIVE_TOPOLOGY = (1 << 13),
|
DIRTY_FLAG_PRIMITIVE_TOPOLOGY = (1 << 16),
|
||||||
DIRTY_FLAG_VIEWPORT = (1 << 14),
|
DIRTY_FLAG_VIEWPORT = (1 << 17),
|
||||||
DIRTY_FLAG_SCISSOR = (1 << 15),
|
DIRTY_FLAG_SCISSOR = (1 << 18),
|
||||||
DIRTY_FLAG_RENDER_TARGET = (1 << 16),
|
DIRTY_FLAG_RENDER_TARGET = (1 << 19),
|
||||||
DIRTY_FLAG_PIPELINE = (1 << 17),
|
DIRTY_FLAG_PIPELINE = (1 << 20),
|
||||||
DIRTY_FLAG_BLEND_CONSTANTS = (1 << 18),
|
DIRTY_FLAG_BLEND_CONSTANTS = (1 << 21),
|
||||||
DIRTY_FLAG_STENCIL_REF = (1 << 19),
|
DIRTY_FLAG_STENCIL_REF = (1 << 22),
|
||||||
|
|
||||||
DIRTY_BASE_STATE = DIRTY_FLAG_VS_CONSTANT_BUFFER_BINDING | DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING |
|
DIRTY_BASE_STATE = DIRTY_FLAG_VS_CONSTANT_BUFFER_BINDING | DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING |
|
||||||
DIRTY_FLAG_VS_VERTEX_BUFFER_BINDING | DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE |
|
DIRTY_FLAG_VS_VERTEX_BUFFER_BINDING | DIRTY_FLAG_PS_ACCURATE_PRIMS_BUFFER_BINDING |
|
||||||
DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE | DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 |
|
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE | DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE |
|
||||||
|
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 | DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_3 |
|
||||||
DIRTY_FLAG_VERTEX_BUFFER | DIRTY_FLAG_INDEX_BUFFER | DIRTY_FLAG_PRIMITIVE_TOPOLOGY |
|
DIRTY_FLAG_VERTEX_BUFFER | DIRTY_FLAG_INDEX_BUFFER | DIRTY_FLAG_PRIMITIVE_TOPOLOGY |
|
||||||
DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR | DIRTY_FLAG_RENDER_TARGET | DIRTY_FLAG_PIPELINE |
|
DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR | DIRTY_FLAG_RENDER_TARGET | DIRTY_FLAG_PIPELINE |
|
||||||
DIRTY_FLAG_BLEND_CONSTANTS | DIRTY_FLAG_STENCIL_REF,
|
DIRTY_FLAG_BLEND_CONSTANTS | DIRTY_FLAG_STENCIL_REF,
|
||||||
|
|
||||||
DIRTY_TFX_STATE =
|
DIRTY_TFX_STATE = DIRTY_BASE_STATE | DIRTY_FLAG_TFX_TEXTURES | DIRTY_FLAG_TFX_SAMPLERS |
|
||||||
DIRTY_BASE_STATE | DIRTY_FLAG_TFX_TEXTURES | DIRTY_FLAG_TFX_SAMPLERS | DIRTY_FLAG_TFX_RT_TEXTURES,
|
DIRTY_FLAG_TFX_RT_TEXTURES | DIRTY_FLAG_TFX_DEPTH_TEXTURES,
|
||||||
DIRTY_UTILITY_STATE = DIRTY_BASE_STATE,
|
DIRTY_UTILITY_STATE = DIRTY_BASE_STATE,
|
||||||
DIRTY_CONSTANT_BUFFER_STATE = DIRTY_FLAG_VS_CONSTANT_BUFFER | DIRTY_FLAG_PS_CONSTANT_BUFFER,
|
DIRTY_CONSTANT_BUFFER_STATE = DIRTY_FLAG_VS_CONSTANT_BUFFER | DIRTY_FLAG_PS_CONSTANT_BUFFER,
|
||||||
};
|
};
|
||||||
@ -594,6 +616,7 @@ private:
|
|||||||
D3D12DescriptorHandle m_tfx_textures_handle_gpu;
|
D3D12DescriptorHandle m_tfx_textures_handle_gpu;
|
||||||
D3D12DescriptorHandle m_tfx_samplers_handle_gpu;
|
D3D12DescriptorHandle m_tfx_samplers_handle_gpu;
|
||||||
D3D12DescriptorHandle m_tfx_rt_textures_handle_gpu;
|
D3D12DescriptorHandle m_tfx_rt_textures_handle_gpu;
|
||||||
|
D3D12DescriptorHandle m_tfx_depth_textures_handle_gpu;
|
||||||
|
|
||||||
D3D12DescriptorHandle m_utility_texture_cpu;
|
D3D12DescriptorHandle m_utility_texture_cpu;
|
||||||
D3D12DescriptorHandle m_utility_texture_gpu;
|
D3D12DescriptorHandle m_utility_texture_gpu;
|
||||||
|
|||||||
@ -350,43 +350,6 @@ ID3D12GraphicsCommandList* GSTexture12::GetCommandBufferForUpdate()
|
|||||||
return dev->GetInitCommandList();
|
return dev->GetInitCommandList();
|
||||||
}
|
}
|
||||||
|
|
||||||
ID3D12Resource* GSTexture12::AllocateUploadStagingBuffer(
|
|
||||||
const void* data, u32 pitch, u32 upload_pitch, u32 height) const
|
|
||||||
{
|
|
||||||
const u32 buffer_size = CalcUploadSize(height, upload_pitch);
|
|
||||||
wil::com_ptr_nothrow<ID3D12Resource> resource;
|
|
||||||
wil::com_ptr_nothrow<D3D12MA::Allocation> allocation;
|
|
||||||
|
|
||||||
const D3D12MA::ALLOCATION_DESC allocation_desc = {D3D12MA::ALLOCATION_FLAG_NONE, D3D12_HEAP_TYPE_UPLOAD};
|
|
||||||
const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, 0, buffer_size, 1, 1, 1,
|
|
||||||
DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE};
|
|
||||||
HRESULT hr = GSDevice12::GetInstance()->GetAllocator()->CreateResource(&allocation_desc, &resource_desc,
|
|
||||||
D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, allocation.put(), IID_PPV_ARGS(resource.put()));
|
|
||||||
if (FAILED(hr))
|
|
||||||
{
|
|
||||||
Console.WriteLn("(AllocateUploadStagingBuffer) CreateCommittedResource() failed with %08X", hr);
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
void* map_ptr;
|
|
||||||
hr = resource->Map(0, nullptr, &map_ptr);
|
|
||||||
if (FAILED(hr))
|
|
||||||
{
|
|
||||||
Console.WriteLn("(AllocateUploadStagingBuffer) Map() failed with %08X", hr);
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
CopyTextureDataForUpload(map_ptr, data, pitch, upload_pitch, height);
|
|
||||||
|
|
||||||
const D3D12_RANGE write_range = {0, buffer_size};
|
|
||||||
resource->Unmap(0, &write_range);
|
|
||||||
|
|
||||||
// Immediately queue it for freeing after the command buffer finishes, since it's only needed for the copy.
|
|
||||||
// This adds the reference needed to keep the buffer alive.
|
|
||||||
GSDevice12::GetInstance()->DeferResourceDestruction(allocation.get(), resource.get());
|
|
||||||
return resource.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
void GSTexture12::CopyTextureDataForUpload(void* dst, const void* src, u32 pitch, u32 upload_pitch, u32 height) const
|
void GSTexture12::CopyTextureDataForUpload(void* dst, const void* src, u32 pitch, u32 upload_pitch, u32 height) const
|
||||||
{
|
{
|
||||||
const u32 block_size = GetCompressedBlockSize();
|
const u32 block_size = GetCompressedBlockSize();
|
||||||
@ -406,7 +369,7 @@ bool GSTexture12::Update(const GSVector4i& r, const void* data, int pitch, int l
|
|||||||
const u32 width = Common::AlignUpPow2(r.width(), block_size);
|
const u32 width = Common::AlignUpPow2(r.width(), block_size);
|
||||||
const u32 height = Common::AlignUpPow2(r.height(), block_size);
|
const u32 height = Common::AlignUpPow2(r.height(), block_size);
|
||||||
const u32 upload_pitch = Common::AlignUpPow2<u32>(pitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
|
const u32 upload_pitch = Common::AlignUpPow2<u32>(pitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
|
||||||
const u32 required_size = CalcUploadSize(r.height(), upload_pitch);
|
const u32 required_size = CalcUploadSize(height, upload_pitch);
|
||||||
|
|
||||||
D3D12_TEXTURE_COPY_LOCATION srcloc;
|
D3D12_TEXTURE_COPY_LOCATION srcloc;
|
||||||
srcloc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
|
srcloc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
|
||||||
@ -416,35 +379,25 @@ bool GSTexture12::Update(const GSVector4i& r, const void* data, int pitch, int l
|
|||||||
srcloc.PlacedFootprint.Footprint.Format = m_dxgi_format;
|
srcloc.PlacedFootprint.Footprint.Format = m_dxgi_format;
|
||||||
srcloc.PlacedFootprint.Footprint.RowPitch = upload_pitch;
|
srcloc.PlacedFootprint.Footprint.RowPitch = upload_pitch;
|
||||||
|
|
||||||
|
const auto upload_data = [&](void* map_ptr) {
|
||||||
|
CopyTextureDataForUpload(map_ptr, data, pitch, upload_pitch, height);
|
||||||
|
};
|
||||||
|
|
||||||
// If the texture is larger than half our streaming buffer size, use a separate buffer.
|
// If the texture is larger than half our streaming buffer size, use a separate buffer.
|
||||||
// Otherwise allocation will either fail, or require lots of cmdbuffer submissions.
|
// Otherwise allocation will either fail, or require lots of cmdbuffer submissions.
|
||||||
if (required_size > (GSDevice12::GetInstance()->GetTextureStreamBuffer().GetSize() / 2))
|
if (required_size > (GSDevice12::GetInstance()->GetTextureStreamBuffer().GetSize() / 2))
|
||||||
{
|
{
|
||||||
srcloc.pResource = AllocateUploadStagingBuffer(data, pitch, upload_pitch, height);
|
srcloc.pResource = GSDevice12::GetInstance()->AllocateUploadStagingBuffer(required_size, upload_data);
|
||||||
if (!srcloc.pResource)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
srcloc.PlacedFootprint.Offset = 0;
|
srcloc.PlacedFootprint.Offset = 0;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
D3D12StreamBuffer& sbuffer = GSDevice12::GetInstance()->GetTextureStreamBuffer();
|
u32 offset;
|
||||||
if (!sbuffer.ReserveMemory(required_size, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT))
|
srcloc.pResource = GSDevice12::GetInstance()->WriteTextureUploadBuffer(required_size, upload_data, offset);
|
||||||
{
|
srcloc.PlacedFootprint.Offset = offset;
|
||||||
GSDevice12::GetInstance()->ExecuteCommandList(
|
|
||||||
false, "While waiting for %u bytes in texture upload buffer", required_size);
|
|
||||||
if (!sbuffer.ReserveMemory(required_size, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT))
|
|
||||||
{
|
|
||||||
Console.Error("Failed to reserve texture upload memory (%u bytes).", required_size);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
srcloc.pResource = sbuffer.GetBuffer();
|
|
||||||
srcloc.PlacedFootprint.Offset = sbuffer.GetCurrentOffset();
|
|
||||||
CopyTextureDataForUpload(sbuffer.GetCurrentHostPointer(), data, pitch, upload_pitch, height);
|
|
||||||
sbuffer.CommitMemory(required_size);
|
|
||||||
}
|
}
|
||||||
|
if (!srcloc.pResource)
|
||||||
|
return false;
|
||||||
|
|
||||||
ID3D12GraphicsCommandList* cmdlist = GetCommandBufferForUpdate();
|
ID3D12GraphicsCommandList* cmdlist = GetCommandBufferForUpdate();
|
||||||
GL_PUSH("GSTexture12::Update({%d,%d} %dx%d Lvl:%u", r.x, r.y, r.width(), r.height(), layer);
|
GL_PUSH("GSTexture12::Update({%d,%d} %dx%d Lvl:%u", r.x, r.y, r.width(), r.height(), layer);
|
||||||
|
|||||||
@ -79,7 +79,6 @@ private:
|
|||||||
static bool CreateUAVDescriptor(ID3D12Resource* resource, DXGI_FORMAT format, D3D12DescriptorHandle* dh);
|
static bool CreateUAVDescriptor(ID3D12Resource* resource, DXGI_FORMAT format, D3D12DescriptorHandle* dh);
|
||||||
|
|
||||||
ID3D12GraphicsCommandList* GetCommandBufferForUpdate();
|
ID3D12GraphicsCommandList* GetCommandBufferForUpdate();
|
||||||
ID3D12Resource* AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 height) const;
|
|
||||||
void CopyTextureDataForUpload(void* dst, const void* src, u32 pitch, u32 upload_pitch, u32 height) const;
|
void CopyTextureDataForUpload(void* dst, const void* src, u32 pitch, u32 upload_pitch, u32 height) const;
|
||||||
|
|
||||||
wil::com_ptr_nothrow<ID3D12Resource> m_resource;
|
wil::com_ptr_nothrow<ID3D12Resource> m_resource;
|
||||||
|
|||||||
@ -291,6 +291,360 @@ void GSRendererHW::Lines2Sprites()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static __forceinline void GetCoveringQuad(const GSVector2i& v0, const GSVector2i& v1, GSVertex* out)
|
||||||
|
{
|
||||||
|
float x0 = static_cast<float>(v0.x) / 16.0f;
|
||||||
|
float y0 = static_cast<float>(v0.y) / 16.0f;
|
||||||
|
float x1 = static_cast<float>(v1.x) / 16.0f;
|
||||||
|
float y1 = static_cast<float>(v1.y) / 16.0f;
|
||||||
|
|
||||||
|
float dx = x1 - x0;
|
||||||
|
float dy = y1 - y0;
|
||||||
|
float d_len = sqrtf(dx * dx + dy * dy);
|
||||||
|
dx = 2.0f * dx / d_len;
|
||||||
|
dy = 2.0f * dy / d_len;
|
||||||
|
|
||||||
|
float nx = -dy;
|
||||||
|
float ny = dx;
|
||||||
|
|
||||||
|
int dxi = static_cast<int>(16.0f * dx);
|
||||||
|
int dyi = static_cast<int>(16.0f * dy);
|
||||||
|
int nxi = static_cast<int>(16.0f * nx);
|
||||||
|
int nyi = static_cast<int>(16.0f * ny);
|
||||||
|
|
||||||
|
GSVertex v[4];
|
||||||
|
std::memset(v, 0, sizeof(v));
|
||||||
|
|
||||||
|
v[0].XYZ.X = static_cast<u32>(std::clamp<int>(v0.x - dxi - nxi, 0, 0xFFFF));
|
||||||
|
v[0].XYZ.Y = static_cast<u32>(std::clamp<int>(v0.y - dyi - nyi, 0, 0xFFFF));
|
||||||
|
|
||||||
|
v[1].XYZ.X = static_cast<u32>(std::clamp<int>(v0.x - dxi + nxi, 0, 0xFFFF));
|
||||||
|
v[1].XYZ.Y = static_cast<u32>(std::clamp<int>(v0.y - dyi + nyi, 0, 0xFFFF));
|
||||||
|
|
||||||
|
v[2].XYZ.X = static_cast<u32>(std::clamp<int>(v1.x + dxi - nxi, 0, 0xFFFF));
|
||||||
|
v[2].XYZ.Y = static_cast<u32>(std::clamp<int>(v1.y + dyi - nyi, 0, 0xFFFF));
|
||||||
|
|
||||||
|
v[3].XYZ.X = static_cast<u32>(std::clamp<int>(v1.x + dxi + nxi, 0, 0xFFFF));
|
||||||
|
v[3].XYZ.Y = static_cast<u32>(std::clamp<int>(v1.y + dyi + nyi, 0, 0xFFFF));
|
||||||
|
|
||||||
|
out[0] = v[0];
|
||||||
|
out[1] = v[1];
|
||||||
|
out[2] = v[2];
|
||||||
|
|
||||||
|
out[3] = v[1];
|
||||||
|
out[4] = v[2];
|
||||||
|
out[5] = v[3];
|
||||||
|
}
|
||||||
|
|
||||||
|
void GSRendererHW::GetAccuratePrimsEdgeVertexAttributes(const GSVertex& vtx0, const GSVertex& vtx1, const GSVertex* vtx_provoking, AccuratePrimsEdgeData& data)
|
||||||
|
{
|
||||||
|
GSVector2i v0 = { static_cast<int>(vtx0.XYZ.X), static_cast<int>(vtx0.XYZ.Y) };
|
||||||
|
GSVector2i v1 = { static_cast<int>(vtx1.XYZ.X), static_cast<int>(vtx1.XYZ.Y) };
|
||||||
|
|
||||||
|
// Interpolated attributes - mimicks transformations done in vertex shader.
|
||||||
|
GSVector2 uv0 = GSVector2(static_cast<float>(vtx0.U), static_cast<float>(vtx0.V)) - m_conf.cb_vs.texture_offset;
|
||||||
|
GSVector2 uv1 = GSVector2(static_cast<float>(vtx1.U), static_cast<float>(vtx1.V)) - m_conf.cb_vs.texture_offset;
|
||||||
|
GSVector2 uv0_scale = uv0 * m_conf.cb_vs.texture_scale;
|
||||||
|
GSVector2 uv1_scale = uv1 * m_conf.cb_vs.texture_scale;
|
||||||
|
GSVector2 st0 = GSVector2(vtx0.ST.S, vtx0.ST.T) - m_conf.cb_vs.texture_offset;
|
||||||
|
GSVector2 st1 = GSVector2(vtx1.ST.S, vtx1.ST.T) - m_conf.cb_vs.texture_offset;
|
||||||
|
GSVector2 st0_scale = PRIM->TME ? st0 / m_conf.cb_vs.texture_scale : GSVector2(0);
|
||||||
|
GSVector2 st1_scale = PRIM->TME ? st1 / m_conf.cb_vs.texture_scale : GSVector2(0);
|
||||||
|
|
||||||
|
float fog0;
|
||||||
|
float fog1;
|
||||||
|
if (vtx_provoking)
|
||||||
|
{
|
||||||
|
fog0 = fog1 = static_cast<float>(vtx_provoking->FOG) / 255.0f;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
fog0 = static_cast<float>(vtx0.FOG) / 255.0f;
|
||||||
|
fog1 = static_cast<float>(vtx1.FOG) / 255.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
data.t_float0 = GSVector4(st0.x, st0.y, fog0, vtx0.RGBAQ.Q);
|
||||||
|
data.t_float1 = GSVector4(st1.x, st1.y, fog1, vtx1.RGBAQ.Q);
|
||||||
|
data.t_int0 = GSVector4(uv0_scale.x, uv0_scale.y);
|
||||||
|
data.t_int1 = GSVector4(uv1_scale.x, uv1_scale.y);
|
||||||
|
|
||||||
|
if (m_conf.vs.fst)
|
||||||
|
{
|
||||||
|
data.t_int0.z = uv0.x;
|
||||||
|
data.t_int0.w = uv0.y;
|
||||||
|
data.t_int1.z = uv1.x;
|
||||||
|
data.t_int1.w = uv1.y;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
data.t_int0.z = st0_scale.x;
|
||||||
|
data.t_int0.w = st0_scale.y;
|
||||||
|
data.t_int1.z = st1_scale.x;
|
||||||
|
data.t_int1.w = st1_scale.y;
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr float exp_min32 = 0x1p-32f;
|
||||||
|
float z0 = static_cast<float>(std::min(vtx0.XYZ.Z, static_cast<u32>(m_conf.cb_vs.max_depth.x)));
|
||||||
|
float z1 = static_cast<float>(std::min(vtx1.XYZ.Z, static_cast<u32>(m_conf.cb_vs.max_depth.x)));
|
||||||
|
|
||||||
|
GSVector2 xy0 = GSVector2(v0.x, v0.y) - GSVector2(0.05f);
|
||||||
|
GSVector2 xy1 = GSVector2(v1.x, v1.y) - GSVector2(0.05f);
|
||||||
|
|
||||||
|
xy0 = xy0 * m_conf.cb_vs.vertex_scale - m_conf.cb_vs.vertex_offset;
|
||||||
|
xy1 = xy1 * m_conf.cb_vs.vertex_scale - m_conf.cb_vs.vertex_offset;
|
||||||
|
|
||||||
|
GSRendererType renderer = GSGetCurrentRenderer();
|
||||||
|
float y_sign = (renderer == GSRendererType::DX11 || renderer == GSRendererType::DX12) ? -1.0f : 1.0f;
|
||||||
|
data.p0 = GSVector4(xy0.x, y_sign * xy0.y, z0 * exp_min32, 1.0f);
|
||||||
|
data.p1 = GSVector4(xy1.x, y_sign * xy1.y, z1 * exp_min32, 1.0f);
|
||||||
|
|
||||||
|
if (vtx_provoking)
|
||||||
|
{
|
||||||
|
data.c0 = data.c1 = GSVector4(
|
||||||
|
static_cast<float>(vtx_provoking->RGBAQ.R),
|
||||||
|
static_cast<float>(vtx_provoking->RGBAQ.G),
|
||||||
|
static_cast<float>(vtx_provoking->RGBAQ.B),
|
||||||
|
static_cast<float>(vtx_provoking->RGBAQ.A));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
data.c0 = GSVector4(
|
||||||
|
static_cast<float>(vtx0.RGBAQ.R),
|
||||||
|
static_cast<float>(vtx0.RGBAQ.G),
|
||||||
|
static_cast<float>(vtx0.RGBAQ.B),
|
||||||
|
static_cast<float>(vtx0.RGBAQ.A));
|
||||||
|
data.c1 = GSVector4(
|
||||||
|
static_cast<float>(vtx1.RGBAQ.R),
|
||||||
|
static_cast<float>(vtx1.RGBAQ.G),
|
||||||
|
static_cast<float>(vtx1.RGBAQ.B),
|
||||||
|
static_cast<float>(vtx1.RGBAQ.A));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GSRendererHW::ExpandAccurateTrianglesEdge(
|
||||||
|
const GSVertex& vtx0,
|
||||||
|
const GSVertex& vtx1,
|
||||||
|
const GSVertex* vtx_provoking,
|
||||||
|
const GSVector4i& edge0,
|
||||||
|
const GSVector4i& edge1,
|
||||||
|
bool top_left,
|
||||||
|
AccuratePrimsEdgeData& data,
|
||||||
|
GSVertex* vertex_out)
|
||||||
|
{
|
||||||
|
const GSVector2i v0 = { static_cast<int>(vtx0.XYZ.X), static_cast<int>(vtx0.XYZ.Y) };
|
||||||
|
const GSVector2i v1 = { static_cast<int>(vtx1.XYZ.X), static_cast<int>(vtx1.XYZ.Y) };
|
||||||
|
|
||||||
|
const GSVector4i& xyof = m_context->scissor.xyof;
|
||||||
|
|
||||||
|
data.xy0 = GSVector2i(v0.x - xyof.x, v0.y - xyof.y);
|
||||||
|
data.xy1 = GSVector2i(v1.x - xyof.x, v1.y - xyof.y);
|
||||||
|
const GSVector2i dxy = data.xy1 - data.xy0;
|
||||||
|
const bool pos_x = dxy.x >= 0;
|
||||||
|
const bool pos_y = dxy.y >= 0;
|
||||||
|
data.edge0 = edge0;
|
||||||
|
data.edge1 = edge1;
|
||||||
|
data.step_x = std::abs(dxy.x) >= std::abs(dxy.y);
|
||||||
|
data.side = top_left != (data.step_x && (dxy.y != 0) && (pos_x == pos_y));
|
||||||
|
|
||||||
|
GetAccuratePrimsEdgeVertexAttributes(vtx0, vtx1, vtx_provoking, data);
|
||||||
|
|
||||||
|
GetCoveringQuad(v0, v1, vertex_out);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const u8 s_ysort[8][4] =
|
||||||
|
{
|
||||||
|
{0, 1, 2, 0}, // y0 <= y1 <= y2
|
||||||
|
{1, 0, 2, 0}, // y1 < y0 <= y2
|
||||||
|
{0, 0, 0, 0},
|
||||||
|
{1, 2, 0, 0}, // y1 <= y2 < y0
|
||||||
|
{0, 2, 1, 0}, // y0 <= y2 < y1
|
||||||
|
{0, 0, 0, 0},
|
||||||
|
{2, 0, 1, 0}, // y2 < y0 <= y1
|
||||||
|
{2, 1, 0, 0}, // y2 < y1 < y0
|
||||||
|
};
|
||||||
|
|
||||||
|
void GSRendererHW::ExpandAccurateTrianglesVertices()
|
||||||
|
{
|
||||||
|
constexpr int verts_per_prim = 21; // 3 verts for triangle interior; 3 x 6 verts for the edges.
|
||||||
|
const int prims = m_index.tail / 3;
|
||||||
|
|
||||||
|
while (m_vertex.maxcount < static_cast<u32>(prims * verts_per_prim))
|
||||||
|
GrowVertexBuffer();
|
||||||
|
|
||||||
|
m_accurate_prims_edge_data.clear();
|
||||||
|
m_accurate_prims_edge_data.resize(3 * prims);
|
||||||
|
|
||||||
|
const GSVector4i& xyof = m_context->scissor.xyof;
|
||||||
|
|
||||||
|
const bool flat_shade = !PRIM->IIP;
|
||||||
|
const int provoking_offset = g_gs_device->Features().provoking_vertex_last ? 2 : 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < prims; i++)
|
||||||
|
{
|
||||||
|
// Code from GSRasterizer
|
||||||
|
const GSVertex& vtx0_orig = m_vertex.buff[m_index.buff[3 * i + 0]];
|
||||||
|
const GSVertex& vtx1_orig = m_vertex.buff[m_index.buff[3 * i + 1]];
|
||||||
|
const GSVertex& vtx2_orig = m_vertex.buff[m_index.buff[3 * i + 2]];
|
||||||
|
|
||||||
|
const GSVector2i v0_orig = { static_cast<int>(vtx0_orig.XYZ.X) - xyof.x, static_cast<int>(vtx0_orig.XYZ.Y) - xyof.y };
|
||||||
|
const GSVector2i v1_orig = { static_cast<int>(vtx1_orig.XYZ.X) - xyof.x, static_cast<int>(vtx1_orig.XYZ.Y) - xyof.y };
|
||||||
|
const GSVector2i v2_orig = { static_cast<int>(vtx2_orig.XYZ.X) - xyof.x, static_cast<int>(vtx2_orig.XYZ.Y) - xyof.y };
|
||||||
|
|
||||||
|
GSVector4i y0011(v0_orig.y, v0_orig.y, v1_orig.y, v1_orig.y);
|
||||||
|
GSVector4i y1221(v1_orig.y, v2_orig.y, v2_orig.y, v1_orig.y);
|
||||||
|
|
||||||
|
int m1 = GSVector4::cast(y0011 > y1221).mask() & 7;
|
||||||
|
|
||||||
|
const u8* idx = s_ysort[m1];
|
||||||
|
|
||||||
|
const GSVertex* vtx[3] = { &vtx0_orig, &vtx1_orig, &vtx2_orig };
|
||||||
|
const GSVector2i* v[3] = { &v0_orig, &v1_orig, &v2_orig };
|
||||||
|
|
||||||
|
const GSVertex& vtx0 = *vtx[idx[0]];
|
||||||
|
const GSVertex& vtx1 = *vtx[idx[1]];
|
||||||
|
const GSVertex& vtx2 = *vtx[idx[2]];
|
||||||
|
const GSVertex* vtx_provoking = flat_shade ? vtx[idx[provoking_offset]] : nullptr;
|
||||||
|
|
||||||
|
const GSVector2i& v0 = *v[idx[0]];
|
||||||
|
const GSVector2i& v1 = *v[idx[1]];
|
||||||
|
const GSVector2i& v2 = *v[idx[2]];
|
||||||
|
|
||||||
|
y0011 = GSVector4i(v0.y, v0.y, v1.y, v1.y);
|
||||||
|
y1221 = GSVector4i(v1.y, v2.y, v2.y, v1.y);
|
||||||
|
|
||||||
|
m1 = GSVector4::cast(y0011 == y1221).mask() & 7;
|
||||||
|
|
||||||
|
if (m1 == 7)
|
||||||
|
continue; // Degenerate triangle.
|
||||||
|
|
||||||
|
GSVector2i dv0 = v1 - v0;
|
||||||
|
GSVector2i dv1 = v2 - v0;
|
||||||
|
GSVector2i dv2 = v2 - v1;
|
||||||
|
|
||||||
|
int cross = dv0.y * dv1.x - dv0.x * dv1.y;
|
||||||
|
|
||||||
|
if (cross == 0)
|
||||||
|
continue; // Degenerate triangle
|
||||||
|
|
||||||
|
bool clockwise = cross < 0;
|
||||||
|
|
||||||
|
const bool tl0 = (v0.y == v1.y) || !clockwise;
|
||||||
|
const bool tl1 = clockwise;
|
||||||
|
const bool tl2 = (v1.y != v2.y) && !clockwise;
|
||||||
|
|
||||||
|
GSVector4i edge0 = GSVector4i( dv0.y, -dv0.x, 0, 0);
|
||||||
|
GSVector4i edge1 = GSVector4i(-dv1.y, dv1.x, 0, 0);
|
||||||
|
GSVector4i edge2 = GSVector4i( dv2.y, -dv2.x, 0, 0);
|
||||||
|
|
||||||
|
edge0.z = v1.x * v0.y - v0.x * v1.y;
|
||||||
|
edge1.z = v0.x * v2.y - v2.x * v0.y;
|
||||||
|
edge2.z = v2.x * v1.y - v1.x * v2.y;
|
||||||
|
|
||||||
|
if (clockwise)
|
||||||
|
{
|
||||||
|
edge0 = GSVector4i(0) - edge0;
|
||||||
|
edge1 = GSVector4i(0) - edge1;
|
||||||
|
edge2 = GSVector4i(0) - edge2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bias for top-left edges.
|
||||||
|
edge0.z += tl0 ? 1 : 0;
|
||||||
|
edge1.z += tl1 ? 1 : 0;
|
||||||
|
edge2.z += tl2 ? 1 : 0;
|
||||||
|
|
||||||
|
// Interior triangle
|
||||||
|
m_vertex.buff_copy[verts_per_prim * i + 0] = vtx0;
|
||||||
|
m_vertex.buff_copy[verts_per_prim * i + 1] = vtx1;
|
||||||
|
m_vertex.buff_copy[verts_per_prim * i + 2] = vtx2;
|
||||||
|
|
||||||
|
// Edges
|
||||||
|
ExpandAccurateTrianglesEdge(vtx0, vtx1, vtx_provoking, edge1, edge2, tl0, m_accurate_prims_edge_data[3 * i + 0],
|
||||||
|
&m_vertex.buff_copy[verts_per_prim * i + 3]);
|
||||||
|
ExpandAccurateTrianglesEdge(vtx0, vtx2, vtx_provoking, edge2, edge0, tl1, m_accurate_prims_edge_data[3 * i + 1],
|
||||||
|
&m_vertex.buff_copy[verts_per_prim * i + 9]);
|
||||||
|
ExpandAccurateTrianglesEdge(vtx1, vtx2, vtx_provoking, edge0, edge1, tl2, m_accurate_prims_edge_data[3 * i + 2],
|
||||||
|
&m_vertex.buff_copy[verts_per_prim * i + 15]);
|
||||||
|
}
|
||||||
|
|
||||||
|
m_index.tail = prims * verts_per_prim;
|
||||||
|
for (std::size_t i = 0; i < m_index.tail; i++)
|
||||||
|
{
|
||||||
|
m_index.buff[i] = i;
|
||||||
|
}
|
||||||
|
m_vertex.next = m_vertex.tail = m_vertex.head = m_index.tail;
|
||||||
|
|
||||||
|
std::swap(m_vertex.buff, m_vertex.buff_copy);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GSRendererHW::ExpandAccurateLinesVertices()
|
||||||
|
{
|
||||||
|
constexpr int verts_per_prim = 6; // 6 verts to form quad covering each line.
|
||||||
|
const int prims = m_index.tail / 2;
|
||||||
|
|
||||||
|
const bool flat_shade = !PRIM->IIP;
|
||||||
|
const int provoking_offset = g_gs_device->Features().provoking_vertex_last ? 1 : 0;
|
||||||
|
|
||||||
|
const auto ExitRule = [](const GSVector2i& d, bool step_x, bool pos_step) {
|
||||||
|
int dist = std::abs(d.x) + std::abs(d.y);
|
||||||
|
if (dist < 8)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (step_x)
|
||||||
|
{
|
||||||
|
bool x_good = pos_step ? (d.x > 0) : (d.x < 0);
|
||||||
|
return x_good && (dist > 8 || d.y >= 0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
bool y_good = pos_step ? (d.y > 0) : (d.y < 0);
|
||||||
|
return y_good && (dist > 8 || d.x >= 0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
while (m_vertex.maxcount < static_cast<u32>(verts_per_prim * prims))
|
||||||
|
GrowVertexBuffer();
|
||||||
|
|
||||||
|
m_accurate_prims_edge_data.clear();
|
||||||
|
m_accurate_prims_edge_data.resize(prims);
|
||||||
|
|
||||||
|
const GSVector4i& xyof = m_context->scissor.xyof;
|
||||||
|
|
||||||
|
for (int i = 0; i < prims; i++)
|
||||||
|
{
|
||||||
|
const GSVertex& vtx0 = m_vertex.buff[m_index.buff[2 * i + 0]];
|
||||||
|
const GSVertex& vtx1 = m_vertex.buff[m_index.buff[2 * i + 1]];
|
||||||
|
const GSVertex* vtx_provoking = flat_shade ? &m_vertex.buff[m_index.buff[2 * i + provoking_offset]] : nullptr;
|
||||||
|
|
||||||
|
const GSVector2i v0 = { static_cast<int>(vtx0.XYZ.X), static_cast<int>(vtx0.XYZ.Y) };
|
||||||
|
const GSVector2i v1 = { static_cast<int>(vtx1.XYZ.X), static_cast<int>(vtx1.XYZ.Y) };
|
||||||
|
|
||||||
|
AccuratePrimsEdgeData& data = m_accurate_prims_edge_data[i];
|
||||||
|
|
||||||
|
data.xy0 = GSVector2i(v0.x - xyof.x, v0.y - xyof.y);
|
||||||
|
data.xy1 = GSVector2i(v1.x - xyof.x, v1.y - xyof.y);
|
||||||
|
const GSVector2i dxy = data.xy1 - data.xy0;
|
||||||
|
const GSVector2i xy0_i = (data.xy0 + 8) & GSVector2i(~0xF);
|
||||||
|
const GSVector2i xy1_i = (data.xy1 + 8) & GSVector2i(~0xF);
|
||||||
|
data.step_x = std::abs(dxy.x) >= std::abs(dxy.y);
|
||||||
|
bool pos_step = data.step_x ? dxy.x >= 0 : dxy.y >= 0;
|
||||||
|
data.draw0 = !ExitRule(data.xy0 - xy0_i, data.step_x, pos_step);
|
||||||
|
data.draw1 = ExitRule(data.xy1 - xy1_i, data.step_x, pos_step);
|
||||||
|
|
||||||
|
GetAccuratePrimsEdgeVertexAttributes(vtx0, vtx1, vtx_provoking, data);
|
||||||
|
|
||||||
|
GetCoveringQuad(v0, v1, &m_vertex.buff_copy[i * verts_per_prim]);
|
||||||
|
}
|
||||||
|
|
||||||
|
m_index.tail = prims * verts_per_prim;
|
||||||
|
for (std::size_t i = 0; i < m_index.tail; i++)
|
||||||
|
{
|
||||||
|
m_index.buff[i] = i;
|
||||||
|
}
|
||||||
|
m_vertex.next = m_vertex.tail = m_vertex.head = m_index.tail;
|
||||||
|
|
||||||
|
std::swap(m_vertex.buff, m_vertex.buff_copy);
|
||||||
|
}
|
||||||
|
|
||||||
void GSRendererHW::ExpandLineIndices()
|
void GSRendererHW::ExpandLineIndices()
|
||||||
{
|
{
|
||||||
const u32 process_count = (m_index.tail + 7) / 8 * 8;
|
const u32 process_count = (m_index.tail + 7) / 8 * 8;
|
||||||
@ -2471,7 +2825,7 @@ void GSRendererHW::Draw()
|
|||||||
|
|
||||||
// Need to fix the alpha test, since the alpha will be fixed to 1.0 if ABE is disabled and AA1 is enabled
|
// Need to fix the alpha test, since the alpha will be fixed to 1.0 if ABE is disabled and AA1 is enabled
|
||||||
// So if it doesn't meet the condition, always fail, if it does, always pass (turn off the test).
|
// So if it doesn't meet the condition, always fail, if it does, always pass (turn off the test).
|
||||||
if (IsCoverageAlpha() && m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST > 1)
|
if (IsCoverageAlphaFixedOne() && m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST > 1)
|
||||||
{
|
{
|
||||||
const float aref = static_cast<float>(m_cached_ctx.TEST.AREF);
|
const float aref = static_cast<float>(m_cached_ctx.TEST.AREF);
|
||||||
const int old_ATST = m_cached_ctx.TEST.ATST;
|
const int old_ATST = m_cached_ctx.TEST.ATST;
|
||||||
@ -5018,21 +5372,37 @@ void GSRendererHW::SetupIA(float target_scale, float sx, float sy, bool req_vert
|
|||||||
|
|
||||||
case GS_LINE_CLASS:
|
case GS_LINE_CLASS:
|
||||||
{
|
{
|
||||||
m_conf.topology = GSHWDrawConfig::Topology::Line;
|
if (features.accurate_prims)
|
||||||
m_conf.indices_per_prim = 2;
|
|
||||||
if (unscale_pt_ln)
|
|
||||||
{
|
{
|
||||||
if (features.line_expand)
|
GL_INS("HW: Using accurate lines");
|
||||||
|
ExpandAccurateLinesVertices();
|
||||||
|
m_conf.accurate_prims = true;
|
||||||
|
m_conf.accurate_prims_edge_data = &m_accurate_prims_edge_data;
|
||||||
|
m_conf.vs.accurate_prims = ACCURATE_PRIMS_LINE;
|
||||||
|
m_conf.ps.accurate_prims = ACCURATE_PRIMS_LINE;
|
||||||
|
m_conf.ps.accurate_prims_aa = (PRIM->AA1 != 0);
|
||||||
|
m_conf.ps.accurate_prims_aa_abe = (PRIM->ABE != 0);
|
||||||
|
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
|
||||||
|
m_conf.indices_per_prim = 6;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
m_conf.topology = GSHWDrawConfig::Topology::Line;
|
||||||
|
m_conf.indices_per_prim = 2;
|
||||||
|
if (unscale_pt_ln)
|
||||||
{
|
{
|
||||||
m_conf.line_expand = true;
|
if (features.line_expand)
|
||||||
}
|
{
|
||||||
else if (features.vs_expand)
|
m_conf.line_expand = true;
|
||||||
{
|
}
|
||||||
m_conf.vs.expand = GSHWDrawConfig::VSExpand::Line;
|
else if (features.vs_expand)
|
||||||
m_conf.cb_vs.point_size = GSVector2(16.0f * sx, 16.0f * sy);
|
{
|
||||||
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
|
m_conf.vs.expand = GSHWDrawConfig::VSExpand::Line;
|
||||||
m_conf.indices_per_prim = 6;
|
m_conf.cb_vs.point_size = GSVector2(16.0f * sx, 16.0f * sy);
|
||||||
ExpandLineIndices();
|
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
|
||||||
|
m_conf.indices_per_prim = 6;
|
||||||
|
ExpandLineIndices();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -5076,6 +5446,20 @@ void GSRendererHW::SetupIA(float target_scale, float sx, float sy, bool req_vert
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case GS_TRIANGLE_CLASS:
|
case GS_TRIANGLE_CLASS:
|
||||||
|
if (features.accurate_prims && PRIM->AA1)
|
||||||
|
{
|
||||||
|
GL_INS("HW: Using accurate triangles");
|
||||||
|
ExpandAccurateTrianglesVertices();
|
||||||
|
m_conf.accurate_prims = true;
|
||||||
|
m_conf.accurate_prims_edge_data = &m_accurate_prims_edge_data;
|
||||||
|
m_conf.vs.accurate_prims = ACCURATE_PRIMS_TRIANGLE;
|
||||||
|
m_conf.ps.accurate_prims = ACCURATE_PRIMS_TRIANGLE;
|
||||||
|
m_conf.ps.accurate_prims_aa = (PRIM->AA1 != 0);
|
||||||
|
m_conf.ps.accurate_prims_aa_abe = (PRIM->ABE != 0);
|
||||||
|
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
|
||||||
|
m_conf.indices_per_prim = 21;
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
|
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
|
||||||
m_conf.indices_per_prim = 3;
|
m_conf.indices_per_prim = 3;
|
||||||
@ -5130,6 +5514,10 @@ void GSRendererHW::EmulateZbuffer(const GSTextureCache::Target* ds)
|
|||||||
m_conf.depth.ztst = ZTST_ALWAYS;
|
m_conf.depth.ztst = ZTST_ALWAYS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Accurate prims requires a manual depth interpolation in the pixel shader.
|
||||||
|
// Piggy-back on Z clamp to avoid creating more pipeline combinations.
|
||||||
|
bool accurate_prims_clamp_z = UsingAccuratePrims() && (m_conf.depth.zwe || m_conf.depth.ztst != ZTST_ALWAYS);
|
||||||
|
|
||||||
// On the real GS we appear to do clamping on the max z value the format allows.
|
// On the real GS we appear to do clamping on the max z value the format allows.
|
||||||
// Clamping is done after rasterization.
|
// Clamping is done after rasterization.
|
||||||
const u32 max_z = 0xFFFFFFFF >> (GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].fmt * 8);
|
const u32 max_z = 0xFFFFFFFF >> (GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].fmt * 8);
|
||||||
@ -5139,16 +5527,23 @@ void GSRendererHW::EmulateZbuffer(const GSTextureCache::Target* ds)
|
|||||||
//ps_cb.MaxDepth = GSVector4(0.0f, 0.0f, 0.0f, 1.0f);
|
//ps_cb.MaxDepth = GSVector4(0.0f, 0.0f, 0.0f, 1.0f);
|
||||||
m_conf.ps.zclamp = 0;
|
m_conf.ps.zclamp = 0;
|
||||||
|
|
||||||
if (clamp_z)
|
if (clamp_z || accurate_prims_clamp_z)
|
||||||
{
|
{
|
||||||
if (m_vt.m_primclass == GS_SPRITE_CLASS || m_vt.m_primclass == GS_POINT_CLASS)
|
if (m_vt.m_primclass == GS_SPRITE_CLASS || m_vt.m_primclass == GS_POINT_CLASS)
|
||||||
{
|
{
|
||||||
m_conf.cb_vs.max_depth = GSVector2i(max_z);
|
m_conf.cb_vs.max_depth = GSVector2i(max_z);
|
||||||
}
|
}
|
||||||
else if (!m_cached_ctx.ZBUF.ZMSK)
|
else if (!m_cached_ctx.ZBUF.ZMSK || accurate_prims_clamp_z)
|
||||||
{
|
{
|
||||||
m_conf.cb_ps.TA_MaxDepth_Af.z = static_cast<float>(max_z) * 0x1p-32f;
|
m_conf.cb_ps.TA_MaxDepth_Af.z = static_cast<float>(max_z) * 0x1p-32f;
|
||||||
m_conf.ps.zclamp = 1;
|
m_conf.ps.zclamp = 1;
|
||||||
|
if (accurate_prims_clamp_z && m_vt.m_primclass == GS_TRIANGLE_CLASS && PRIM->AA1 &&
|
||||||
|
m_cached_ctx.TEST.ZTE && (m_conf.depth.ztst == ZTST_GEQUAL || m_conf.depth.ztst == ZTST_GREATER))
|
||||||
|
{
|
||||||
|
// For HW AA1 with triangles we must do Z test in the shader to get proper
|
||||||
|
// updating of the Z buffer (interior triangle points update the Z buffer but edges should not).
|
||||||
|
m_conf.ps.ztst = m_conf.depth.ztst;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -5619,15 +6014,13 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, const boo
|
|||||||
{
|
{
|
||||||
const GIFRegALPHA& ALPHA = m_context->ALPHA;
|
const GIFRegALPHA& ALPHA = m_context->ALPHA;
|
||||||
{
|
{
|
||||||
// AA1: Blending needs to be enabled on draw.
|
|
||||||
const bool AA1 = PRIM->AA1 && (m_vt.m_primclass == GS_LINE_CLASS || m_vt.m_primclass == GS_TRIANGLE_CLASS);
|
|
||||||
// PABE: Check condition early as an optimization, no blending when As < 128.
|
// PABE: Check condition early as an optimization, no blending when As < 128.
|
||||||
// For Cs*As + Cd*(1 - As) if As is 128 then blending can be disabled as well.
|
// For Cs*As + Cd*(1 - As) if As is 128 then blending can be disabled as well.
|
||||||
const bool PABE_skip = m_draw_env->PABE.PABE &&
|
const bool PABE_skip = m_draw_env->PABE.PABE &&
|
||||||
((GetAlphaMinMax().max < 128) || (GetAlphaMinMax().max == 128 && ALPHA.A == 0 && ALPHA.B == 1 && ALPHA.C == 0 && ALPHA.D == 1));
|
((GetAlphaMinMax().max < 128) || (GetAlphaMinMax().max == 128 && ALPHA.A == 0 && ALPHA.B == 1 && ALPHA.C == 0 && ALPHA.D == 1));
|
||||||
|
|
||||||
// No blending or coverage anti-aliasing so early exit
|
// No blending or coverage anti-aliasing so early exit
|
||||||
if (PABE_skip || !(NeedsBlending() || AA1))
|
if (PABE_skip || !(NeedsBlending() || IsCoverageAlpha()))
|
||||||
{
|
{
|
||||||
m_conf.blend = {};
|
m_conf.blend = {};
|
||||||
m_conf.ps.no_color1 = true;
|
m_conf.ps.no_color1 = true;
|
||||||
@ -7315,8 +7708,8 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
|
|||||||
const bool is_overlap_alpha = m_prim_overlap != PRIM_OVERLAP_NO && !(m_cached_ctx.FRAME.FBMSK & 0x80000000);
|
const bool is_overlap_alpha = m_prim_overlap != PRIM_OVERLAP_NO && !(m_cached_ctx.FRAME.FBMSK & 0x80000000);
|
||||||
if (m_cached_ctx.TEST.DATM == 0)
|
if (m_cached_ctx.TEST.DATM == 0)
|
||||||
{
|
{
|
||||||
// Some pixles are >= 1 so some fail, or some pixels get written but the written alpha matches or exceeds 1 (so overlap doesn't always pass).
|
// Some pixels are >= 1 so some fail, or some pixels get written but the written alpha matches or exceeds 1 (so overlap doesn't always pass).
|
||||||
DATE = rt->m_alpha_max >= 128 || (is_overlap_alpha && rt->m_alpha_min < 128 && (GetAlphaMinMax().max >= 128 || (m_context->FBA.FBA || IsCoverageAlpha())));
|
DATE = rt->m_alpha_max >= 128 || (is_overlap_alpha && rt->m_alpha_min < 128 && (GetAlphaMinMax().max >= 128 || (m_context->FBA.FBA || IsCoverageAlphaFixedOne())));
|
||||||
|
|
||||||
// All pixels fail.
|
// All pixels fail.
|
||||||
if (DATE && rt->m_alpha_min >= 128)
|
if (DATE && rt->m_alpha_min >= 128)
|
||||||
@ -7324,8 +7717,8 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Some pixles are < 1 so some fail, or some pixels get written but the written alpha goes below 1 (so overlap doesn't always pass).
|
// Some pixels are < 1 so some fail, or some pixels get written but the written alpha goes below 1 (so overlap doesn't always pass).
|
||||||
DATE = rt->m_alpha_min < 128 || (is_overlap_alpha && rt->m_alpha_max >= 128 && (GetAlphaMinMax().min < 128 && !(m_context->FBA.FBA || IsCoverageAlpha())));
|
DATE = rt->m_alpha_min < 128 || (is_overlap_alpha && rt->m_alpha_max >= 128 && (GetAlphaMinMax().min < 128 && !(m_context->FBA.FBA || IsCoverageAlphaFixedOne())));
|
||||||
|
|
||||||
// All pixels fail.
|
// All pixels fail.
|
||||||
if (DATE && rt->m_alpha_max < 128)
|
if (DATE && rt->m_alpha_max < 128)
|
||||||
@ -7477,7 +7870,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
|
|||||||
}
|
}
|
||||||
// When Blending is disabled and Edge Anti Aliasing is enabled,
|
// When Blending is disabled and Edge Anti Aliasing is enabled,
|
||||||
// the output alpha is Coverage (which we force to 128) so DATE will fail/pass guaranteed on second pass.
|
// the output alpha is Coverage (which we force to 128) so DATE will fail/pass guaranteed on second pass.
|
||||||
else if (m_conf.colormask.wa && (m_context->FBA.FBA || IsCoverageAlpha()) && features.stencil_buffer)
|
else if (m_conf.colormask.wa && (m_context->FBA.FBA || IsCoverageAlphaFixedOne()) && features.stencil_buffer)
|
||||||
{
|
{
|
||||||
GL_PERF("DATE: Fast with FBA, all pixels will be >= 128");
|
GL_PERF("DATE: Fast with FBA, all pixels will be >= 128");
|
||||||
DATE_one = !m_cached_ctx.TEST.DATM;
|
DATE_one = !m_cached_ctx.TEST.DATM;
|
||||||
@ -7663,7 +8056,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
|
|||||||
}
|
}
|
||||||
|
|
||||||
// AA1: Set alpha source to coverage 128 when there is no alpha blending.
|
// AA1: Set alpha source to coverage 128 when there is no alpha blending.
|
||||||
m_conf.ps.fixed_one_a = IsCoverageAlpha();
|
m_conf.ps.fixed_one_a = IsCoverageAlphaFixedOne();
|
||||||
|
|
||||||
if ((!IsOpaque() || m_context->ALPHA.IsBlack()) && rt && ((m_conf.colormask.wrgba & 0x7) || (m_texture_shuffle && !m_copy_16bit_to_target_shuffle && !m_same_group_texture_shuffle)))
|
if ((!IsOpaque() || m_context->ALPHA.IsBlack()) && rt && ((m_conf.colormask.wrgba & 0x7) || (m_texture_shuffle && !m_copy_16bit_to_target_shuffle && !m_same_group_texture_shuffle)))
|
||||||
{
|
{
|
||||||
@ -8030,6 +8423,23 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
|
|||||||
m_conf.require_full_barrier = false;
|
m_conf.require_full_barrier = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((features.texture_barrier || features.multidraw_fb_copy) && UsingAccuratePrims() &&
|
||||||
|
(m_vt.m_primclass == GS_TRIANGLE_CLASS) && PRIM->AA1 && m_conf.ps.zclamp)
|
||||||
|
{
|
||||||
|
// Manual depth test in the shader requires full barrier.
|
||||||
|
if (m_prim_overlap == PRIM_OVERLAP_NO)
|
||||||
|
m_conf.require_one_barrier = true;
|
||||||
|
else
|
||||||
|
m_conf.require_full_barrier = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m_conf.require_full_barrier && (g_gs_device->Features().texture_barrier || g_gs_device->Features().multidraw_fb_copy))
|
||||||
|
{
|
||||||
|
ComputeDrawlistGetSize(rt->m_scale);
|
||||||
|
m_conf.drawlist = &m_drawlist;
|
||||||
|
m_conf.drawlist_bbox = &m_drawlist_bbox;
|
||||||
|
}
|
||||||
|
|
||||||
// rs
|
// rs
|
||||||
const GSVector4i hacked_scissor = m_channel_shuffle ? GSVector4i::cxpr(0, 0, 1024, 1024) : m_context->scissor.in;
|
const GSVector4i hacked_scissor = m_channel_shuffle ? GSVector4i::cxpr(0, 0, 1024, 1024) : m_context->scissor.in;
|
||||||
const GSVector4i scissor(GSVector4i(GSVector4(rtscale) * GSVector4(hacked_scissor)).rintersect(GSVector4i::loadh(rtsize)));
|
const GSVector4i scissor(GSVector4i(GSVector4(rtscale) * GSVector4(hacked_scissor)).rintersect(GSVector4i::loadh(rtsize)));
|
||||||
@ -8124,14 +8534,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
|
|||||||
m_conf.cb_ps.FogColor_AREF.a = m_conf.alpha_second_pass.ps_aref;
|
m_conf.cb_ps.FogColor_AREF.a = m_conf.alpha_second_pass.ps_aref;
|
||||||
m_conf.alpha_second_pass.enable = false;
|
m_conf.alpha_second_pass.enable = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_conf.require_full_barrier && (g_gs_device->Features().texture_barrier || g_gs_device->Features().multidraw_fb_copy))
|
|
||||||
{
|
|
||||||
ComputeDrawlistGetSize(rt->m_scale);
|
|
||||||
m_conf.drawlist = &m_drawlist;
|
|
||||||
m_conf.drawlist_bbox = &m_drawlist_bbox;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!m_channel_shuffle_width)
|
if (!m_channel_shuffle_width)
|
||||||
g_gs_device->RenderHW(m_conf);
|
g_gs_device->RenderHW(m_conf);
|
||||||
else
|
else
|
||||||
@ -9574,3 +9977,10 @@ std::size_t GSRendererHW::ComputeDrawlistGetSize(float scale)
|
|||||||
}
|
}
|
||||||
return m_drawlist.size();
|
return m_drawlist.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool GSRendererHW::IsCoverageAlphaSupported()
|
||||||
|
{
|
||||||
|
return IsCoverageAlpha() &&
|
||||||
|
((m_vt.m_primclass == GS_LINE_CLASS || m_vt.m_primclass == GS_TRIANGLE_CLASS) &&
|
||||||
|
g_gs_device->Features().accurate_prims);
|
||||||
|
}
|
||||||
|
|||||||
@ -137,6 +137,21 @@ private:
|
|||||||
bool IsUsingCsInBlend();
|
bool IsUsingCsInBlend();
|
||||||
bool IsUsingAsInBlend();
|
bool IsUsingAsInBlend();
|
||||||
|
|
||||||
|
void GetAccuratePrimsEdgeVertexAttributes(
|
||||||
|
const GSVertex& vtx0,
|
||||||
|
const GSVertex& vtx1,
|
||||||
|
const GSVertex* vtx_provoking,
|
||||||
|
AccuratePrimsEdgeData& data);
|
||||||
|
void ExpandAccurateTrianglesEdge(
|
||||||
|
const GSVertex& vtx0,
|
||||||
|
const GSVertex& vtx1,
|
||||||
|
const GSVertex* vtx_provoking,
|
||||||
|
const GSVector4i& edge0,
|
||||||
|
const GSVector4i& edge1,
|
||||||
|
bool top_left,
|
||||||
|
AccuratePrimsEdgeData& data,
|
||||||
|
GSVertex* vertex_out);
|
||||||
|
|
||||||
// We modify some of the context registers to optimize away unnecessary operations.
|
// We modify some of the context registers to optimize away unnecessary operations.
|
||||||
// Instead of messing with the real context, we copy them and use those instead.
|
// Instead of messing with the real context, we copy them and use those instead.
|
||||||
struct HWCachedCtx
|
struct HWCachedCtx
|
||||||
@ -205,6 +220,8 @@ private:
|
|||||||
std::unique_ptr<GSTextureCacheSW::Texture> m_sw_texture[7 + 1];
|
std::unique_ptr<GSTextureCacheSW::Texture> m_sw_texture[7 + 1];
|
||||||
std::unique_ptr<GSVirtualAlignedClass<32>> m_sw_rasterizer;
|
std::unique_ptr<GSVirtualAlignedClass<32>> m_sw_rasterizer;
|
||||||
|
|
||||||
|
std::vector<AccuratePrimsEdgeData> m_accurate_prims_edge_data;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSRendererHW();
|
GSRendererHW();
|
||||||
virtual ~GSRendererHW() override;
|
virtual ~GSRendererHW() override;
|
||||||
@ -221,6 +238,8 @@ public:
|
|||||||
void Lines2Sprites();
|
void Lines2Sprites();
|
||||||
bool VerifyIndices();
|
bool VerifyIndices();
|
||||||
void ExpandLineIndices();
|
void ExpandLineIndices();
|
||||||
|
void ExpandAccurateLinesVertices();
|
||||||
|
void ExpandAccurateTrianglesVertices();
|
||||||
void ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, bool& shuffle_across, GSTextureCache::Target* rt, GSTextureCache::Source* tex);
|
void ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, bool& shuffle_across, GSTextureCache::Target* rt, GSTextureCache::Source* tex);
|
||||||
GSVector4 RealignTargetTextureCoordinate(const GSTextureCache::Source* tex);
|
GSVector4 RealignTargetTextureCoordinate(const GSTextureCache::Source* tex);
|
||||||
GSVector4i ComputeBoundingBox(const GSVector2i& rtsize, float rtscale);
|
GSVector4i ComputeBoundingBox(const GSVector2i& rtsize, float rtscale);
|
||||||
@ -273,4 +292,6 @@ public:
|
|||||||
|
|
||||||
/// Compute the drawlist (if not already present) and bounding boxes for the current draw.
|
/// Compute the drawlist (if not already present) and bounding boxes for the current draw.
|
||||||
std::size_t ComputeDrawlistGetSize(float scale);
|
std::size_t ComputeDrawlistGetSize(float scale);
|
||||||
|
|
||||||
|
bool IsCoverageAlphaSupported() override;
|
||||||
};
|
};
|
||||||
|
|||||||
@ -94,6 +94,11 @@ struct GSMTLMainVSUniform
|
|||||||
vector_float2 texture_offset;
|
vector_float2 texture_offset;
|
||||||
vector_float2 point_size;
|
vector_float2 point_size;
|
||||||
uint max_depth;
|
uint max_depth;
|
||||||
|
uint _pad0;
|
||||||
|
uint base_vertex;
|
||||||
|
uint _pad1;
|
||||||
|
uint _pad2;
|
||||||
|
uint _pad3;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct GSMTLMainPSUniform
|
struct GSMTLMainPSUniform
|
||||||
@ -134,6 +139,8 @@ struct GSMTLMainPSUniform
|
|||||||
matrix_float4x4 dither_matrix;
|
matrix_float4x4 dither_matrix;
|
||||||
|
|
||||||
vector_float4 scale_factor;
|
vector_float4 scale_factor;
|
||||||
|
|
||||||
|
vector_uint4 accurate_prims_base_index;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum GSMTLAttributes
|
enum GSMTLAttributes
|
||||||
|
|||||||
@ -310,10 +310,10 @@ namespace
|
|||||||
};
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
std::unique_ptr<GLStreamBuffer> GLStreamBuffer::Create(GLenum target, u32 size)
|
std::unique_ptr<GLStreamBuffer> GLStreamBuffer::Create(GLenum target, u32 size, bool nonsyncing)
|
||||||
{
|
{
|
||||||
std::unique_ptr<GLStreamBuffer> buf;
|
std::unique_ptr<GLStreamBuffer> buf;
|
||||||
if (GLAD_GL_VERSION_4_4 || GLAD_GL_ARB_buffer_storage || GLAD_GL_EXT_buffer_storage)
|
if (!nonsyncing && (GLAD_GL_VERSION_4_4 || GLAD_GL_ARB_buffer_storage || GLAD_GL_EXT_buffer_storage))
|
||||||
{
|
{
|
||||||
buf = BufferStorageStreamBuffer::Create(target, size);
|
buf = BufferStorageStreamBuffer::Create(target, size);
|
||||||
if (buf)
|
if (buf)
|
||||||
|
|||||||
@ -38,7 +38,7 @@ public:
|
|||||||
/// Returns the minimum granularity of blocks which sync objects will be created around.
|
/// Returns the minimum granularity of blocks which sync objects will be created around.
|
||||||
virtual u32 GetChunkSize() const = 0;
|
virtual u32 GetChunkSize() const = 0;
|
||||||
|
|
||||||
static std::unique_ptr<GLStreamBuffer> Create(GLenum target, u32 size);
|
static std::unique_ptr<GLStreamBuffer> Create(GLenum target, u32 size, bool nonsyncing = false);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
GLStreamBuffer(GLenum target, GLuint buffer_id, u32 size);
|
GLStreamBuffer(GLenum target, GLuint buffer_id, u32 size);
|
||||||
|
|||||||
@ -26,6 +26,7 @@ static constexpr u32 g_ps_cb_index = 0;
|
|||||||
|
|
||||||
static constexpr u32 VERTEX_BUFFER_SIZE = 32 * 1024 * 1024;
|
static constexpr u32 VERTEX_BUFFER_SIZE = 32 * 1024 * 1024;
|
||||||
static constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024;
|
static constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||||
|
static constexpr u32 ACCURATE_PRIMS_BUFFER_SIZE = 32 * 1024 * 1024;
|
||||||
static constexpr u32 VERTEX_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024;
|
static constexpr u32 VERTEX_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024;
|
||||||
static constexpr u32 FRAGMENT_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024;
|
static constexpr u32 FRAGMENT_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024;
|
||||||
static constexpr u32 TEXTURE_UPLOAD_BUFFER_SIZE = 128 * 1024 * 1024;
|
static constexpr u32 TEXTURE_UPLOAD_BUFFER_SIZE = 128 * 1024 * 1024;
|
||||||
@ -258,10 +259,18 @@ bool GSDeviceOGL::Create(GSVSyncMode vsync_mode, bool allow_present_throttle)
|
|||||||
|
|
||||||
m_vertex_stream_buffer = GLStreamBuffer::Create(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE);
|
m_vertex_stream_buffer = GLStreamBuffer::Create(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE);
|
||||||
m_index_stream_buffer = GLStreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE);
|
m_index_stream_buffer = GLStreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE);
|
||||||
|
if (m_features.accurate_prims)
|
||||||
|
{
|
||||||
|
// Performance note: prefer a non-syncing buffer for accurate prims so that it is more likely to be GPU local.
|
||||||
|
// Rationale: we expect this buffer to be updated relatively rarely and it's used as a pixel shader resource.
|
||||||
|
m_accurate_prims_stream_buffer = GLStreamBuffer::Create(GL_ARRAY_BUFFER, ACCURATE_PRIMS_BUFFER_SIZE, true);
|
||||||
|
}
|
||||||
m_vertex_uniform_stream_buffer = GLStreamBuffer::Create(GL_UNIFORM_BUFFER, VERTEX_UNIFORM_BUFFER_SIZE);
|
m_vertex_uniform_stream_buffer = GLStreamBuffer::Create(GL_UNIFORM_BUFFER, VERTEX_UNIFORM_BUFFER_SIZE);
|
||||||
m_fragment_uniform_stream_buffer = GLStreamBuffer::Create(GL_UNIFORM_BUFFER, FRAGMENT_UNIFORM_BUFFER_SIZE);
|
m_fragment_uniform_stream_buffer = GLStreamBuffer::Create(GL_UNIFORM_BUFFER, FRAGMENT_UNIFORM_BUFFER_SIZE);
|
||||||
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &m_uniform_buffer_alignment);
|
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &m_uniform_buffer_alignment);
|
||||||
if (!m_vertex_stream_buffer || !m_index_stream_buffer || !m_vertex_uniform_stream_buffer || !m_fragment_uniform_stream_buffer)
|
if (!m_vertex_stream_buffer || !m_index_stream_buffer ||
|
||||||
|
(m_features.accurate_prims && !m_accurate_prims_stream_buffer) ||
|
||||||
|
!m_vertex_uniform_stream_buffer || !m_fragment_uniform_stream_buffer)
|
||||||
{
|
{
|
||||||
Host::ReportErrorAsync("GS", "Failed to create vertex/index/uniform streaming buffers");
|
Host::ReportErrorAsync("GS", "Failed to create vertex/index/uniform streaming buffers");
|
||||||
return false;
|
return false;
|
||||||
@ -303,6 +312,11 @@ bool GSDeviceOGL::Create(GSVSyncMode vsync_mode, bool allow_present_throttle)
|
|||||||
glBufferData(GL_ELEMENT_ARRAY_BUFFER, EXPAND_BUFFER_SIZE, expand_data.get(), GL_STATIC_DRAW);
|
glBufferData(GL_ELEMENT_ARRAY_BUFFER, EXPAND_BUFFER_SIZE, expand_data.get(), GL_STATIC_DRAW);
|
||||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 2, m_vertex_stream_buffer->GetGLBufferId(), 0, VERTEX_BUFFER_SIZE);
|
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 2, m_vertex_stream_buffer->GetGLBufferId(), 0, VERTEX_BUFFER_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (m_features.accurate_prims)
|
||||||
|
{
|
||||||
|
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 3, m_accurate_prims_stream_buffer->GetGLBufferId(), 0, ACCURATE_PRIMS_BUFFER_SIZE);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ****************************************************************
|
// ****************************************************************
|
||||||
@ -770,6 +784,8 @@ bool GSDeviceOGL::CheckFeatures()
|
|||||||
m_features.line_expand ? "hardware" : (m_features.vs_expand ? "vertex expanding" : "UNSUPPORTED"),
|
m_features.line_expand ? "hardware" : (m_features.vs_expand ? "vertex expanding" : "UNSUPPORTED"),
|
||||||
m_features.vs_expand ? "vertex expanding" : "CPU");
|
m_features.vs_expand ? "vertex expanding" : "CPU");
|
||||||
|
|
||||||
|
m_features.accurate_prims = GSConfig.HWAccuratePrims;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -840,6 +856,7 @@ void GSDeviceOGL::DestroyResources()
|
|||||||
|
|
||||||
m_fragment_uniform_stream_buffer.reset();
|
m_fragment_uniform_stream_buffer.reset();
|
||||||
m_vertex_uniform_stream_buffer.reset();
|
m_vertex_uniform_stream_buffer.reset();
|
||||||
|
m_accurate_prims_stream_buffer.reset();
|
||||||
|
|
||||||
glBindVertexArray(0);
|
glBindVertexArray(0);
|
||||||
if (m_expand_ibo != 0)
|
if (m_expand_ibo != 0)
|
||||||
@ -1330,8 +1347,9 @@ std::string GSDeviceOGL::GetVSSource(VSSelector sel)
|
|||||||
std::string macro = fmt::format("#define VS_FST {}\n", static_cast<u32>(sel.fst))
|
std::string macro = fmt::format("#define VS_FST {}\n", static_cast<u32>(sel.fst))
|
||||||
+ fmt::format("#define VS_IIP {}\n", static_cast<u32>(sel.iip))
|
+ fmt::format("#define VS_IIP {}\n", static_cast<u32>(sel.iip))
|
||||||
+ fmt::format("#define VS_POINT_SIZE {}\n", static_cast<u32>(sel.point_size))
|
+ fmt::format("#define VS_POINT_SIZE {}\n", static_cast<u32>(sel.point_size))
|
||||||
+ fmt::format("#define VS_EXPAND {}\n", static_cast<int>(sel.expand));
|
+ fmt::format("#define VS_EXPAND {}\n", static_cast<int>(sel.expand))
|
||||||
|
+ fmt::format("#define VS_ACCURATE_PRIMS {}\n", static_cast<int>(sel.accurate_prims))
|
||||||
|
;
|
||||||
std::string src = GenGlslHeader("vs_main", GL_VERTEX_SHADER, macro);
|
std::string src = GenGlslHeader("vs_main", GL_VERTEX_SHADER, macro);
|
||||||
src += m_shader_tfx_vgs;
|
src += m_shader_tfx_vgs;
|
||||||
return src;
|
return src;
|
||||||
@ -1396,6 +1414,10 @@ std::string GSDeviceOGL::GetPSSource(const PSSelector& sel)
|
|||||||
+ fmt::format("#define PS_SCANMSK {}\n", sel.scanmsk)
|
+ fmt::format("#define PS_SCANMSK {}\n", sel.scanmsk)
|
||||||
+ fmt::format("#define PS_NO_COLOR {}\n", sel.no_color)
|
+ fmt::format("#define PS_NO_COLOR {}\n", sel.no_color)
|
||||||
+ fmt::format("#define PS_NO_COLOR1 {}\n", sel.no_color1)
|
+ fmt::format("#define PS_NO_COLOR1 {}\n", sel.no_color1)
|
||||||
|
+ fmt::format("#define PS_ACCURATE_PRIMS {}\n", sel.accurate_prims)
|
||||||
|
+ fmt::format("#define PS_ACCURATE_PRIMS_AA {}\n", sel.accurate_prims_aa)
|
||||||
|
+ fmt::format("#define PS_ACCURATE_PRIMS_AA_ABE {}\n", sel.accurate_prims_aa_abe)
|
||||||
|
+ fmt::format("#define PS_ZTST {}\n", sel.ztst)
|
||||||
;
|
;
|
||||||
|
|
||||||
std::string src = GenGlslHeader("ps_main", GL_FRAGMENT_SHADER, macro);
|
std::string src = GenGlslHeader("ps_main", GL_FRAGMENT_SHADER, macro);
|
||||||
@ -2012,6 +2034,21 @@ void GSDeviceOGL::ClearSamplerCache()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GSDeviceOGL::SetupAccuratePrims(GSHWDrawConfig& config)
|
||||||
|
{
|
||||||
|
if (config.accurate_prims)
|
||||||
|
{
|
||||||
|
const u32 count = config.accurate_prims_edge_data->size();
|
||||||
|
const u32 size = count * sizeof(AccuratePrimsEdgeData);
|
||||||
|
auto res = m_accurate_prims_stream_buffer->Map(sizeof(AccuratePrimsEdgeData), size);
|
||||||
|
std::memcpy(res.pointer, config.accurate_prims_edge_data->data(), size);
|
||||||
|
m_accurate_prims_stream_buffer->Unmap(size);
|
||||||
|
|
||||||
|
config.cb_vs.base_vertex.x = m_vertex.start;
|
||||||
|
config.cb_ps.accurate_prims_base_index.x = res.index_aligned;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool GSDeviceOGL::CreateCASPrograms()
|
bool GSDeviceOGL::CreateCASPrograms()
|
||||||
{
|
{
|
||||||
std::optional<std::string> cas_source = ReadShaderSource("shaders/opengl/cas.glsl");
|
std::optional<std::string> cas_source = ReadShaderSource("shaders/opengl/cas.glsl");
|
||||||
@ -2525,6 +2562,8 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
|
|||||||
IASetVertexBuffer(config.verts, config.nverts, GetVertexAlignment(config.vs.expand));
|
IASetVertexBuffer(config.verts, config.nverts, GetVertexAlignment(config.vs.expand));
|
||||||
m_vertex.start *= GetExpansionFactor(config.vs.expand);
|
m_vertex.start *= GetExpansionFactor(config.vs.expand);
|
||||||
|
|
||||||
|
SetupAccuratePrims(config);
|
||||||
|
|
||||||
if (config.vs.UseExpandIndexBuffer())
|
if (config.vs.UseExpandIndexBuffer())
|
||||||
{
|
{
|
||||||
IASetVAO(m_expand_vao);
|
IASetVAO(m_expand_vao);
|
||||||
@ -2554,6 +2593,8 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
|
|||||||
PSSetShaderResource(2, draw_rt_clone);
|
PSSetShaderResource(2, draw_rt_clone);
|
||||||
else if (config.require_one_barrier || config.require_full_barrier)
|
else if (config.require_one_barrier || config.require_full_barrier)
|
||||||
PSSetShaderResource(2, colclip_rt ? colclip_rt : config.rt);
|
PSSetShaderResource(2, colclip_rt ? colclip_rt : config.rt);
|
||||||
|
if ((config.require_one_barrier || config.require_full_barrier) && config.ps.IsFeedbackLoopDepth())
|
||||||
|
PSSetShaderResource(4, config.ds);
|
||||||
|
|
||||||
SetupSampler(config.sampler);
|
SetupSampler(config.sampler);
|
||||||
|
|
||||||
@ -2671,7 +2712,7 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
|
|||||||
OMSetRenderTargets(draw_rt, draw_ds, &config.scissor);
|
OMSetRenderTargets(draw_rt, draw_ds, &config.scissor);
|
||||||
OMSetColorMaskState(config.colormask);
|
OMSetColorMaskState(config.colormask);
|
||||||
SetupOM(config.depth);
|
SetupOM(config.depth);
|
||||||
|
|
||||||
// Clear stencil as close as possible to the RT bind, to avoid framebuffer swaps.
|
// Clear stencil as close as possible to the RT bind, to avoid framebuffer swaps.
|
||||||
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::StencilOne && m_features.texture_barrier)
|
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::StencilOne && m_features.texture_barrier)
|
||||||
{
|
{
|
||||||
@ -2761,7 +2802,7 @@ void GSDeviceOGL::SendHWDraw(const GSHWDrawConfig& config, bool one_barrier, boo
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef PCSX2_DEVBUILD
|
#ifdef PCSX2_DEVBUILD
|
||||||
if ((one_barrier || full_barrier) && !config.ps.IsFeedbackLoop()) [[unlikely]]
|
if ((one_barrier || full_barrier) && !(config.ps.IsFeedbackLoop() || config.ps.IsFeedbackLoopDepth())) [[unlikely]]
|
||||||
Console.Warning("OpenGL: Possible unnecessary barrier detected.");
|
Console.Warning("OpenGL: Possible unnecessary barrier detected.");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@ -157,6 +157,7 @@ private:
|
|||||||
|
|
||||||
std::unique_ptr<GLStreamBuffer> m_vertex_stream_buffer;
|
std::unique_ptr<GLStreamBuffer> m_vertex_stream_buffer;
|
||||||
std::unique_ptr<GLStreamBuffer> m_index_stream_buffer;
|
std::unique_ptr<GLStreamBuffer> m_index_stream_buffer;
|
||||||
|
std::unique_ptr<GLStreamBuffer> m_accurate_prims_stream_buffer;
|
||||||
GLuint m_expand_ibo = 0;
|
GLuint m_expand_ibo = 0;
|
||||||
GLuint m_vao = 0;
|
GLuint m_vao = 0;
|
||||||
GLuint m_expand_vao = 0;
|
GLuint m_expand_vao = 0;
|
||||||
@ -346,6 +347,7 @@ public:
|
|||||||
void IASetPrimitiveTopology(GLenum topology);
|
void IASetPrimitiveTopology(GLenum topology);
|
||||||
void IASetVertexBuffer(const void* vertices, size_t count, size_t align_multiplier = 1);
|
void IASetVertexBuffer(const void* vertices, size_t count, size_t align_multiplier = 1);
|
||||||
void IASetIndexBuffer(const void* index, size_t count);
|
void IASetIndexBuffer(const void* index, size_t count);
|
||||||
|
void SetupAccuratePrims(GSHWDrawConfig& config);
|
||||||
|
|
||||||
void PSSetShaderResource(int i, GSTexture* sr);
|
void PSSetShaderResource(int i, GSTexture* sr);
|
||||||
void PSSetSamplerState(GLuint ss);
|
void PSSetSamplerState(GLuint ss);
|
||||||
|
|||||||
@ -82,6 +82,8 @@ protected:
|
|||||||
template <u32 primclass>
|
template <u32 primclass>
|
||||||
void RewriteVerticesIfSTOverflow();
|
void RewriteVerticesIfSTOverflow();
|
||||||
|
|
||||||
|
bool IsCoverageAlphaSupported() override { return true; }
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSRendererSW(int threads);
|
GSRendererSW(int threads);
|
||||||
~GSRendererSW() override;
|
~GSRendererSW() override;
|
||||||
|
|||||||
@ -41,6 +41,7 @@ enum : u32
|
|||||||
|
|
||||||
VERTEX_BUFFER_SIZE = 32 * 1024 * 1024,
|
VERTEX_BUFFER_SIZE = 32 * 1024 * 1024,
|
||||||
INDEX_BUFFER_SIZE = 16 * 1024 * 1024,
|
INDEX_BUFFER_SIZE = 16 * 1024 * 1024,
|
||||||
|
ACCURATE_PRIMS_BUFFER_SIZE = 32 * 1024 * 1024,
|
||||||
VERTEX_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024,
|
VERTEX_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024,
|
||||||
FRAGMENT_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024,
|
FRAGMENT_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024,
|
||||||
TEXTURE_BUFFER_SIZE = 64 * 1024 * 1024,
|
TEXTURE_BUFFER_SIZE = 64 * 1024 * 1024,
|
||||||
@ -932,7 +933,7 @@ bool GSDeviceVK::CreateGlobalDescriptorPool()
|
|||||||
{
|
{
|
||||||
static constexpr const VkDescriptorPoolSize pool_sizes[] = {
|
static constexpr const VkDescriptorPoolSize pool_sizes[] = {
|
||||||
{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 2},
|
{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 2},
|
||||||
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2},
|
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 3},
|
||||||
};
|
};
|
||||||
|
|
||||||
VkDescriptorPoolCreateInfo pool_create_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, nullptr,
|
VkDescriptorPoolCreateInfo pool_create_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, nullptr,
|
||||||
@ -1501,12 +1502,13 @@ VkRenderPass GSDeviceVK::CreateCachedRenderPass(RenderPassCacheKey key)
|
|||||||
VkAttachmentReference* color_reference_ptr = nullptr;
|
VkAttachmentReference* color_reference_ptr = nullptr;
|
||||||
VkAttachmentReference depth_reference;
|
VkAttachmentReference depth_reference;
|
||||||
VkAttachmentReference* depth_reference_ptr = nullptr;
|
VkAttachmentReference* depth_reference_ptr = nullptr;
|
||||||
VkAttachmentReference input_reference;
|
std::array<VkAttachmentReference, 2> input_reference;
|
||||||
VkAttachmentReference* input_reference_ptr = nullptr;
|
u32 num_subpass_inputs = 0;
|
||||||
VkSubpassDependency subpass_dependency;
|
std::array<VkSubpassDependency, 2> subpass_dependency;
|
||||||
VkSubpassDependency* subpass_dependency_ptr = nullptr;
|
u32 num_subpass_dependencies = 0;
|
||||||
std::array<VkAttachmentDescription, 2> attachments;
|
std::array<VkAttachmentDescription, 2> attachments;
|
||||||
u32 num_attachments = 0;
|
u32 num_attachments = 0;
|
||||||
|
bool actual_color_feedback_loop = false;
|
||||||
if (key.color_format != VK_FORMAT_UNDEFINED)
|
if (key.color_format != VK_FORMAT_UNDEFINED)
|
||||||
{
|
{
|
||||||
const VkImageLayout layout =
|
const VkImageLayout layout =
|
||||||
@ -1522,28 +1524,32 @@ VkRenderPass GSDeviceVK::CreateCachedRenderPass(RenderPassCacheKey key)
|
|||||||
|
|
||||||
if (key.color_feedback_loop)
|
if (key.color_feedback_loop)
|
||||||
{
|
{
|
||||||
|
actual_color_feedback_loop = true;
|
||||||
|
|
||||||
if (!UseFeedbackLoopLayout())
|
if (!UseFeedbackLoopLayout())
|
||||||
{
|
{
|
||||||
input_reference.attachment = num_attachments;
|
pxAssert(num_subpass_inputs == 0); // Must always have the color input first.
|
||||||
input_reference.layout = layout;
|
input_reference[num_subpass_inputs].attachment = num_attachments;
|
||||||
input_reference_ptr = &input_reference;
|
input_reference[num_subpass_inputs].layout = layout;
|
||||||
|
num_subpass_inputs++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!m_features.framebuffer_fetch)
|
if (!m_features.framebuffer_fetch)
|
||||||
{
|
{
|
||||||
|
pxAssert(num_subpass_dependencies == 0); // Must always have the color input first.
|
||||||
// don't need the framebuffer-local dependency when we have rasterization order attachment access
|
// don't need the framebuffer-local dependency when we have rasterization order attachment access
|
||||||
subpass_dependency.srcSubpass = 0;
|
subpass_dependency[num_subpass_dependencies].srcSubpass = 0;
|
||||||
subpass_dependency.dstSubpass = 0;
|
subpass_dependency[num_subpass_dependencies].dstSubpass = 0;
|
||||||
subpass_dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
subpass_dependency[num_subpass_dependencies].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||||
subpass_dependency.dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
|
subpass_dependency[num_subpass_dependencies].dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
|
||||||
subpass_dependency.srcAccessMask =
|
subpass_dependency[num_subpass_dependencies].srcAccessMask =
|
||||||
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||||
subpass_dependency.dstAccessMask =
|
subpass_dependency[num_subpass_dependencies].dstAccessMask =
|
||||||
UseFeedbackLoopLayout() ? VK_ACCESS_SHADER_READ_BIT : VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
|
UseFeedbackLoopLayout() ? VK_ACCESS_SHADER_READ_BIT : VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
|
||||||
subpass_dependency.dependencyFlags =
|
subpass_dependency[num_subpass_dependencies].dependencyFlags =
|
||||||
UseFeedbackLoopLayout() ? (VK_DEPENDENCY_BY_REGION_BIT | VK_DEPENDENCY_FEEDBACK_LOOP_BIT_EXT) :
|
UseFeedbackLoopLayout() ? (VK_DEPENDENCY_BY_REGION_BIT | VK_DEPENDENCY_FEEDBACK_LOOP_BIT_EXT) :
|
||||||
VK_DEPENDENCY_BY_REGION_BIT;
|
VK_DEPENDENCY_BY_REGION_BIT;
|
||||||
subpass_dependency_ptr = &subpass_dependency;
|
num_subpass_dependencies++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1562,6 +1568,41 @@ VkRenderPass GSDeviceVK::CreateCachedRenderPass(RenderPassCacheKey key)
|
|||||||
depth_reference.attachment = num_attachments;
|
depth_reference.attachment = num_attachments;
|
||||||
depth_reference.layout = layout;
|
depth_reference.layout = layout;
|
||||||
depth_reference_ptr = &depth_reference;
|
depth_reference_ptr = &depth_reference;
|
||||||
|
|
||||||
|
if (actual_color_feedback_loop && key.depth_sampling)
|
||||||
|
{
|
||||||
|
// Note: We only allow depth to be bound in a feedback loop if color is already bound as such.
|
||||||
|
// This is partly because it doesn't seem likely that we will ever need a depth feedback loop
|
||||||
|
// without a color feedback loop and to simplify the indices for subpass inputs (0 for color; 1 for depth);
|
||||||
|
|
||||||
|
if (!UseFeedbackLoopLayout())
|
||||||
|
{
|
||||||
|
pxAssert(num_subpass_inputs == 1); // Must always have the color input first.
|
||||||
|
input_reference[num_subpass_inputs].attachment = num_attachments;
|
||||||
|
input_reference[num_subpass_inputs].layout = layout;
|
||||||
|
num_subpass_inputs++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!m_features.framebuffer_fetch)
|
||||||
|
{
|
||||||
|
pxAssert(num_subpass_dependencies == 1); // Must always have the color input first.
|
||||||
|
// don't need the framebuffer-local dependency when we have rasterization order attachment access
|
||||||
|
subpass_dependency[num_subpass_dependencies].srcSubpass = 0;
|
||||||
|
subpass_dependency[num_subpass_dependencies].dstSubpass = 0;
|
||||||
|
subpass_dependency[num_subpass_dependencies].srcStageMask =
|
||||||
|
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
|
||||||
|
subpass_dependency[num_subpass_dependencies].dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
|
||||||
|
subpass_dependency[num_subpass_dependencies].srcAccessMask =
|
||||||
|
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||||
|
subpass_dependency[num_subpass_dependencies].dstAccessMask =
|
||||||
|
UseFeedbackLoopLayout() ? VK_ACCESS_SHADER_READ_BIT : VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
|
||||||
|
subpass_dependency[num_subpass_dependencies].dependencyFlags =
|
||||||
|
UseFeedbackLoopLayout() ? (VK_DEPENDENCY_BY_REGION_BIT | VK_DEPENDENCY_FEEDBACK_LOOP_BIT_EXT) :
|
||||||
|
VK_DEPENDENCY_BY_REGION_BIT;
|
||||||
|
num_subpass_dependencies++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
num_attachments++;
|
num_attachments++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1569,11 +1610,11 @@ VkRenderPass GSDeviceVK::CreateCachedRenderPass(RenderPassCacheKey key)
|
|||||||
(key.color_feedback_loop && m_optional_extensions.vk_ext_rasterization_order_attachment_access) ?
|
(key.color_feedback_loop && m_optional_extensions.vk_ext_rasterization_order_attachment_access) ?
|
||||||
VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_COLOR_ACCESS_BIT_EXT :
|
VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_COLOR_ACCESS_BIT_EXT :
|
||||||
0;
|
0;
|
||||||
const VkSubpassDescription subpass = {subpass_flags, VK_PIPELINE_BIND_POINT_GRAPHICS, input_reference_ptr ? 1u : 0u,
|
const VkSubpassDescription subpass = {subpass_flags, VK_PIPELINE_BIND_POINT_GRAPHICS, num_subpass_inputs,
|
||||||
input_reference_ptr ? input_reference_ptr : nullptr, color_reference_ptr ? 1u : 0u,
|
num_subpass_inputs ? input_reference.data() : nullptr, color_reference_ptr ? 1u : 0u,
|
||||||
color_reference_ptr ? color_reference_ptr : nullptr, nullptr, depth_reference_ptr, 0, nullptr};
|
color_reference_ptr ? color_reference_ptr : nullptr, nullptr, depth_reference_ptr, 0, nullptr};
|
||||||
const VkRenderPassCreateInfo pass_info = {VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, nullptr, 0u, num_attachments,
|
const VkRenderPassCreateInfo pass_info = {VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, nullptr, 0u, num_attachments,
|
||||||
attachments.data(), 1u, &subpass, subpass_dependency_ptr ? 1u : 0u, subpass_dependency_ptr};
|
attachments.data(), 1u, &subpass, num_subpass_dependencies, num_subpass_dependencies ? subpass_dependency.data() : nullptr};
|
||||||
|
|
||||||
VkRenderPass pass;
|
VkRenderPass pass;
|
||||||
const VkResult res = vkCreateRenderPass(m_device, &pass_info, nullptr, &pass);
|
const VkResult res = vkCreateRenderPass(m_device, &pass_info, nullptr, &pass);
|
||||||
@ -2679,6 +2720,8 @@ bool GSDeviceVK::CheckFeatures()
|
|||||||
|
|
||||||
m_max_texture_size = m_device_properties.limits.maxImageDimension2D;
|
m_max_texture_size = m_device_properties.limits.maxImageDimension2D;
|
||||||
|
|
||||||
|
m_features.accurate_prims = GSConfig.HWAccuratePrims;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3363,6 +3406,135 @@ void GSDeviceVK::IASetIndexBuffer(const void* index, size_t count)
|
|||||||
SetIndexBuffer(m_index_stream_buffer.GetBuffer());
|
SetIndexBuffer(m_index_stream_buffer.GetBuffer());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GSDeviceVK::SetupAccuratePrimsBuffer(GSHWDrawConfig& config)
|
||||||
|
{
|
||||||
|
if (config.accurate_prims)
|
||||||
|
{
|
||||||
|
const u32 count = config.accurate_prims_edge_data->size();
|
||||||
|
const u32 size = count * sizeof(AccuratePrimsEdgeData);
|
||||||
|
|
||||||
|
// Reserve the GPU region.
|
||||||
|
if (!m_accurate_prims_stream_buffer.ReserveMemory(size, sizeof(AccuratePrimsEdgeData)))
|
||||||
|
{
|
||||||
|
ExecuteCommandBufferAndRestartRenderPass(false, "Uploading bytes to accurate prims buffer");
|
||||||
|
if (!m_accurate_prims_stream_buffer.ReserveMemory(size, sizeof(AccuratePrimsEdgeData)))
|
||||||
|
pxFailRel("Failed to reserve space for accurate prims");
|
||||||
|
}
|
||||||
|
|
||||||
|
const u32 offset = m_accurate_prims_stream_buffer.GetCurrentOffset();
|
||||||
|
|
||||||
|
if (InRenderPass())
|
||||||
|
EndRenderPass();
|
||||||
|
|
||||||
|
// Copy data to an upload buffer.
|
||||||
|
VkBuffer upload_buffer;
|
||||||
|
u32 upload_buffer_offset;
|
||||||
|
|
||||||
|
const auto upload_data = [&](void* map_ptr) {
|
||||||
|
std::memcpy(map_ptr, config.accurate_prims_edge_data->data(), size);
|
||||||
|
};
|
||||||
|
|
||||||
|
// If the texture is larger than half our streaming buffer size, use a separate buffer.
|
||||||
|
// Otherwise allocation will either fail, or require lots of cmdbuffer submissions.
|
||||||
|
if (size > m_texture_stream_buffer.GetCurrentSize() / 2)
|
||||||
|
{
|
||||||
|
upload_buffer_offset = 0;
|
||||||
|
upload_buffer = AllocateUploadStagingBuffer(size, upload_data);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
upload_buffer = WriteTextureUploadBuffer(size, upload_data, upload_buffer_offset);
|
||||||
|
}
|
||||||
|
if (upload_buffer == VK_NULL_HANDLE)
|
||||||
|
{
|
||||||
|
Console.Error("Failed to get upload buffer for accurate prims data.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy data from upload to GPU buffer.
|
||||||
|
VkBufferCopy copyRegion = {upload_buffer_offset, offset, size};
|
||||||
|
vkCmdCopyBuffer(GetCurrentCommandBuffer(), upload_buffer, m_accurate_prims_stream_buffer.GetBuffer(), 1, ©Region);
|
||||||
|
|
||||||
|
// Commit the GPU region.
|
||||||
|
m_accurate_prims_stream_buffer.CommitMemory(size);
|
||||||
|
|
||||||
|
// Issue the barrier since this will be used next draw.
|
||||||
|
VkBufferMemoryBarrier barrier = {
|
||||||
|
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, nullptr,
|
||||||
|
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT,
|
||||||
|
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
m_accurate_prims_stream_buffer.GetBuffer(), offset, size};
|
||||||
|
vkCmdPipelineBarrier(GetCurrentCommandBuffer(),
|
||||||
|
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
|
||||||
|
0, 0, nullptr, 1, &barrier, 0, nullptr);
|
||||||
|
|
||||||
|
m_accurate_prims_stream_buffer_offset = offset; // Save this for the constant buffer.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GSDeviceVK::SetupAccuratePrimsConstants(GSHWDrawConfig& config)
|
||||||
|
{
|
||||||
|
if (config.accurate_prims)
|
||||||
|
{
|
||||||
|
// We separate this from setting up the buffer to mirror Vulkan, which requires it.
|
||||||
|
config.cb_vs.base_vertex = m_vertex.start;
|
||||||
|
config.cb_ps.accurate_prims_base_index.x = m_accurate_prims_stream_buffer_offset / sizeof(AccuratePrimsEdgeData);
|
||||||
|
|
||||||
|
SetVSConstantBuffer(config.cb_vs);
|
||||||
|
SetPSConstantBuffer(config.cb_ps);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
VkBuffer GSDeviceVK::WriteTextureUploadBuffer(u32 size, std::function<void(void*)> write_data, u32& offset_out)
|
||||||
|
{
|
||||||
|
if (!m_texture_stream_buffer.ReserveMemory(size, GetBufferCopyOffsetAlignment()))
|
||||||
|
{
|
||||||
|
ExecuteCommandBuffer(
|
||||||
|
false, "While waiting for %u bytes in texture upload buffer", size);
|
||||||
|
if (!m_texture_stream_buffer.ReserveMemory(size, GetBufferCopyOffsetAlignment()))
|
||||||
|
{
|
||||||
|
Console.Error("Failed to reserve texture upload memory (%u bytes).", size);
|
||||||
|
return VK_NULL_HANDLE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
offset_out = m_texture_stream_buffer.GetCurrentOffset();
|
||||||
|
write_data(m_texture_stream_buffer.GetCurrentHostPointer());
|
||||||
|
m_texture_stream_buffer.CommitMemory(size);
|
||||||
|
return m_texture_stream_buffer.GetBuffer();
|
||||||
|
}
|
||||||
|
|
||||||
|
VkBuffer GSDeviceVK::AllocateUploadStagingBuffer(u32 size, std::function<void(void*)> write_data)
|
||||||
|
{
|
||||||
|
const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, nullptr, 0, static_cast<VkDeviceSize>(size),
|
||||||
|
VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_SHARING_MODE_EXCLUSIVE, 0, nullptr};
|
||||||
|
|
||||||
|
// Don't worry about setting the coherent bit for this upload, the main reason we had
|
||||||
|
// that set in StreamBuffer was for MoltenVK, which would upload the whole buffer on
|
||||||
|
// smaller uploads, but we're writing to the whole thing anyway.
|
||||||
|
VmaAllocationCreateInfo aci = {};
|
||||||
|
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
|
||||||
|
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
|
||||||
|
|
||||||
|
VmaAllocationInfo ai;
|
||||||
|
VkBuffer buffer;
|
||||||
|
VmaAllocation allocation;
|
||||||
|
VkResult res = vmaCreateBuffer(GSDeviceVK::GetInstance()->GetAllocator(), &bci, &aci, &buffer, &allocation, &ai);
|
||||||
|
if (res != VK_SUCCESS)
|
||||||
|
{
|
||||||
|
LOG_VULKAN_ERROR(res, "(AllocateUploadStagingBuffer) vmaCreateBuffer() failed: ");
|
||||||
|
return VK_NULL_HANDLE;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Immediately queue it for freeing after the command buffer finishes, since it's only needed for the copy.
|
||||||
|
GSDeviceVK::GetInstance()->DeferBufferDestruction(buffer, allocation);
|
||||||
|
|
||||||
|
// And write the data.
|
||||||
|
write_data(ai.pMappedData);
|
||||||
|
vmaFlushAllocation(GSDeviceVK::GetInstance()->GetAllocator(), allocation, 0, size);
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
|
||||||
void GSDeviceVK::OMSetRenderTargets(
|
void GSDeviceVK::OMSetRenderTargets(
|
||||||
GSTexture* rt, GSTexture* ds, const GSVector4i& scissor, FeedbackLoopFlag feedback_loop)
|
GSTexture* rt, GSTexture* ds, const GSVector4i& scissor, FeedbackLoopFlag feedback_loop)
|
||||||
{
|
{
|
||||||
@ -3379,12 +3551,15 @@ void GSDeviceVK::OMSetRenderTargets(
|
|||||||
if (vkRt)
|
if (vkRt)
|
||||||
{
|
{
|
||||||
m_current_framebuffer =
|
m_current_framebuffer =
|
||||||
vkRt->GetLinkedFramebuffer(vkDs, (feedback_loop & FeedbackLoopFlag_ReadAndWriteRT) != 0);
|
vkRt->GetLinkedFramebuffer(vkDs,
|
||||||
|
(feedback_loop & FeedbackLoopFlag_ReadAndWriteRT) != 0,
|
||||||
|
(feedback_loop & FeedbackLoopFlag_ReadAndWriteDepth) != 0);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
pxAssert(!(feedback_loop & FeedbackLoopFlag_ReadAndWriteRT));
|
pxAssert(!(feedback_loop & FeedbackLoopFlag_ReadAndWriteRT) &&
|
||||||
m_current_framebuffer = vkDs->GetLinkedFramebuffer(nullptr, false);
|
!(feedback_loop & FeedbackLoopFlag_ReadAndWriteDepth));
|
||||||
|
m_current_framebuffer = vkDs->GetLinkedFramebuffer(nullptr, false, false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (InRenderPass())
|
else if (InRenderPass())
|
||||||
@ -3494,7 +3669,21 @@ void GSDeviceVK::OMSetRenderTargets(
|
|||||||
if (vkDs)
|
if (vkDs)
|
||||||
{
|
{
|
||||||
// need to update descriptors to reflect the new layout
|
// need to update descriptors to reflect the new layout
|
||||||
if (feedback_loop & FeedbackLoopFlag_ReadDS)
|
if (feedback_loop & FeedbackLoopFlag_ReadAndWriteDepth)
|
||||||
|
{
|
||||||
|
// NVIDIA drivers appear to return random garbage when sampling the RT via a feedback loop, if the load op for
|
||||||
|
// the render pass is CLEAR. Using vkCmdClearAttachments() doesn't work, so we have to clear the image instead.
|
||||||
|
// Note: DS feedback loop was added later - we will assume that the same issue is relevant.
|
||||||
|
if (vkDs->GetState() == GSTexture::State::Cleared && IsDeviceNVIDIA())
|
||||||
|
vkDs->CommitClear();
|
||||||
|
|
||||||
|
if (vkDs->GetLayout() != GSTextureVK::Layout::FeedbackLoop)
|
||||||
|
{
|
||||||
|
m_dirty_flags |= (DIRTY_FLAG_TFX_TEXTURE_0 << TFX_TEXTURE_DEPTH);
|
||||||
|
vkDs->TransitionToLayout(GSTextureVK::Layout::FeedbackLoop);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (feedback_loop & FeedbackLoopFlag_ReadDepth)
|
||||||
{
|
{
|
||||||
if (vkDs->GetLayout() != GSTextureVK::Layout::FeedbackLoop)
|
if (vkDs->GetLayout() != GSTextureVK::Layout::FeedbackLoop)
|
||||||
{
|
{
|
||||||
@ -3675,6 +3864,16 @@ bool GSDeviceVK::CreateBuffers()
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (m_features.accurate_prims)
|
||||||
|
{
|
||||||
|
if (!m_accurate_prims_stream_buffer.Create(
|
||||||
|
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, ACCURATE_PRIMS_BUFFER_SIZE, true))
|
||||||
|
{
|
||||||
|
Host::ReportErrorAsync("GS", "Failed to allocate accurate prims buffer");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!m_vertex_uniform_stream_buffer.Create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VERTEX_UNIFORM_BUFFER_SIZE))
|
if (!m_vertex_uniform_stream_buffer.Create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VERTEX_UNIFORM_BUFFER_SIZE))
|
||||||
{
|
{
|
||||||
Host::ReportErrorAsync("GS", "Failed to allocate vertex uniform buffer");
|
Host::ReportErrorAsync("GS", "Failed to allocate vertex uniform buffer");
|
||||||
@ -3734,6 +3933,8 @@ bool GSDeviceVK::CreatePipelineLayouts()
|
|||||||
dslb.AddBinding(1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
|
dslb.AddBinding(1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
|
||||||
if (m_features.vs_expand)
|
if (m_features.vs_expand)
|
||||||
dslb.AddBinding(2, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_VERTEX_BIT);
|
dslb.AddBinding(2, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_VERTEX_BIT);
|
||||||
|
if (m_features.accurate_prims)
|
||||||
|
dslb.AddBinding(3, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
|
||||||
if ((m_tfx_ubo_ds_layout = dslb.Create(dev)) == VK_NULL_HANDLE)
|
if ((m_tfx_ubo_ds_layout = dslb.Create(dev)) == VK_NULL_HANDLE)
|
||||||
return false;
|
return false;
|
||||||
Vulkan::SetObjectName(dev, m_tfx_ubo_ds_layout, "TFX UBO descriptor layout");
|
Vulkan::SetObjectName(dev, m_tfx_ubo_ds_layout, "TFX UBO descriptor layout");
|
||||||
@ -3743,9 +3944,13 @@ bool GSDeviceVK::CreatePipelineLayouts()
|
|||||||
dslb.AddBinding(TFX_TEXTURE_PALETTE, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
|
dslb.AddBinding(TFX_TEXTURE_PALETTE, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
|
||||||
dslb.AddBinding(TFX_TEXTURE_RT,
|
dslb.AddBinding(TFX_TEXTURE_RT,
|
||||||
(m_features.texture_barrier && !UseFeedbackLoopLayout()) ? VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT :
|
(m_features.texture_barrier && !UseFeedbackLoopLayout()) ? VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT :
|
||||||
VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
|
VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
|
||||||
1, VK_SHADER_STAGE_FRAGMENT_BIT);
|
1, VK_SHADER_STAGE_FRAGMENT_BIT);
|
||||||
dslb.AddBinding(TFX_TEXTURE_PRIMID, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
|
dslb.AddBinding(TFX_TEXTURE_PRIMID, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
|
||||||
|
dslb.AddBinding(TFX_TEXTURE_DEPTH,
|
||||||
|
(m_features.texture_barrier && !UseFeedbackLoopLayout()) ? VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT :
|
||||||
|
VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
|
||||||
|
1, VK_SHADER_STAGE_FRAGMENT_BIT);
|
||||||
if ((m_tfx_texture_ds_layout = dslb.Create(dev)) == VK_NULL_HANDLE)
|
if ((m_tfx_texture_ds_layout = dslb.Create(dev)) == VK_NULL_HANDLE)
|
||||||
return false;
|
return false;
|
||||||
Vulkan::SetObjectName(dev, m_tfx_texture_ds_layout, "TFX texture descriptor layout");
|
Vulkan::SetObjectName(dev, m_tfx_texture_ds_layout, "TFX texture descriptor layout");
|
||||||
@ -4603,6 +4808,7 @@ void GSDeviceVK::DestroyResources()
|
|||||||
m_fragment_uniform_stream_buffer.Destroy(false);
|
m_fragment_uniform_stream_buffer.Destroy(false);
|
||||||
m_vertex_uniform_stream_buffer.Destroy(false);
|
m_vertex_uniform_stream_buffer.Destroy(false);
|
||||||
m_index_stream_buffer.Destroy(false);
|
m_index_stream_buffer.Destroy(false);
|
||||||
|
m_accurate_prims_stream_buffer.Destroy(false);
|
||||||
m_vertex_stream_buffer.Destroy(false);
|
m_vertex_stream_buffer.Destroy(false);
|
||||||
if (m_expand_index_buffer != VK_NULL_HANDLE)
|
if (m_expand_index_buffer != VK_NULL_HANDLE)
|
||||||
vmaDestroyBuffer(m_allocator, m_expand_index_buffer, m_expand_index_buffer_allocation);
|
vmaDestroyBuffer(m_allocator, m_expand_index_buffer, m_expand_index_buffer_allocation);
|
||||||
@ -4670,6 +4876,7 @@ VkShaderModule GSDeviceVK::GetTFXVertexShader(GSHWDrawConfig::VSSelector sel)
|
|||||||
AddMacro(ss, "VS_POINT_SIZE", sel.point_size);
|
AddMacro(ss, "VS_POINT_SIZE", sel.point_size);
|
||||||
AddMacro(ss, "VS_EXPAND", static_cast<int>(sel.expand));
|
AddMacro(ss, "VS_EXPAND", static_cast<int>(sel.expand));
|
||||||
AddMacro(ss, "VS_PROVOKING_VERTEX_LAST", static_cast<int>(m_features.provoking_vertex_last));
|
AddMacro(ss, "VS_PROVOKING_VERTEX_LAST", static_cast<int>(m_features.provoking_vertex_last));
|
||||||
|
AddMacro(ss, "VS_ACCURATE_PRIMS", static_cast<int>(sel.accurate_prims));
|
||||||
ss << m_tfx_source;
|
ss << m_tfx_source;
|
||||||
|
|
||||||
VkShaderModule mod = g_vulkan_shader_cache->GetVertexShader(ss.str());
|
VkShaderModule mod = g_vulkan_shader_cache->GetVertexShader(ss.str());
|
||||||
@ -4744,6 +4951,10 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector
|
|||||||
AddMacro(ss, "PS_TEX_IS_FB", sel.tex_is_fb);
|
AddMacro(ss, "PS_TEX_IS_FB", sel.tex_is_fb);
|
||||||
AddMacro(ss, "PS_NO_COLOR", sel.no_color);
|
AddMacro(ss, "PS_NO_COLOR", sel.no_color);
|
||||||
AddMacro(ss, "PS_NO_COLOR1", sel.no_color1);
|
AddMacro(ss, "PS_NO_COLOR1", sel.no_color1);
|
||||||
|
AddMacro(ss, "PS_ACCURATE_PRIMS", sel.accurate_prims);
|
||||||
|
AddMacro(ss, "PS_ACCURATE_PRIMS_AA", sel.accurate_prims_aa);
|
||||||
|
AddMacro(ss, "PS_ACCURATE_PRIMS_AA_ABE", sel.accurate_prims_aa_abe);
|
||||||
|
AddMacro(ss, "PS_ZTST", sel.ztst);
|
||||||
ss << m_tfx_source;
|
ss << m_tfx_source;
|
||||||
|
|
||||||
VkShaderModule mod = g_vulkan_shader_cache->GetFragmentShader(ss.str());
|
VkShaderModule mod = g_vulkan_shader_cache->GetFragmentShader(ss.str());
|
||||||
@ -4945,6 +5156,11 @@ bool GSDeviceVK::CreatePersistentDescriptorSets()
|
|||||||
dsub.AddBufferDescriptorWrite(m_tfx_ubo_descriptor_set, 2, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
dsub.AddBufferDescriptorWrite(m_tfx_ubo_descriptor_set, 2, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||||
m_vertex_stream_buffer.GetBuffer(), 0, VERTEX_BUFFER_SIZE);
|
m_vertex_stream_buffer.GetBuffer(), 0, VERTEX_BUFFER_SIZE);
|
||||||
}
|
}
|
||||||
|
if (m_features.accurate_prims)
|
||||||
|
{
|
||||||
|
dsub.AddBufferDescriptorWrite(m_tfx_ubo_descriptor_set, 3, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||||
|
m_accurate_prims_stream_buffer.GetBuffer(), 0, ACCURATE_PRIMS_BUFFER_SIZE);
|
||||||
|
}
|
||||||
dsub.Update(dev);
|
dsub.Update(dev);
|
||||||
Vulkan::SetObjectName(dev, m_tfx_ubo_descriptor_set, "Persistent TFX UBO set");
|
Vulkan::SetObjectName(dev, m_tfx_ubo_descriptor_set, "Persistent TFX UBO set");
|
||||||
return true;
|
return true;
|
||||||
@ -5341,11 +5557,15 @@ bool GSDeviceVK::ApplyTFXState(bool already_execed)
|
|||||||
m_current_pipeline_layout = PipelineLayout::TFX;
|
m_current_pipeline_layout = PipelineLayout::TFX;
|
||||||
flags |= DIRTY_FLAG_TFX_UBO | DIRTY_FLAG_TFX_TEXTURES;
|
flags |= DIRTY_FLAG_TFX_UBO | DIRTY_FLAG_TFX_TEXTURES;
|
||||||
|
|
||||||
// Clear out the RT binding if feedback loop isn't on, because it'll be in the wrong state and make
|
// Clear out the RT/DS binding if feedback loop isn't on, because it'll be in the wrong state and make
|
||||||
// the validation layer cranky. Not a big deal since we need to write it anyway.
|
// the validation layer cranky. Not a big deal since we need to write it anyway.
|
||||||
const GSTextureVK::Layout rt_tex_layout = m_tfx_textures[TFX_TEXTURE_RT]->GetLayout();
|
std::array<TFX_TEXTURES, 2> texture_types = { TFX_TEXTURE_RT, TFX_TEXTURE_DEPTH };
|
||||||
if (rt_tex_layout != GSTextureVK::Layout::FeedbackLoop && rt_tex_layout != GSTextureVK::Layout::ShaderReadOnly)
|
for (u32 texture_type : texture_types)
|
||||||
m_tfx_textures[TFX_TEXTURE_RT] = m_null_texture.get();
|
{
|
||||||
|
const GSTextureVK::Layout tex_layout = m_tfx_textures[texture_type]->GetLayout();
|
||||||
|
if (tex_layout != GSTextureVK::Layout::FeedbackLoop && tex_layout != GSTextureVK::Layout::ShaderReadOnly)
|
||||||
|
m_tfx_textures[texture_type] = m_null_texture.get();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & DIRTY_FLAG_TFX_UBO)
|
if (flags & DIRTY_FLAG_TFX_UBO)
|
||||||
@ -5386,6 +5606,19 @@ bool GSDeviceVK::ApplyTFXState(bool already_execed)
|
|||||||
dsub.AddImageDescriptorWrite(VK_NULL_HANDLE, TFX_TEXTURE_PRIMID,
|
dsub.AddImageDescriptorWrite(VK_NULL_HANDLE, TFX_TEXTURE_PRIMID,
|
||||||
m_tfx_textures[TFX_TEXTURE_PRIMID]->GetView(), m_tfx_textures[TFX_TEXTURE_PRIMID]->GetVkLayout());
|
m_tfx_textures[TFX_TEXTURE_PRIMID]->GetView(), m_tfx_textures[TFX_TEXTURE_PRIMID]->GetVkLayout());
|
||||||
}
|
}
|
||||||
|
if (flags & DIRTY_FLAG_TFX_TEXTURE_DEPTH)
|
||||||
|
{
|
||||||
|
if (m_features.texture_barrier && !UseFeedbackLoopLayout())
|
||||||
|
{
|
||||||
|
dsub.AddInputAttachmentDescriptorWrite(
|
||||||
|
VK_NULL_HANDLE, TFX_TEXTURE_DEPTH, m_tfx_textures[TFX_TEXTURE_DEPTH]->GetView(), VK_IMAGE_LAYOUT_GENERAL);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
dsub.AddImageDescriptorWrite(VK_NULL_HANDLE, TFX_TEXTURE_DEPTH, m_tfx_textures[TFX_TEXTURE_DEPTH]->GetView(),
|
||||||
|
m_tfx_textures[TFX_TEXTURE_DEPTH]->GetVkLayout());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
dsub.PushUpdate(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_tfx_pipeline_layout, TFX_DESCRIPTOR_SET_TEXTURES);
|
dsub.PushUpdate(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_tfx_pipeline_layout, TFX_DESCRIPTOR_SET_TEXTURES);
|
||||||
}
|
}
|
||||||
@ -5545,13 +5778,15 @@ GSTextureVK* GSDeviceVK::SetupPrimitiveTrackingDATE(GSHWDrawConfig& config)
|
|||||||
|
|
||||||
void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
|
void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
|
||||||
{
|
{
|
||||||
|
|
||||||
const GSVector2i rtsize(config.rt ? config.rt->GetSize() : config.ds->GetSize());
|
const GSVector2i rtsize(config.rt ? config.rt->GetSize() : config.ds->GetSize());
|
||||||
GSTextureVK* draw_rt = static_cast<GSTextureVK*>(config.rt);
|
GSTextureVK* draw_rt = static_cast<GSTextureVK*>(config.rt);
|
||||||
GSTextureVK* draw_ds = static_cast<GSTextureVK*>(config.ds);
|
GSTextureVK* draw_ds = static_cast<GSTextureVK*>(config.ds);
|
||||||
GSTextureVK* draw_rt_clone = nullptr;
|
GSTextureVK* draw_rt_clone = nullptr;
|
||||||
GSTextureVK* colclip_rt = static_cast<GSTextureVK*>(g_gs_device->GetColorClipTexture());
|
GSTextureVK* colclip_rt = static_cast<GSTextureVK*>(g_gs_device->GetColorClipTexture());
|
||||||
|
|
||||||
|
// Copying buffers needs to done outside render pass so do this early.
|
||||||
|
SetupAccuratePrimsBuffer(config);
|
||||||
|
|
||||||
// stream buffer in first, in case we need to exec
|
// stream buffer in first, in case we need to exec
|
||||||
SetVSConstantBuffer(config.cb_vs);
|
SetVSConstantBuffer(config.cb_vs);
|
||||||
SetPSConstantBuffer(config.cb_ps);
|
SetPSConstantBuffer(config.cb_ps);
|
||||||
@ -5597,8 +5832,12 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
|
|||||||
UpdateHWPipelineSelector(config, pipe);
|
UpdateHWPipelineSelector(config, pipe);
|
||||||
|
|
||||||
// If we don't have a barrier but the texture was drawn to last draw, end the pass to insert a barrier.
|
// If we don't have a barrier but the texture was drawn to last draw, end the pass to insert a barrier.
|
||||||
if (InRenderPass() && !pipe.IsRTFeedbackLoop() && (config.tex == m_current_render_target || config.tex == m_current_depth_target))
|
if (InRenderPass())
|
||||||
EndRenderPass();
|
{
|
||||||
|
if ((!pipe.IsRTFeedbackLoop() && config.tex == m_current_render_target) ||
|
||||||
|
(!pipe.IsDepthFeedbackLoop() && config.tex == m_current_depth_target))
|
||||||
|
EndRenderPass();
|
||||||
|
}
|
||||||
|
|
||||||
// now blit the colclip texture back to the original target
|
// now blit the colclip texture back to the original target
|
||||||
if (colclip_rt)
|
if (colclip_rt)
|
||||||
@ -5781,20 +6020,31 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
|
|||||||
// Despite the layout changing enforcing the execution dependency between previous draws and the first
|
// Despite the layout changing enforcing the execution dependency between previous draws and the first
|
||||||
// input attachment read, it still wants the region/fragment-local barrier...
|
// input attachment read, it still wants the region/fragment-local barrier...
|
||||||
|
|
||||||
const bool skip_first_barrier =
|
bool skip_first_barrier = !pipe.ps.colclip_hw && !IsDeviceAMD();
|
||||||
(draw_rt && draw_rt->GetLayout() != GSTextureVK::Layout::FeedbackLoop && !pipe.ps.colclip_hw && !IsDeviceAMD());
|
if (draw_rt)
|
||||||
|
skip_first_barrier = skip_first_barrier && draw_rt->GetLayout() != GSTextureVK::Layout::FeedbackLoop;
|
||||||
|
if (draw_ds)
|
||||||
|
skip_first_barrier = skip_first_barrier && draw_ds->GetLayout() != GSTextureVK::Layout::FeedbackLoop;
|
||||||
|
|
||||||
OMSetRenderTargets(draw_rt, draw_ds, config.scissor, static_cast<FeedbackLoopFlag>(pipe.feedback_loop_flags));
|
OMSetRenderTargets(draw_rt, draw_ds, config.scissor, static_cast<FeedbackLoopFlag>(pipe.feedback_loop_flags));
|
||||||
if (pipe.IsRTFeedbackLoop())
|
if (pipe.IsRTFeedbackLoop())
|
||||||
{
|
{
|
||||||
pxAssertMsg(m_features.texture_barrier, "Texture barriers enabled");
|
pxAssertMsg(m_features.texture_barrier, "Texture barriers enabled");
|
||||||
PSSetShaderResource(2, draw_rt, false);
|
PSSetShaderResource(TFX_TEXTURE_RT, draw_rt, false);
|
||||||
|
|
||||||
// If this is the first draw to the target as a feedback loop, make sure we re-generate the texture descriptor.
|
// If this is the first draw to the target as a feedback loop, make sure we re-generate the texture descriptor.
|
||||||
// Otherwise, we might have a previous descriptor left over, that has the RT in a different state.
|
// Otherwise, we might have a previous descriptor left over, that has the RT in a different state.
|
||||||
m_dirty_flags |= (skip_first_barrier ? static_cast<u32>(DIRTY_FLAG_TFX_TEXTURE_RT) : 0);
|
m_dirty_flags |= (skip_first_barrier ? static_cast<u32>(DIRTY_FLAG_TFX_TEXTURE_RT) : 0);
|
||||||
}
|
}
|
||||||
|
if (pipe.IsDepthFeedbackLoop())
|
||||||
|
{
|
||||||
|
pxAssertMsg(m_features.texture_barrier, "Texture barriers enabled");
|
||||||
|
PSSetShaderResource(TFX_TEXTURE_DEPTH, draw_ds, false);
|
||||||
|
|
||||||
|
// If this is the first draw to the target as a feedback loop, make sure we re-generate the texture descriptor.
|
||||||
|
// Otherwise, we might have a previous descriptor left over, that has the RT in a different state.
|
||||||
|
m_dirty_flags |= (skip_first_barrier ? static_cast<u32>(DIRTY_FLAG_TFX_TEXTURE_DEPTH) : 0);
|
||||||
|
}
|
||||||
// Begin render pass if new target or out of the area.
|
// Begin render pass if new target or out of the area.
|
||||||
if (!InRenderPass())
|
if (!InRenderPass())
|
||||||
{
|
{
|
||||||
@ -5868,7 +6118,8 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
|
|||||||
|
|
||||||
// now we can do the actual draw
|
// now we can do the actual draw
|
||||||
if (BindDrawPipeline(pipe))
|
if (BindDrawPipeline(pipe))
|
||||||
SendHWDraw(config, draw_rt, config.require_one_barrier, config.require_full_barrier, skip_first_barrier);
|
SendHWDraw(config, draw_rt, pipe.IsDepthFeedbackLoop() ? draw_ds : nullptr,
|
||||||
|
config.require_one_barrier, config.require_full_barrier, skip_first_barrier);
|
||||||
|
|
||||||
// blend second pass
|
// blend second pass
|
||||||
if (config.blend_multi_pass.enable)
|
if (config.blend_multi_pass.enable)
|
||||||
@ -5903,8 +6154,8 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
|
|||||||
pipe.bs = config.blend;
|
pipe.bs = config.blend;
|
||||||
if (BindDrawPipeline(pipe))
|
if (BindDrawPipeline(pipe))
|
||||||
{
|
{
|
||||||
SendHWDraw(config, draw_rt, config.alpha_second_pass.require_one_barrier,
|
SendHWDraw(config, draw_rt, pipe.IsDepthFeedbackLoop() ? draw_ds : nullptr,
|
||||||
config.alpha_second_pass.require_full_barrier, false);
|
config.alpha_second_pass.require_one_barrier, config.alpha_second_pass.require_full_barrier, false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5981,19 +6232,24 @@ void GSDeviceVK::UpdateHWPipelineSelector(GSHWDrawConfig& config, PipelineSelect
|
|||||||
pipe.rt = config.rt != nullptr;
|
pipe.rt = config.rt != nullptr;
|
||||||
pipe.ds = config.ds != nullptr;
|
pipe.ds = config.ds != nullptr;
|
||||||
pipe.line_width = config.line_expand;
|
pipe.line_width = config.line_expand;
|
||||||
pipe.feedback_loop_flags =
|
pipe.feedback_loop_flags = FeedbackLoopFlag_None;
|
||||||
(m_features.texture_barrier &&
|
if (m_features.texture_barrier && (config.ps.IsFeedbackLoop() || config.require_one_barrier || config.require_full_barrier))
|
||||||
(config.ps.IsFeedbackLoop() || config.require_one_barrier || config.require_full_barrier)) ?
|
{
|
||||||
FeedbackLoopFlag_ReadAndWriteRT :
|
pipe.feedback_loop_flags |= FeedbackLoopFlag_ReadAndWriteRT;
|
||||||
FeedbackLoopFlag_None;
|
|
||||||
pipe.feedback_loop_flags |=
|
// We only allow DS feedback loop if RT is already in a feedback loop.
|
||||||
(config.tex && config.tex == config.ds) ? FeedbackLoopFlag_ReadDS : FeedbackLoopFlag_None;
|
pipe.feedback_loop_flags |= (pipe.ds && config.ps.IsFeedbackLoopDepth()) ? FeedbackLoopFlag_ReadAndWriteDepth : FeedbackLoopFlag_None;
|
||||||
|
}
|
||||||
|
if (!(pipe.feedback_loop_flags & FeedbackLoopFlag_ReadAndWriteDepth))
|
||||||
|
{
|
||||||
|
pipe.feedback_loop_flags |= (config.tex && config.tex == config.ds) ? FeedbackLoopFlag_ReadDepth : FeedbackLoopFlag_None;
|
||||||
|
}
|
||||||
|
|
||||||
// enable point size in the vertex shader if we're rendering points regardless of upscaling.
|
// enable point size in the vertex shader if we're rendering points regardless of upscaling.
|
||||||
pipe.vs.point_size |= (config.topology == GSHWDrawConfig::Topology::Point);
|
pipe.vs.point_size |= (config.topology == GSHWDrawConfig::Topology::Point);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDeviceVK::UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config)
|
void GSDeviceVK::UploadHWDrawVerticesAndIndices(GSHWDrawConfig& config)
|
||||||
{
|
{
|
||||||
IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts, GetVertexAlignment(config.vs.expand));
|
IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts, GetVertexAlignment(config.vs.expand));
|
||||||
m_vertex.start *= GetExpansionFactor(config.vs.expand);
|
m_vertex.start *= GetExpansionFactor(config.vs.expand);
|
||||||
@ -6008,6 +6264,9 @@ void GSDeviceVK::UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config)
|
|||||||
{
|
{
|
||||||
IASetIndexBuffer(config.indices, config.nindices);
|
IASetIndexBuffer(config.indices, config.nindices);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Needs to be done after vertex offset is set.
|
||||||
|
SetupAccuratePrimsConstants(config);
|
||||||
}
|
}
|
||||||
|
|
||||||
VkImageMemoryBarrier GSDeviceVK::GetColorBufferBarrier(GSTextureVK* rt) const
|
VkImageMemoryBarrier GSDeviceVK::GetColorBufferBarrier(GSTextureVK* rt) const
|
||||||
@ -6021,13 +6280,31 @@ VkImageMemoryBarrier GSDeviceVK::GetColorBufferBarrier(GSTextureVK* rt) const
|
|||||||
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, rt->GetImage(), {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}};
|
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, rt->GetImage(), {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VkImageMemoryBarrier GSDeviceVK::GetDepthStencilBufferBarrier(GSTextureVK* ds) const
|
||||||
|
{
|
||||||
|
const VkImageLayout layout =
|
||||||
|
UseFeedbackLoopLayout() ? VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT : VK_IMAGE_LAYOUT_GENERAL;
|
||||||
|
const VkAccessFlags dst_access =
|
||||||
|
UseFeedbackLoopLayout() ? VK_ACCESS_SHADER_READ_BIT : VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
|
||||||
|
return {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, nullptr,
|
||||||
|
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, dst_access, layout, layout,
|
||||||
|
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, ds->GetImage(),
|
||||||
|
{VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0u, 1u, 0u, 1u}};
|
||||||
|
}
|
||||||
|
|
||||||
VkDependencyFlags GSDeviceVK::GetColorBufferBarrierFlags() const
|
VkDependencyFlags GSDeviceVK::GetColorBufferBarrierFlags() const
|
||||||
{
|
{
|
||||||
return UseFeedbackLoopLayout() ? (VK_DEPENDENCY_BY_REGION_BIT | VK_DEPENDENCY_FEEDBACK_LOOP_BIT_EXT) :
|
return UseFeedbackLoopLayout() ? (VK_DEPENDENCY_BY_REGION_BIT | VK_DEPENDENCY_FEEDBACK_LOOP_BIT_EXT) :
|
||||||
VK_DEPENDENCY_BY_REGION_BIT;
|
VK_DEPENDENCY_BY_REGION_BIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
|
VkDependencyFlags GSDeviceVK::GetDepthStencilBufferBarrierFlags() const
|
||||||
|
{
|
||||||
|
return UseFeedbackLoopLayout() ? (VK_DEPENDENCY_BY_REGION_BIT | VK_DEPENDENCY_FEEDBACK_LOOP_BIT_EXT) :
|
||||||
|
VK_DEPENDENCY_BY_REGION_BIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, GSTextureVK* draw_ds,
|
||||||
bool one_barrier, bool full_barrier, bool skip_first_barrier)
|
bool one_barrier, bool full_barrier, bool skip_first_barrier)
|
||||||
{
|
{
|
||||||
if (!m_features.texture_barrier) [[unlikely]]
|
if (!m_features.texture_barrier) [[unlikely]]
|
||||||
@ -6037,21 +6314,48 @@ void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef PCSX2_DEVBUILD
|
#ifdef PCSX2_DEVBUILD
|
||||||
if ((one_barrier || full_barrier) && !m_pipeline_selector.ps.IsFeedbackLoop()) [[unlikely]]
|
if ((one_barrier || full_barrier) && !(m_pipeline_selector.ps.IsFeedbackLoop() || m_pipeline_selector.ps.IsFeedbackLoopDepth())) [[unlikely]]
|
||||||
Console.Warning("VK: Possible unnecessary barrier detected.");
|
Console.Warning("VK: Possible unnecessary barrier detected.");
|
||||||
#endif
|
#endif
|
||||||
const VkDependencyFlags barrier_flags = GetColorBufferBarrierFlags();
|
std::array<VkDependencyFlags, 2> barrier_flags = {
|
||||||
|
GetColorBufferBarrierFlags(),
|
||||||
|
GetDepthStencilBufferBarrierFlags(),
|
||||||
|
};
|
||||||
|
std::array<VkImageMemoryBarrier, 2> barrier;
|
||||||
|
u32 barriers_per_draw = 0;
|
||||||
|
if (full_barrier || one_barrier)
|
||||||
|
{
|
||||||
|
if (draw_rt)
|
||||||
|
barrier[barriers_per_draw++] = GetColorBufferBarrier(draw_rt);
|
||||||
|
if (draw_ds)
|
||||||
|
barrier[barriers_per_draw++] = GetDepthStencilBufferBarrier(draw_ds);
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto IssueBarriers = [&]() {
|
||||||
|
if (draw_rt)
|
||||||
|
{
|
||||||
|
vkCmdPipelineBarrier(GetCurrentCommandBuffer(),
|
||||||
|
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||||
|
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, barrier_flags[0], 0, nullptr, 0, nullptr, 1, &barrier[0]);
|
||||||
|
}
|
||||||
|
if (draw_ds)
|
||||||
|
{
|
||||||
|
vkCmdPipelineBarrier(GetCurrentCommandBuffer(),
|
||||||
|
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
|
||||||
|
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, barrier_flags[1], 0, nullptr, 0, nullptr, 1, &barrier[1]);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
if (full_barrier)
|
if (full_barrier)
|
||||||
{
|
{
|
||||||
pxAssert(config.drawlist && !config.drawlist->empty());
|
pxAssert(config.drawlist && !config.drawlist->empty());
|
||||||
|
|
||||||
const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt);
|
|
||||||
const u32 indices_per_prim = config.indices_per_prim;
|
const u32 indices_per_prim = config.indices_per_prim;
|
||||||
const u32 draw_list_size = static_cast<u32>(config.drawlist->size());
|
const u32 draw_list_size = static_cast<u32>(config.drawlist->size());
|
||||||
|
|
||||||
GL_PUSH("Split the draw");
|
GL_PUSH("Split the draw");
|
||||||
g_perfmon.Put(
|
g_perfmon.Put(GSPerfMon::Barriers,
|
||||||
GSPerfMon::Barriers, static_cast<u32>(draw_list_size) - static_cast<u32>(skip_first_barrier));
|
barriers_per_draw * (static_cast<u32>(draw_list_size) - static_cast<u32>(skip_first_barrier)));
|
||||||
|
|
||||||
u32 p = 0;
|
u32 p = 0;
|
||||||
u32 n = 0;
|
u32 n = 0;
|
||||||
@ -6066,8 +6370,7 @@ void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
|
|||||||
|
|
||||||
for (; n < draw_list_size; n++)
|
for (; n < draw_list_size; n++)
|
||||||
{
|
{
|
||||||
vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
IssueBarriers();
|
||||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, barrier_flags, 0, nullptr, 0, nullptr, 1, &barrier);
|
|
||||||
|
|
||||||
const u32 count = (*config.drawlist)[n] * indices_per_prim;
|
const u32 count = (*config.drawlist)[n] * indices_per_prim;
|
||||||
DrawIndexedPrimitive(p, count);
|
DrawIndexedPrimitive(p, count);
|
||||||
@ -6079,11 +6382,8 @@ void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
|
|||||||
|
|
||||||
if (one_barrier && !skip_first_barrier)
|
if (one_barrier && !skip_first_barrier)
|
||||||
{
|
{
|
||||||
g_perfmon.Put(GSPerfMon::Barriers, 1);
|
g_perfmon.Put(GSPerfMon::Barriers, barriers_per_draw);
|
||||||
|
IssueBarriers();
|
||||||
const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt);
|
|
||||||
vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
|
||||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, barrier_flags, 0, nullptr, 0, nullptr, 1, &barrier);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DrawIndexedPrimitive();
|
DrawIndexedPrimitive();
|
||||||
|
|||||||
@ -98,6 +98,8 @@ public:
|
|||||||
__fi VkCommandBuffer GetCurrentCommandBuffer() const { return m_current_command_buffer; }
|
__fi VkCommandBuffer GetCurrentCommandBuffer() const { return m_current_command_buffer; }
|
||||||
__fi VKStreamBuffer& GetTextureUploadBuffer() { return m_texture_stream_buffer; }
|
__fi VKStreamBuffer& GetTextureUploadBuffer() { return m_texture_stream_buffer; }
|
||||||
VkCommandBuffer GetCurrentInitCommandBuffer();
|
VkCommandBuffer GetCurrentInitCommandBuffer();
|
||||||
|
VkBuffer AllocateUploadStagingBuffer(u32 size, std::function<void(void*)> write_data);
|
||||||
|
VkBuffer WriteTextureUploadBuffer(u32 size, std::function<void(void*)> write_data, u32& offset_out);
|
||||||
|
|
||||||
/// Allocates a descriptor set from the pool reserved for the current frame.
|
/// Allocates a descriptor set from the pool reserved for the current frame.
|
||||||
VkDescriptorSet AllocatePersistentDescriptorSet(VkDescriptorSetLayout set_layout);
|
VkDescriptorSet AllocatePersistentDescriptorSet(VkDescriptorSetLayout set_layout);
|
||||||
@ -293,7 +295,8 @@ public:
|
|||||||
{
|
{
|
||||||
FeedbackLoopFlag_None = 0,
|
FeedbackLoopFlag_None = 0,
|
||||||
FeedbackLoopFlag_ReadAndWriteRT = 1,
|
FeedbackLoopFlag_ReadAndWriteRT = 1,
|
||||||
FeedbackLoopFlag_ReadDS = 2,
|
FeedbackLoopFlag_ReadDepth = 2,
|
||||||
|
FeedbackLoopFlag_ReadAndWriteDepth = 4,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct alignas(8) PipelineSelector
|
struct alignas(8) PipelineSelector
|
||||||
@ -308,7 +311,7 @@ public:
|
|||||||
u32 rt : 1;
|
u32 rt : 1;
|
||||||
u32 ds : 1;
|
u32 ds : 1;
|
||||||
u32 line_width : 1;
|
u32 line_width : 1;
|
||||||
u32 feedback_loop_flags : 2;
|
u32 feedback_loop_flags : 3;
|
||||||
};
|
};
|
||||||
|
|
||||||
u32 key;
|
u32 key;
|
||||||
@ -326,7 +329,8 @@ public:
|
|||||||
__fi PipelineSelector() { std::memset(this, 0, sizeof(*this)); }
|
__fi PipelineSelector() { std::memset(this, 0, sizeof(*this)); }
|
||||||
|
|
||||||
__fi bool IsRTFeedbackLoop() const { return ((feedback_loop_flags & FeedbackLoopFlag_ReadAndWriteRT) != 0); }
|
__fi bool IsRTFeedbackLoop() const { return ((feedback_loop_flags & FeedbackLoopFlag_ReadAndWriteRT) != 0); }
|
||||||
__fi bool IsTestingAndSamplingDepth() const { return ((feedback_loop_flags & FeedbackLoopFlag_ReadDS) != 0); }
|
__fi bool IsDepthFeedbackLoop() const { return ((feedback_loop_flags & FeedbackLoopFlag_ReadAndWriteDepth) != 0); }
|
||||||
|
__fi bool IsTestingAndSamplingDepth() const { return ((feedback_loop_flags & (FeedbackLoopFlag_ReadDepth | FeedbackLoopFlag_ReadAndWriteDepth)) != 0); }
|
||||||
};
|
};
|
||||||
static_assert(sizeof(PipelineSelector) == 24, "Pipeline selector is 24 bytes");
|
static_assert(sizeof(PipelineSelector) == 24, "Pipeline selector is 24 bytes");
|
||||||
|
|
||||||
@ -357,10 +361,11 @@ public:
|
|||||||
};
|
};
|
||||||
enum TFX_TEXTURES : u32
|
enum TFX_TEXTURES : u32
|
||||||
{
|
{
|
||||||
TFX_TEXTURE_TEXTURE,
|
TFX_TEXTURE_TEXTURE = 0,
|
||||||
TFX_TEXTURE_PALETTE,
|
TFX_TEXTURE_PALETTE,
|
||||||
TFX_TEXTURE_RT,
|
TFX_TEXTURE_RT,
|
||||||
TFX_TEXTURE_PRIMID,
|
TFX_TEXTURE_PRIMID,
|
||||||
|
TFX_TEXTURE_DEPTH,
|
||||||
|
|
||||||
NUM_TFX_TEXTURES
|
NUM_TFX_TEXTURES
|
||||||
};
|
};
|
||||||
@ -377,6 +382,8 @@ private:
|
|||||||
|
|
||||||
VKStreamBuffer m_vertex_stream_buffer;
|
VKStreamBuffer m_vertex_stream_buffer;
|
||||||
VKStreamBuffer m_index_stream_buffer;
|
VKStreamBuffer m_index_stream_buffer;
|
||||||
|
VKStreamBuffer m_accurate_prims_stream_buffer;
|
||||||
|
u32 m_accurate_prims_stream_buffer_offset = 0; // Ring buffer offset for the current draw.
|
||||||
VKStreamBuffer m_vertex_uniform_stream_buffer;
|
VKStreamBuffer m_vertex_uniform_stream_buffer;
|
||||||
VKStreamBuffer m_fragment_uniform_stream_buffer;
|
VKStreamBuffer m_fragment_uniform_stream_buffer;
|
||||||
VKStreamBuffer m_texture_stream_buffer;
|
VKStreamBuffer m_texture_stream_buffer;
|
||||||
@ -559,6 +566,9 @@ public:
|
|||||||
void PSSetShaderResource(int i, GSTexture* sr, bool check_state);
|
void PSSetShaderResource(int i, GSTexture* sr, bool check_state);
|
||||||
void PSSetSampler(GSHWDrawConfig::SamplerSelector sel);
|
void PSSetSampler(GSHWDrawConfig::SamplerSelector sel);
|
||||||
|
|
||||||
|
void SetupAccuratePrimsBuffer(GSHWDrawConfig& config);
|
||||||
|
void SetupAccuratePrimsConstants(GSHWDrawConfig& config);
|
||||||
|
|
||||||
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i& scissor,
|
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i& scissor,
|
||||||
FeedbackLoopFlag feedback_loop = FeedbackLoopFlag_None);
|
FeedbackLoopFlag feedback_loop = FeedbackLoopFlag_None);
|
||||||
|
|
||||||
@ -568,10 +578,12 @@ public:
|
|||||||
|
|
||||||
void RenderHW(GSHWDrawConfig& config) override;
|
void RenderHW(GSHWDrawConfig& config) override;
|
||||||
void UpdateHWPipelineSelector(GSHWDrawConfig& config, PipelineSelector& pipe);
|
void UpdateHWPipelineSelector(GSHWDrawConfig& config, PipelineSelector& pipe);
|
||||||
void UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config);
|
void UploadHWDrawVerticesAndIndices(GSHWDrawConfig& config);
|
||||||
VkImageMemoryBarrier GetColorBufferBarrier(GSTextureVK* rt) const;
|
VkImageMemoryBarrier GetColorBufferBarrier(GSTextureVK* rt) const;
|
||||||
VkDependencyFlags GetColorBufferBarrierFlags() const;
|
VkDependencyFlags GetColorBufferBarrierFlags() const;
|
||||||
void SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
|
VkImageMemoryBarrier GetDepthStencilBufferBarrier(GSTextureVK* ds) const;
|
||||||
|
VkDependencyFlags GetDepthStencilBufferBarrierFlags() const;
|
||||||
|
void SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, GSTextureVK* draw_ds,
|
||||||
bool one_barrier, bool full_barrier, bool skip_first_barrier);
|
bool one_barrier, bool full_barrier, bool skip_first_barrier);
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
@ -621,25 +633,27 @@ public:
|
|||||||
private:
|
private:
|
||||||
enum DIRTY_FLAG : u32
|
enum DIRTY_FLAG : u32
|
||||||
{
|
{
|
||||||
DIRTY_FLAG_TFX_TEXTURE_0 = (1 << 0), // 0, 1, 2, 3
|
DIRTY_FLAG_TFX_TEXTURE_0 = (1 << 0), // 0, 1, 2, 3, 4
|
||||||
DIRTY_FLAG_TFX_UBO = (1 << 4),
|
DIRTY_FLAG_TFX_UBO = (1 << 5),
|
||||||
DIRTY_FLAG_UTILITY_TEXTURE = (1 << 5),
|
DIRTY_FLAG_UTILITY_TEXTURE = (1 << 6),
|
||||||
DIRTY_FLAG_BLEND_CONSTANTS = (1 << 6),
|
DIRTY_FLAG_BLEND_CONSTANTS = (1 << 7),
|
||||||
DIRTY_FLAG_LINE_WIDTH = (1 << 7),
|
DIRTY_FLAG_LINE_WIDTH = (1 << 8),
|
||||||
DIRTY_FLAG_INDEX_BUFFER = (1 << 8),
|
DIRTY_FLAG_INDEX_BUFFER = (1 << 9),
|
||||||
DIRTY_FLAG_VIEWPORT = (1 << 9),
|
DIRTY_FLAG_VIEWPORT = (1 << 10),
|
||||||
DIRTY_FLAG_SCISSOR = (1 << 10),
|
DIRTY_FLAG_SCISSOR = (1 << 11),
|
||||||
DIRTY_FLAG_PIPELINE = (1 << 11),
|
DIRTY_FLAG_PIPELINE = (1 << 12),
|
||||||
DIRTY_FLAG_VS_CONSTANT_BUFFER = (1 << 12),
|
DIRTY_FLAG_VS_CONSTANT_BUFFER = (1 << 13),
|
||||||
DIRTY_FLAG_PS_CONSTANT_BUFFER = (1 << 13),
|
DIRTY_FLAG_PS_CONSTANT_BUFFER = (1 << 14),
|
||||||
|
|
||||||
DIRTY_FLAG_TFX_TEXTURE_TEX = (DIRTY_FLAG_TFX_TEXTURE_0 << 0),
|
DIRTY_FLAG_TFX_TEXTURE_TEX = (DIRTY_FLAG_TFX_TEXTURE_0 << 0),
|
||||||
DIRTY_FLAG_TFX_TEXTURE_PALETTE = (DIRTY_FLAG_TFX_TEXTURE_0 << 1),
|
DIRTY_FLAG_TFX_TEXTURE_PALETTE = (DIRTY_FLAG_TFX_TEXTURE_0 << 1),
|
||||||
DIRTY_FLAG_TFX_TEXTURE_RT = (DIRTY_FLAG_TFX_TEXTURE_0 << 2),
|
DIRTY_FLAG_TFX_TEXTURE_RT = (DIRTY_FLAG_TFX_TEXTURE_0 << 2),
|
||||||
DIRTY_FLAG_TFX_TEXTURE_PRIMID = (DIRTY_FLAG_TFX_TEXTURE_0 << 3),
|
DIRTY_FLAG_TFX_TEXTURE_PRIMID = (DIRTY_FLAG_TFX_TEXTURE_0 << 3),
|
||||||
|
DIRTY_FLAG_TFX_TEXTURE_DEPTH = (DIRTY_FLAG_TFX_TEXTURE_0 << 4),
|
||||||
|
|
||||||
DIRTY_FLAG_TFX_TEXTURES = DIRTY_FLAG_TFX_TEXTURE_TEX | DIRTY_FLAG_TFX_TEXTURE_PALETTE |
|
DIRTY_FLAG_TFX_TEXTURES = DIRTY_FLAG_TFX_TEXTURE_TEX | DIRTY_FLAG_TFX_TEXTURE_PALETTE |
|
||||||
DIRTY_FLAG_TFX_TEXTURE_RT | DIRTY_FLAG_TFX_TEXTURE_PRIMID,
|
DIRTY_FLAG_TFX_TEXTURE_RT | DIRTY_FLAG_TFX_TEXTURE_PRIMID |
|
||||||
|
DIRTY_FLAG_TFX_TEXTURE_DEPTH,
|
||||||
|
|
||||||
DIRTY_BASE_STATE = DIRTY_FLAG_INDEX_BUFFER | DIRTY_FLAG_PIPELINE | DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR |
|
DIRTY_BASE_STATE = DIRTY_FLAG_INDEX_BUFFER | DIRTY_FLAG_PIPELINE | DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR |
|
||||||
DIRTY_FLAG_BLEND_CONSTANTS | DIRTY_FLAG_LINE_WIDTH,
|
DIRTY_FLAG_BLEND_CONSTANTS | DIRTY_FLAG_LINE_WIDTH,
|
||||||
|
|||||||
@ -114,7 +114,7 @@ std::unique_ptr<GSTextureVK> GSTextureVK::Create(Type type, Format format, int w
|
|||||||
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
|
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
|
||||||
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT |
|
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT |
|
||||||
(GSDeviceVK::GetInstance()->UseFeedbackLoopLayout() ? VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT
|
(GSDeviceVK::GetInstance()->UseFeedbackLoopLayout() ? VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT
|
||||||
: 0);
|
: VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT);
|
||||||
vci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
|
vci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -198,7 +198,7 @@ void GSTextureVK::Destroy(bool defer)
|
|||||||
|
|
||||||
if (m_type == Type::RenderTarget || m_type == Type::DepthStencil)
|
if (m_type == Type::RenderTarget || m_type == Type::DepthStencil)
|
||||||
{
|
{
|
||||||
for (const auto& [other_tex, fb, feedback] : m_framebuffers)
|
for (const auto& [other_tex, fb, feedback_color, feedback_depth] : m_framebuffers)
|
||||||
{
|
{
|
||||||
if (other_tex)
|
if (other_tex)
|
||||||
{
|
{
|
||||||
@ -270,38 +270,6 @@ void GSTextureVK::CopyTextureDataForUpload(void* dst, const void* src, u32 pitch
|
|||||||
StringUtil::StrideMemCpy(dst, upload_pitch, src, pitch, std::min(upload_pitch, pitch), count);
|
StringUtil::StrideMemCpy(dst, upload_pitch, src, pitch, std::min(upload_pitch, pitch), count);
|
||||||
}
|
}
|
||||||
|
|
||||||
VkBuffer GSTextureVK::AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 height) const
|
|
||||||
{
|
|
||||||
const u32 size = upload_pitch * height;
|
|
||||||
const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, nullptr, 0, static_cast<VkDeviceSize>(size),
|
|
||||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_SHARING_MODE_EXCLUSIVE, 0, nullptr};
|
|
||||||
|
|
||||||
// Don't worry about setting the coherent bit for this upload, the main reason we had
|
|
||||||
// that set in StreamBuffer was for MoltenVK, which would upload the whole buffer on
|
|
||||||
// smaller uploads, but we're writing to the whole thing anyway.
|
|
||||||
VmaAllocationCreateInfo aci = {};
|
|
||||||
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
|
|
||||||
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
|
|
||||||
|
|
||||||
VmaAllocationInfo ai;
|
|
||||||
VkBuffer buffer;
|
|
||||||
VmaAllocation allocation;
|
|
||||||
VkResult res = vmaCreateBuffer(GSDeviceVK::GetInstance()->GetAllocator(), &bci, &aci, &buffer, &allocation, &ai);
|
|
||||||
if (res != VK_SUCCESS)
|
|
||||||
{
|
|
||||||
LOG_VULKAN_ERROR(res, "(AllocateUploadStagingBuffer) vmaCreateBuffer() failed: ");
|
|
||||||
return VK_NULL_HANDLE;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Immediately queue it for freeing after the command buffer finishes, since it's only needed for the copy.
|
|
||||||
GSDeviceVK::GetInstance()->DeferBufferDestruction(buffer, allocation);
|
|
||||||
|
|
||||||
// And write the data.
|
|
||||||
CopyTextureDataForUpload(ai.pMappedData, data, pitch, upload_pitch, height);
|
|
||||||
vmaFlushAllocation(GSDeviceVK::GetInstance()->GetAllocator(), allocation, 0, size);
|
|
||||||
return buffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
void GSTextureVK::UpdateFromBuffer(VkCommandBuffer cmdbuf, int level, u32 x, u32 y, u32 width, u32 height,
|
void GSTextureVK::UpdateFromBuffer(VkCommandBuffer cmdbuf, int level, u32 x, u32 y, u32 width, u32 height,
|
||||||
u32 buffer_height, u32 row_length, VkBuffer buffer, u32 buffer_offset)
|
u32 buffer_height, u32 row_length, VkBuffer buffer, u32 buffer_offset)
|
||||||
{
|
{
|
||||||
@ -333,6 +301,10 @@ bool GSTextureVK::Update(const GSVector4i& r, const void* data, int pitch, int l
|
|||||||
const u32 upload_pitch = Common::AlignUpPow2(pitch, GSDeviceVK::GetInstance()->GetBufferCopyRowPitchAlignment());
|
const u32 upload_pitch = Common::AlignUpPow2(pitch, GSDeviceVK::GetInstance()->GetBufferCopyRowPitchAlignment());
|
||||||
const u32 required_size = CalcUploadSize(height, upload_pitch);
|
const u32 required_size = CalcUploadSize(height, upload_pitch);
|
||||||
|
|
||||||
|
const auto upload_data = [&](void* map_ptr) {
|
||||||
|
CopyTextureDataForUpload(map_ptr, data, pitch, upload_pitch, height);
|
||||||
|
};
|
||||||
|
|
||||||
// If the texture is larger than half our streaming buffer size, use a separate buffer.
|
// If the texture is larger than half our streaming buffer size, use a separate buffer.
|
||||||
// Otherwise allocation will either fail, or require lots of cmdbuffer submissions.
|
// Otherwise allocation will either fail, or require lots of cmdbuffer submissions.
|
||||||
VkBuffer buffer;
|
VkBuffer buffer;
|
||||||
@ -340,29 +312,14 @@ bool GSTextureVK::Update(const GSVector4i& r, const void* data, int pitch, int l
|
|||||||
if (required_size > (GSDeviceVK::GetInstance()->GetTextureUploadBuffer().GetCurrentSize() / 2))
|
if (required_size > (GSDeviceVK::GetInstance()->GetTextureUploadBuffer().GetCurrentSize() / 2))
|
||||||
{
|
{
|
||||||
buffer_offset = 0;
|
buffer_offset = 0;
|
||||||
buffer = AllocateUploadStagingBuffer(data, pitch, upload_pitch, height);
|
buffer = GSDeviceVK::GetInstance()->AllocateUploadStagingBuffer(required_size, upload_data);
|
||||||
if (buffer == VK_NULL_HANDLE)
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
VKStreamBuffer& sbuffer = GSDeviceVK::GetInstance()->GetTextureUploadBuffer();
|
buffer = GSDeviceVK::GetInstance()->WriteTextureUploadBuffer(required_size, upload_data, buffer_offset);
|
||||||
if (!sbuffer.ReserveMemory(required_size, GSDeviceVK::GetInstance()->GetBufferCopyOffsetAlignment()))
|
|
||||||
{
|
|
||||||
GSDeviceVK::GetInstance()->ExecuteCommandBuffer(
|
|
||||||
false, "While waiting for %u bytes in texture upload buffer", required_size);
|
|
||||||
if (!sbuffer.ReserveMemory(required_size, GSDeviceVK::GetInstance()->GetBufferCopyOffsetAlignment()))
|
|
||||||
{
|
|
||||||
Console.Error("Failed to reserve texture upload memory (%u bytes).", required_size);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
buffer = sbuffer.GetBuffer();
|
|
||||||
buffer_offset = sbuffer.GetCurrentOffset();
|
|
||||||
CopyTextureDataForUpload(sbuffer.GetCurrentHostPointer(), data, pitch, upload_pitch, height);
|
|
||||||
sbuffer.CommitMemory(required_size);
|
|
||||||
}
|
}
|
||||||
|
if (buffer == VK_NULL_HANDLE)
|
||||||
|
return false;
|
||||||
|
|
||||||
const VkCommandBuffer cmdbuf = GetCommandBufferForUpdate();
|
const VkCommandBuffer cmdbuf = GetCommandBufferForUpdate();
|
||||||
GL_PUSH("GSTextureVK::Update({%d,%d} %dx%d Lvl:%u", r.x, r.y, r.width(), r.height(), layer);
|
GL_PUSH("GSTextureVK::Update({%d,%d} %dx%d Lvl:%u", r.x, r.y, r.width(), r.height(), layer);
|
||||||
@ -738,16 +695,16 @@ void GSTextureVK::TransitionSubresourcesToLayout(
|
|||||||
|
|
||||||
VkFramebuffer GSTextureVK::GetFramebuffer(bool feedback_loop)
|
VkFramebuffer GSTextureVK::GetFramebuffer(bool feedback_loop)
|
||||||
{
|
{
|
||||||
return GetLinkedFramebuffer(nullptr, feedback_loop);
|
return GetLinkedFramebuffer(nullptr, feedback_loop, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
VkFramebuffer GSTextureVK::GetLinkedFramebuffer(GSTextureVK* depth_texture, bool feedback_loop)
|
VkFramebuffer GSTextureVK::GetLinkedFramebuffer(GSTextureVK* depth_texture, bool feedback_loop_color, bool feedback_loop_depth)
|
||||||
{
|
{
|
||||||
pxAssertRel(m_type != Type::Texture, "Texture is a render target");
|
pxAssertRel(m_type != Type::Texture, "Texture is a render target");
|
||||||
|
|
||||||
for (const auto& [other_tex, fb, other_feedback_loop] : m_framebuffers)
|
for (const auto& [other_tex, fb, other_feedback_loop_color, other_feedback_loop_depth] : m_framebuffers)
|
||||||
{
|
{
|
||||||
if (other_tex == depth_texture && other_feedback_loop == feedback_loop)
|
if (other_tex == depth_texture && other_feedback_loop_color == feedback_loop_color && other_feedback_loop_depth == feedback_loop_depth)
|
||||||
return fb;
|
return fb;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -756,7 +713,7 @@ VkFramebuffer GSTextureVK::GetLinkedFramebuffer(GSTextureVK* depth_texture, bool
|
|||||||
(m_type != GSTexture::Type::DepthStencil) ? (depth_texture ? depth_texture->m_vk_format : VK_FORMAT_UNDEFINED) :
|
(m_type != GSTexture::Type::DepthStencil) ? (depth_texture ? depth_texture->m_vk_format : VK_FORMAT_UNDEFINED) :
|
||||||
m_vk_format,
|
m_vk_format,
|
||||||
VK_ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_LOAD,
|
VK_ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_LOAD,
|
||||||
VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_STORE_OP_DONT_CARE, feedback_loop);
|
VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_STORE_OP_DONT_CARE, feedback_loop_color, feedback_loop_depth);
|
||||||
if (!rp)
|
if (!rp)
|
||||||
return VK_NULL_HANDLE;
|
return VK_NULL_HANDLE;
|
||||||
|
|
||||||
@ -771,9 +728,9 @@ VkFramebuffer GSTextureVK::GetLinkedFramebuffer(GSTextureVK* depth_texture, bool
|
|||||||
if (!fb)
|
if (!fb)
|
||||||
return VK_NULL_HANDLE;
|
return VK_NULL_HANDLE;
|
||||||
|
|
||||||
m_framebuffers.emplace_back(depth_texture, fb, feedback_loop);
|
m_framebuffers.emplace_back(depth_texture, fb, feedback_loop_color, feedback_loop_depth);
|
||||||
if (depth_texture)
|
if (depth_texture)
|
||||||
depth_texture->m_framebuffers.emplace_back(this, fb, feedback_loop);
|
depth_texture->m_framebuffers.emplace_back(this, fb, feedback_loop_color, feedback_loop_depth);
|
||||||
return fb;
|
return fb;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -73,7 +73,7 @@ public:
|
|||||||
/// Framebuffers are lazily allocated.
|
/// Framebuffers are lazily allocated.
|
||||||
VkFramebuffer GetFramebuffer(bool feedback_loop);
|
VkFramebuffer GetFramebuffer(bool feedback_loop);
|
||||||
|
|
||||||
VkFramebuffer GetLinkedFramebuffer(GSTextureVK* depth_texture, bool feedback_loop);
|
VkFramebuffer GetLinkedFramebuffer(GSTextureVK* depth_texture, bool feedback_loop_color, bool feedback_loop_depth);
|
||||||
|
|
||||||
// Call when the texture is bound to the pipeline, or read from in a copy.
|
// Call when the texture is bound to the pipeline, or read from in a copy.
|
||||||
__fi void SetUseFenceCounter(u64 counter) { m_use_fence_counter = counter; }
|
__fi void SetUseFenceCounter(u64 counter) { m_use_fence_counter = counter; }
|
||||||
@ -84,7 +84,6 @@ private:
|
|||||||
|
|
||||||
VkCommandBuffer GetCommandBufferForUpdate();
|
VkCommandBuffer GetCommandBufferForUpdate();
|
||||||
void CopyTextureDataForUpload(void* dst, const void* src, u32 pitch, u32 upload_pitch, u32 height) const;
|
void CopyTextureDataForUpload(void* dst, const void* src, u32 pitch, u32 upload_pitch, u32 height) const;
|
||||||
VkBuffer AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 height) const;
|
|
||||||
void UpdateFromBuffer(VkCommandBuffer cmdbuf, int level, u32 x, u32 y, u32 width, u32 height, u32 buffer_height,
|
void UpdateFromBuffer(VkCommandBuffer cmdbuf, int level, u32 x, u32 y, u32 width, u32 height, u32 buffer_height,
|
||||||
u32 row_length, VkBuffer buffer, u32 buffer_offset);
|
u32 row_length, VkBuffer buffer, u32 buffer_offset);
|
||||||
|
|
||||||
@ -103,7 +102,7 @@ private:
|
|||||||
|
|
||||||
// linked framebuffer is combined with depth texture
|
// linked framebuffer is combined with depth texture
|
||||||
// list of color textures this depth texture is linked to or vice versa
|
// list of color textures this depth texture is linked to or vice versa
|
||||||
std::vector<std::tuple<GSTextureVK*, VkFramebuffer, bool>> m_framebuffers;
|
std::vector<std::tuple<GSTextureVK*, VkFramebuffer, bool, bool>> m_framebuffers;
|
||||||
};
|
};
|
||||||
|
|
||||||
class GSDownloadTextureVK final : public GSDownloadTexture
|
class GSDownloadTextureVK final : public GSDownloadTexture
|
||||||
|
|||||||
@ -19,6 +19,7 @@ VKStreamBuffer::VKStreamBuffer(VKStreamBuffer&& move)
|
|||||||
, m_allocation(move.m_allocation)
|
, m_allocation(move.m_allocation)
|
||||||
, m_buffer(move.m_buffer)
|
, m_buffer(move.m_buffer)
|
||||||
, m_host_pointer(move.m_host_pointer)
|
, m_host_pointer(move.m_host_pointer)
|
||||||
|
, m_device_local(move.m_device_local)
|
||||||
, m_tracked_fences(std::move(move.m_tracked_fences))
|
, m_tracked_fences(std::move(move.m_tracked_fences))
|
||||||
{
|
{
|
||||||
move.m_size = 0;
|
move.m_size = 0;
|
||||||
@ -28,6 +29,7 @@ VKStreamBuffer::VKStreamBuffer(VKStreamBuffer&& move)
|
|||||||
move.m_allocation = VK_NULL_HANDLE;
|
move.m_allocation = VK_NULL_HANDLE;
|
||||||
move.m_buffer = VK_NULL_HANDLE;
|
move.m_buffer = VK_NULL_HANDLE;
|
||||||
move.m_host_pointer = nullptr;
|
move.m_host_pointer = nullptr;
|
||||||
|
move.m_device_local = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
VKStreamBuffer::~VKStreamBuffer()
|
VKStreamBuffer::~VKStreamBuffer()
|
||||||
@ -48,19 +50,29 @@ VKStreamBuffer& VKStreamBuffer::operator=(VKStreamBuffer&& move)
|
|||||||
std::swap(m_buffer, move.m_buffer);
|
std::swap(m_buffer, move.m_buffer);
|
||||||
std::swap(m_host_pointer, move.m_host_pointer);
|
std::swap(m_host_pointer, move.m_host_pointer);
|
||||||
std::swap(m_tracked_fences, move.m_tracked_fences);
|
std::swap(m_tracked_fences, move.m_tracked_fences);
|
||||||
|
std::swap(m_device_local, move.m_device_local);
|
||||||
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool VKStreamBuffer::Create(VkBufferUsageFlags usage, u32 size)
|
bool VKStreamBuffer::Create(VkBufferUsageFlags usage, u32 size, bool device_local)
|
||||||
{
|
{
|
||||||
const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, nullptr, 0, static_cast<VkDeviceSize>(size),
|
const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, nullptr, 0, static_cast<VkDeviceSize>(size),
|
||||||
usage, VK_SHARING_MODE_EXCLUSIVE, 0, nullptr};
|
usage, VK_SHARING_MODE_EXCLUSIVE, 0, nullptr};
|
||||||
|
|
||||||
VmaAllocationCreateInfo aci = {};
|
VmaAllocationCreateInfo aci = {};
|
||||||
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
|
if (device_local)
|
||||||
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
|
{
|
||||||
aci.preferredFlags = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
// GPU default buffer
|
||||||
|
aci.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// CPU upload buffer
|
||||||
|
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
|
||||||
|
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
|
||||||
|
aci.preferredFlags = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||||
|
}
|
||||||
|
|
||||||
VmaAllocationInfo ai = {};
|
VmaAllocationInfo ai = {};
|
||||||
VkBuffer new_buffer = VK_NULL_HANDLE;
|
VkBuffer new_buffer = VK_NULL_HANDLE;
|
||||||
@ -83,7 +95,8 @@ bool VKStreamBuffer::Create(VkBufferUsageFlags usage, u32 size)
|
|||||||
m_tracked_fences.clear();
|
m_tracked_fences.clear();
|
||||||
m_allocation = new_allocation;
|
m_allocation = new_allocation;
|
||||||
m_buffer = new_buffer;
|
m_buffer = new_buffer;
|
||||||
m_host_pointer = static_cast<u8*>(ai.pMappedData);
|
m_host_pointer = device_local ? nullptr : static_cast<u8*>(ai.pMappedData);
|
||||||
|
m_device_local = device_local;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -104,6 +117,7 @@ void VKStreamBuffer::Destroy(bool defer)
|
|||||||
m_buffer = VK_NULL_HANDLE;
|
m_buffer = VK_NULL_HANDLE;
|
||||||
m_allocation = VK_NULL_HANDLE;
|
m_allocation = VK_NULL_HANDLE;
|
||||||
m_host_pointer = nullptr;
|
m_host_pointer = nullptr;
|
||||||
|
m_device_local = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool VKStreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment)
|
bool VKStreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment)
|
||||||
@ -180,8 +194,11 @@ void VKStreamBuffer::CommitMemory(u32 final_num_bytes)
|
|||||||
pxAssert((m_current_offset + final_num_bytes) <= m_size);
|
pxAssert((m_current_offset + final_num_bytes) <= m_size);
|
||||||
pxAssert(final_num_bytes <= m_current_space);
|
pxAssert(final_num_bytes <= m_current_space);
|
||||||
|
|
||||||
// For non-coherent mappings, flush the memory range
|
if (!m_device_local)
|
||||||
vmaFlushAllocation(GSDeviceVK::GetInstance()->GetAllocator(), m_allocation, m_current_offset, final_num_bytes);
|
{
|
||||||
|
// For non-coherent mappings, flush the memory range
|
||||||
|
vmaFlushAllocation(GSDeviceVK::GetInstance()->GetAllocator(), m_allocation, m_current_offset, final_num_bytes);
|
||||||
|
}
|
||||||
|
|
||||||
m_current_offset += final_num_bytes;
|
m_current_offset += final_num_bytes;
|
||||||
m_current_space -= final_num_bytes;
|
m_current_space -= final_num_bytes;
|
||||||
|
|||||||
@ -30,14 +30,13 @@ public:
|
|||||||
__fi u32 GetCurrentSpace() const { return m_current_space; }
|
__fi u32 GetCurrentSpace() const { return m_current_space; }
|
||||||
__fi u32 GetCurrentOffset() const { return m_current_offset; }
|
__fi u32 GetCurrentOffset() const { return m_current_offset; }
|
||||||
|
|
||||||
bool Create(VkBufferUsageFlags usage, u32 size);
|
bool Create(VkBufferUsageFlags usage, u32 size, bool device_local = false);
|
||||||
void Destroy(bool defer);
|
void Destroy(bool defer);
|
||||||
|
|
||||||
bool ReserveMemory(u32 num_bytes, u32 alignment);
|
bool ReserveMemory(u32 num_bytes, u32 alignment);
|
||||||
void CommitMemory(u32 final_num_bytes);
|
void CommitMemory(u32 final_num_bytes);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool AllocateBuffer(VkBufferUsageFlags usage, u32 size);
|
|
||||||
void UpdateCurrentFencePosition();
|
void UpdateCurrentFencePosition();
|
||||||
void UpdateGPUPosition();
|
void UpdateGPUPosition();
|
||||||
|
|
||||||
@ -51,7 +50,8 @@ private:
|
|||||||
|
|
||||||
VmaAllocation m_allocation = VK_NULL_HANDLE;
|
VmaAllocation m_allocation = VK_NULL_HANDLE;
|
||||||
VkBuffer m_buffer = VK_NULL_HANDLE;
|
VkBuffer m_buffer = VK_NULL_HANDLE;
|
||||||
u8* m_host_pointer = nullptr;
|
u8* m_host_pointer = nullptr; // Only used for upload buffers.
|
||||||
|
bool m_device_local = false; // False for upload buffer; true for default buffer.
|
||||||
|
|
||||||
// List of fences and the corresponding positions in the buffer
|
// List of fences and the corresponding positions in the buffer
|
||||||
std::deque<std::pair<u64, u32>> m_tracked_fences;
|
std::deque<std::pair<u64, u32>> m_tracked_fences;
|
||||||
|
|||||||
@ -751,6 +751,7 @@ Pcsx2Config::GSOptions::GSOptions()
|
|||||||
PreloadFrameWithGSData = false;
|
PreloadFrameWithGSData = false;
|
||||||
Mipmap = true;
|
Mipmap = true;
|
||||||
HWMipmap = true;
|
HWMipmap = true;
|
||||||
|
HWAccuratePrims = false;
|
||||||
|
|
||||||
ManualUserHacks = false;
|
ManualUserHacks = false;
|
||||||
UserHacks_AlignSpriteX = false;
|
UserHacks_AlignSpriteX = false;
|
||||||
@ -1021,6 +1022,7 @@ void Pcsx2Config::GSOptions::LoadSave(SettingsWrapper& wrap)
|
|||||||
SettingsWrapEntryEx(UpscaleMultiplier, "upscale_multiplier");
|
SettingsWrapEntryEx(UpscaleMultiplier, "upscale_multiplier");
|
||||||
|
|
||||||
SettingsWrapBitBoolEx(HWMipmap, "hw_mipmap");
|
SettingsWrapBitBoolEx(HWMipmap, "hw_mipmap");
|
||||||
|
SettingsWrapBitBoolEx(HWAccuratePrims, "HWAccuratePrims");
|
||||||
SettingsWrapIntEnumEx(AccurateBlendingUnit, "accurate_blending_unit");
|
SettingsWrapIntEnumEx(AccurateBlendingUnit, "accurate_blending_unit");
|
||||||
SettingsWrapIntEnumEx(TextureFiltering, "filter");
|
SettingsWrapIntEnumEx(TextureFiltering, "filter");
|
||||||
SettingsWrapIntEnumEx(TexturePreloading, "texture_preloading");
|
SettingsWrapIntEnumEx(TexturePreloading, "texture_preloading");
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user