GS/HW: Further improve clear behaviour
Some checks failed
🐧 Linux Builds / AppImage (push) Has been cancelled
🐧 Linux Builds / Flatpak (push) Has been cancelled
🍎 MacOS Builds / Defaults (push) Has been cancelled
🖥️ Windows Builds / Lint VS Project Files (push) Has been cancelled
🖥️ Windows Builds / CMake (push) Has been cancelled
🖥️ Windows Builds / SSE4 (push) Has been cancelled
🖥️ Windows Builds / AVX2 (push) Has been cancelled

This commit is contained in:
refractionpcsx2 2025-12-15 01:04:05 +00:00
parent 3088f83e13
commit 0f0ebbe480
4 changed files with 77 additions and 22 deletions

View File

@ -5490,6 +5490,11 @@ bool GSState::IsOpaque()
return true;
const GSDrawingContext* context = m_context;
const u32 fmsk = GSLocalMemory::m_psm[context->FRAME.PSM].fmsk;
// If we aren't drawing color, it's equivilant to opaque.
if ((context->FRAME.FBMSK & fmsk) == (fmsk & 0x00FFFFFF))
return true;
int amin = 0;
int amax = 0xff;

View File

@ -8903,17 +8903,17 @@ bool GSRendererHW::TryGSMemClear(bool no_rt, bool preserve_rt, bool invalidate_r
if (m_r.width() < ((static_cast<int>(m_cached_ctx.FRAME.FBW) - 1) * 64))
return false;
if (!no_rt && !preserve_rt)
if (!no_rt && (!preserve_rt || (IsOpaque() && m_cached_ctx.FRAME.FBMSK)))
{
ClearGSLocalMemory(m_context->offset.fb, m_r, GetConstantDirectWriteMemClearColor());
if (invalidate_rt)
if (invalidate_rt && !preserve_rt)
{
g_texture_cache->InvalidateVideoMem(m_context->offset.fb, m_r, false);
g_texture_cache->InvalidateContainedTargets(
GSLocalMemory::GetStartBlockAddress(
m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r),
rt_end_bp, m_cached_ctx.FRAME.PSM, m_cached_ctx.FRAME.FBW);
rt_end_bp, m_cached_ctx.FRAME.PSM, m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.FBMSK);
GSUploadQueue clear_queue;
clear_queue.draw = s_n;
@ -8924,6 +8924,13 @@ bool GSRendererHW::TryGSMemClear(bool no_rt, bool preserve_rt, bool invalidate_r
clear_queue.zero_clear = true;
m_draw_transfers.push_back(clear_queue);
}
else
{
g_texture_cache->InvalidateContainedTargets(
GSLocalMemory::GetStartBlockAddress(
m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r),
rt_end_bp, m_cached_ctx.FRAME.PSM, m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.FBMSK, true);
}
}
if (!no_ds && !preserve_z)
@ -8955,6 +8962,7 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r,
const int right = r.right;
const int bottom = r.bottom;
int top = r.top;
u32 drawing_mask = GSLocalMemory::m_psm[psm].depth ? 0x0 : m_cached_ctx.FRAME.FBMSK;
// Process the page aligned region first, then fall back to anything which is not.
// Since pages are linear in memory, we can do it basically with a vector memset.
@ -8970,22 +8978,34 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r,
if (format == GSLocalMemory::PSM_FMT_32)
{
const GSVector4i vcolor = GSVector4i(vert_color);
const GSVector4i vcolor = GSVector4i(vert_color & ~drawing_mask);
const u32 iterations_per_page = (pages_wide * pixels_per_page) / 4;
const GSVector4i mask = GSVector4i(drawing_mask);
pxAssert((off.bp() & (GS_BLOCKS_PER_PAGE - 1)) == 0);
for (u32 current_page = off.bp() >> 5; top < page_aligned_bottom; top += pgs.y, current_page += fbw)
{
current_page &= (GS_MAX_PAGES - 1);
GSVector4i* ptr = reinterpret_cast<GSVector4i*>(m_mem.vm8() + current_page * GS_PAGE_SIZE);
GSVector4i* const ptr_end = ptr + iterations_per_page;
if (drawing_mask)
{
while (ptr != ptr_end)
{
*ptr = (*ptr & mask) | vcolor;
ptr++;
}
}
else
{
while (ptr != ptr_end)
*(ptr++) = vcolor;
}
}
}
else if (format == GSLocalMemory::PSM_FMT_24)
{
const GSVector4i mask = GSVector4i::xff000000();
const GSVector4i vcolor = GSVector4i(vert_color & 0x00ffffffu);
const GSVector4i mask = GSVector4i::xff000000() | GSVector4i(drawing_mask);
const GSVector4i vcolor = GSVector4i((vert_color & 0x00ffffffu) & ~drawing_mask);
const u32 iterations_per_page = (pages_wide * pixels_per_page) / 4;
pxAssert((off.bp() & (GS_BLOCKS_PER_PAGE - 1)) == 0);
for (u32 current_page = off.bp() >> 5; top < page_aligned_bottom; top += pgs.y, current_page += fbw)
@ -9004,7 +9024,10 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r,
{
const u16 converted_color = ((vert_color >> 16) & 0x8000) | ((vert_color >> 9) & 0x7C00) |
((vert_color >> 6) & 0x7E0) | ((vert_color >> 3) & 0x1F);
const u16 converted_mask = ((drawing_mask >> 16) & 0x8000) | ((drawing_mask >> 9) & 0x7C00) |
((drawing_mask >> 6) & 0x7E0) | ((drawing_mask >> 3) & 0x1F);
const GSVector4i vcolor = GSVector4i::broadcast16(converted_color);
const GSVector4i mask = GSVector4i::broadcast16(converted_mask);
const u32 iterations_per_page = (pages_wide * pixels_per_page) / 8;
pxAssert((off.bp() & (GS_BLOCKS_PER_PAGE - 1)) == 0);
for (u32 current_page = off.bp() >> 5; top < page_aligned_bottom; top += pgs.y, current_page += fbw)
@ -9012,14 +9035,27 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r,
current_page &= (GS_MAX_PAGES - 1);
GSVector4i* ptr = reinterpret_cast<GSVector4i*>(m_mem.vm8() + current_page * GS_PAGE_SIZE);
GSVector4i* const ptr_end = ptr + iterations_per_page;
if (converted_mask)
{
while (ptr != ptr_end)
{
*ptr = (*ptr & mask) | vcolor;
ptr++;
}
}
else
{
while (ptr != ptr_end)
*(ptr++) = vcolor;
}
}
}
}
if (format == GSLocalMemory::PSM_FMT_32)
{
const u32 mask = drawing_mask;
const u32 vcolor = vert_color & ~mask;
// Based on WritePixel32
u32* vm = m_mem.vm32();
for (int y = top; y < bottom; y++)
@ -9027,25 +9063,28 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r,
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(0, y);
for (int x = left; x < right; x++)
vm[pa.value(x)] = vert_color;
vm[pa.value(x)] = vcolor | (vm[pa.value(x)] & mask);
}
}
else if (format == GSLocalMemory::PSM_FMT_24)
{
// Based on WritePixel24
u32* vm = m_mem.vm32();
const u32 write_color = vert_color & 0xffffffu;
const u32 mask = drawing_mask | 0xff000000u;
const u32 write_color = (vert_color & 0xffffffu) & ~mask;
for (int y = top; y < bottom; y++)
{
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(0, y);
for (int x = left; x < right; x++)
vm[pa.value(x)] = (vm[pa.value(x)] & 0xff000000u) | write_color;
vm[pa.value(x)] = (vm[pa.value(x)] & mask) | write_color;
}
}
else if (format == GSLocalMemory::PSM_FMT_16)
{
const u16 converted_color = ((vert_color >> 16) & 0x8000) | ((vert_color >> 9) & 0x7C00) | ((vert_color >> 6) & 0x7E0) | ((vert_color >> 3) & 0x1F);
const u16 converted_mask = ((drawing_mask >> 16) & 0x8000) | ((drawing_mask >> 9) & 0x7C00) |
((drawing_mask >> 6) & 0x7E0) | ((drawing_mask >> 3) & 0x1F);
const u16 converted_color = (((vert_color >> 16) & 0x8000) | ((vert_color >> 9) & 0x7C00) | ((vert_color >> 6) & 0x7E0) | ((vert_color >> 3) & 0x1F)) & ~converted_mask;
// Based on WritePixel16
u16* vm = m_mem.vm16();
@ -9054,7 +9093,7 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r,
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle16).paMulti(0, y);
for (int x = left; x < right; x++)
vm[pa.value(x)] = converted_color;
vm[pa.value(x)] = converted_color | (vm[pa.value(x)] & converted_mask);
}
}
}

View File

@ -4270,16 +4270,17 @@ bool GSTextureCache::PrepareDownloadTexture(u32 width, u32 height, GSTexture::Fo
}
}
}*/
void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm, u32 write_bw)
void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm, u32 write_bw, u32 fb_mask, bool ignore_exact)
{
const bool preserve_alpha = (GSLocalMemory::m_psm[write_psm].trbpp == 24);
for (int type = 0; type < 2; type++)
const bool preserve_alpha = (GSLocalMemory::m_psm[write_psm].trbpp == 24) || (fb_mask & 0xFF000000);
for (int type = 0; type < (ignore_exact ? 1 : 2); type++)
{
auto& list = m_dst[type];
for (auto i = list.begin(); i != list.end();)
{
Target* const t = *i;
if (start_bp != t->m_TEX0.TBP0 && (t->m_TEX0.TBP0 > end_bp || t->UnwrappedEndBlock() < start_bp))
if ((ignore_exact && start_bp == t->m_TEX0.TBP0) || (start_bp != t->m_TEX0.TBP0 && (t->m_TEX0.TBP0 > end_bp || t->UnwrappedEndBlock() < start_bp)))
{
++i;
continue;
@ -4302,7 +4303,7 @@ void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 wr
{
RGBAMask mask;
mask._u32 = GSUtil::GetChannelMask(write_psm);
mask._u32 = GSUtil::GetChannelMask(write_psm, fb_mask);
AddDirtyRectTarget(t, invalidate_r, t->m_TEX0.PSM, t->m_TEX0.TBW, mask, false);
}
@ -4332,7 +4333,7 @@ void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 wr
t->m_valid_alpha_low &= preserve_alpha;
t->m_valid_alpha_high &= preserve_alpha;
t->m_valid_rgb = false;
t->m_valid_rgb &= (fb_mask & 0x00FFFFFF) != 0;
// Don't keep partial depth buffers around.
if ((!t->m_valid_alpha_low && !t->m_valid_alpha_high && !t->m_valid_rgb) || type == DepthStencil)
@ -4354,6 +4355,16 @@ void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 wr
delete t;
continue;
}
else if (ignore_exact && GSUtil::HasCompatibleBits(t->m_TEX0.PSM, write_psm))
{
RGBAMask mask;
mask._u32 = GSUtil::GetChannelMask(write_psm, fb_mask);
AddDirtyRectTarget(t, t->m_valid, t->m_TEX0.PSM, t->m_TEX0.TBW, mask, false);
t->m_valid_rgb |= !!(mask._u32 & 0x7);
t->m_valid_alpha_low |= mask.c.a;
t->m_valid_alpha_high |= mask.c.a;
}
GL_CACHE("TC: InvalidateContainedTargets: Clear RGB valid on %s[%x, %s]", to_string(type), t->m_TEX0.TBP0, GSUtil::GetPSMName(t->m_TEX0.PSM));
++i;

View File

@ -532,7 +532,7 @@ public:
bool HasTargetInHeightCache(u32 bp, u32 fbw, u32 psm, u32 max_age = std::numeric_limits<u32>::max(), bool move_front = true);
bool Has32BitTarget(u32 bp);
void InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm = PSMCT32, u32 write_bw = 1);
void InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm = PSMCT32, u32 write_bw = 1, u32 fb_mask = 0x00000000, bool ignore_exact = false);
void InvalidateVideoMemType(int type, u32 bp, u32 write_psm = PSMCT32, u32 write_fbmsk = 0, bool dirty_only = false);
void InvalidateVideoMemSubTarget(GSTextureCache::Target* rt);
void InvalidateVideoMem(const GSOffset& off, const GSVector4i& r, bool target = true);