Core: Postpone page table updates when DR is unset

Page table mappings are only used when DR is set, so if page tables are
updated when DR isn't set, we can wait with updating page table mappings
until DR gets set. This lets us batch page table updates in the Disney
Trio of Destruction, improving performance when the games are loading
data. It doesn't help much for GameCube games, because those run tlbie
with DR set.

The PowerPCState struct has had its members slightly reordered. I had to
put pagetable_update_pending less than 4 KiB from the start so AArch64's
LDRB (immediate) can access it, and I also took the opportunity to move
some other members around to cut down on padding.
This commit is contained in:
JosJuice 2025-06-20 09:16:55 +02:00
parent 083f3a7e0e
commit 7b885b857e
7 changed files with 94 additions and 19 deletions

View File

@ -513,6 +513,8 @@ void Jit64::MSRUpdated(const OpArg& msr, X64Reg scratch_reg)
{
ASSERT(!msr.IsSimpleReg(scratch_reg));
constexpr u32 dr_bit = 1 << UReg_MSR{}.DR.StartBit();
// Update mem_ptr
auto& memory = m_system.GetMemory();
if (msr.IsImm())
@ -524,7 +526,7 @@ void Jit64::MSRUpdated(const OpArg& msr, X64Reg scratch_reg)
{
MOV(64, R(RMEM), ImmPtr(memory.GetLogicalBase()));
MOV(64, R(scratch_reg), ImmPtr(memory.GetPhysicalBase()));
TEST(32, msr, Imm32(1 << (31 - 27)));
TEST(32, msr, Imm32(dr_bit));
CMOVcc(64, RMEM, R(scratch_reg), CC_Z);
}
MOV(64, PPCSTATE(mem_ptr), R(RMEM));
@ -548,6 +550,25 @@ void Jit64::MSRUpdated(const OpArg& msr, X64Reg scratch_reg)
OR(32, R(scratch_reg), Imm32(other_feature_flags));
MOV(32, PPCSTATE(feature_flags), R(scratch_reg));
}
// Call PageTableUpdatedFromJit if needed
if (!msr.IsImm() || UReg_MSR(msr.Imm32()).DR)
{
gpr.Flush();
fpr.Flush();
FixupBranch dr_unset;
if (!msr.IsImm())
{
TEST(32, msr, Imm32(dr_bit));
dr_unset = J_CC(CC_Z);
}
CMP(8, PPCSTATE(pagetable_update_pending), Imm8(0));
FixupBranch update_not_pending = J_CC(CC_E);
ABI_CallFunctionP(&PowerPC::MMU::PageTableUpdatedFromJit, &m_system.GetMMU());
SetJumpTarget(update_not_pending);
if (!msr.IsImm())
SetJumpTarget(dr_unset);
}
}
void Jit64::WriteExit(u32 destination, bool bl, u32 after)

View File

@ -436,11 +436,14 @@ void Jit64::mtmsr(UGeckoInstruction inst)
FALLBACK_IF(jo.fp_exceptions);
{
RCOpArg Rs = gpr.BindOrImm(inst.RS, RCMode::Read);
RegCache::Realize(Rs);
MOV(32, PPCSTATE(msr), Rs);
MSRUpdated(Rs, RSCRATCH2);
OpArg Rs_op_arg;
{
RCOpArg Rs = gpr.BindOrImm(inst.RS, RCMode::Read);
RegCache::Realize(Rs);
MOV(32, PPCSTATE(msr), Rs);
Rs_op_arg = Rs;
}
MSRUpdated(Rs_op_arg, RSCRATCH2);
}
gpr.Flush();

View File

@ -452,10 +452,27 @@ void JitArm64::MSRUpdated(u32 msr)
MOVI2R(WA, feature_flags);
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(feature_flags));
}
// Call PageTableUpdatedFromJit if needed
if (UReg_MSR(msr).DR)
{
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
auto WA = gpr.GetScopedReg();
static_assert(PPCSTATE_OFF(pagetable_update_pending) < 0x1000);
LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(pagetable_update_pending));
FixupBranch update_not_pending = CBZ(WA);
ABI_CallFunction(&PowerPC::MMU::PageTableUpdatedFromJit, &m_system.GetMMU());
SetJumpTarget(update_not_pending);
}
}
void JitArm64::MSRUpdated(ARM64Reg msr)
{
constexpr LogicalImm dr_bit(1ULL << UReg_MSR{}.DR.StartBit(), GPRSize::B32);
auto WA = gpr.GetScopedReg();
ARM64Reg XA = EncodeRegTo64(WA);
@ -463,7 +480,7 @@ void JitArm64::MSRUpdated(ARM64Reg msr)
auto& memory = m_system.GetMemory();
MOVP2R(MEM_REG, jo.fastmem ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase());
MOVP2R(XA, jo.fastmem ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase());
TST(msr, LogicalImm(1 << (31 - 27), GPRSize::B32));
TST(msr, dr_bit);
CSEL(MEM_REG, MEM_REG, XA, CCFlags::CC_NEQ);
STR(IndexType::Unsigned, MEM_REG, PPC_REG, PPCSTATE_OFF(mem_ptr));
@ -477,6 +494,18 @@ void JitArm64::MSRUpdated(ARM64Reg msr)
if (other_feature_flags != 0)
ORR(WA, WA, LogicalImm(other_feature_flags, GPRSize::B32));
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(feature_flags));
// Call PageTableUpdatedFromJit if needed
MOV(WA, msr);
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
FixupBranch dr_unset = TBZ(WA, dr_bit);
static_assert(PPCSTATE_OFF(pagetable_update_pending) < 0x1000);
LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(pagetable_update_pending));
FixupBranch update_not_pending = CBZ(WA);
ABI_CallFunction(&PowerPC::MMU::PageTableUpdatedFromJit, &m_system.GetMMU());
SetJumpTarget(update_not_pending);
SetJumpTarget(dr_unset);
}
void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return,

View File

@ -1240,7 +1240,10 @@ void MMU::SDRUpdated()
void MMU::SRUpdated()
{
PageTableUpdated();
if (m_ppc_state.msr.DR)
PageTableUpdated();
else
m_ppc_state.pagetable_update_pending = true;
}
enum class TLBLookupResult
@ -1331,11 +1334,15 @@ void MMU::InvalidateTLBEntry(u32 address)
m_ppc_state.tlb[PowerPC::DATA_TLB_INDEX][entry_index].Invalidate();
m_ppc_state.tlb[PowerPC::INST_TLB_INDEX][entry_index].Invalidate();
PageTableUpdated();
if (m_ppc_state.msr.DR)
PageTableUpdated();
else
m_ppc_state.pagetable_update_pending = true;
}
void MMU::PageTableUpdated()
{
m_ppc_state.pagetable_update_pending = false;
m_page_mappings.clear();
if (!m_system.GetJitInterface().WantsPageTableMappings())
@ -1456,6 +1463,11 @@ void MMU::PageTableUpdated()
m_memory.UpdatePageTableMappings(m_page_mappings);
}
void MMU::PageTableUpdatedFromJit(MMU* mmu)
{
mmu->PageTableUpdated();
}
// Page Address Translation
template <const XCheckTLBFlag flag>
MMU::TranslateAddressResult MMU::TranslatePageAddress(const EffectiveAddress address, bool* wi)

View File

@ -248,6 +248,7 @@ public:
void SRUpdated();
void InvalidateTLBEntry(u32 address);
void PageTableUpdated();
static void PageTableUpdatedFromJit(MMU* mmu);
void DBATUpdated();
void IBATUpdated();

View File

@ -95,6 +95,7 @@ void PowerPCManager::DoState(PointerWrap& p)
p.DoArray(m_ppc_state.tlb);
p.Do(m_ppc_state.pagetable_base);
p.Do(m_ppc_state.pagetable_mask);
p.Do(m_ppc_state.pagetable_update_pending);
p.Do(m_ppc_state.reserve);
p.Do(m_ppc_state.reserve_address);
@ -277,6 +278,7 @@ void PowerPCManager::Reset()
{
m_ppc_state.pagetable_base = 0;
m_ppc_state.pagetable_mask = 0;
m_ppc_state.pagetable_update_pending = false;
m_ppc_state.tlb = {};
ResetRegisters();
@ -670,6 +672,9 @@ void PowerPCManager::MSRUpdated()
m_ppc_state.feature_flags = static_cast<CPUEmuFeatureFlags>(
(m_ppc_state.feature_flags & FEATURE_FLAG_PERFMON) | ((m_ppc_state.msr.Hex >> 4) & 0x3));
if (m_ppc_state.msr.DR && m_ppc_state.pagetable_update_pending)
m_system.GetMMU().PageTableUpdated();
m_system.GetJitInterface().UpdateMembase();
}

View File

@ -122,6 +122,9 @@ struct PowerPCState
u32 pc = 0; // program counter
u32 npc = 0;
// Storage for the stack pointer of the BLR optimization.
u8* stored_stack_pointer = nullptr;
// gather pipe pointer for JIT access
u8* gather_pipe_ptr = nullptr;
u8* gather_pipe_base_ptr = nullptr;
@ -157,6 +160,14 @@ struct PowerPCState
// lscbx
u16 xer_stringctrl = 0;
// Reservation monitor for lwarx and its friend stwcxd. These two don't really need to be
// this early in the struct, but due to how the padding works out, they fit nicely here.
u32 reserve_address;
bool reserve;
bool pagetable_update_pending = false;
bool m_enable_dcache = false;
#ifdef _M_X86_64
// This member exists only for the purpose of an assertion that its offset <= 0x100.
std::tuple<> above_fits_in_first_0x100;
@ -171,22 +182,15 @@ struct PowerPCState
// JitArm64 needs 64-bit alignment for SPR_TL.
alignas(8) u32 spr[1024]{};
// Storage for the stack pointer of the BLR optimization.
u8* stored_stack_pointer = nullptr;
u8* mem_ptr = nullptr;
std::array<std::array<TLBEntry, TLB_SIZE / TLB_WAYS>, NUM_TLBS> tlb;
u32 pagetable_base = 0;
u32 pagetable_mask = 0;
InstructionCache iCache;
bool m_enable_dcache = false;
Cache dCache;
std::array<std::array<TLBEntry, TLB_SIZE / TLB_WAYS>, NUM_TLBS> tlb;
// Reservation monitor for lwarx and its friend stwcxd.
bool reserve;
u32 reserve_address;
InstructionCache iCache;
Cache dCache;
void UpdateCR1()
{