diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index c898c8f693e..7fc503e2f97 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -508,6 +508,8 @@ add_library(core PowerPC/Interpreter/Interpreter_Tables.cpp PowerPC/Interpreter/Interpreter.cpp PowerPC/Interpreter/Interpreter.h + PowerPC/JitCommon/ConstantPropagation.cpp + PowerPC/JitCommon/ConstantPropagation.h PowerPC/JitCommon/DivUtils.cpp PowerPC/JitCommon/DivUtils.h PowerPC/JitCommon/JitAsmCommon.cpp diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 377b4388fb4..6cee5b971e3 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -42,6 +42,7 @@ #include "Core/PowerPC/Jit64Common/Jit64Constants.h" #include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" #include "Core/PowerPC/Jit64Common/TrampolineCache.h" +#include "Core/PowerPC/JitCommon/ConstantPropagation.h" #include "Core/PowerPC/JitInterface.h" #include "Core/PowerPC/MMU.h" #include "Core/PowerPC/PPCAnalyst.h" @@ -369,6 +370,9 @@ void Jit64::FallBackToInterpreter(UGeckoInstruction inst) gpr.Reset(js.op->regsOut); fpr.Reset(js.op->GetFregsOut()); + // We must also update constant propagation + m_constant_propagation.ClearGPRs(js.op->regsOut); + if (js.op->opinfo->flags & FL_SET_MSR) EmitUpdateMembase(); @@ -921,6 +925,8 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) gpr.Start(); fpr.Start(); + m_constant_propagation.Clear(); + js.downcountAmount = 0; js.skipInstructions = 0; js.carryFlag = CarryFlag::InPPCState; @@ -1105,21 +1111,55 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) { gpr.Flush(); fpr.Flush(); + m_constant_propagation.Clear(); + + CompileInstruction(op); } else { - // If we have an input register that is going to be used again, load it pre-emptively, - // even if the instruction doesn't strictly need it in a register, to avoid redundant - // loads later. Of course, don't do this if we're already out of registers. - // As a bit of a heuristic, make sure we have at least one register left over for the - // output, which needs to be bound in the actual instruction compilation. - // TODO: make this smarter in the case that we're actually register-starved, i.e. - // prioritize the more important registers. - gpr.PreloadRegisters(op.regsIn & op.gprInUse & ~op.gprDiscardable); - fpr.PreloadRegisters(op.fregsIn & op.fprInXmm & ~op.fprDiscardable); - } + const JitCommon::ConstantPropagationResult constant_propagation_result = + m_constant_propagation.EvaluateInstruction(op.inst, opinfo->flags); - CompileInstruction(op); + if (!constant_propagation_result.instruction_fully_executed) + { + if (!bJITRegisterCacheOff) + { + // If we have an input register that is going to be used again, load it pre-emptively, + // even if the instruction doesn't strictly need it in a register, to avoid redundant + // loads later. Of course, don't do this if we're already out of registers. + // As a bit of a heuristic, make sure we have at least one register left over for the + // output, which needs to be bound in the actual instruction compilation. + // TODO: make this smarter in the case that we're actually register-starved, i.e. + // prioritize the more important registers. + gpr.PreloadRegisters(op.regsIn & op.gprInUse & ~op.gprDiscardable); + fpr.PreloadRegisters(op.fregsIn & op.fprInXmm & ~op.fprDiscardable); + } + + CompileInstruction(op); + } + + m_constant_propagation.Apply(constant_propagation_result); + + if (constant_propagation_result.gpr >= 0) + { + // Mark the GPR as dirty in the register cache + gpr.SetImmediate32(constant_propagation_result.gpr, + constant_propagation_result.gpr_value); + } + + if (constant_propagation_result.instruction_fully_executed) + { + if (constant_propagation_result.carry) + FinalizeCarry(*constant_propagation_result.carry); + + if (constant_propagation_result.overflow) + GenerateConstantOverflow(*constant_propagation_result.overflow); + + // FinalizeImmediateRC is called last, because it may trigger branch merging + if (constant_propagation_result.compute_rc) + FinalizeImmediateRC(constant_propagation_result.gpr_value); + } + } js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index e72397ccc52..8b96fda107a 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -31,6 +31,7 @@ #include "Core/PowerPC/Jit64Common/BlockCache.h" #include "Core/PowerPC/Jit64Common/Jit64AsmCommon.h" #include "Core/PowerPC/Jit64Common/TrampolineCache.h" +#include "Core/PowerPC/JitCommon/ConstantPropagation.h" #include "Core/PowerPC/JitCommon/JitBase.h" #include "Core/PowerPC/JitCommon/JitCache.h" @@ -83,6 +84,8 @@ public: void FlushRegistersBeforeSlowAccess(); + JitCommon::ConstantPropagation& GetConstantPropagation() { return m_constant_propagation; } + JitBlockCache* GetBlockCache() override { return &blocks; } void Trace(); @@ -122,6 +125,7 @@ public: void FinalizeCarry(Gen::CCFlags cond); void FinalizeCarry(bool ca); void ComputeRC(preg_t preg, bool needs_test = true, bool needs_sext = true); + void FinalizeImmediateRC(s32 value); void AndWithMask(Gen::X64Reg reg, u32 mask); void RotateLeft(int bits, Gen::X64Reg regOp, const Gen::OpArg& arg, u8 rotate); @@ -288,6 +292,8 @@ private: GPRRegCache gpr{*this}; FPURegCache fpr{*this}; + JitCommon::ConstantPropagation m_constant_propagation; + Jit64AsmRoutineManager asm_routines{*this}; HyoutaUtilities::RangeSizeSet m_free_ranges_near; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 65d2a4296b2..03bf2fc7867 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -150,7 +150,10 @@ void Jit64::ComputeRC(preg_t preg, bool needs_test, bool needs_sext) if (arg.IsImm()) { - MOV(64, PPCSTATE_CR(0), Imm32(arg.SImm32())); + const s32 value = arg.SImm32(); + arg.Unlock(); + FinalizeImmediateRC(value); + return; } else if (needs_sext) { @@ -164,33 +167,32 @@ void Jit64::ComputeRC(preg_t preg, bool needs_test, bool needs_sext) if (CheckMergedBranch(0)) { - if (arg.IsImm()) + if (needs_test) { - s32 offset = arg.SImm32(); + TEST(32, arg, arg); arg.Unlock(); - DoMergedBranchImmediate(offset); } else { - if (needs_test) - { - TEST(32, arg, arg); - arg.Unlock(); - } - else - { - // If an operand to the cmp/rc op we're merging with the branch isn't used anymore, it'd be - // better to flush it here so that we don't have to flush it on both sides of the branch. - // We don't want to do this if a test is needed though, because it would interrupt macro-op - // fusion. - arg.Unlock(); - gpr.Flush(~js.op->gprInUse); - } - DoMergedBranchCondition(); + // If an operand to the cmp/rc op we're merging with the branch isn't used anymore, it'd be + // better to flush it here so that we don't have to flush it on both sides of the branch. + // We don't want to do this if a test is needed though, because it would interrupt macro-op + // fusion. + arg.Unlock(); + gpr.Flush(~js.op->gprInUse); } + DoMergedBranchCondition(); } } +void Jit64::FinalizeImmediateRC(s32 value) +{ + MOV(64, PPCSTATE_CR(0), Imm32(value)); + + if (CheckMergedBranch(0)) + DoMergedBranchImmediate(value); +} + // we can't do this optimization in the emitter because MOVZX and AND have different effects on // flags. void Jit64::AndWithMask(X64Reg reg, u32 mask) @@ -258,25 +260,18 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, if (a || binary || carry) { carry &= js.op->wantsCA; - if (gpr.IsImm(a) && !carry) + RCOpArg Ra = gpr.Use(a, RCMode::Read); + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + RegCache::Realize(Ra, Rd); + if (doop == Add && Ra.IsSimpleReg() && !carry && d != a) { - gpr.SetImmediate32(d, doop(gpr.Imm32(a), value)); + LEA(32, Rd, MDisp(Ra.GetSimpleReg(), value)); } else { - RCOpArg Ra = gpr.Use(a, RCMode::Read); - RCX64Reg Rd = gpr.Bind(d, RCMode::Write); - RegCache::Realize(Ra, Rd); - if (doop == Add && Ra.IsSimpleReg() && !carry && d != a) - { - LEA(32, Rd, MDisp(Ra.GetSimpleReg(), value)); - } - else - { - if (d != a) - MOV(32, Rd, Ra); - (this->*op)(32, Rd, Imm32(value)); // m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; - } + if (d != a) + MOV(32, Rd, Ra); + (this->*op)(32, Rd, Imm32(value)); // m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; } if (carry) FinalizeCarry(CC_C); @@ -302,12 +297,8 @@ void Jit64::reg_imm(UGeckoInstruction inst) switch (inst.OPCD) { case 14: // addi - // occasionally used as MOV - emulate, with immediate propagation - if (a != 0 && d != a && gpr.IsImm(a)) - { - gpr.SetImmediate32(d, gpr.Imm32(a) + (u32)(s32)inst.SIMM_16); - } - else if (a != 0 && d != a && inst.SIMM_16 == 0) + // occasionally used as MOV + if (a != 0 && d != a && inst.SIMM_16 == 0) { RCOpArg Ra = gpr.Use(a, RCMode::Read); RCX64Reg Rd = gpr.Bind(d, RCMode::Write); @@ -325,14 +316,6 @@ void Jit64::reg_imm(UGeckoInstruction inst) case 24: // ori case 25: // oris { - // check for nop - if (a == s && inst.UIMM == 0) - { - // Make the nop visible in the generated code. not much use but interesting if we see one. - NOP(); - return; - } - const u32 immediate = inst.OPCD == 24 ? inst.UIMM : inst.UIMM << 16; regimmop(a, s, true, immediate, Or, &XEmitter::OR); break; @@ -346,13 +329,6 @@ void Jit64::reg_imm(UGeckoInstruction inst) case 26: // xori case 27: // xoris { - if (s == a && inst.UIMM == 0) - { - // Make the nop visible in the generated code. - NOP(); - return; - } - const u32 immediate = inst.OPCD == 26 ? inst.UIMM : inst.UIMM << 16; regimmop(a, s, true, immediate, Xor, &XEmitter::XOR, false); break; @@ -704,29 +680,7 @@ void Jit64::boolX(UGeckoInstruction inst) bool needs_test = false; DEBUG_ASSERT_MSG(DYNA_REC, inst.OPCD == 31, "Invalid boolX"); - if (gpr.IsImm(s, b)) - { - const u32 rs_offset = gpr.Imm32(s); - const u32 rb_offset = gpr.Imm32(b); - - if (inst.SUBOP10 == 28) // andx - gpr.SetImmediate32(a, rs_offset & rb_offset); - else if (inst.SUBOP10 == 476) // nandx - gpr.SetImmediate32(a, ~(rs_offset & rb_offset)); - else if (inst.SUBOP10 == 60) // andcx - gpr.SetImmediate32(a, rs_offset & (~rb_offset)); - else if (inst.SUBOP10 == 444) // orx - gpr.SetImmediate32(a, rs_offset | rb_offset); - else if (inst.SUBOP10 == 124) // norx - gpr.SetImmediate32(a, ~(rs_offset | rb_offset)); - else if (inst.SUBOP10 == 412) // orcx - gpr.SetImmediate32(a, rs_offset | (~rb_offset)); - else if (inst.SUBOP10 == 316) // xorx - gpr.SetImmediate32(a, rs_offset ^ rb_offset); - else if (inst.SUBOP10 == 284) // eqvx - gpr.SetImmediate32(a, ~(rs_offset ^ rb_offset)); - } - else if (gpr.IsImm(s) || gpr.IsImm(b)) + if (gpr.IsImm(s) || gpr.IsImm(b)) { const auto [i, j] = gpr.IsImm(s) ? std::pair(s, b) : std::pair(b, s); u32 imm = gpr.Imm32(i); @@ -780,53 +734,46 @@ void Jit64::boolX(UGeckoInstruction inst) } else if (is_and) { - if (imm == 0) + RCOpArg Rj = gpr.Use(j, RCMode::Read); + RCX64Reg Ra = gpr.Bind(a, RCMode::Write); + RegCache::Realize(Rj, Ra); + + if (imm == 0xFFFFFFFF) { - gpr.SetImmediate32(a, final_not ? 0xFFFFFFFF : 0); + if (a != j) + MOV(32, Ra, Rj); + if (final_not || complement_b) + NOT(32, Ra); + needs_test = true; + } + else if (complement_b) + { + if (a != j) + MOV(32, Ra, Rj); + NOT(32, Ra); + AND(32, Ra, Imm32(imm)); } else { - RCOpArg Rj = gpr.Use(j, RCMode::Read); - RCX64Reg Ra = gpr.Bind(a, RCMode::Write); - RegCache::Realize(Rj, Ra); - - if (imm == 0xFFFFFFFF) + if (a == j) { - if (a != j) - MOV(32, Ra, Rj); - if (final_not || complement_b) - NOT(32, Ra); - needs_test = true; + AND(32, Ra, Imm32(imm)); } - else if (complement_b) + else if (s32(imm) >= -128 && s32(imm) <= 127) { - if (a != j) - MOV(32, Ra, Rj); - NOT(32, Ra); + MOV(32, Ra, Rj); AND(32, Ra, Imm32(imm)); } else { - if (a == j) - { - AND(32, Ra, Imm32(imm)); - } - else if (s32(imm) >= -128 && s32(imm) <= 127) - { - MOV(32, Ra, Rj); - AND(32, Ra, Imm32(imm)); - } - else - { - MOV(32, Ra, Imm32(imm)); - AND(32, Ra, Rj); - } + MOV(32, Ra, Imm32(imm)); + AND(32, Ra, Rj); + } - if (final_not) - { - NOT(32, Ra); - needs_test = true; - } + if (final_not) + { + NOT(32, Ra); + needs_test = true; } } } @@ -1079,13 +1026,8 @@ void Jit64::extsXx(UGeckoInstruction inst) int a = inst.RA, s = inst.RS; int size = inst.SUBOP10 == 922 ? 16 : 8; - if (gpr.IsImm(s)) { - gpr.SetImmediate32(a, (u32)(s32)(size == 16 ? (s16)gpr.Imm32(s) : (s8)gpr.Imm32(s))); - } - else - { - RCOpArg Rs = gpr.Use(s, RCMode::Read); + RCOpArg Rs = gpr.UseNoImm(s, RCMode::Read); RCX64Reg Ra = gpr.Bind(a, RCMode::Write); RegCache::Realize(Rs, Ra); MOVSX(32, size, Ra, Rs); @@ -1100,14 +1042,6 @@ void Jit64::subfic(UGeckoInstruction inst) JITDISABLE(bJITIntegerOff); int a = inst.RA, d = inst.RD, imm = inst.SIMM_16; - if (gpr.IsImm(a)) - { - u32 i = imm, j = gpr.Imm32(a); - gpr.SetImmediate32(d, i - j); - FinalizeCarry(j == 0 || (i > j - 1)); - return; - } - RCOpArg Ra = gpr.Use(a, RCMode::Read); RCX64Reg Rd = gpr.Bind(d, RCMode::Write); RegCache::Realize(Ra, Rd); @@ -1155,24 +1089,7 @@ void Jit64::subfx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; const bool carry = !(inst.SUBOP10 & (1 << 5)); - if (a == b) - { - gpr.SetImmediate32(d, 0); - if (carry) - FinalizeCarry(true); - if (inst.OE) - GenerateConstantOverflow(false); - } - else if (gpr.IsImm(a, b)) - { - s32 i = gpr.SImm32(b), j = gpr.SImm32(a); - gpr.SetImmediate32(d, i - j); - if (carry) - FinalizeCarry(j == 0 || Interpreter::Helper_Carry((u32)i, 0u - (u32)j)); - if (inst.OE) - GenerateConstantOverflow((s64)i - (s64)j); - } - else if (gpr.IsImm(a)) + if (gpr.IsImm(a)) { s32 j = gpr.SImm32(a); RCOpArg Rb = gpr.Use(b, RCMode::Read); @@ -1259,13 +1176,6 @@ void Jit64::MultiplyImmediate(u32 imm, int a, int d, bool overflow) RCX64Reg Rd = gpr.Bind(d, RCMode::Write); RegCache::Realize(Ra, Rd); - // simplest cases first - if (imm == 0) - { - XOR(32, Rd, Rd); - return; - } - if (imm == (u32)-1) { if (d != a) @@ -1320,14 +1230,7 @@ void Jit64::mulli(UGeckoInstruction inst) int a = inst.RA, d = inst.RD; u32 imm = inst.SIMM_16; - if (gpr.IsImm(a)) - { - gpr.SetImmediate32(d, gpr.Imm32(a) * imm); - } - else - { - MultiplyImmediate(imm, a, d, false); - } + MultiplyImmediate(imm, a, d, false); } void Jit64::mullwx(UGeckoInstruction inst) @@ -1336,14 +1239,7 @@ void Jit64::mullwx(UGeckoInstruction inst) JITDISABLE(bJITIntegerOff); int a = inst.RA, b = inst.RB, d = inst.RD; - if (gpr.IsImm(a, b)) - { - s32 i = gpr.SImm32(a), j = gpr.SImm32(b); - gpr.SetImmediate32(d, i * j); - if (inst.OE) - GenerateConstantOverflow((s64)i * (s64)j); - } - else if (gpr.IsImm(a) || gpr.IsImm(b)) + if (gpr.IsImm(a) || gpr.IsImm(b)) { u32 imm = gpr.IsImm(a) ? gpr.Imm32(a) : gpr.Imm32(b); int src = gpr.IsImm(a) ? b : a; @@ -1385,14 +1281,7 @@ void Jit64::mulhwXx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; bool sign = inst.SUBOP10 == 75; - if (gpr.IsImm(a, b)) - { - if (sign) - gpr.SetImmediate32(d, (u32)((u64)(((s64)gpr.SImm32(a) * (s64)gpr.SImm32(b))) >> 32)); - else - gpr.SetImmediate32(d, (u32)(((u64)gpr.Imm32(a) * (u64)gpr.Imm32(b)) >> 32)); - } - else if (sign) + if (sign) { RCOpArg Ra = gpr.Use(a, RCMode::Read); RCOpArg Rb = gpr.UseNoImm(b, RCMode::Read); @@ -1432,22 +1321,7 @@ void Jit64::divwux(UGeckoInstruction inst) JITDISABLE(bJITIntegerOff); int a = inst.RA, b = inst.RB, d = inst.RD; - if (gpr.IsImm(a, b)) - { - if (gpr.Imm32(b) == 0) - { - gpr.SetImmediate32(d, 0); - if (inst.OE) - GenerateConstantOverflow(true); - } - else - { - gpr.SetImmediate32(d, gpr.Imm32(a) / gpr.Imm32(b)); - if (inst.OE) - GenerateConstantOverflow(false); - } - } - else if (gpr.IsImm(b)) + if (gpr.IsImm(b)) { u32 divisor = gpr.Imm32(b); if (divisor == 0) @@ -1559,24 +1433,7 @@ void Jit64::divwx(UGeckoInstruction inst) JITDISABLE(bJITIntegerOff); int a = inst.RA, b = inst.RB, d = inst.RD; - if (gpr.IsImm(a, b)) - { - s32 i = gpr.SImm32(a), j = gpr.SImm32(b); - if (j == 0 || (i == (s32)0x80000000 && j == -1)) - { - const u32 result = i < 0 ? 0xFFFFFFFF : 0x00000000; - gpr.SetImmediate32(d, result); - if (inst.OE) - GenerateConstantOverflow(true); - } - else - { - gpr.SetImmediate32(d, i / j); - if (inst.OE) - GenerateConstantOverflow(false); - } - } - else if (gpr.IsImm(a)) + if (gpr.IsImm(a)) { // Constant dividend const u32 dividend = gpr.Imm32(a); @@ -1879,16 +1736,7 @@ void Jit64::addx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; bool carry = !(inst.SUBOP10 & (1 << 8)); - if (gpr.IsImm(a, b)) - { - const s32 i = gpr.SImm32(a), j = gpr.SImm32(b); - gpr.SetImmediate32(d, i + j); - if (carry) - FinalizeCarry(Interpreter::Helper_Carry(i, j)); - if (inst.OE) - GenerateConstantOverflow((s64)i + (s64)j); - } - else if (gpr.IsImm(a) || gpr.IsImm(b)) + if (gpr.IsImm(a) || gpr.IsImm(b)) { const auto [i, j] = gpr.IsImm(a) ? std::pair(a, b) : std::pair(b, a); const s32 imm = gpr.SImm32(i); @@ -2046,112 +1894,99 @@ void Jit64::rlwinmx(UGeckoInstruction inst) int a = inst.RA; int s = inst.RS; - if (gpr.IsImm(s)) + const bool left_shift = inst.SH && inst.MB == 0 && inst.ME == 31 - inst.SH; + const bool right_shift = inst.SH && inst.ME == 31 && inst.MB == 32 - inst.SH; + const bool field_extract = inst.SH && inst.ME == 31 && inst.MB > 32 - inst.SH; + const u32 mask = MakeRotationMask(inst.MB, inst.ME); + const u32 prerotate_mask = std::rotr(mask, inst.SH); + const bool simple_mask = mask == 0xff || mask == 0xffff; + const bool simple_prerotate_mask = prerotate_mask == 0xff || prerotate_mask == 0xffff; + // In case of a merged branch, track whether or not we've set flags. + // If not, we need to do a test later to get them. + bool needs_test = true; + // If we know the high bit can't be set, we can avoid doing a sign extend for flag storage. + bool needs_sext = true; + int mask_size = inst.ME - inst.MB + 1; + + if (simple_mask && !(inst.SH & (mask_size - 1)) && !gpr.IsBound(s) && !gpr.IsImm(s)) { - u32 result = gpr.Imm32(s); - if (inst.SH != 0) - result = std::rotl(result, inst.SH); - result &= MakeRotationMask(inst.MB, inst.ME); - gpr.SetImmediate32(a, result); - if (inst.Rc) - ComputeRC(a); + // optimized case: byte/word extract from m_ppc_state + + // Note: If a == s, calling Realize(Ra) will allocate a host register for Rs, + // so we have to get mem_source from Rs before calling Realize(Ra) + + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RegCache::Realize(Rs); + OpArg mem_source = Rs.Location(); + if (inst.SH) + mem_source.AddMemOffset((32 - inst.SH) >> 3); + Rs.Unlock(); + + RCX64Reg Ra = gpr.Bind(a, RCMode::Write); + RegCache::Realize(Ra); + MOVZX(32, mask_size, Ra, mem_source); + + needs_sext = false; } else { - const bool left_shift = inst.SH && inst.MB == 0 && inst.ME == 31 - inst.SH; - const bool right_shift = inst.SH && inst.ME == 31 && inst.MB == 32 - inst.SH; - const bool field_extract = inst.SH && inst.ME == 31 && inst.MB > 32 - inst.SH; - const u32 mask = MakeRotationMask(inst.MB, inst.ME); - const u32 prerotate_mask = std::rotr(mask, inst.SH); - const bool simple_mask = mask == 0xff || mask == 0xffff; - const bool simple_prerotate_mask = prerotate_mask == 0xff || prerotate_mask == 0xffff; - // In case of a merged branch, track whether or not we've set flags. - // If not, we need to do a test later to get them. - bool needs_test = true; - // If we know the high bit can't be set, we can avoid doing a sign extend for flag storage. - bool needs_sext = true; - int mask_size = inst.ME - inst.MB + 1; + RCOpArg Rs = gpr.UseNoImm(s, RCMode::Read); + RCX64Reg Ra = gpr.Bind(a, RCMode::Write); + RegCache::Realize(Rs, Ra); - if (simple_mask && !(inst.SH & (mask_size - 1)) && !gpr.IsBound(s)) + if (a != s && left_shift && Rs.IsSimpleReg() && inst.SH <= 3) { - // optimized case: byte/word extract from m_ppc_state - - // Note: If a == s, calling Realize(Ra) will allocate a host register for Rs, - // so we have to get mem_source from Rs before calling Realize(Ra) - - RCOpArg Rs = gpr.Use(s, RCMode::Read); - RegCache::Realize(Rs); - OpArg mem_source = Rs.Location(); + LEA(32, Ra, MScaled(Rs.GetSimpleReg(), SCALE_1 << inst.SH, 0)); + } + // optimized case: byte/word extract plus rotate + else if (simple_prerotate_mask && !left_shift) + { + MOVZX(32, prerotate_mask == 0xff ? 8 : 16, Ra, Rs); if (inst.SH) - mem_source.AddMemOffset((32 - inst.SH) >> 3); - Rs.Unlock(); + ROL(32, Ra, Imm8(inst.SH)); + needs_sext = (mask & 0x80000000) != 0; + } + // Use BEXTR where possible: Only AMD implements this in one uop + else if (field_extract && cpu_info.bBMI1 && cpu_info.vendor == CPUVendor::AMD) + { + MOV(32, R(RSCRATCH), Imm32((mask_size << 8) | (32 - inst.SH))); + BEXTR(32, Ra, Rs, RSCRATCH); + needs_sext = false; + } + else if (left_shift) + { + if (a != s) + MOV(32, Ra, Rs); - RCX64Reg Ra = gpr.Bind(a, RCMode::Write); - RegCache::Realize(Ra); - MOVZX(32, mask_size, Ra, mem_source); + SHL(32, Ra, Imm8(inst.SH)); + } + else if (right_shift) + { + if (a != s) + MOV(32, Ra, Rs); + SHR(32, Ra, Imm8(inst.MB)); needs_sext = false; } else { - RCOpArg Rs = gpr.Use(s, RCMode::Read); - RCX64Reg Ra = gpr.Bind(a, RCMode::Write); - RegCache::Realize(Rs, Ra); + RotateLeft(32, Ra, Rs, inst.SH); - if (a != s && left_shift && Rs.IsSimpleReg() && inst.SH <= 3) + if (!(inst.MB == 0 && inst.ME == 31)) { - LEA(32, Ra, MScaled(Rs.GetSimpleReg(), SCALE_1 << inst.SH, 0)); - } - // optimized case: byte/word extract plus rotate - else if (simple_prerotate_mask && !left_shift) - { - MOVZX(32, prerotate_mask == 0xff ? 8 : 16, Ra, Rs); - if (inst.SH) - ROL(32, Ra, Imm8(inst.SH)); - needs_sext = (mask & 0x80000000) != 0; - } - // Use BEXTR where possible: Only AMD implements this in one uop - else if (field_extract && cpu_info.bBMI1 && cpu_info.vendor == CPUVendor::AMD) - { - MOV(32, R(RSCRATCH), Imm32((mask_size << 8) | (32 - inst.SH))); - BEXTR(32, Ra, Rs, RSCRATCH); - needs_sext = false; - } - else if (left_shift) - { - if (a != s) - MOV(32, Ra, Rs); - - SHL(32, Ra, Imm8(inst.SH)); - } - else if (right_shift) - { - if (a != s) - MOV(32, Ra, Rs); - - SHR(32, Ra, Imm8(inst.MB)); - needs_sext = false; - } - else - { - RotateLeft(32, Ra, Rs, inst.SH); - - if (!(inst.MB == 0 && inst.ME == 31)) - { - // we need flags if we're merging the branch - if (inst.Rc && CheckMergedBranch(0)) - AND(32, Ra, Imm32(mask)); - else - AndWithMask(Ra, mask); - needs_sext = inst.MB == 0; - needs_test = false; - } + // we need flags if we're merging the branch + if (inst.Rc && CheckMergedBranch(0)) + AND(32, Ra, Imm32(mask)); + else + AndWithMask(Ra, mask); + needs_sext = inst.MB == 0; + needs_test = false; } } - - if (inst.Rc) - ComputeRC(a, needs_test, needs_sext); } + + if (inst.Rc) + ComputeRC(a, needs_test, needs_sext); } void Jit64::rlwimix(UGeckoInstruction inst) @@ -2162,135 +1997,118 @@ void Jit64::rlwimix(UGeckoInstruction inst) int s = inst.RS; const u32 mask = MakeRotationMask(inst.MB, inst.ME); + const bool left_shift = mask == 0U - (1U << inst.SH); + const bool right_shift = mask == (1U << inst.SH) - 1; + bool needs_test = false; - if (gpr.IsImm(a, s)) + if (mask == 0 || (a == s && inst.SH == 0)) { - gpr.SetImmediate32(a, (gpr.Imm32(a) & ~mask) | (std::rotl(gpr.Imm32(s), inst.SH) & mask)); - if (inst.Rc) - ComputeRC(a); + needs_test = true; } - else if (gpr.IsImm(s) && mask == 0xFFFFFFFF) + else if (mask == 0xFFFFFFFF) { - gpr.SetImmediate32(a, std::rotl(gpr.Imm32(s), inst.SH)); - - if (inst.Rc) - ComputeRC(a); + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RCX64Reg Ra = gpr.Bind(a, RCMode::Write); + RegCache::Realize(Rs, Ra); + RotateLeft(32, Ra, Rs, inst.SH); + needs_test = true; } - else + else if (gpr.IsImm(s)) { - const bool left_shift = mask == 0U - (1U << inst.SH); - const bool right_shift = mask == (1U << inst.SH) - 1; - bool needs_test = false; + RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite); + RegCache::Realize(Ra); + AndWithMask(Ra, ~mask); + OR(32, Ra, Imm32(std::rotl(gpr.Imm32(s), inst.SH) & mask)); + } + else if (gpr.IsImm(a)) + { + const u32 maskA = gpr.Imm32(a) & ~mask; - if (mask == 0 || (a == s && inst.SH == 0)) + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RCX64Reg Ra = gpr.Bind(a, RCMode::Write); + RegCache::Realize(Rs, Ra); + + if (inst.SH == 0) { - needs_test = true; + MOV(32, Ra, Rs); + AndWithMask(Ra, mask); } - else if (mask == 0xFFFFFFFF) + else if (left_shift) { - RCOpArg Rs = gpr.Use(s, RCMode::Read); - RCX64Reg Ra = gpr.Bind(a, RCMode::Write); - RegCache::Realize(Rs, Ra); - RotateLeft(32, Ra, Rs, inst.SH); - needs_test = true; + MOV(32, Ra, Rs); + SHL(32, Ra, Imm8(inst.SH)); } - else if (gpr.IsImm(s)) + else if (right_shift) { - RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite); - RegCache::Realize(Ra); - AndWithMask(Ra, ~mask); - OR(32, Ra, Imm32(std::rotl(gpr.Imm32(s), inst.SH) & mask)); - } - else if (gpr.IsImm(a)) - { - const u32 maskA = gpr.Imm32(a) & ~mask; - - RCOpArg Rs = gpr.Use(s, RCMode::Read); - RCX64Reg Ra = gpr.Bind(a, RCMode::Write); - RegCache::Realize(Rs, Ra); - - if (inst.SH == 0) - { - MOV(32, Ra, Rs); - AndWithMask(Ra, mask); - } - else if (left_shift) - { - MOV(32, Ra, Rs); - SHL(32, Ra, Imm8(inst.SH)); - } - else if (right_shift) - { - MOV(32, Ra, Rs); - SHR(32, Ra, Imm8(32 - inst.SH)); - } - else - { - RotateLeft(32, Ra, Rs, inst.SH); - AndWithMask(Ra, mask); - } - - if (maskA) - OR(32, Ra, Imm32(maskA)); - else - needs_test = true; - } - else if (inst.SH) - { - // TODO: perhaps consider pinsrb or abuse of AH - RCOpArg Rs = gpr.Use(s, RCMode::Read); - RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite); - RegCache::Realize(Rs, Ra); - - if (left_shift) - { - MOV(32, R(RSCRATCH), Rs); - SHL(32, R(RSCRATCH), Imm8(inst.SH)); - } - else if (right_shift) - { - MOV(32, R(RSCRATCH), Rs); - SHR(32, R(RSCRATCH), Imm8(32 - inst.SH)); - } - else - { - RotateLeft(32, RSCRATCH, Rs, inst.SH); - } - - if (mask == 0xFF || mask == 0xFFFF) - { - MOV(mask == 0xFF ? 8 : 16, Ra, R(RSCRATCH)); - needs_test = true; - } - else - { - if (!left_shift && !right_shift) - AndWithMask(RSCRATCH, mask); - AndWithMask(Ra, ~mask); - OR(32, Ra, R(RSCRATCH)); - } + MOV(32, Ra, Rs); + SHR(32, Ra, Imm8(32 - inst.SH)); } else { - RCX64Reg Rs = gpr.Bind(s, RCMode::Read); - RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite); - RegCache::Realize(Rs, Ra); - - if (mask == 0xFF || mask == 0xFFFF) - { - MOV(mask == 0xFF ? 8 : 16, Ra, Rs); - needs_test = true; - } - else - { - XOR(32, Ra, Rs); - AndWithMask(Ra, ~mask); - XOR(32, Ra, Rs); - } + RotateLeft(32, Ra, Rs, inst.SH); + AndWithMask(Ra, mask); } - if (inst.Rc) - ComputeRC(a, needs_test); + + if (maskA) + OR(32, Ra, Imm32(maskA)); + else + needs_test = true; } + else if (inst.SH) + { + // TODO: perhaps consider pinsrb or abuse of AH + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite); + RegCache::Realize(Rs, Ra); + + if (left_shift) + { + MOV(32, R(RSCRATCH), Rs); + SHL(32, R(RSCRATCH), Imm8(inst.SH)); + } + else if (right_shift) + { + MOV(32, R(RSCRATCH), Rs); + SHR(32, R(RSCRATCH), Imm8(32 - inst.SH)); + } + else + { + RotateLeft(32, RSCRATCH, Rs, inst.SH); + } + + if (mask == 0xFF || mask == 0xFFFF) + { + MOV(mask == 0xFF ? 8 : 16, Ra, R(RSCRATCH)); + needs_test = true; + } + else + { + if (!left_shift && !right_shift) + AndWithMask(RSCRATCH, mask); + AndWithMask(Ra, ~mask); + OR(32, Ra, R(RSCRATCH)); + } + } + else + { + RCX64Reg Rs = gpr.Bind(s, RCMode::Read); + RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite); + RegCache::Realize(Rs, Ra); + + if (mask == 0xFF || mask == 0xFFFF) + { + MOV(mask == 0xFF ? 8 : 16, Ra, Rs); + needs_test = true; + } + else + { + XOR(32, Ra, Rs); + AndWithMask(Ra, ~mask); + XOR(32, Ra, Rs); + } + } + if (inst.Rc) + ComputeRC(a, needs_test); } void Jit64::rlwnmx(UGeckoInstruction inst) @@ -2300,11 +2118,7 @@ void Jit64::rlwnmx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, s = inst.RS; const u32 mask = MakeRotationMask(inst.MB, inst.ME); - if (gpr.IsImm(b, s)) - { - gpr.SetImmediate32(a, std::rotl(gpr.Imm32(s), gpr.Imm32(b) & 0x1F) & mask); - } - else if (gpr.IsImm(b)) + if (gpr.IsImm(b)) { u32 amount = gpr.Imm32(b) & 0x1f; RCX64Reg Ra = gpr.Bind(a, RCMode::Write); @@ -2350,15 +2164,8 @@ void Jit64::negx(UGeckoInstruction inst) int a = inst.RA; int d = inst.RD; - if (gpr.IsImm(a)) { - gpr.SetImmediate32(d, ~(gpr.Imm32(a)) + 1); - if (inst.OE) - GenerateConstantOverflow(gpr.Imm32(d) == 0x80000000); - } - else - { - RCOpArg Ra = gpr.Use(a, RCMode::Read); + RCOpArg Ra = gpr.UseNoImm(a, RCMode::Read); RCX64Reg Rd = gpr.Bind(d, RCMode::Write); RegCache::Realize(Ra, Rd); @@ -2380,12 +2187,7 @@ void Jit64::srwx(UGeckoInstruction inst) int b = inst.RB; int s = inst.RS; - if (gpr.IsImm(b, s)) - { - u32 amount = gpr.Imm32(b); - gpr.SetImmediate32(a, (amount & 0x20) ? 0 : (gpr.Imm32(s) >> (amount & 0x1f))); - } - else if (gpr.IsImm(b)) + if (gpr.IsImm(b)) { u32 amount = gpr.Imm32(b); if (amount & 0x20) @@ -2442,14 +2244,7 @@ void Jit64::slwx(UGeckoInstruction inst) int b = inst.RB; int s = inst.RS; - if (gpr.IsImm(b, s)) - { - u32 amount = gpr.Imm32(b); - gpr.SetImmediate32(a, (amount & 0x20) ? 0 : gpr.Imm32(s) << (amount & 0x1f)); - if (inst.Rc) - ComputeRC(a); - } - else if (gpr.IsImm(b)) + if (gpr.IsImm(b)) { u32 amount = gpr.Imm32(b); if (amount & 0x20) @@ -2473,12 +2268,6 @@ void Jit64::slwx(UGeckoInstruction inst) if (inst.Rc) ComputeRC(a); } - else if (gpr.IsImm(s) && gpr.Imm32(s) == 0) - { - gpr.SetImmediate32(a, 0); - if (inst.Rc) - ComputeRC(a); - } else if (cpu_info.bBMI2) { RCX64Reg Ra = gpr.Bind(a, RCMode::Write); @@ -2532,22 +2321,7 @@ void Jit64::srawx(UGeckoInstruction inst) int b = inst.RB; int s = inst.RS; - if (gpr.IsImm(b, s)) - { - s32 i = gpr.SImm32(s), amount = gpr.SImm32(b); - if (amount & 0x20) - { - gpr.SetImmediate32(a, i & 0x80000000 ? 0xFFFFFFFF : 0); - FinalizeCarry(i & 0x80000000 ? true : false); - } - else - { - amount &= 0x1F; - gpr.SetImmediate32(a, i >> amount); - FinalizeCarry(amount != 0 && i < 0 && (u32(i) << (32 - amount))); - } - } - else if (gpr.IsImm(b)) + if (gpr.IsImm(b)) { u32 amount = gpr.Imm32(b); RCX64Reg Ra = gpr.Bind(a, RCMode::Write); @@ -2583,11 +2357,6 @@ void Jit64::srawx(UGeckoInstruction inst) FinalizeCarry(CC_NZ); } } - else if (gpr.IsImm(s) && gpr.Imm32(s) == 0) - { - gpr.SetImmediate32(a, 0); - FinalizeCarry(false); - } else if (cpu_info.bBMI2) { RCX64Reg Ra = gpr.Bind(a, RCMode::Write); @@ -2652,13 +2421,7 @@ void Jit64::srawix(UGeckoInstruction inst) int s = inst.RS; int amount = inst.SH; - if (gpr.IsImm(s)) - { - s32 imm = gpr.SImm32(s); - gpr.SetImmediate32(a, imm >> amount); - FinalizeCarry(amount != 0 && imm < 0 && (u32(imm) << (32 - amount))); - } - else if (amount != 0) + if (amount != 0) { RCX64Reg Ra = gpr.Bind(a, RCMode::Write); RCOpArg Rs = gpr.Use(s, RCMode::Read); @@ -2716,14 +2479,9 @@ void Jit64::cntlzwx(UGeckoInstruction inst) int s = inst.RS; bool needs_test = false; - if (gpr.IsImm(s)) - { - gpr.SetImmediate32(a, static_cast(std::countl_zero(gpr.Imm32(s)))); - } - else { RCX64Reg Ra = gpr.Bind(a, RCMode::Write); - RCOpArg Rs = gpr.Use(s, RCMode::Read); + RCOpArg Rs = gpr.UseNoImm(s, RCMode::Read); RegCache::Realize(Ra, Rs); if (cpu_info.bLZCNT) diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/CachedReg.h b/Source/Core/Core/PowerPC/Jit64/RegCache/CachedReg.h index acf5480abb7..99a6e472254 100644 --- a/Source/Core/Core/PowerPC/Jit64/RegCache/CachedReg.h +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/CachedReg.h @@ -16,111 +16,79 @@ using preg_t = size_t; class PPCCachedReg { public: - enum class LocationType - { - /// Value is currently at its default location - Default, - /// Value is not stored anywhere because we know it won't be read before the next write - Discarded, - /// Value is currently bound to a x64 register - Bound, - /// Value is known as an immediate and has not been written back to its default location - Immediate, - /// Value is known as an immediate and is already present at its default location - SpeculativeImmediate, - }; - PPCCachedReg() = default; - explicit PPCCachedReg(Gen::OpArg default_location_) - : default_location(default_location_), location(default_location_) + explicit PPCCachedReg(Gen::OpArg default_location) : m_default_location(default_location) {} + + Gen::OpArg GetDefaultLocation() const { return m_default_location; } + + Gen::X64Reg GetHostRegister() const { + ASSERT(m_in_host_register); + return m_host_register; } - const std::optional& Location() const { return location; } + bool IsInDefaultLocation() const { return m_in_default_location; } + bool IsInHostRegister() const { return m_in_host_register; } - LocationType GetLocationType() const + void SetFlushed(bool maintain_host_register) { - if (!location.has_value()) - return LocationType::Discarded; - - if (!away) - { - ASSERT(!revertable); - - if (location->IsImm()) - return LocationType::SpeculativeImmediate; - - ASSERT(*location == default_location); - return LocationType::Default; - } - - ASSERT(location->IsImm() || location->IsSimpleReg()); - return location->IsImm() ? LocationType::Immediate : LocationType::Bound; + ASSERT(!m_revertable); + if (!maintain_host_register) + m_in_host_register = false; + m_in_default_location = true; } - bool IsAway() const { return away; } - bool IsDiscarded() const { return !location.has_value(); } - bool IsBound() const { return GetLocationType() == LocationType::Bound; } - - void SetBoundTo(Gen::X64Reg xreg) + void SetInHostRegister(Gen::X64Reg xreg, bool dirty) { - away = true; - location = Gen::R(xreg); + if (dirty) + m_in_default_location = false; + m_in_host_register = true; + m_host_register = xreg; } + void SetDirty() { m_in_default_location = false; } + void SetDiscarded() { - ASSERT(!revertable); - away = false; - location = std::nullopt; + ASSERT(!m_revertable); + m_in_default_location = false; + m_in_host_register = false; } - void SetFlushed() - { - ASSERT(!revertable); - away = false; - location = default_location; - } - - void SetToImm32(u32 imm32, bool dirty = true) - { - away |= dirty; - location = Gen::Imm32(imm32); - } - - bool IsRevertable() const { return revertable; } + bool IsRevertable() const { return m_revertable; } void SetRevertable() { - ASSERT(IsBound()); - revertable = true; + ASSERT(m_in_host_register); + m_revertable = true; } void SetRevert() { - ASSERT(revertable); - revertable = false; - SetFlushed(); + ASSERT(m_revertable); + m_revertable = false; + SetFlushed(false); } void SetCommit() { - ASSERT(revertable); - revertable = false; + ASSERT(m_revertable); + m_revertable = false; } - bool IsLocked() const { return locked > 0; } - void Lock() { locked++; } + bool IsLocked() const { return m_locked > 0; } + void Lock() { m_locked++; } void Unlock() { ASSERT(IsLocked()); - locked--; + m_locked--; } private: - Gen::OpArg default_location{}; - std::optional location{}; - bool away = false; // value not in source register - bool revertable = false; - size_t locked = 0; + Gen::OpArg m_default_location{}; + Gen::X64Reg m_host_register{}; + bool m_in_default_location = true; + bool m_in_host_register = false; + bool m_revertable = false; + size_t m_locked = 0; }; class X64CachedReg @@ -128,25 +96,20 @@ class X64CachedReg public: preg_t Contents() const { return ppcReg; } - void SetBoundTo(preg_t ppcReg_, bool dirty_) + void SetBoundTo(preg_t ppcReg_) { free = false; ppcReg = ppcReg_; - dirty = dirty_; } void Unbind() { ppcReg = static_cast(Gen::INVALID_REG); free = true; - dirty = false; } bool IsFree() const { return free && !locked; } - bool IsDirty() const { return dirty; } - void MakeDirty() { dirty = true; } - bool IsLocked() const { return locked > 0; } void Lock() { locked++; } void Unlock() @@ -158,7 +121,6 @@ public: private: preg_t ppcReg = static_cast(Gen::INVALID_REG); bool free = true; - bool dirty = false; size_t locked = 0; }; diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp index df210f80c5b..3e089ffe7dd 100644 --- a/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp @@ -13,16 +13,59 @@ FPURegCache::FPURegCache(Jit64& jit) : RegCache{jit} { } -void FPURegCache::StoreRegister(preg_t preg, const OpArg& new_loc) +bool FPURegCache::IsImm(preg_t preg) const { - ASSERT_MSG(DYNA_REC, m_regs[preg].IsBound(), "Unbound register - {}", preg); - m_emitter->MOVAPD(new_loc, m_regs[preg].Location()->GetSimpleReg()); + return false; +} + +u32 FPURegCache::Imm32(preg_t preg) const +{ + ASSERT_MSG(DYNA_REC, false, "FPURegCache doesn't support immediates"); + return 0; +} + +s32 FPURegCache::SImm32(preg_t preg) const +{ + ASSERT_MSG(DYNA_REC, false, "FPURegCache doesn't support immediates"); + return 0; +} + +OpArg FPURegCache::R(preg_t preg) const +{ + if (m_regs[preg].IsInHostRegister()) + { + return ::Gen::R(m_regs[preg].GetHostRegister()); + } + else + { + ASSERT_MSG(DYNA_REC, m_regs[preg].IsInDefaultLocation(), "FPR {} missing!", preg); + return m_regs[preg].GetDefaultLocation(); + } +} + +void FPURegCache::StoreRegister(preg_t preg, const OpArg& new_loc, + IgnoreDiscardedRegisters ignore_discarded_registers) +{ + if (m_regs[preg].IsInHostRegister()) + { + m_emitter->MOVAPD(new_loc, m_regs[preg].GetHostRegister()); + } + else + { + ASSERT_MSG(DYNA_REC, ignore_discarded_registers != IgnoreDiscardedRegisters::No, + "FPR {} not in host register", preg); + } } void FPURegCache::LoadRegister(preg_t preg, X64Reg new_loc) { - ASSERT_MSG(DYNA_REC, !m_regs[preg].IsDiscarded(), "Discarded register - {}", preg); - m_emitter->MOVAPD(new_loc, m_regs[preg].Location().value()); + ASSERT_MSG(DYNA_REC, m_regs[preg].IsInDefaultLocation(), "FPR {} not in default location", preg); + m_emitter->MOVAPD(new_loc, m_regs[preg].GetDefaultLocation()); +} + +void FPURegCache::DiscardImm(preg_t preg) +{ + // FPURegCache doesn't support immediates, so no need to do anything } std::span FPURegCache::GetAllocationOrder() const diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.h b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.h index f7d81663b6d..76cad940aec 100644 --- a/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.h +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.h @@ -12,10 +12,17 @@ class FPURegCache final : public RegCache public: explicit FPURegCache(Jit64& jit); + bool IsImm(preg_t preg) const override; + u32 Imm32(preg_t preg) const override; + s32 SImm32(preg_t preg) const override; + protected: + Gen::OpArg R(preg_t preg) const override; Gen::OpArg GetDefaultLocation(preg_t preg) const override; - void StoreRegister(preg_t preg, const Gen::OpArg& newLoc) override; + void StoreRegister(preg_t preg, const Gen::OpArg& newLoc, + IgnoreDiscardedRegisters ignore_discarded_registers) override; void LoadRegister(preg_t preg, Gen::X64Reg newLoc) override; + void DiscardImm(preg_t preg) override; std::span GetAllocationOrder() const override; BitSet32 GetRegUtilization() const override; BitSet32 CountRegsIn(preg_t preg, u32 lookahead) const override; diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.cpp index ca30e15784a..a740d76e3dc 100644 --- a/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.cpp @@ -13,16 +13,76 @@ GPRRegCache::GPRRegCache(Jit64& jit) : RegCache{jit} { } -void GPRRegCache::StoreRegister(preg_t preg, const OpArg& new_loc) +bool GPRRegCache::IsImm(preg_t preg) const { - ASSERT_MSG(DYNA_REC, !m_regs[preg].IsDiscarded(), "Discarded register - {}", preg); - m_emitter->MOV(32, new_loc, m_regs[preg].Location().value()); + return m_jit.GetConstantPropagation().HasGPR(preg); +} + +u32 GPRRegCache::Imm32(preg_t preg) const +{ + ASSERT(m_jit.GetConstantPropagation().HasGPR(preg)); + return m_jit.GetConstantPropagation().GetGPR(preg); +} + +s32 GPRRegCache::SImm32(preg_t preg) const +{ + ASSERT(m_jit.GetConstantPropagation().HasGPR(preg)); + return m_jit.GetConstantPropagation().GetGPR(preg); +} + +OpArg GPRRegCache::R(preg_t preg) const +{ + if (m_regs[preg].IsInHostRegister()) + { + return ::Gen::R(m_regs[preg].GetHostRegister()); + } + else if (m_jit.GetConstantPropagation().HasGPR(preg)) + { + return ::Gen::Imm32(m_jit.GetConstantPropagation().GetGPR(preg)); + } + else + { + ASSERT_MSG(DYNA_REC, m_regs[preg].IsInDefaultLocation(), "GPR {} missing!", preg); + return m_regs[preg].GetDefaultLocation(); + } +} + +void GPRRegCache::StoreRegister(preg_t preg, const OpArg& new_loc, + IgnoreDiscardedRegisters ignore_discarded_registers) +{ + if (m_regs[preg].IsInHostRegister()) + { + m_emitter->MOV(32, new_loc, ::Gen::R(m_regs[preg].GetHostRegister())); + } + else if (m_jit.GetConstantPropagation().HasGPR(preg)) + { + m_emitter->MOV(32, new_loc, ::Gen::Imm32(m_jit.GetConstantPropagation().GetGPR(preg))); + } + else + { + ASSERT_MSG(DYNA_REC, ignore_discarded_registers != IgnoreDiscardedRegisters::No, + "GPR {} not in host register or constant propagation", preg); + } } void GPRRegCache::LoadRegister(preg_t preg, X64Reg new_loc) { - ASSERT_MSG(DYNA_REC, !m_regs[preg].IsDiscarded(), "Discarded register - {}", preg); - m_emitter->MOV(32, ::Gen::R(new_loc), m_regs[preg].Location().value()); + const JitCommon::ConstantPropagation& constant_propagation = m_jit.GetConstantPropagation(); + if (constant_propagation.HasGPR(preg)) + { + m_emitter->MOV(32, ::Gen::R(new_loc), ::Gen::Imm32(constant_propagation.GetGPR(preg))); + } + else + { + ASSERT_MSG(DYNA_REC, m_regs[preg].IsInDefaultLocation(), "GPR {} not in default location", + preg); + m_emitter->MOV(32, ::Gen::R(new_loc), m_regs[preg].GetDefaultLocation()); + } +} + +void GPRRegCache::DiscardImm(preg_t preg) +{ + m_jit.GetConstantPropagation().ClearGPR(preg); } OpArg GPRRegCache::GetDefaultLocation(preg_t preg) const @@ -48,8 +108,9 @@ void GPRRegCache::SetImmediate32(preg_t preg, u32 imm_value, bool dirty) { // "dirty" can be false to avoid redundantly flushing an immediate when // processing speculative constants. - DiscardRegContentsIfCached(preg); - m_regs[preg].SetToImm32(imm_value, dirty); + if (dirty) + DiscardRegister(preg); + m_jit.GetConstantPropagation().SetGPR(preg, imm_value); } BitSet32 GPRRegCache::GetRegUtilization() const diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.h b/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.h index 60985e19607..9c0b394bad9 100644 --- a/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.h +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.h @@ -11,12 +11,20 @@ class GPRRegCache final : public RegCache { public: explicit GPRRegCache(Jit64& jit); + + bool IsImm(preg_t preg) const override; + u32 Imm32(preg_t preg) const override; + s32 SImm32(preg_t preg) const override; + void SetImmediate32(preg_t preg, u32 imm_value, bool dirty = true); protected: + Gen::OpArg R(preg_t preg) const override; Gen::OpArg GetDefaultLocation(preg_t preg) const override; - void StoreRegister(preg_t preg, const Gen::OpArg& new_loc) override; + void StoreRegister(preg_t preg, const Gen::OpArg& new_loc, + IgnoreDiscardedRegisters ignore_discarded_registers) override; void LoadRegister(preg_t preg, Gen::X64Reg new_loc) override; + void DiscardImm(preg_t preg) override; std::span GetAllocationOrder() const override; BitSet32 GetRegUtilization() const override; BitSet32 CountRegsIn(preg_t preg, u32 lookahead) const override; diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.cpp index d59d4f0a01d..2a787f31207 100644 --- a/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.cpp @@ -136,7 +136,7 @@ bool RCOpArg::IsImm() const { if (const preg_t* preg = std::get_if(&contents)) { - return rc->R(*preg).IsImm(); + return rc->IsImm(*preg); } else if (std::holds_alternative(contents)) { @@ -149,7 +149,7 @@ s32 RCOpArg::SImm32() const { if (const preg_t* preg = std::get_if(&contents)) { - return rc->R(*preg).SImm32(); + return rc->SImm32(*preg); } else if (const u32* imm = std::get_if(&contents)) { @@ -163,7 +163,7 @@ u32 RCOpArg::Imm32() const { if (const preg_t* preg = std::get_if(&contents)) { - return rc->R(*preg).Imm32(); + return rc->Imm32(*preg); } else if (const u32* imm = std::get_if(&contents)) { @@ -297,25 +297,16 @@ bool RegCache::SanityCheck() const { for (size_t i = 0; i < m_regs.size(); i++) { - switch (m_regs[i].GetLocationType()) - { - case PPCCachedReg::LocationType::Default: - case PPCCachedReg::LocationType::Discarded: - case PPCCachedReg::LocationType::SpeculativeImmediate: - case PPCCachedReg::LocationType::Immediate: - break; - case PPCCachedReg::LocationType::Bound: + if (m_regs[i].IsInHostRegister()) { if (m_regs[i].IsLocked() || m_regs[i].IsRevertable()) return false; - Gen::X64Reg xr = m_regs[i].Location()->GetSimpleReg(); + Gen::X64Reg xr = m_regs[i].GetHostRegister(); if (m_xregs[xr].IsLocked()) return false; if (m_xregs[xr].Contents() != i) return false; - break; - } } } return true; @@ -379,13 +370,7 @@ void RegCache::Discard(BitSet32 pregs) ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Register transaction is in progress for {}!", i); - if (m_regs[i].IsBound()) - { - X64Reg xr = RX(i); - m_xregs[xr].Unbind(); - } - - m_regs[i].SetDiscarded(); + DiscardRegister(i); } } @@ -401,25 +386,7 @@ void RegCache::Flush(BitSet32 pregs, IgnoreDiscardedRegisters ignore_discarded_r ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Register transaction is in progress for {}!", i); - switch (m_regs[i].GetLocationType()) - { - case PPCCachedReg::LocationType::Default: - break; - case PPCCachedReg::LocationType::Discarded: - ASSERT_MSG(DYNA_REC, ignore_discarded_registers != IgnoreDiscardedRegisters::No, - "Attempted to flush discarded PPC reg {}", i); - break; - case PPCCachedReg::LocationType::SpeculativeImmediate: - // We can have a cached value without a host register through speculative constants. - // It must be cleared when flushing, otherwise it may be out of sync with PPCSTATE, - // if PPCSTATE is modified externally (e.g. fallback to interpreter). - m_regs[i].SetFlushed(); - break; - case PPCCachedReg::LocationType::Bound: - case PPCCachedReg::LocationType::Immediate: - StoreFromRegister(i); - break; - } + StoreFromRegister(i, FlushMode::Full, ignore_discarded_registers); } } @@ -427,9 +394,9 @@ void RegCache::Reset(BitSet32 pregs) { for (preg_t i : pregs) { - ASSERT_MSG(DYNA_REC, !m_regs[i].IsAway(), + ASSERT_MSG(DYNA_REC, !m_regs[i].IsInHostRegister(), "Attempted to reset a loaded register (did you mean to flush it?)"); - m_regs[i].SetFlushed(); + m_regs[i].SetFlushed(false); } } @@ -465,7 +432,7 @@ void RegCache::PreloadRegisters(BitSet32 to_preload) { if (NumFreeRegisters() < 2) return; - if (!R(preg).IsImm()) + if (!IsImm(preg)) BindToRegister(preg, true, false); } } @@ -492,84 +459,68 @@ void RegCache::FlushX(X64Reg reg) } } -void RegCache::DiscardRegContentsIfCached(preg_t preg) +void RegCache::DiscardRegister(preg_t preg) { - if (m_regs[preg].IsBound()) + if (m_regs[preg].IsInHostRegister()) { - X64Reg xr = m_regs[preg].Location()->GetSimpleReg(); + X64Reg xr = m_regs[preg].GetHostRegister(); m_xregs[xr].Unbind(); - m_regs[preg].SetFlushed(); } + + m_regs[preg].SetDiscarded(); } void RegCache::BindToRegister(preg_t i, bool doLoad, bool makeDirty) { - if (!m_regs[i].IsBound()) + if (!m_regs[i].IsInHostRegister()) { X64Reg xr = GetFreeXReg(); - ASSERT_MSG(DYNA_REC, !m_xregs[xr].IsDirty(), "Xreg {} already dirty", Common::ToUnderlying(xr)); ASSERT_MSG(DYNA_REC, !m_xregs[xr].IsLocked(), "GetFreeXReg returned locked register"); ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Invalid transaction state"); - m_xregs[xr].SetBoundTo(i, makeDirty || m_regs[i].IsAway()); + m_xregs[xr].SetBoundTo(i); if (doLoad) - { - ASSERT_MSG(DYNA_REC, !m_regs[i].IsDiscarded(), "Attempted to load a discarded value"); LoadRegister(i, xr); - } ASSERT_MSG(DYNA_REC, - std::ranges::none_of( - m_regs, [xr](const auto& l) { return l.has_value() && l->IsSimpleReg(xr); }, - &PPCCachedReg::Location), + std::ranges::none_of(m_regs, + [xr](const auto& r) { + return r.IsInHostRegister() && r.GetHostRegister() == xr; + }), "Xreg {} already bound", Common::ToUnderlying(xr)); - m_regs[i].SetBoundTo(xr); + m_regs[i].SetInHostRegister(xr, makeDirty); } else { // reg location must be simplereg; memory locations // and immediates are taken care of above. if (makeDirty) - m_xregs[RX(i)].MakeDirty(); + m_regs[i].SetDirty(); } + if (makeDirty) + DiscardImm(i); + ASSERT_MSG(DYNA_REC, !m_xregs[RX(i)].IsLocked(), "WTF, this reg ({} -> {}) should have been flushed", i, Common::ToUnderlying(RX(i))); } -void RegCache::StoreFromRegister(preg_t i, FlushMode mode) +void RegCache::StoreFromRegister(preg_t i, FlushMode mode, + IgnoreDiscardedRegisters ignore_discarded_registers) { // When a transaction is in progress, allowing the store would overwrite the old value. ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Register transaction on {} is in progress!", i); - bool doStore = false; + if (!m_regs[i].IsInDefaultLocation()) + StoreRegister(i, GetDefaultLocation(i), ignore_discarded_registers); - switch (m_regs[i].GetLocationType()) - { - case PPCCachedReg::LocationType::Default: - case PPCCachedReg::LocationType::Discarded: - case PPCCachedReg::LocationType::SpeculativeImmediate: - return; - case PPCCachedReg::LocationType::Bound: - { - X64Reg xr = RX(i); - doStore = m_xregs[xr].IsDirty(); - if (mode == FlushMode::Full) - m_xregs[xr].Unbind(); - break; - } - case PPCCachedReg::LocationType::Immediate: - doStore = true; - break; - } + if (mode == FlushMode::Full && m_regs[i].IsInHostRegister()) + m_xregs[m_regs[i].GetHostRegister()].Unbind(); - if (doStore) - StoreRegister(i, GetDefaultLocation(i)); - if (mode == FlushMode::Full) - m_regs[i].SetFlushed(); + m_regs[i].SetFlushed(mode != FlushMode::Full); } X64Reg RegCache::GetFreeXReg() @@ -634,7 +585,7 @@ float RegCache::ScoreRegister(X64Reg xreg) const // bias a bit against dirty registers. Testing shows that a bias of 2 seems roughly // right: 3 causes too many extra clobbers, while 1 saves very few clobbers relative // to the number of extra stores it causes. - if (m_xregs[xreg].IsDirty()) + if (!m_regs[preg].IsInDefaultLocation()) score += 2; // If the register isn't actually needed in a physical register for a later instruction, @@ -655,16 +606,10 @@ float RegCache::ScoreRegister(X64Reg xreg) const return score; } -const OpArg& RegCache::R(preg_t preg) const -{ - ASSERT_MSG(DYNA_REC, !m_regs[preg].IsDiscarded(), "Discarded register - {}", preg); - return m_regs[preg].Location().value(); -} - X64Reg RegCache::RX(preg_t preg) const { - ASSERT_MSG(DYNA_REC, m_regs[preg].IsBound(), "Unbound register - {}", preg); - return m_regs[preg].Location()->GetSimpleReg(); + ASSERT_MSG(DYNA_REC, m_regs[preg].IsInHostRegister(), "Not in host register - {}", preg); + return m_regs[preg].GetHostRegister(); } void RegCache::Lock(preg_t preg) @@ -720,29 +665,23 @@ void RegCache::Realize(preg_t preg) return; } - switch (m_regs[preg].GetLocationType()) + if (IsImm(preg)) { - case PPCCachedReg::LocationType::Default: - if (kill_mem) - { - do_bind(); - return; - } - m_constraints[preg].Realized(RCConstraint::RealizedLoc::Mem); - return; - case PPCCachedReg::LocationType::Discarded: - case PPCCachedReg::LocationType::Bound: - do_bind(); - return; - case PPCCachedReg::LocationType::Immediate: - case PPCCachedReg::LocationType::SpeculativeImmediate: if (dirty || kill_imm) - { do_bind(); - return; - } - m_constraints[preg].Realized(RCConstraint::RealizedLoc::Imm); - break; + else + m_constraints[preg].Realized(RCConstraint::RealizedLoc::Imm); + } + else if (!m_regs[preg].IsInHostRegister()) + { + if (kill_mem) + do_bind(); + else + m_constraints[preg].Realized(RCConstraint::RealizedLoc::Mem); + } + else + { + do_bind(); } } diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.h b/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.h index 3677d2b42b7..0a7ab3836d2 100644 --- a/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.h +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.h @@ -157,12 +157,14 @@ public: bool IsImm(Args... pregs) const { static_assert(sizeof...(pregs) > 0); - return (R(pregs).IsImm() && ...); + return (IsImm(preg_t(pregs)) && ...); } - u32 Imm32(preg_t preg) const { return R(preg).Imm32(); } - s32 SImm32(preg_t preg) const { return R(preg).SImm32(); } - bool IsBound(preg_t preg) const { return m_regs[preg].IsBound(); } + virtual bool IsImm(preg_t preg) const = 0; + virtual u32 Imm32(preg_t preg) const = 0; + virtual s32 SImm32(preg_t preg) const = 0; + + bool IsBound(preg_t preg) const { return m_regs[preg].IsInHostRegister(); } RCOpArg Use(preg_t preg, RCMode mode); RCOpArg UseNoImm(preg_t preg, RCMode mode); @@ -191,8 +193,10 @@ protected: friend class RCForkGuard; virtual Gen::OpArg GetDefaultLocation(preg_t preg) const = 0; - virtual void StoreRegister(preg_t preg, const Gen::OpArg& new_loc) = 0; + virtual void StoreRegister(preg_t preg, const Gen::OpArg& new_loc, + IgnoreDiscardedRegisters ignore_discarded_registers) = 0; virtual void LoadRegister(preg_t preg, Gen::X64Reg new_loc) = 0; + virtual void DiscardImm(preg_t preg) = 0; virtual std::span GetAllocationOrder() const = 0; @@ -200,16 +204,18 @@ protected: virtual BitSet32 CountRegsIn(preg_t preg, u32 lookahead) const = 0; void FlushX(Gen::X64Reg reg); - void DiscardRegContentsIfCached(preg_t preg); + void DiscardRegister(preg_t preg); void BindToRegister(preg_t preg, bool doLoad = true, bool makeDirty = true); - void StoreFromRegister(preg_t preg, FlushMode mode = FlushMode::Full); + void StoreFromRegister( + preg_t preg, FlushMode mode = FlushMode::Full, + IgnoreDiscardedRegisters ignore_discarded_registers = IgnoreDiscardedRegisters::No); Gen::X64Reg GetFreeXReg(); int NumFreeRegisters() const; float ScoreRegister(Gen::X64Reg xreg) const; - const Gen::OpArg& R(preg_t preg) const; + virtual Gen::OpArg R(preg_t preg) const = 0; Gen::X64Reg RX(preg_t preg) const; void Lock(preg_t preg); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 1eda45c58e3..37bfc4c24ac 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -33,6 +33,7 @@ #include "Core/PatchEngine.h" #include "Core/PowerPC/Interpreter/Interpreter.h" #include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" +#include "Core/PowerPC/JitCommon/ConstantPropagation.h" #include "Core/PowerPC/JitInterface.h" #include "Core/PowerPC/PowerPC.h" #include "Core/System.h" @@ -278,6 +279,9 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst) fpr.ResetRegisters(js.op->GetFregsOut()); gpr.ResetCRRegisters(js.op->crOut); + // We must also update constant propagation + m_constant_propagation.ClearGPRs(js.op->regsOut); + if (js.op->opinfo->flags & FL_SET_MSR) EmitUpdateMembase(); @@ -1169,6 +1173,8 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) gpr.Start(js.gpa); fpr.Start(js.fpa); + m_constant_propagation.Clear(); + if (!js.noSpeculativeConstantsAddresses.contains(js.blockStart)) { IntializeSpeculativeConstants(); @@ -1341,9 +1347,38 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) FlushCarry(); gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); - } + m_constant_propagation.Clear(); - CompileInstruction(op); + CompileInstruction(op); + } + else + { + const JitCommon::ConstantPropagationResult constant_propagation_result = + m_constant_propagation.EvaluateInstruction(op.inst, opinfo->flags); + + if (!constant_propagation_result.instruction_fully_executed) + CompileInstruction(op); + + m_constant_propagation.Apply(constant_propagation_result); + + if (constant_propagation_result.gpr >= 0) + { + // Mark the GPR as dirty in the register cache + gpr.SetImmediate(constant_propagation_result.gpr, constant_propagation_result.gpr_value); + } + + if (constant_propagation_result.instruction_fully_executed) + { + if (constant_propagation_result.carry) + ComputeCarry(*constant_propagation_result.carry); + + if (constant_propagation_result.overflow) + GenerateConstantOverflow(*constant_propagation_result.overflow); + + if (constant_propagation_result.compute_rc) + ComputeRC0(constant_propagation_result.gpr_value); + } + } js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst; diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 77ba6d0bb35..e98f950d42f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -16,6 +16,7 @@ #include "Core/PowerPC/JitArm64/JitArm64Cache.h" #include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" #include "Core/PowerPC/JitArmCommon/BackPatch.h" +#include "Core/PowerPC/JitCommon/ConstantPropagation.h" #include "Core/PowerPC/JitCommon/JitAsmCommon.h" #include "Core/PowerPC/JitCommon/JitBase.h" #include "Core/PowerPC/PPCAnalyst.h" @@ -35,6 +36,8 @@ public: void Init() override; void Shutdown() override; + JitCommon::ConstantPropagation& GetConstantPropagation() { return m_constant_propagation; } + JitBaseBlockCache* GetBlockCache() override { return &blocks; } bool IsInCodeSpace(const u8* ptr) const { return IsInSpace(ptr); } bool HandleFault(uintptr_t access_address, SContext* ctx) override; @@ -376,13 +379,14 @@ protected: void ComputeRC0(Arm64Gen::ARM64Reg reg); void ComputeRC0(u32 imm); + void GenerateConstantOverflow(bool overflow); void ComputeCarry(Arm64Gen::ARM64Reg reg); // reg must contain 0 or 1 void ComputeCarry(bool carry); void ComputeCarry(); void LoadCarry(); void FlushCarry(); - void reg_imm(u32 d, u32 a, u32 value, u32 (*do_op)(u32, u32), + void reg_imm(u32 d, u32 a, u32 value, void (ARM64XEmitter::*op)(Arm64Gen::ARM64Reg, Arm64Gen::ARM64Reg, u64, Arm64Gen::ARM64Reg), bool Rc = false); @@ -396,6 +400,8 @@ protected: Arm64GPRCache gpr; Arm64FPRCache fpr; + JitCommon::ConstantPropagation m_constant_propagation; + JitArm64BlockCache blocks{*this}; Arm64Gen::ARM64FloatEmitter m_float_emit; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 087a4cd9077..dd01d6dbddb 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -39,6 +39,25 @@ void JitArm64::ComputeRC0(u32 imm) MOVI2R(gpr.CR(0), s64(s32(imm))); } +void JitArm64::GenerateConstantOverflow(bool overflow) +{ + ARM64Reg WA = gpr.GetReg(); + + if (overflow) + { + MOVI2R(WA, XER_OV_MASK | XER_SO_MASK); + STRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_so_ov)); + } + else + { + LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_so_ov)); + AND(WA, WA, LogicalImm(~XER_OV_MASK, GPRSize::B32)); + STRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_so_ov)); + } + + gpr.Unlock(WA); +} + void JitArm64::ComputeCarry(ARM64Reg reg) { js.carryFlag = CarryFlag::InPPCState; @@ -136,41 +155,17 @@ void JitArm64::FlushCarry() js.carryFlag = CarryFlag::InPPCState; } -void JitArm64::reg_imm(u32 d, u32 a, u32 value, u32 (*do_op)(u32, u32), +void JitArm64::reg_imm(u32 d, u32 a, u32 value, void (ARM64XEmitter::*op)(ARM64Reg, ARM64Reg, u64, ARM64Reg), bool Rc) { - if (gpr.IsImm(a)) + gpr.BindToRegister(d, d == a); { - gpr.SetImmediate(d, do_op(gpr.GetImm(a), value)); - if (Rc) - ComputeRC0(gpr.GetImm(d)); + auto WA = gpr.GetScopedReg(); + (this->*op)(gpr.R(d), gpr.R(a), value, WA); } - else - { - gpr.BindToRegister(d, d == a); - { - auto WA = gpr.GetScopedReg(); - (this->*op)(gpr.R(d), gpr.R(a), value, WA); - } - if (Rc) - ComputeRC0(gpr.R(d)); - } -} - -static constexpr u32 BitOR(u32 a, u32 b) -{ - return a | b; -} - -static constexpr u32 BitAND(u32 a, u32 b) -{ - return a & b; -} - -static constexpr u32 BitXOR(u32 a, u32 b) -{ - return a ^ b; + if (Rc) + ComputeRC0(gpr.R(d)); } void JitArm64::arith_imm(UGeckoInstruction inst) @@ -184,34 +179,21 @@ void JitArm64::arith_imm(UGeckoInstruction inst) case 24: // ori case 25: // oris { - // check for nop - if (a == s && inst.UIMM == 0) - { - // NOP - return; - } - const u32 immediate = inst.OPCD == 24 ? inst.UIMM : inst.UIMM << 16; - reg_imm(a, s, immediate, BitOR, &ARM64XEmitter::ORRI2R); + reg_imm(a, s, immediate, &ARM64XEmitter::ORRI2R); break; } case 28: // andi - reg_imm(a, s, inst.UIMM, BitAND, &ARM64XEmitter::ANDI2R, true); + reg_imm(a, s, inst.UIMM, &ARM64XEmitter::ANDI2R, true); break; case 29: // andis - reg_imm(a, s, inst.UIMM << 16, BitAND, &ARM64XEmitter::ANDI2R, true); + reg_imm(a, s, inst.UIMM << 16, &ARM64XEmitter::ANDI2R, true); break; case 26: // xori case 27: // xoris { - if (a == s && inst.UIMM == 0) - { - // NOP - return; - } - const u32 immediate = inst.OPCD == 26 ? inst.UIMM : inst.UIMM << 16; - reg_imm(a, s, immediate, BitXOR, &ARM64XEmitter::EORI2R); + reg_imm(a, s, immediate, &ARM64XEmitter::EORI2R); break; } } @@ -231,17 +213,10 @@ void JitArm64::addix(UGeckoInstruction inst) if (a) { - if (gpr.IsImm(a)) - { - gpr.SetImmediate(d, gpr.GetImm(a) + imm); - } - else - { - gpr.BindToRegister(d, d == a); + gpr.BindToRegister(d, d == a); - auto WA = gpr.GetScopedReg(); - ADDI2R(gpr.R(d), gpr.R(a), imm, WA); - } + auto WA = gpr.GetScopedReg(); + ADDI2R(gpr.R(d), gpr.R(a), imm, WA); } else { @@ -256,29 +231,7 @@ void JitArm64::boolX(UGeckoInstruction inst) JITDISABLE(bJITIntegerOff); int a = inst.RA, s = inst.RS, b = inst.RB; - if (gpr.IsImm(s) && gpr.IsImm(b)) - { - if (inst.SUBOP10 == 28) // andx - gpr.SetImmediate(a, (u32)gpr.GetImm(s) & (u32)gpr.GetImm(b)); - else if (inst.SUBOP10 == 476) // nandx - gpr.SetImmediate(a, ~((u32)gpr.GetImm(s) & (u32)gpr.GetImm(b))); - else if (inst.SUBOP10 == 60) // andcx - gpr.SetImmediate(a, (u32)gpr.GetImm(s) & (~(u32)gpr.GetImm(b))); - else if (inst.SUBOP10 == 444) // orx - gpr.SetImmediate(a, (u32)gpr.GetImm(s) | (u32)gpr.GetImm(b)); - else if (inst.SUBOP10 == 124) // norx - gpr.SetImmediate(a, ~((u32)gpr.GetImm(s) | (u32)gpr.GetImm(b))); - else if (inst.SUBOP10 == 412) // orcx - gpr.SetImmediate(a, (u32)gpr.GetImm(s) | (~(u32)gpr.GetImm(b))); - else if (inst.SUBOP10 == 316) // xorx - gpr.SetImmediate(a, (u32)gpr.GetImm(s) ^ (u32)gpr.GetImm(b)); - else if (inst.SUBOP10 == 284) // eqvx - gpr.SetImmediate(a, ~((u32)gpr.GetImm(s) ^ (u32)gpr.GetImm(b))); - - if (inst.Rc) - ComputeRC0(gpr.GetImm(a)); - } - else if (s == b) + if (s == b) { if ((inst.SUBOP10 == 28 /* andx */) || (inst.SUBOP10 == 444 /* orx */)) { @@ -523,14 +476,7 @@ void JitArm64::addx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; - if (gpr.IsImm(a) && gpr.IsImm(b)) - { - s32 i = (s32)gpr.GetImm(a), j = (s32)gpr.GetImm(b); - gpr.SetImmediate(d, i + j); - if (inst.Rc) - ComputeRC0(gpr.GetImm(d)); - } - else if (gpr.IsImm(a) || gpr.IsImm(b)) + if (gpr.IsImm(a) || gpr.IsImm(b)) { int imm_reg = gpr.IsImm(a) ? a : b; int in_reg = gpr.IsImm(a) ? b : a; @@ -560,19 +506,10 @@ void JitArm64::extsXx(UGeckoInstruction inst) int a = inst.RA, s = inst.RS; int size = inst.SUBOP10 == 922 ? 16 : 8; - if (gpr.IsImm(s)) - { - gpr.SetImmediate(a, (u32)(s32)(size == 16 ? (s16)gpr.GetImm(s) : (s8)gpr.GetImm(s))); - if (inst.Rc) - ComputeRC0(gpr.GetImm(a)); - } - else - { - gpr.BindToRegister(a, a == s); - SBFM(gpr.R(a), gpr.R(s), 0, size - 1); - if (inst.Rc) - ComputeRC0(gpr.R(a)); - } + gpr.BindToRegister(a, a == s); + SBFM(gpr.R(a), gpr.R(s), 0, size - 1); + if (inst.Rc) + ComputeRC0(gpr.R(a)); } void JitArm64::cntlzwx(UGeckoInstruction inst) @@ -582,19 +519,10 @@ void JitArm64::cntlzwx(UGeckoInstruction inst) int a = inst.RA; int s = inst.RS; - if (gpr.IsImm(s)) - { - gpr.SetImmediate(a, static_cast(std::countl_zero(gpr.GetImm(s)))); - if (inst.Rc) - ComputeRC0(gpr.GetImm(a)); - } - else - { - gpr.BindToRegister(a, a == s); - CLZ(gpr.R(a), gpr.R(s)); - if (inst.Rc) - ComputeRC0(gpr.R(a)); - } + gpr.BindToRegister(a, a == s); + CLZ(gpr.R(a), gpr.R(s)); + if (inst.Rc) + ComputeRC0(gpr.R(a)); } void JitArm64::negx(UGeckoInstruction inst) @@ -606,19 +534,10 @@ void JitArm64::negx(UGeckoInstruction inst) FALLBACK_IF(inst.OE); - if (gpr.IsImm(a)) - { - gpr.SetImmediate(d, ~((u32)gpr.GetImm(a)) + 1); - if (inst.Rc) - ComputeRC0(gpr.GetImm(d)); - } - else - { - gpr.BindToRegister(d, d == a); - SUB(gpr.R(d), ARM64Reg::WSP, gpr.R(a)); - if (inst.Rc) - ComputeRC0(gpr.R(d)); - } + gpr.BindToRegister(d, d == a); + SUB(gpr.R(d), ARM64Reg::WSP, gpr.R(a)); + if (inst.Rc) + ComputeRC0(gpr.R(d)); } void JitArm64::cmp(UGeckoInstruction inst) @@ -800,15 +719,7 @@ void JitArm64::cmpli(UGeckoInstruction inst) void JitArm64::rlwinmx_internal(UGeckoInstruction inst, u32 sh) { u32 a = inst.RA, s = inst.RS; - const u32 mask = MakeRotationMask(inst.MB, inst.ME); - if (gpr.IsImm(inst.RS)) - { - gpr.SetImmediate(a, std::rotl(gpr.GetImm(s), sh) & mask); - if (inst.Rc) - ComputeRC0(gpr.GetImm(a)); - return; - } if (mask == 0) { @@ -899,17 +810,7 @@ void JitArm64::srawix(UGeckoInstruction inst) int amount = inst.SH; bool inplace_carry = CanMergeNextInstructions(1) && js.op[1].wantsCAInFlags; - if (gpr.IsImm(s)) - { - s32 imm = (s32)gpr.GetImm(s); - gpr.SetImmediate(a, imm >> amount); - - ComputeCarry(amount != 0 && (imm < 0) && (u32(imm) << (32 - amount))); - - if (inst.Rc) - ComputeRC0(gpr.GetImm(a)); - } - else if (amount == 0) + if (amount == 0) { gpr.BindToRegister(a, a == s); ARM64Reg RA = gpr.R(a); @@ -970,42 +871,21 @@ void JitArm64::addic(UGeckoInstruction inst) int a = inst.RA, d = inst.RD; bool rc = inst.OPCD == 13; s32 simm = inst.SIMM_16; - u32 imm = (u32)simm; - if (gpr.IsImm(a)) + gpr.BindToRegister(d, d == a); { - u32 i = gpr.GetImm(a); - gpr.SetImmediate(d, i + imm); - - bool has_carry = Interpreter::Helper_Carry(i, imm); - ComputeCarry(has_carry); - if (rc) - ComputeRC0(gpr.GetImm(d)); + auto WA = gpr.GetScopedReg(); + CARRY_IF_NEEDED(ADDI2R, ADDSI2R, gpr.R(d), gpr.R(a), simm, WA); } - else - { - gpr.BindToRegister(d, d == a); - { - auto WA = gpr.GetScopedReg(); - CARRY_IF_NEEDED(ADDI2R, ADDSI2R, gpr.R(d), gpr.R(a), simm, WA); - } - ComputeCarry(); - if (rc) - ComputeRC0(gpr.R(d)); - } + ComputeCarry(); + if (rc) + ComputeRC0(gpr.R(d)); } bool JitArm64::MultiplyImmediate(u32 imm, int a, int d, bool rc) { - if (imm == 0) - { - // Multiplication by zero (0). - gpr.SetImmediate(d, 0); - if (rc) - ComputeRC0(gpr.GetImm(d)); - } - else if (imm == 1) + if (imm == 1) { // Multiplication by one (1). if (d != a) @@ -1072,12 +952,7 @@ void JitArm64::mulli(UGeckoInstruction inst) int a = inst.RA, d = inst.RD; - if (gpr.IsImm(a)) - { - s32 i = (s32)gpr.GetImm(a); - gpr.SetImmediate(d, i * inst.SIMM_16); - } - else if (MultiplyImmediate((u32)(s32)inst.SIMM_16, a, d, false)) + if (MultiplyImmediate((u32)(s32)inst.SIMM_16, a, d, false)) { // Code is generated inside MultiplyImmediate, nothing to be done here. } @@ -1102,15 +977,8 @@ void JitArm64::mullwx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; - if (gpr.IsImm(a) && gpr.IsImm(b)) - { - s32 i = (s32)gpr.GetImm(a), j = (s32)gpr.GetImm(b); - gpr.SetImmediate(d, i * j); - if (inst.Rc) - ComputeRC0(gpr.GetImm(d)); - } - else if ((gpr.IsImm(a) && MultiplyImmediate(gpr.GetImm(a), b, d, inst.Rc)) || - (gpr.IsImm(b) && MultiplyImmediate(gpr.GetImm(b), a, d, inst.Rc))) + if ((gpr.IsImm(a) && MultiplyImmediate(gpr.GetImm(a), b, d, inst.Rc)) || + (gpr.IsImm(b) && MultiplyImmediate(gpr.GetImm(b), a, d, inst.Rc))) { // Code is generated inside MultiplyImmediate, nothing to be done here. } @@ -1130,22 +998,12 @@ void JitArm64::mulhwx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; - if (gpr.IsImm(a) && gpr.IsImm(b)) - { - s32 i = (s32)gpr.GetImm(a), j = (s32)gpr.GetImm(b); - gpr.SetImmediate(d, (u32)((u64)(((s64)i * (s64)j)) >> 32)); - if (inst.Rc) - ComputeRC0(gpr.GetImm(d)); - } - else - { - gpr.BindToRegister(d, d == a || d == b); - SMULL(EncodeRegTo64(gpr.R(d)), gpr.R(a), gpr.R(b)); - LSR(EncodeRegTo64(gpr.R(d)), EncodeRegTo64(gpr.R(d)), 32); + gpr.BindToRegister(d, d == a || d == b); + SMULL(EncodeRegTo64(gpr.R(d)), gpr.R(a), gpr.R(b)); + LSR(EncodeRegTo64(gpr.R(d)), EncodeRegTo64(gpr.R(d)), 32); - if (inst.Rc) - ComputeRC0(gpr.R(d)); - } + if (inst.Rc) + ComputeRC0(gpr.R(d)); } void JitArm64::mulhwux(UGeckoInstruction inst) @@ -1155,22 +1013,12 @@ void JitArm64::mulhwux(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; - if (gpr.IsImm(a) && gpr.IsImm(b)) - { - u32 i = gpr.GetImm(a), j = gpr.GetImm(b); - gpr.SetImmediate(d, (u32)(((u64)i * (u64)j) >> 32)); - if (inst.Rc) - ComputeRC0(gpr.GetImm(d)); - } - else - { - gpr.BindToRegister(d, d == a || d == b); - UMULL(EncodeRegTo64(gpr.R(d)), gpr.R(a), gpr.R(b)); - LSR(EncodeRegTo64(gpr.R(d)), EncodeRegTo64(gpr.R(d)), 32); + gpr.BindToRegister(d, d == a || d == b); + UMULL(EncodeRegTo64(gpr.R(d)), gpr.R(a), gpr.R(b)); + LSR(EncodeRegTo64(gpr.R(d)), EncodeRegTo64(gpr.R(d)), 32); - if (inst.Rc) - ComputeRC0(gpr.R(d)); - } + if (inst.Rc) + ComputeRC0(gpr.R(d)); } void JitArm64::addzex(UGeckoInstruction inst) @@ -1274,26 +1122,10 @@ void JitArm64::subfx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; - if (a == b) - { - gpr.SetImmediate(d, 0); - if (inst.Rc) - ComputeRC0(gpr.GetImm(d)); - } - else if (gpr.IsImm(a) && gpr.IsImm(b)) - { - u32 i = gpr.GetImm(a), j = gpr.GetImm(b); - gpr.SetImmediate(d, j - i); - if (inst.Rc) - ComputeRC0(gpr.GetImm(d)); - } - else - { - gpr.BindToRegister(d, d == a || d == b); - SUB(gpr.R(d), gpr.R(b), gpr.R(a)); - if (inst.Rc) - ComputeRC0(gpr.R(d)); - } + gpr.BindToRegister(d, d == a || d == b); + SUB(gpr.R(d), gpr.R(b), gpr.R(a)); + if (inst.Rc) + ComputeRC0(gpr.R(d)); } void JitArm64::subfex(UGeckoInstruction inst) @@ -1435,17 +1267,7 @@ void JitArm64::subfcx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; - if (gpr.IsImm(a) && gpr.IsImm(b)) - { - u32 a_imm = gpr.GetImm(a), b_imm = gpr.GetImm(b); - - gpr.SetImmediate(d, b_imm - a_imm); - ComputeCarry(a_imm == 0 || Interpreter::Helper_Carry(b_imm, 0u - a_imm)); - - if (inst.Rc) - ComputeRC0(gpr.GetImm(d)); - } - else if (gpr.IsImm(a, 0)) + if (gpr.IsImm(a, 0)) { if (d != b) { @@ -1559,44 +1381,34 @@ void JitArm64::subfic(UGeckoInstruction inst) int a = inst.RA, d = inst.RD; s32 imm = inst.SIMM_16; - if (gpr.IsImm(a)) - { - u32 a_imm = gpr.GetImm(a); + const bool will_read = d == a; + gpr.BindToRegister(d, will_read); + ARM64Reg RD = gpr.R(d); - gpr.SetImmediate(d, imm - a_imm); - ComputeCarry(a_imm == 0 || Interpreter::Helper_Carry(imm, 0u - a_imm)); + if (imm == -1) + { + // d = -1 - a = ~a + MVN(RD, gpr.R(a)); + // CA is always set in this case + ComputeCarry(true); } else { - const bool will_read = d == a; - gpr.BindToRegister(d, will_read); - ARM64Reg RD = gpr.R(d); + const bool is_zero = imm == 0; - if (imm == -1) + // d = imm - a { - // d = -1 - a = ~a - MVN(RD, gpr.R(a)); - // CA is always set in this case - ComputeCarry(true); - } - else - { - const bool is_zero = imm == 0; - - // d = imm - a + Arm64GPRCache::ScopedARM64Reg WA(ARM64Reg::WZR); + if (!is_zero) { - Arm64GPRCache::ScopedARM64Reg WA(ARM64Reg::WZR); - if (!is_zero) - { - WA = will_read ? gpr.GetScopedReg() : Arm64GPRCache::ScopedARM64Reg(RD); - MOVI2R(WA, imm); - } - - CARRY_IF_NEEDED(SUB, SUBS, RD, WA, gpr.R(a)); + WA = will_read ? gpr.GetScopedReg() : Arm64GPRCache::ScopedARM64Reg(RD); + MOVI2R(WA, imm); } - ComputeCarry(); + CARRY_IF_NEEDED(SUB, SUBS, RD, WA, gpr.R(a)); } + + ComputeCarry(); } } @@ -1726,25 +1538,12 @@ void JitArm64::addcx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; - if (gpr.IsImm(a) && gpr.IsImm(b)) - { - u32 i = gpr.GetImm(a), j = gpr.GetImm(b); - gpr.SetImmediate(d, i + j); + gpr.BindToRegister(d, d == a || d == b); + CARRY_IF_NEEDED(ADD, ADDS, gpr.R(d), gpr.R(a), gpr.R(b)); - bool has_carry = Interpreter::Helper_Carry(i, j); - ComputeCarry(has_carry); - if (inst.Rc) - ComputeRC0(gpr.GetImm(d)); - } - else - { - gpr.BindToRegister(d, d == a || d == b); - CARRY_IF_NEEDED(ADD, ADDS, gpr.R(d), gpr.R(a), gpr.R(b)); - - ComputeCarry(); - if (inst.Rc) - ComputeRC0(gpr.R(d)); - } + ComputeCarry(); + if (inst.Rc) + ComputeRC0(gpr.R(d)); } void JitArm64::divwux(UGeckoInstruction inst) @@ -1755,15 +1554,7 @@ void JitArm64::divwux(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; - if (gpr.IsImm(a) && gpr.IsImm(b)) - { - u32 i = gpr.GetImm(a), j = gpr.GetImm(b); - gpr.SetImmediate(d, j == 0 ? 0 : i / j); - - if (inst.Rc) - ComputeRC0(gpr.GetImm(d)); - } - else if (gpr.IsImm(b)) + if (gpr.IsImm(b)) { const u32 divisor = gpr.GetImm(b); @@ -1834,28 +1625,7 @@ void JitArm64::divwx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; - if (gpr.IsImm(a) && gpr.IsImm(b)) - { - s32 imm_a = gpr.GetImm(a); - s32 imm_b = gpr.GetImm(b); - u32 imm_d; - if (imm_b == 0 || (static_cast(imm_a) == 0x80000000 && imm_b == -1)) - { - if (imm_a < 0) - imm_d = 0xFFFFFFFF; - else - imm_d = 0; - } - else - { - imm_d = static_cast(imm_a / imm_b); - } - gpr.SetImmediate(d, imm_d); - - if (inst.Rc) - ComputeRC0(imm_d); - } - else if (gpr.IsImm(a, 0)) + if (gpr.IsImm(a, 0)) { // Zero divided by anything is always zero gpr.SetImmediate(d, 0); @@ -2029,21 +1799,7 @@ void JitArm64::slwx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, s = inst.RS; - if (gpr.IsImm(b) && gpr.IsImm(s)) - { - u32 i = gpr.GetImm(s), j = gpr.GetImm(b); - gpr.SetImmediate(a, (j & 0x20) ? 0 : i << (j & 0x1F)); - - if (inst.Rc) - ComputeRC0(gpr.GetImm(a)); - } - else if (gpr.IsImm(s, 0)) - { - gpr.SetImmediate(a, 0); - if (inst.Rc) - ComputeRC0(0); - } - else if (gpr.IsImm(b)) + if (gpr.IsImm(b)) { u32 i = gpr.GetImm(b); if (i & 0x20) @@ -2080,15 +1836,7 @@ void JitArm64::srwx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, s = inst.RS; - if (gpr.IsImm(b) && gpr.IsImm(s)) - { - u32 i = gpr.GetImm(s), amount = gpr.GetImm(b); - gpr.SetImmediate(a, (amount & 0x20) ? 0 : i >> (amount & 0x1F)); - - if (inst.Rc) - ComputeRC0(gpr.GetImm(a)); - } - else if (gpr.IsImm(b)) + if (gpr.IsImm(b)) { u32 amount = gpr.GetImm(b); if (amount & 0x20) @@ -2123,34 +1871,7 @@ void JitArm64::srawx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, s = inst.RS; - if (gpr.IsImm(b) && gpr.IsImm(s)) - { - s32 i = gpr.GetImm(s), amount = gpr.GetImm(b); - if (amount & 0x20) - { - gpr.SetImmediate(a, i & 0x80000000 ? 0xFFFFFFFF : 0); - ComputeCarry(i & 0x80000000 ? true : false); - } - else - { - amount &= 0x1F; - gpr.SetImmediate(a, i >> amount); - ComputeCarry(amount != 0 && i < 0 && (u32(i) << (32 - amount))); - } - - if (inst.Rc) - ComputeRC0(gpr.GetImm(a)); - return; - } - else if (gpr.IsImm(s, 0)) - { - gpr.SetImmediate(a, 0); - ComputeCarry(false); - if (inst.Rc) - ComputeRC0(0); - return; - } - else if (gpr.IsImm(b)) + if (gpr.IsImm(b)) { int amount = gpr.GetImm(b); @@ -2254,74 +1975,64 @@ void JitArm64::rlwimix(UGeckoInstruction inst) const u32 width = inst.ME - inst.MB + 1; const u32 rot_dist = inst.SH ? 32 - inst.SH : 0; - if (gpr.IsImm(a) && gpr.IsImm(s)) + if (mask == 0 || (a == s && inst.SH == 0)) { - u32 res = (gpr.GetImm(a) & ~mask) | (std::rotl(gpr.GetImm(s), inst.SH) & mask); - gpr.SetImmediate(a, res); - if (inst.Rc) - ComputeRC0(res); + // Do Nothing } - else + else if (mask == 0xFFFFFFFF) { - if (mask == 0 || (a == s && inst.SH == 0)) - { - // Do Nothing - } - else if (mask == 0xFFFFFFFF) - { - if (inst.SH || a != s) - gpr.BindToRegister(a, a == s); + if (inst.SH || a != s) + gpr.BindToRegister(a, a == s); - if (inst.SH) - ROR(gpr.R(a), gpr.R(s), rot_dist); - else if (a != s) - MOV(gpr.R(a), gpr.R(s)); - } - else if (lsb == 0 && inst.MB <= inst.ME && rot_dist + width <= 32) + if (inst.SH) + ROR(gpr.R(a), gpr.R(s), rot_dist); + else if (a != s) + MOV(gpr.R(a), gpr.R(s)); + } + else if (lsb == 0 && inst.MB <= inst.ME && rot_dist + width <= 32) + { + // Destination is in least significant position + // No mask inversion + // Source field pre-rotation is contiguous + gpr.BindToRegister(a, true); + BFXIL(gpr.R(a), gpr.R(s), rot_dist, width); + } + else if (inst.SH == 0 && inst.MB <= inst.ME) + { + // No rotation + // No mask inversion + gpr.BindToRegister(a, true); + auto WA = gpr.GetScopedReg(); + UBFX(WA, gpr.R(s), lsb, width); + BFI(gpr.R(a), WA, lsb, width); + } + else if (inst.SH && inst.MB <= inst.ME) + { + // No mask inversion + gpr.BindToRegister(a, true); + if ((rot_dist + lsb) % 32 == 0) { - // Destination is in least significant position - // No mask inversion - // Source field pre-rotation is contiguous - gpr.BindToRegister(a, true); - BFXIL(gpr.R(a), gpr.R(s), rot_dist, width); - } - else if (inst.SH == 0 && inst.MB <= inst.ME) - { - // No rotation - // No mask inversion - gpr.BindToRegister(a, true); - auto WA = gpr.GetScopedReg(); - UBFX(WA, gpr.R(s), lsb, width); - BFI(gpr.R(a), WA, lsb, width); - } - else if (inst.SH && inst.MB <= inst.ME) - { - // No mask inversion - gpr.BindToRegister(a, true); - if ((rot_dist + lsb) % 32 == 0) - { - BFI(gpr.R(a), gpr.R(s), lsb, width); - } - else - { - auto WA = gpr.GetScopedReg(); - ROR(WA, gpr.R(s), (rot_dist + lsb) % 32); - BFI(gpr.R(a), WA, lsb, width); - } + BFI(gpr.R(a), gpr.R(s), lsb, width); } else { - gpr.BindToRegister(a, true); - ARM64Reg RA = gpr.R(a); auto WA = gpr.GetScopedReg(); - const u32 inverted_mask = ~mask; - - AND(WA, gpr.R(s), LogicalImm(std::rotl(mask, rot_dist), GPRSize::B32)); - AND(RA, RA, LogicalImm(inverted_mask, GPRSize::B32)); - ORR(RA, RA, WA, ArithOption(WA, ShiftType::ROR, rot_dist)); + ROR(WA, gpr.R(s), (rot_dist + lsb) % 32); + BFI(gpr.R(a), WA, lsb, width); } - - if (inst.Rc) - ComputeRC0(gpr.R(a)); } + else + { + gpr.BindToRegister(a, true); + ARM64Reg RA = gpr.R(a); + auto WA = gpr.GetScopedReg(); + const u32 inverted_mask = ~mask; + + AND(WA, gpr.R(s), LogicalImm(std::rotl(mask, rot_dist), GPRSize::B32)); + AND(RA, RA, LogicalImm(inverted_mask, GPRSize::B32)); + ORR(RA, RA, WA, ArithOption(WA, ShiftType::ROR, rot_dist)); + } + + if (inst.Rc) + ComputeRC0(gpr.R(a)); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index 20a86c0389d..8ba7a6a3be4 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -114,8 +114,7 @@ void Arm64RegCache::FlushMostStaleRegister() const auto& reg = m_guest_registers[i]; const u32 last_used = reg.GetLastUsed(); - if (last_used > most_stale_amount && reg.GetType() != RegType::NotLoaded && - reg.GetType() != RegType::Discarded && reg.GetType() != RegType::Immediate) + if (last_used > most_stale_amount && reg.IsInHostRegister()) { most_stale_preg = i; most_stale_amount = last_used; @@ -137,12 +136,6 @@ void Arm64RegCache::DiscardRegister(size_t preg) UnlockRegister(host_reg); } -// GPR Cache -constexpr size_t GUEST_GPR_COUNT = 32; -constexpr size_t GUEST_CR_COUNT = 8; -constexpr size_t GUEST_GPR_OFFSET = 0; -constexpr size_t GUEST_CR_OFFSET = GUEST_GPR_COUNT; - Arm64GPRCache::Arm64GPRCache() : Arm64RegCache(GUEST_GPR_COUNT + GUEST_CR_COUNT) { } @@ -151,6 +144,19 @@ void Arm64GPRCache::Start(PPCAnalyst::BlockRegStats& stats) { } +// Returns if a register is set as an immediate. Only valid for guest GPRs. +bool Arm64GPRCache::IsImm(size_t preg) const +{ + return m_jit->GetConstantPropagation().HasGPR(preg); +} + +// Gets the immediate that a register is set to. Only valid for guest GPRs. +u32 Arm64GPRCache::GetImm(size_t preg) const +{ + ASSERT(m_jit->GetConstantPropagation().HasGPR(preg)); + return m_jit->GetConstantPropagation().GetGPR(preg); +} + bool Arm64GPRCache::IsCallerSaved(ARM64Reg reg) const { return ARM64XEmitter::CALLER_SAVED_GPRS[DecodeReg(reg)]; @@ -192,11 +198,12 @@ void Arm64GPRCache::FlushRegister(size_t index, FlushMode mode, ARM64Reg tmp_reg GuestRegInfo guest_reg = GetGuestByIndex(index); OpArg& reg = guest_reg.reg; size_t bitsize = guest_reg.bitsize; + const bool is_gpr = index >= GUEST_GPR_OFFSET && index < GUEST_GPR_OFFSET + GUEST_GPR_COUNT; - if (reg.GetType() == RegType::Register) + if (reg.IsInHostRegister()) { ARM64Reg host_reg = reg.GetReg(); - if (reg.IsDirty()) + if (!reg.IsInPPCState()) m_emit->STR(IndexType::Unsigned, host_reg, PPC_REG, u32(guest_reg.ppc_offset)); if (mode == FlushMode::All) @@ -205,11 +212,12 @@ void Arm64GPRCache::FlushRegister(size_t index, FlushMode mode, ARM64Reg tmp_reg reg.Flush(); } } - else if (reg.GetType() == RegType::Immediate) + else if (is_gpr && IsImm(index - GUEST_GPR_OFFSET)) { - if (reg.IsDirty()) + if (!reg.IsInPPCState()) { - if (!reg.GetImm()) + const u32 imm = GetImm(index - GUEST_GPR_OFFSET); + if (imm == 0) { m_emit->STR(IndexType::Unsigned, bitsize == 64 ? ARM64Reg::ZR : ARM64Reg::WZR, PPC_REG, u32(guest_reg.ppc_offset)); @@ -231,7 +239,7 @@ void Arm64GPRCache::FlushRegister(size_t index, FlushMode mode, ARM64Reg tmp_reg const ARM64Reg encoded_tmp_reg = bitsize != 64 ? tmp_reg : EncodeRegTo64(tmp_reg); - m_emit->MOVI2R(encoded_tmp_reg, reg.GetImm()); + m_emit->MOVI2R(encoded_tmp_reg, imm); m_emit->STR(IndexType::Unsigned, encoded_tmp_reg, PPC_REG, u32(guest_reg.ppc_offset)); if (allocated_tmp_reg) @@ -250,10 +258,10 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, FlushMode mode, ARM64Reg tmp_r for (auto iter = regs.begin(); iter != regs.end(); ++iter) { const int i = *iter; - + OpArg& reg = m_guest_registers[GUEST_GPR_OFFSET + i]; ASSERT_MSG(DYNA_REC, - ignore_discarded_registers != IgnoreDiscardedRegisters::No || - m_guest_registers[GUEST_GPR_OFFSET + i].GetType() != RegType::Discarded, + ignore_discarded_registers != IgnoreDiscardedRegisters::No || reg.IsInPPCState() || + reg.IsInHostRegister() || IsImm(i), "Attempted to flush discarded register"); if (i + 1 < int(GUEST_GPR_COUNT) && regs[i + 1]) @@ -261,27 +269,27 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, FlushMode mode, ARM64Reg tmp_r // We've got two guest registers in a row to store OpArg& reg1 = m_guest_registers[GUEST_GPR_OFFSET + i]; OpArg& reg2 = m_guest_registers[GUEST_GPR_OFFSET + i + 1]; - const bool reg1_imm = reg1.GetType() == RegType::Immediate; - const bool reg2_imm = reg2.GetType() == RegType::Immediate; - const bool reg1_zero = reg1_imm && reg1.GetImm() == 0; - const bool reg2_zero = reg2_imm && reg2.GetImm() == 0; + const bool reg1_imm = IsImm(i); + const bool reg2_imm = IsImm(i + 1); + const bool reg1_zero = reg1_imm && GetImm(i) == 0; + const bool reg2_zero = reg2_imm && GetImm(i + 1) == 0; const bool flush_all = mode == FlushMode::All; - if (reg1.IsDirty() && reg2.IsDirty() && - (reg1.GetType() == RegType::Register || (reg1_imm && (reg1_zero || flush_all))) && - (reg2.GetType() == RegType::Register || (reg2_imm && (reg2_zero || flush_all)))) + if (!reg1.IsInPPCState() && !reg2.IsInPPCState() && + (reg1.IsInHostRegister() || (reg1_imm && (reg1_zero || flush_all))) && + (reg2.IsInHostRegister() || (reg2_imm && (reg2_zero || flush_all)))) { const size_t ppc_offset = GetGuestByIndex(i).ppc_offset; if (ppc_offset <= 252) { - ARM64Reg RX1 = reg1_zero ? ARM64Reg::WZR : R(GetGuestByIndex(i)); - ARM64Reg RX2 = reg2_zero ? ARM64Reg::WZR : R(GetGuestByIndex(i + 1)); + ARM64Reg RX1 = reg1_zero ? ARM64Reg::WZR : BindForRead(i); + ARM64Reg RX2 = reg2_zero ? ARM64Reg::WZR : BindForRead(i + 1); m_emit->STP(IndexType::Signed, RX1, RX2, PPC_REG, u32(ppc_offset)); if (flush_all) { - if (!reg1_zero) - UnlockRegister(EncodeRegTo32(RX1)); - if (!reg2_zero) - UnlockRegister(EncodeRegTo32(RX2)); + if (reg1.IsInHostRegister()) + UnlockRegister(reg1.GetReg()); + if (reg2.IsInHostRegister()) + UnlockRegister(reg2.GetReg()); reg1.Flush(); reg2.Flush(); } @@ -300,9 +308,10 @@ void Arm64GPRCache::FlushCRRegisters(BitSet8 regs, FlushMode mode, ARM64Reg tmp_ { for (int i : regs) { + OpArg& reg = m_guest_registers[GUEST_CR_OFFSET + i]; ASSERT_MSG(DYNA_REC, - ignore_discarded_registers != IgnoreDiscardedRegisters::No || - m_guest_registers[GUEST_CR_OFFSET + i].GetType() != RegType::Discarded, + ignore_discarded_registers != IgnoreDiscardedRegisters::No || reg.IsInPPCState() || + reg.IsInHostRegister(), "Attempted to flush discarded register"); FlushRegister(GUEST_CR_OFFSET + i, mode, tmp_reg); @@ -335,94 +344,89 @@ void Arm64GPRCache::Flush(FlushMode mode, ARM64Reg tmp_reg, FlushCRRegisters(BitSet8(0xFF), mode, tmp_reg, ignore_discarded_registers); } -ARM64Reg Arm64GPRCache::R(const GuestRegInfo& guest_reg) +ARM64Reg Arm64GPRCache::BindForRead(size_t index) { + GuestRegInfo guest_reg = GetGuestByIndex(index); OpArg& reg = guest_reg.reg; size_t bitsize = guest_reg.bitsize; + const bool is_gpr = index >= GUEST_GPR_OFFSET && index < GUEST_GPR_OFFSET + GUEST_GPR_COUNT; IncrementAllUsed(); reg.ResetLastUsed(); - switch (reg.GetType()) + if (reg.IsInHostRegister()) { - case RegType::Register: // already in a reg return reg.GetReg(); - case RegType::Immediate: // Is an immediate + } + else if (is_gpr && IsImm(index - GUEST_GPR_OFFSET)) { ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg()); - m_emit->MOVI2R(host_reg, reg.GetImm()); + m_emit->MOVI2R(host_reg, GetImm(index - GUEST_GPR_OFFSET)); reg.Load(host_reg); return host_reg; } - break; - case RegType::Discarded: - ASSERT_MSG(DYNA_REC, false, "Attempted to read discarded register"); - break; - case RegType::NotLoaded: // Register isn't loaded at /all/ + else // Register isn't loaded at /all/ { - // This is a bit annoying. We try to keep these preloaded as much as possible - // This can also happen on cases where PPCAnalyst isn't feeing us proper register usage - // statistics + ASSERT_MSG(DYNA_REC, reg.IsInPPCState(), "Attempted to read discarded register"); ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg()); reg.Load(host_reg); reg.SetDirty(false); m_emit->LDR(IndexType::Unsigned, host_reg, PPC_REG, u32(guest_reg.ppc_offset)); return host_reg; } - break; - default: - ERROR_LOG_FMT(DYNA_REC, "Invalid OpArg Type!"); - break; - } - // We've got an issue if we end up here - return ARM64Reg::INVALID_REG; } -void Arm64GPRCache::SetImmediate(const GuestRegInfo& guest_reg, u32 imm, bool dirty) +void Arm64GPRCache::SetImmediateInternal(size_t index, u32 imm, bool dirty) { + GuestRegInfo guest_reg = GetGuestByIndex(index); OpArg& reg = guest_reg.reg; - if (reg.GetType() == RegType::Register) + if (reg.IsInHostRegister()) UnlockRegister(EncodeRegTo32(reg.GetReg())); - reg.LoadToImm(imm); + reg.Discard(); reg.SetDirty(dirty); + m_jit->GetConstantPropagation().SetGPR(index - GUEST_GPR_OFFSET, imm); } -void Arm64GPRCache::BindToRegister(const GuestRegInfo& guest_reg, bool will_read, bool will_write) +void Arm64GPRCache::BindForWrite(size_t index, bool will_read, bool will_write) { + GuestRegInfo guest_reg = GetGuestByIndex(index); OpArg& reg = guest_reg.reg; const size_t bitsize = guest_reg.bitsize; + const bool is_gpr = index >= GUEST_GPR_OFFSET && index < GUEST_GPR_OFFSET + GUEST_GPR_COUNT; reg.ResetLastUsed(); - const RegType reg_type = reg.GetType(); - if (reg_type == RegType::NotLoaded || reg_type == RegType::Discarded) + if (!reg.IsInHostRegister()) { - const ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg()); - reg.Load(host_reg); - reg.SetDirty(will_write); - if (will_read) + if (is_gpr && IsImm(index - GUEST_GPR_OFFSET)) { - ASSERT_MSG(DYNA_REC, reg_type != RegType::Discarded, "Attempted to load a discarded value"); - m_emit->LDR(IndexType::Unsigned, host_reg, PPC_REG, u32(guest_reg.ppc_offset)); + const ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg()); + if (will_read || !will_write) + { + // TODO: Emitting this instruction when (!will_read && !will_write) would be unnecessary if + // we had some way to indicate to Flush that the immediate value should be written to + // ppcState even though there is a host register allocated + m_emit->MOVI2R(host_reg, GetImm(index - GUEST_GPR_OFFSET)); + } + reg.Load(host_reg); + } + else + { + ASSERT_MSG(DYNA_REC, !will_read || reg.IsInPPCState(), "Attempted to load a discarded value"); + const ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg()); + reg.Load(host_reg); + reg.SetDirty(will_write); + if (will_read) + m_emit->LDR(IndexType::Unsigned, host_reg, PPC_REG, u32(guest_reg.ppc_offset)); + return; } } - else if (reg_type == RegType::Immediate) - { - const ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg()); - if (will_read || !will_write) - { - // TODO: Emitting this instruction when (!will_read && !will_write) would be unnecessary if we - // had some way to indicate to Flush that the immediate value should be written to ppcState - // even though there is a host register allocated - m_emit->MOVI2R(host_reg, reg.GetImm()); - } - reg.Load(host_reg); - if (will_write) - reg.SetDirty(true); - } - else if (will_write) + + if (will_write) { reg.SetDirty(true); + if (is_gpr) + m_jit->GetConstantPropagation().ClearGPR(index - GUEST_GPR_OFFSET); } } @@ -484,7 +488,7 @@ BitSet32 Arm64GPRCache::GetDirtyGPRs() const for (size_t i = 0; i < GUEST_GPR_COUNT; ++i) { const OpArg& arg = m_guest_registers[GUEST_GPR_OFFSET + i]; - registers[i] = arg.GetType() != RegType::NotLoaded && arg.IsDirty(); + registers[i] = !arg.IsInPPCState(); } return registers; } @@ -494,7 +498,7 @@ void Arm64GPRCache::FlushByHost(ARM64Reg host_reg, ARM64Reg tmp_reg) for (size_t i = 0; i < m_guest_registers.size(); ++i) { const OpArg& reg = m_guest_registers[i]; - if (reg.GetType() == RegType::Register && DecodeReg(reg.GetReg()) == DecodeReg(host_reg)) + if (reg.IsInHostRegister() && DecodeReg(reg.GetReg()) == DecodeReg(host_reg)) { FlushRegister(i, FlushMode::All, tmp_reg); return; @@ -514,17 +518,17 @@ void Arm64FPRCache::Flush(FlushMode mode, ARM64Reg tmp_reg, { for (size_t i = 0; i < m_guest_registers.size(); ++i) { - const RegType reg_type = m_guest_registers[i].GetType(); - - if (reg_type == RegType::Discarded) - { - ASSERT_MSG(DYNA_REC, ignore_discarded_registers != IgnoreDiscardedRegisters::No, - "Attempted to flush discarded register"); - } - else if (reg_type != RegType::NotLoaded && reg_type != RegType::Immediate) + if (m_guest_registers[i].IsInHostRegister()) { FlushRegister(i, mode, tmp_reg); } + else + { + ASSERT_MSG(DYNA_REC, + ignore_discarded_registers != IgnoreDiscardedRegisters::No || + m_guest_registers[i].IsInPPCState(), + "Attempted to flush discarded register"); + } } } @@ -533,9 +537,32 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type) OpArg& reg = m_guest_registers[preg]; IncrementAllUsed(); reg.ResetLastUsed(); + + if (!reg.IsInHostRegister()) + { + ASSERT_MSG(DYNA_REC, reg.IsInPPCState(), "Attempted to read discarded register"); + + ARM64Reg host_reg = GetReg(); + u32 load_size; + if (type == RegType::Register) + { + load_size = 128; + reg.Load(host_reg, RegType::Register); + } + else + { + load_size = 64; + reg.Load(host_reg, RegType::LowerPair); + } + reg.SetDirty(false); + m_float_emit->LDR(load_size, IndexType::Unsigned, host_reg, PPC_REG, + static_cast(PPCSTATE_OFF_PS0(preg))); + return host_reg; + } + ARM64Reg host_reg = reg.GetReg(); - switch (reg.GetType()) + switch (reg.GetFPRType()) { case RegType::Single: { @@ -618,28 +645,6 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type) } return host_reg; } - case RegType::Discarded: - ASSERT_MSG(DYNA_REC, false, "Attempted to read discarded register"); - break; - case RegType::NotLoaded: // Register isn't loaded at /all/ - { - host_reg = GetReg(); - u32 load_size; - if (type == RegType::Register) - { - load_size = 128; - reg.Load(host_reg, RegType::Register); - } - else - { - load_size = 64; - reg.Load(host_reg, RegType::LowerPair); - } - reg.SetDirty(false); - m_float_emit->LDR(load_size, IndexType::Unsigned, host_reg, PPC_REG, - static_cast(PPCSTATE_OFF_PS0(preg))); - return host_reg; - } default: DEBUG_ASSERT_MSG(DYNA_REC, false, "Invalid OpArg Type!"); break; @@ -655,16 +660,17 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type, bool set_dirty) IncrementAllUsed(); reg.ResetLastUsed(); - // Only the lower value will be overwritten, so we must be extra careful to store PSR1 if dirty. - if (reg.IsDirty() && (type == RegType::LowerPair || type == RegType::LowerPairSingle)) + // If PS1 is dirty, but the caller wants a RegType with only PS0, we must write PS1 to m_ppc_state + // now so the contents of PS1 aren't lost. + if (!reg.IsInPPCState() && (type == RegType::LowerPair || type == RegType::LowerPairSingle)) { - // We must *not* change host_reg as this register might still be in use. So it's fine to - // store this register, but it's *not* fine to convert it to double. So for double conversion, - // a temporary register needs to be used. + // We must *not* modify host_reg, as the current guest instruction might want to read its old + // value before overwriting it. So it's fine to store this register, but it's *not* fine to + // convert it to double in place. For double conversion, a temporary register needs to be used. ARM64Reg host_reg = reg.GetReg(); ARM64Reg flush_reg = host_reg; - switch (reg.GetType()) + switch (reg.GetFPRType()) { case RegType::Single: // For a store-safe register, conversion is just one instruction regardless of whether @@ -706,8 +712,8 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type, bool set_dirty) // Store PSR1 (which is equal to PSR0) in memory. m_float_emit->STR(64, IndexType::Unsigned, flush_reg, PPC_REG, static_cast(PPCSTATE_OFF_PS1(preg))); - reg.Load(host_reg, reg.GetType() == RegType::DuplicatedSingle ? RegType::LowerPairSingle : - RegType::LowerPair); + reg.Load(host_reg, reg.GetFPRType() == RegType::DuplicatedSingle ? RegType::LowerPairSingle : + RegType::LowerPair); break; default: // All other types doesn't store anything in PSR1. @@ -718,7 +724,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type, bool set_dirty) Unlock(flush_reg); } - if (reg.GetType() == RegType::NotLoaded || reg.GetType() == RegType::Discarded) + if (!reg.IsInHostRegister()) { // If not loaded at all, just alloc a new one. reg.Load(GetReg(), type); @@ -782,10 +788,8 @@ void Arm64FPRCache::FlushByHost(ARM64Reg host_reg, ARM64Reg tmp_reg) for (size_t i = 0; i < m_guest_registers.size(); ++i) { const OpArg& reg = m_guest_registers[i]; - const RegType reg_type = reg.GetType(); - if (reg_type != RegType::NotLoaded && reg_type != RegType::Discarded && - reg_type != RegType::Immediate && reg.GetReg() == host_reg) + if (reg.IsInHostRegister() && reg.GetReg() == host_reg) { FlushRegister(i, FlushMode::All, tmp_reg); return; @@ -802,8 +806,8 @@ bool Arm64FPRCache::IsTopHalfUsed(ARM64Reg reg) const { for (const OpArg& r : m_guest_registers) { - if (r.GetReg() != ARM64Reg::INVALID_REG && DecodeReg(r.GetReg()) == DecodeReg(reg)) - return r.GetType() == RegType::Register; + if (r.IsInHostRegister() && DecodeReg(r.GetReg()) == DecodeReg(reg)) + return r.GetFPRType() == RegType::Register; } return false; @@ -813,8 +817,8 @@ void Arm64FPRCache::FlushRegister(size_t preg, FlushMode mode, ARM64Reg tmp_reg) { OpArg& reg = m_guest_registers[preg]; const ARM64Reg host_reg = reg.GetReg(); - const bool dirty = reg.IsDirty(); - RegType type = reg.GetType(); + const bool dirty = !reg.IsInPPCState(); + RegType type = reg.GetFPRType(); bool allocated_tmp_reg = false; if (tmp_reg != ARM64Reg::INVALID_REG) @@ -921,7 +925,7 @@ BitSet32 Arm64FPRCache::GetCallerSavedUsed() const bool Arm64FPRCache::IsSingle(size_t preg, bool lower_only) const { - const RegType type = m_guest_registers[preg].GetType(); + const RegType type = m_guest_registers[preg].GetFPRType(); return type == RegType::Single || type == RegType::DuplicatedSingle || (lower_only && type == RegType::LowerPairSingle); } @@ -929,18 +933,18 @@ bool Arm64FPRCache::IsSingle(size_t preg, bool lower_only) const void Arm64FPRCache::FixSinglePrecision(size_t preg) { OpArg& reg = m_guest_registers[preg]; + if (!reg.IsInHostRegister()) + return; + ARM64Reg host_reg = reg.GetReg(); - switch (reg.GetType()) + if (reg.GetFPRType() == RegType::Duplicated) // only PS0 needs to be converted { - case RegType::Duplicated: // only PS0 needs to be converted m_float_emit->FCVT(32, 64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); reg.Load(host_reg, RegType::DuplicatedSingle); - break; - case RegType::Register: // PS0 and PS1 need to be converted + } + else if (reg.GetFPRType() == RegType::Register) // PS0 and PS1 need to be converted + { m_float_emit->FCVTN(32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); reg.Load(host_reg, RegType::Single); - break; - default: - break; } } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index 990b2dcee79..d547ee9d4ee 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -60,16 +60,12 @@ static_assert(PPCSTATE_OFF(xer_so_ov) < 4096, "STRB can't store xer_so_ov!"); enum class RegType { - NotLoaded, - Discarded, // Reg is not loaded because we know it won't be read before the next write - Register, // Reg type is register - Immediate, // Reg is really a IMM - LowerPair, // Only the lower pair of a paired register - Duplicated, // The lower reg is the same as the upper one (physical upper doesn't actually have - // the duplicated value) - Single, // Both registers are loaded as single - LowerPairSingle, // Only the lower pair of a paired register, as single - DuplicatedSingle, // The lower one contains both registers, as single + Register, // PS0 and PS1, each 64-bit + LowerPair, // PS0 only, 64-bit + Duplicated, // PS0 and PS1 are identical, host register only stores one lane (64-bit) + Single, // PS0 and PS1, each 32-bit + LowerPairSingle, // PS0 only, 32-bit + DuplicatedSingle, // PS0 and PS1 are identical, host register only stores one lane (32-bit) }; enum class FlushMode : bool @@ -92,26 +88,21 @@ class OpArg public: OpArg() = default; - RegType GetType() const { return m_type; } + RegType GetFPRType() const { return m_fpr_type; } Arm64Gen::ARM64Reg GetReg() const { return m_reg; } - u32 GetImm() const { return m_value; } - void Load(Arm64Gen::ARM64Reg reg, RegType type = RegType::Register) + void Load(Arm64Gen::ARM64Reg reg, RegType format = RegType::Register) { - m_type = type; m_reg = reg; - } - void LoadToImm(u32 imm) - { - m_type = RegType::Immediate; - m_value = imm; - - m_reg = Arm64Gen::ARM64Reg::INVALID_REG; + m_fpr_type = format; + m_in_host_register = true; } void Discard() { // Invalidate any previous information - m_type = RegType::Discarded; m_reg = Arm64Gen::ARM64Reg::INVALID_REG; + m_fpr_type = RegType::Register; + m_in_ppc_state = false; + m_in_host_register = false; // Arbitrarily large value that won't roll over on a lot of increments m_last_used = 0xFFFF; @@ -119,8 +110,10 @@ public: void Flush() { // Invalidate any previous information - m_type = RegType::NotLoaded; m_reg = Arm64Gen::ARM64Reg::INVALID_REG; + m_fpr_type = RegType::Register; + m_in_ppc_state = true; + m_in_host_register = false; // Arbitrarily large value that won't roll over on a lot of increments m_last_used = 0xFFFF; @@ -129,20 +122,18 @@ public: u32 GetLastUsed() const { return m_last_used; } void ResetLastUsed() { m_last_used = 0; } void IncrementLastUsed() { ++m_last_used; } - void SetDirty(bool dirty) { m_dirty = dirty; } - bool IsDirty() const { return m_dirty; } + void SetDirty(bool dirty) { m_in_ppc_state = !dirty; } + bool IsInPPCState() const { return m_in_ppc_state; } + bool IsInHostRegister() const { return m_in_host_register; } private: - // For REG_REG - RegType m_type = RegType::NotLoaded; // store type Arm64Gen::ARM64Reg m_reg = Arm64Gen::ARM64Reg::INVALID_REG; // host register we are in - - // For REG_IMM - u32 m_value = 0; // IMM value + RegType m_fpr_type = RegType::Register; // for FPRs only u32 m_last_used = 0; - bool m_dirty = false; + bool m_in_ppc_state = true; + bool m_in_host_register = false; }; class HostReg @@ -328,22 +319,22 @@ public: // Returns a guest GPR inside of a host register. // Will dump an immediate to the host register as well. - Arm64Gen::ARM64Reg R(size_t preg) { return R(GetGuestGPR(preg)); } + Arm64Gen::ARM64Reg R(size_t preg) { return BindForRead(GUEST_GPR_OFFSET + preg); } // Returns a guest CR inside of a host register. - Arm64Gen::ARM64Reg CR(size_t preg) { return R(GetGuestCR(preg)); } + Arm64Gen::ARM64Reg CR(size_t preg) { return BindForRead(GUEST_CR_OFFSET + preg); } // Set a register to an immediate. Only valid for guest GPRs. void SetImmediate(size_t preg, u32 imm, bool dirty = true) { - SetImmediate(GetGuestGPR(preg), imm, dirty); + SetImmediateInternal(GUEST_GPR_OFFSET + preg, imm, dirty); } - // Returns if a register is set as an immediate. Only valid for guest GPRs. - bool IsImm(size_t preg) const { return GetGuestGPROpArg(preg).GetType() == RegType::Immediate; } + // Returns whether a register is set as an immediate. Only valid for guest GPRs. + bool IsImm(size_t preg) const; // Gets the immediate that a register is set to. Only valid for guest GPRs. - u32 GetImm(size_t preg) const { return GetGuestGPROpArg(preg).GetImm(); } + u32 GetImm(size_t preg) const; bool IsImm(size_t preg, u32 imm) const { return IsImm(preg) && GetImm(preg) == imm; } @@ -374,14 +365,14 @@ public: // flushed. Just remember to call this function again with will_write = true after the Flush call. void BindToRegister(size_t preg, bool will_read, bool will_write = true) { - BindToRegister(GetGuestGPR(preg), will_read, will_write); + BindForWrite(GUEST_GPR_OFFSET + preg, will_read, will_write); } // Binds a guest CR to a host register, optionally loading its value. // The description of BindToRegister above applies to this function as well. void BindCRToRegister(size_t preg, bool will_read, bool will_write = true) { - BindToRegister(GetGuestCR(preg), will_read, will_write); + BindForWrite(GUEST_CR_OFFSET + preg, will_read, will_write); } BitSet32 GetCallerSavedUsed() const override; @@ -428,14 +419,19 @@ private: GuestRegInfo GetGuestCR(size_t preg); GuestRegInfo GetGuestByIndex(size_t index); - Arm64Gen::ARM64Reg R(const GuestRegInfo& guest_reg); - void SetImmediate(const GuestRegInfo& guest_reg, u32 imm, bool dirty); - void BindToRegister(const GuestRegInfo& guest_reg, bool will_read, bool will_write = true); + Arm64Gen::ARM64Reg BindForRead(size_t index); + void SetImmediateInternal(size_t index, u32 imm, bool dirty); + void BindForWrite(size_t index, bool will_read, bool will_write = true); void FlushRegisters(BitSet32 regs, FlushMode mode, Arm64Gen::ARM64Reg tmp_reg, IgnoreDiscardedRegisters ignore_discarded_registers); void FlushCRRegisters(BitSet8 regs, FlushMode mode, Arm64Gen::ARM64Reg tmp_reg, IgnoreDiscardedRegisters ignore_discarded_registers); + + static constexpr size_t GUEST_GPR_COUNT = 32; + static constexpr size_t GUEST_CR_COUNT = 8; + static constexpr size_t GUEST_GPR_OFFSET = 0; + static constexpr size_t GUEST_CR_OFFSET = GUEST_GPR_COUNT; }; class Arm64FPRCache : public Arm64RegCache @@ -451,9 +447,9 @@ public: // Returns a guest register inside of a host register // Will dump an immediate to the host register as well - Arm64Gen::ARM64Reg R(size_t preg, RegType type); + Arm64Gen::ARM64Reg R(size_t preg, RegType format); - Arm64Gen::ARM64Reg RW(size_t preg, RegType type, bool set_dirty = true); + Arm64Gen::ARM64Reg RW(size_t preg, RegType format, bool set_dirty = true); BitSet32 GetCallerSavedUsed() const override; diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp new file mode 100644 index 00000000000..b632cd3fc4e --- /dev/null +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp @@ -0,0 +1,538 @@ +// Copyright 2023 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "Core/PowerPC/JitCommon/ConstantPropagation.h" + +#include + +#include "Core/PowerPC/Gekko.h" +#include "Core/PowerPC/Interpreter/Interpreter.h" +#include "Core/PowerPC/PPCTables.h" + +namespace JitCommon +{ +static constexpr u32 BitOR(u32 a, u32 b) +{ + return a | b; +} + +static constexpr u32 BitAND(u32 a, u32 b) +{ + return a & b; +} + +static constexpr u32 BitXOR(u32 a, u32 b) +{ + return a ^ b; +} + +ConstantPropagationResult ConstantPropagation::EvaluateInstruction(UGeckoInstruction inst, + u64 flags) const +{ + switch (inst.OPCD) + { + case 7: // mulli + return EvaluateMulImm(inst); + case 8: // subfic + return EvaluateSubImmCarry(inst); + case 12: // addic + case 13: // addic. + return EvaluateAddImmCarry(inst); + case 14: // addi + case 15: // addis + return EvaluateAddImm(inst); + case 20: // rlwimix + return EvaluateRlwimix(inst); + case 21: // rlwinmx + return EvaluateRlwinmxRlwnmx(inst, inst.SH); + case 23: // rlwnmx + if (HasGPR(inst.RB)) + return EvaluateRlwinmxRlwnmx(inst, GetGPR(inst.RB) & 0x1F); + else + return {}; + case 24: // ori + case 25: // oris + return EvaluateBitwiseImm(inst, BitOR); + case 26: // xori + case 27: // xoris + return EvaluateBitwiseImm(inst, BitXOR); + case 28: // andi + case 29: // andis + return EvaluateBitwiseImm(inst, BitAND); + case 31: + return EvaluateTable31(inst, flags); + default: + return {}; + } +} + +ConstantPropagationResult ConstantPropagation::EvaluateMulImm(UGeckoInstruction inst) const +{ + if (inst.SIMM_16 == 0) + return ConstantPropagationResult(inst.RD, 0); + + if (!HasGPR(inst.RA)) + return {}; + + return ConstantPropagationResult(inst.RD, m_gpr_values[inst.RA] * inst.SIMM_16); +} + +ConstantPropagationResult ConstantPropagation::EvaluateSubImmCarry(UGeckoInstruction inst) const +{ + if (!HasGPR(inst.RA)) + return {}; + + const u32 a = GetGPR(inst.RA); + const u32 imm = s32(inst.SIMM_16); + + ConstantPropagationResult result(inst.RD, imm - a); + result.carry = imm >= a; + return result; +} + +ConstantPropagationResult ConstantPropagation::EvaluateAddImm(UGeckoInstruction inst) const +{ + const s32 immediate = inst.OPCD & 1 ? inst.SIMM_16 << 16 : inst.SIMM_16; + + if (inst.RA == 0) + return ConstantPropagationResult(inst.RD, immediate); + + if (!HasGPR(inst.RA)) + return {}; + + return ConstantPropagationResult(inst.RD, m_gpr_values[inst.RA] + immediate); +} + +ConstantPropagationResult ConstantPropagation::EvaluateAddImmCarry(UGeckoInstruction inst) const +{ + if (!HasGPR(inst.RA)) + return {}; + + const u32 a = m_gpr_values[inst.RA]; + const bool rc = inst.OPCD & 1; + + ConstantPropagationResult result(inst.RD, a + inst.SIMM_16, rc); + result.carry = Interpreter::Helper_Carry(a, inst.SIMM_16); + return result; +} + +ConstantPropagationResult ConstantPropagation::EvaluateRlwimix(UGeckoInstruction inst) const +{ + if (!HasGPR(inst.RS)) + return {}; + + const u32 mask = MakeRotationMask(inst.MB, inst.ME); + if (mask == 0xFFFFFFFF) + return ConstantPropagationResult(inst.RA, std::rotl(GetGPR(inst.RS), inst.SH), inst.Rc); + + if (!HasGPR(inst.RA)) + return {}; + + return ConstantPropagationResult( + inst.RA, (GetGPR(inst.RA) & ~mask) | (std::rotl(GetGPR(inst.RS), inst.SH) & mask), inst.Rc); +} + +ConstantPropagationResult ConstantPropagation::EvaluateRlwinmxRlwnmx(UGeckoInstruction inst, + u32 shift) const +{ + if (!HasGPR(inst.RS)) + return {}; + + const u32 mask = MakeRotationMask(inst.MB, inst.ME); + return ConstantPropagationResult(inst.RA, std::rotl(GetGPR(inst.RS), shift) & mask, inst.Rc); +} + +ConstantPropagationResult ConstantPropagation::EvaluateBitwiseImm(UGeckoInstruction inst, + u32 (*do_op)(u32, u32)) const +{ + const bool is_and = do_op == &BitAND; + const u32 immediate = inst.OPCD & 1 ? inst.UIMM << 16 : inst.UIMM; + + if (inst.UIMM == 0 && !is_and && inst.RA == inst.RS) + return DO_NOTHING; + + if (!HasGPR(inst.RS)) + return {}; + + return ConstantPropagationResult(inst.RA, do_op(m_gpr_values[inst.RS], immediate), is_and); +} + +ConstantPropagationResult ConstantPropagation::EvaluateTable31(UGeckoInstruction inst, + u64 flags) const +{ + if (flags & FL_IN_B) + { + if (flags & FL_OUT_D) + { + // input a, b -> output d + return EvaluateTable31AB(inst, flags); + } + else + { + // input s, b -> output a + return EvaluateTable31SB(inst); + } + } + else + { + switch (inst.SUBOP10) + { + case 104: // negx + case 616: // negox + // input a -> output d + return EvaluateTable31Negx(inst, flags); + default: + // input s -> output a + return EvaluateTable31S(inst); + } + } +} + +ConstantPropagationResult ConstantPropagation::EvaluateTable31Negx(UGeckoInstruction inst, + u64 flags) const +{ + if (!HasGPR(inst.RA)) + return {}; + + const s64 out = -s64(s32(GetGPR(inst.RA))); + + ConstantPropagationResult result(inst.RD, u32(out), inst.Rc); + if (flags & FL_SET_OE) + result.overflow = (out != s64(s32(out))); + return result; +} + +ConstantPropagationResult ConstantPropagation::EvaluateTable31S(UGeckoInstruction inst) const +{ + if (!HasGPR(inst.RS)) + return {}; + + std::optional carry; + u32 a; + const u32 s = GetGPR(inst.RS); + + switch (inst.SUBOP10) + { + case 26: // cntlzwx + a = std::countl_zero(s); + break; + case 824: // srawix + a = s32(s) >> inst.SH; + carry = inst.SH != 0 && s32(s) < 0 && (s << (32 - inst.SH)); + break; + case 922: // extshx + a = s32(s16(s)); + break; + case 954: // extsbx + a = s32(s8(s)); + break; + default: + return {}; + } + + ConstantPropagationResult result(ConstantPropagationResult(inst.RA, a, inst.Rc)); + result.carry = carry; + return result; +} + +ConstantPropagationResult ConstantPropagation::EvaluateTable31AB(UGeckoInstruction inst, + u64 flags) const +{ + const bool has_a = HasGPR(inst.RA); + const bool has_b = HasGPR(inst.RB); + if (!has_a || !has_b) + { + if (has_a) + return EvaluateTable31ABOneRegisterKnown(inst, flags, GetGPR(inst.RA), false); + else if (has_b) + return EvaluateTable31ABOneRegisterKnown(inst, flags, GetGPR(inst.RB), true); + else if (inst.RA == inst.RB) + return EvaluateTable31ABIdenticalRegisters(inst, flags); + else + return {}; + } + + u64 d; + s64 d_overflow; + const u32 a = GetGPR(inst.RA); + const u32 b = GetGPR(inst.RB); + + switch (inst.SUBOP10) + { + case 8: // subfcx + case 40: // subfx + case 520: // subfcox + case 552: // subfox + d = u64(u32(~a)) + u64(b) + 1; + d_overflow = s64(s32(b)) - s64(s32(a)); + break; + case 10: // addcx + case 522: // addcox + case 266: // addx + case 778: // addox + d = u64(a) + u64(b); + d_overflow = s64(s32(a)) + s64(s32(b)); + break; + case 11: // mulhwux + d = d_overflow = (u64(a) * u64(b)) >> 32; + break; + case 75: // mulhwx + d = d_overflow = u64(s64(s32(a)) * s64(s32(b))) >> 32; + break; + case 235: // mullwx + case 747: // mullwox + d = d_overflow = s64(s32(a)) * s64(s32(b)); + break; + case 459: // divwux + case 971: // divwuox + d = d_overflow = b == 0 ? 0x1'0000'0000 : u64(a / b); + break; + case 491: // divwx + case 1003: // divwox + d = d_overflow = b == 0 || (a == 0x80000000 && b == 0xFFFFFFFF) ? + (s32(a) < 0 ? 0xFFFFFFFF : 0x1'0000'0000) : + s32(a) / s32(b); + break; + default: + return {}; + } + + ConstantPropagationResult result(inst.RD, u32(d), inst.Rc); + if (flags & FL_SET_CA) + result.carry = (d >> 32 != 0); + if (flags & FL_SET_OE) + result.overflow = (d_overflow != s64(s32(d_overflow))); + return result; +} + +ConstantPropagationResult +ConstantPropagation::EvaluateTable31ABOneRegisterKnown(UGeckoInstruction inst, u64 flags, u32 value, + bool known_reg_is_b) const +{ + switch (inst.SUBOP10) + { + case 11: // mulhwux + case 75: // mulhwx + case 235: // mullwx + case 747: // mullwox + if (value == 0) + { + ConstantPropagationResult result(inst.RD, 0, inst.Rc); + if (flags & FL_SET_OE) + result.overflow = false; + return result; + } + break; + case 459: // divwux + case 971: // divwuox + if (known_reg_is_b && value == 0) + { + ConstantPropagationResult result(inst.RD, 0, inst.Rc); + if (flags & FL_SET_OE) + result.overflow = true; + return result; + } + [[fallthrough]]; + case 491: // divwx + case 1003: // divwox + if (!known_reg_is_b && value == 0 && !(flags & FL_SET_OE)) + { + return ConstantPropagationResult(inst.RD, 0, inst.Rc); + } + break; + } + + return {}; +} + +ConstantPropagationResult +ConstantPropagation::EvaluateTable31ABIdenticalRegisters(UGeckoInstruction inst, u64 flags) const +{ + switch (inst.SUBOP10) + { + case 8: // subfcx + case 40: // subfx + case 520: // subfcox + case 552: // subfox + { + ConstantPropagationResult result(inst.RD, 0, inst.Rc); + if (flags & FL_SET_CA) + result.carry = true; + if (flags & FL_SET_OE) + result.overflow = false; + return result; + } + default: + return {}; + } +} + +ConstantPropagationResult ConstantPropagation::EvaluateTable31SB(UGeckoInstruction inst) const +{ + const bool has_s = HasGPR(inst.RS); + const bool has_b = HasGPR(inst.RB); + if (!has_s || !has_b) + { + if (has_s) + return EvaluateTable31SBOneRegisterKnown(inst, GetGPR(inst.RS), false); + else if (has_b) + return EvaluateTable31SBOneRegisterKnown(inst, GetGPR(inst.RB), true); + else if (inst.RS == inst.RB) + return EvaluateTable31SBIdenticalRegisters(inst); + else + return {}; + } + + u32 a; + const u32 s = GetGPR(inst.RS); + const u32 b = GetGPR(inst.RB); + + switch (inst.SUBOP10) + { + case 24: // slwx + a = u32(u64(s) << (b & 0x3f)); + break; + case 28: // andx + a = s & b; + break; + case 60: // andcx + a = s & (~b); + break; + case 124: // norx + a = ~(s | b); + break; + case 284: // eqvx + a = ~(s ^ b); + break; + case 316: // xorx + a = s ^ b; + break; + case 412: // orcx + a = s | (~b); + break; + case 444: // orx + a = s | b; + break; + case 476: // nandx + a = ~(s & b); + break; + case 536: // srwx + a = u32(u64(s) >> (b & 0x3f)); + break; + case 792: // srawx + { + const u64 temp = (s64(s32(s)) << 32) >> (b & 0x3f); + a = u32(temp >> 32); + + ConstantPropagationResult result(inst.RA, a, inst.Rc); + result.carry = (temp & a) != 0; + return result; + } + default: + return {}; + } + + return ConstantPropagationResult(inst.RA, a, inst.Rc); +} + +ConstantPropagationResult +ConstantPropagation::EvaluateTable31SBOneRegisterKnown(UGeckoInstruction inst, u32 value, + bool known_reg_is_b) const +{ + u32 a; + + switch (inst.SUBOP10) + { + case 24: // slwx + case 536: // srwx + if (!known_reg_is_b && value == 0) + a = 0; + else if (known_reg_is_b && (value & 0x20)) + a = 0; + else + return {}; + break; + case 60: // andcx + if (known_reg_is_b) + value = ~value; + [[fallthrough]]; + case 28: // andx + if (value == 0) + a = 0; + else + return {}; + break; + case 124: // norx + if (value == 0xFFFFFFFF) + a = 0; + else + return {}; + break; + case 412: // orcx + if (known_reg_is_b) + value = ~value; + [[fallthrough]]; + case 444: // orx + if (value == 0xFFFFFFFF) + a = 0xFFFFFFFF; + else + return {}; + break; + case 476: // nandx + if (value == 0) + a = 0xFFFFFFFF; + else + return {}; + break; + case 792: // srawx + if (!known_reg_is_b && value == 0) + { + ConstantPropagationResult result(inst.RA, 0, inst.Rc); + result.carry = false; + return result; + } + else + { + return {}; + } + break; + default: + return {}; + } + + return ConstantPropagationResult(inst.RA, a, inst.Rc); +} + +ConstantPropagationResult +ConstantPropagation::EvaluateTable31SBIdenticalRegisters(UGeckoInstruction inst) const +{ + u32 a; + + switch (inst.SUBOP10) + { + case 60: // andcx + a = 0; + break; + case 284: // eqvx + a = 0xFFFFFFFF; + break; + case 316: // xorx + a = 0; + break; + case 412: // orcx + a = 0xFFFFFFFF; + break; + default: + return {}; + } + + return ConstantPropagationResult(inst.RA, a, inst.Rc); +} + +void ConstantPropagation::Apply(ConstantPropagationResult result) +{ + if (result.gpr >= 0) + SetGPR(result.gpr, result.gpr_value); +} + +} // namespace JitCommon diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h new file mode 100644 index 00000000000..ffbf543ed6d --- /dev/null +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h @@ -0,0 +1,113 @@ +// Copyright 2023 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "Common/BitSet.h" +#include "Common/CommonTypes.h" +#include "Core/PowerPC/PowerPC.h" + +#include +#include +#include + +namespace JitCommon +{ +struct ConstantPropagationResult final +{ + constexpr ConstantPropagationResult() = default; + + constexpr ConstantPropagationResult(s8 gpr_, u32 gpr_value_, bool compute_rc_ = false) + : gpr_value(gpr_value_), gpr(gpr_), instruction_fully_executed(true), compute_rc(compute_rc_) + { + } + + // If gpr is non-negative, this is the value the instruction writes to that GPR. + u32 gpr_value = 0; + + // If the instruction couldn't be evaluated or doesn't output to a GPR, this is -1. + // Otherwise, this is the GPR that the instruction writes to. + s8 gpr = -1; + + // Whether the instruction was able to be fully evaluated with no side effects unaccounted for, + // or in other words, whether the JIT can skip emitting code for this instruction. + bool instruction_fully_executed = false; + + // If true, CR0 needs to be set based on gpr_value. + bool compute_rc = false; + + // If not std::nullopt, the instruction writes this to the carry flag. + std::optional carry = std::nullopt; + + // If not std::nullopt, the instruction writes this to the overflow flag. + std::optional overflow = std::nullopt; +}; + +class ConstantPropagation final +{ +public: + ConstantPropagationResult EvaluateInstruction(UGeckoInstruction inst, u64 flags) const; + + void Apply(ConstantPropagationResult result); + + template + bool HasGPR(Args... gprs) const + { + return HasGPRs(BitSet32{static_cast(gprs)...}); + } + + bool HasGPRs(BitSet32 gprs) const { return (m_gpr_values_known & gprs) == gprs; } + + u32 GetGPR(size_t gpr) const { return m_gpr_values[gpr]; } + + void SetGPR(size_t gpr, u32 value) + { + m_gpr_values_known[gpr] = true; + m_gpr_values[gpr] = value; + } + + template + void ClearGPR(Args... gprs) + { + ClearGPRs(BitSet32{static_cast(gprs)...}); + } + + void ClearGPRs(BitSet32 gprs) { m_gpr_values_known &= ~gprs; } + + void Clear() { m_gpr_values_known = BitSet32{}; } + +private: + ConstantPropagationResult EvaluateMulImm(UGeckoInstruction inst) const; + ConstantPropagationResult EvaluateSubImmCarry(UGeckoInstruction inst) const; + ConstantPropagationResult EvaluateAddImm(UGeckoInstruction inst) const; + ConstantPropagationResult EvaluateAddImmCarry(UGeckoInstruction inst) const; + ConstantPropagationResult EvaluateRlwimix(UGeckoInstruction inst) const; + ConstantPropagationResult EvaluateRlwinmxRlwnmx(UGeckoInstruction inst, u32 shift) const; + ConstantPropagationResult EvaluateBitwiseImm(UGeckoInstruction inst, + u32 (*do_op)(u32, u32)) const; + ConstantPropagationResult EvaluateTable31(UGeckoInstruction inst, u64 flags) const; + ConstantPropagationResult EvaluateTable31Negx(UGeckoInstruction inst, u64 flags) const; + ConstantPropagationResult EvaluateTable31S(UGeckoInstruction inst) const; + ConstantPropagationResult EvaluateTable31AB(UGeckoInstruction inst, u64 flags) const; + ConstantPropagationResult EvaluateTable31ABOneRegisterKnown(UGeckoInstruction inst, u64 flags, + u32 value, bool known_reg_is_b) const; + ConstantPropagationResult EvaluateTable31ABIdenticalRegisters(UGeckoInstruction inst, + u64 flags) const; + ConstantPropagationResult EvaluateTable31SB(UGeckoInstruction inst) const; + ConstantPropagationResult EvaluateTable31SBOneRegisterKnown(UGeckoInstruction inst, u32 value, + bool known_reg_is_b) const; + ConstantPropagationResult EvaluateTable31SBIdenticalRegisters(UGeckoInstruction inst) const; + + static constexpr ConstantPropagationResult DO_NOTHING = [] { + ConstantPropagationResult result; + result.instruction_fully_executed = true; + return result; + }(); + + static constexpr size_t GPR_COUNT = 32; + + std::array m_gpr_values; + BitSet32 m_gpr_values_known{}; +}; + +} // namespace JitCommon diff --git a/Source/Core/DolphinLib.props b/Source/Core/DolphinLib.props index 17e44675ac2..3cfac5838c4 100644 --- a/Source/Core/DolphinLib.props +++ b/Source/Core/DolphinLib.props @@ -455,6 +455,7 @@ + @@ -1139,6 +1140,7 @@ +