From f9601dc38c78151fe1a8f95128a07fc821e50664 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Tue, 22 Aug 2023 17:44:35 +0200 Subject: [PATCH] Jit: Extract immediate handling to separate ConstantPropagation class Restructuring things in this way brings two immediate benefits: * Code is deduplicated between Jit64 and JitArm64. * Materializing an immediate value in a register no longer results in us forgetting what the immediate value was. As a more long-term benefit, this lets us also run constant propagation as part of PPCAnalyst, which could let us do cool stuff in the future like statically determining whether a conditional branch will be taken. But I have nothing concrete planned for that right now. --- Source/Core/Core/CMakeLists.txt | 2 + Source/Core/Core/PowerPC/Jit64/Jit.cpp | 60 ++++++++++--- Source/Core/Core/PowerPC/Jit64/Jit.h | 3 + Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 37 +++++++- Source/Core/Core/PowerPC/JitArm64/Jit.h | 3 + .../PowerPC/JitCommon/ConstantPropagation.cpp | 19 ++++ .../PowerPC/JitCommon/ConstantPropagation.h | 86 +++++++++++++++++++ Source/Core/DolphinLib.props | 2 + 8 files changed, 199 insertions(+), 13 deletions(-) create mode 100644 Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp create mode 100644 Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index c898c8f693e..7fc503e2f97 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -508,6 +508,8 @@ add_library(core PowerPC/Interpreter/Interpreter_Tables.cpp PowerPC/Interpreter/Interpreter.cpp PowerPC/Interpreter/Interpreter.h + PowerPC/JitCommon/ConstantPropagation.cpp + PowerPC/JitCommon/ConstantPropagation.h PowerPC/JitCommon/DivUtils.cpp PowerPC/JitCommon/DivUtils.h PowerPC/JitCommon/JitAsmCommon.cpp diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 377b4388fb4..6c161bd8b4f 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -42,6 +42,7 @@ #include "Core/PowerPC/Jit64Common/Jit64Constants.h" #include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" #include "Core/PowerPC/Jit64Common/TrampolineCache.h" +#include "Core/PowerPC/JitCommon/ConstantPropagation.h" #include "Core/PowerPC/JitInterface.h" #include "Core/PowerPC/MMU.h" #include "Core/PowerPC/PPCAnalyst.h" @@ -921,6 +922,8 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) gpr.Start(); fpr.Start(); + m_constant_propagation.Clear(); + js.downcountAmount = 0; js.skipInstructions = 0; js.carryFlag = CarryFlag::InPPCState; @@ -1105,21 +1108,56 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) { gpr.Flush(); fpr.Flush(); + m_constant_propagation.Clear(); + + CompileInstruction(op); } else { - // If we have an input register that is going to be used again, load it pre-emptively, - // even if the instruction doesn't strictly need it in a register, to avoid redundant - // loads later. Of course, don't do this if we're already out of registers. - // As a bit of a heuristic, make sure we have at least one register left over for the - // output, which needs to be bound in the actual instruction compilation. - // TODO: make this smarter in the case that we're actually register-starved, i.e. - // prioritize the more important registers. - gpr.PreloadRegisters(op.regsIn & op.gprInUse & ~op.gprDiscardable); - fpr.PreloadRegisters(op.fregsIn & op.fprInXmm & ~op.fprDiscardable); - } + const JitCommon::ConstantPropagationResult constant_propagation_result = + m_constant_propagation.EvaluateInstruction(op.inst); - CompileInstruction(op); + if (!constant_propagation_result.instruction_fully_executed) + { + if (!bJITRegisterCacheOff) + { + // If we have an input register that is going to be used again, load it pre-emptively, + // even if the instruction doesn't strictly need it in a register, to avoid redundant + // loads later. Of course, don't do this if we're already out of registers. + // As a bit of a heuristic, make sure we have at least one register left over for the + // output, which needs to be bound in the actual instruction compilation. + // TODO: make this smarter in the case that we're actually register-starved, i.e. + // prioritize the more important registers. + gpr.PreloadRegisters(op.regsIn & op.gprInUse & ~op.gprDiscardable); + fpr.PreloadRegisters(op.fregsIn & op.fprInXmm & ~op.fprDiscardable); + } + + CompileInstruction(op); + + m_constant_propagation.ClearGPRs(op.regsOut); + } + + m_constant_propagation.Apply(constant_propagation_result); + + if (constant_propagation_result.gpr >= 0) + { + gpr.SetImmediate32(constant_propagation_result.gpr, + constant_propagation_result.gpr_value); + } + + if (constant_propagation_result.instruction_fully_executed) + { + if (constant_propagation_result.carry) + FinalizeCarry(*constant_propagation_result.carry); + + if (constant_propagation_result.overflow) + GenerateConstantOverflow(*constant_propagation_result.overflow); + + // FinalizeImmediateRC is called last, because it may trigger branch merging + if (constant_propagation_result.compute_rc) + FinalizeImmediateRC(constant_propagation_result.gpr_value); + } + } js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 5ce409cbaf1..189f0c2b4bb 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -31,6 +31,7 @@ #include "Core/PowerPC/Jit64Common/BlockCache.h" #include "Core/PowerPC/Jit64Common/Jit64AsmCommon.h" #include "Core/PowerPC/Jit64Common/TrampolineCache.h" +#include "Core/PowerPC/JitCommon/ConstantPropagation.h" #include "Core/PowerPC/JitCommon/JitBase.h" #include "Core/PowerPC/JitCommon/JitCache.h" @@ -289,6 +290,8 @@ private: GPRRegCache gpr{*this}; FPURegCache fpr{*this}; + JitCommon::ConstantPropagation m_constant_propagation; + Jit64AsmRoutineManager asm_routines{*this}; HyoutaUtilities::RangeSizeSet m_free_ranges_near; diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 1eda45c58e3..f968ef5bdf1 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -33,6 +33,7 @@ #include "Core/PatchEngine.h" #include "Core/PowerPC/Interpreter/Interpreter.h" #include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" +#include "Core/PowerPC/JitCommon/ConstantPropagation.h" #include "Core/PowerPC/JitInterface.h" #include "Core/PowerPC/PowerPC.h" #include "Core/System.h" @@ -1169,6 +1170,8 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) gpr.Start(js.gpa); fpr.Start(js.fpa); + m_constant_propagation.Clear(); + if (!js.noSpeculativeConstantsAddresses.contains(js.blockStart)) { IntializeSpeculativeConstants(); @@ -1341,9 +1344,39 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) FlushCarry(); gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); - } + m_constant_propagation.Clear(); - CompileInstruction(op); + CompileInstruction(op); + } + else + { + const JitCommon::ConstantPropagationResult constant_propagation_result = + m_constant_propagation.EvaluateInstruction(op.inst); + + if (!constant_propagation_result.instruction_fully_executed) + { + CompileInstruction(op); + + m_constant_propagation.ClearGPRs(op.regsOut); + } + + m_constant_propagation.Apply(constant_propagation_result); + + if (constant_propagation_result.gpr >= 0) + gpr.SetImmediate(constant_propagation_result.gpr, constant_propagation_result.gpr_value); + + if (constant_propagation_result.instruction_fully_executed) + { + if (constant_propagation_result.carry) + ComputeCarry(*constant_propagation_result.carry); + + if (constant_propagation_result.overflow) + GenerateConstantOverflow(*constant_propagation_result.overflow); + + if (constant_propagation_result.compute_rc) + ComputeRC0(constant_propagation_result.gpr_value); + } + } js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst; diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 2d14d634dfa..df44b9b6793 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -16,6 +16,7 @@ #include "Core/PowerPC/JitArm64/JitArm64Cache.h" #include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" #include "Core/PowerPC/JitArmCommon/BackPatch.h" +#include "Core/PowerPC/JitCommon/ConstantPropagation.h" #include "Core/PowerPC/JitCommon/JitAsmCommon.h" #include "Core/PowerPC/JitCommon/JitBase.h" #include "Core/PowerPC/PPCAnalyst.h" @@ -397,6 +398,8 @@ protected: Arm64GPRCache gpr; Arm64FPRCache fpr; + JitCommon::ConstantPropagation m_constant_propagation; + JitArm64BlockCache blocks{*this}; Arm64Gen::ARM64FloatEmitter m_float_emit; diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp new file mode 100644 index 00000000000..b4afeed5b7d --- /dev/null +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp @@ -0,0 +1,19 @@ +// Copyright 2023 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "Core/PowerPC/JitCommon/ConstantPropagation.h" + +namespace JitCommon +{ +ConstantPropagationResult ConstantPropagation::EvaluateInstruction(UGeckoInstruction inst) const +{ + return {}; +} + +void ConstantPropagation::Apply(ConstantPropagationResult result) +{ + if (result.gpr >= 0) + SetGPR(result.gpr, result.gpr_value); +} + +} // namespace JitCommon diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h new file mode 100644 index 00000000000..2a24b9e7105 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h @@ -0,0 +1,86 @@ +// Copyright 2023 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "Common/BitSet.h" +#include "Common/CommonTypes.h" +#include "Core/PowerPC/PowerPC.h" + +#include +#include +#include + +namespace JitCommon +{ +struct ConstantPropagationResult final +{ + constexpr ConstantPropagationResult() = default; + + constexpr ConstantPropagationResult(s8 gpr_, u32 gpr_value_, bool compute_rc_ = false) + : gpr_value(gpr_value_), gpr(gpr_), instruction_fully_executed(true), compute_rc(compute_rc_) + { + } + + // If gpr is non-negative, this is the value the instruction writes to that GPR. + u32 gpr_value = 0; + + // If the instruction couldn't be evaluated or doesn't output to a GPR, this is -1. + // Otherwise, this is the GPR that the instruction writes to. + s8 gpr = -1; + + // Whether the instruction was able to be fully evaluated with no side effects unaccounted for, + // or in other words, whether the JIT can skip emitting code for this instruction. + bool instruction_fully_executed = false; + + // If true, CR0 needs to be set based on gpr_value. + bool compute_rc = false; + + // If not std::nullopt, the instruction writes this to the carry flag. + std::optional carry = std::nullopt; + + // If not std::nullopt, the instruction writes this to the overflow flag. + std::optional overflow = std::nullopt; +}; + +class ConstantPropagation final +{ +public: + ConstantPropagationResult EvaluateInstruction(UGeckoInstruction inst) const; + + void Apply(ConstantPropagationResult result); + + template + bool HasGPR(Args... gprs) const + { + return HasGPRs(BitSet32{static_cast(gprs)...}); + } + + bool HasGPRs(BitSet32 gprs) const { return (m_gpr_values_known & gprs) == gprs; } + + u32 GetGPR(size_t gpr) const { return m_gpr_values[gpr]; } + + void SetGPR(size_t gpr, u32 value) + { + m_gpr_values_known[gpr] = true; + m_gpr_values[gpr] = value; + } + + template + void ClearGPR(Args... gprs) + { + ClearGPRs(BitSet32{static_cast(gprs)...}); + } + + void ClearGPRs(BitSet32 gprs) { m_gpr_values_known &= ~gprs; } + + void Clear() { m_gpr_values_known = BitSet32{}; } + +private: + static constexpr size_t GPR_COUNT = 32; + + std::array m_gpr_values; + BitSet32 m_gpr_values_known{}; +}; + +} // namespace JitCommon diff --git a/Source/Core/DolphinLib.props b/Source/Core/DolphinLib.props index 17e44675ac2..3cfac5838c4 100644 --- a/Source/Core/DolphinLib.props +++ b/Source/Core/DolphinLib.props @@ -455,6 +455,7 @@ + @@ -1139,6 +1140,7 @@ +