From addededecf65f6c7531930f1d345d86bab772aca Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 28 Sep 2025 19:20:07 +0200 Subject: [PATCH] JitArm64: Always use double precision for inaccurate FMA When we're emulating single-precision FMA using an FMA instruction, there's no precision benefit from using a double-precision instruction, assuming all inputs are single-precision. But when we're emulating single-precision FMA using separate multiplication and addition instructions, there is. This change increases the precision of inaccurate FMA to the same level as Jit64, which matters since the only reason we have the inaccurate FMA mode is for sync compatibility with Jit64. --- .../JitArm64/JitArm64_FloatingPoint.cpp | 19 ++++++++--------- .../Core/PowerPC/JitArm64/JitArm64_Paired.cpp | 21 ++++++++++--------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index 4f66b947026..e03cb50c2f6 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -75,26 +75,25 @@ void JitArm64::fp_arith(UGeckoInstruction inst) const u32 d = inst.FD; const u32 op5 = inst.SUBOP5; - const bool use_c = op5 >= 25; // fmul and all kind of fmaddXX + const bool use_c = op5 >= 25; // fmul and all kinds of fmaddXX const bool use_b = op5 != 25; // fmul uses no B const bool fma = use_b && use_c; const bool negate_result = (op5 & ~0x1) == 30; const bool output_is_single = inst.OPCD == 59; - const bool inaccurate_fma = op5 > 25 && !Config::Get(Config::SESSION_USE_FMA); - const bool round_c = use_c && output_is_single && !js.op->fprIsSingle[inst.FC]; + const bool inaccurate_fma = fma && !Config::Get(Config::SESSION_USE_FMA); + const bool round_c = use_c && output_is_single && !js.op->fprIsSingle[c]; const auto inputs_are_singles_func = [&] { return fpr.IsSingle(a, true) && (!use_b || fpr.IsSingle(b, true)) && (!use_c || fpr.IsSingle(c, true)); }; - const bool inputs_are_singles = inputs_are_singles_func(); - const bool single = inputs_are_singles && output_is_single; + const bool single = inputs_are_singles_func() && output_is_single && !inaccurate_fma; const RegType type = single ? RegType::LowerPairSingle : RegType::LowerPair; - const RegType type_out = - output_is_single ? (inputs_are_singles ? RegType::DuplicatedSingle : RegType::Duplicated) : - RegType::LowerPair; + const RegType type_out = output_is_single ? + (single ? RegType::DuplicatedSingle : RegType::Duplicated) : + RegType::LowerPair; const auto reg_encoder = single ? EncodeRegToSingle : EncodeRegToDouble; const ARM64Reg VA = reg_encoder(fpr.R(a, type)); @@ -109,7 +108,7 @@ void JitArm64::fp_arith(UGeckoInstruction inst) ARM64Reg rounded_c_reg = VC; if (round_c) { - ASSERT_MSG(DYNA_REC, !inputs_are_singles, "Tried to apply 25-bit precision to single"); + ASSERT_MSG(DYNA_REC, !single, "Tried to apply 25-bit precision to single"); V0Q = fpr.GetScopedReg(); rounded_c_reg = reg_encoder(V0Q); @@ -249,7 +248,7 @@ void JitArm64::fp_arith(UGeckoInstruction inst) if (output_is_single) { - ASSERT_MSG(DYNA_REC, inputs_are_singles == inputs_are_singles_func(), + ASSERT_MSG(DYNA_REC, single == inputs_are_singles_func(), "Register allocation turned singles into doubles in the middle of fp_arith"); fpr.FixSinglePrecision(d); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index 283944dd8bf..e1f3f096629 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -94,16 +94,17 @@ void JitArm64::ps_arith(UGeckoInstruction inst) const bool negate_result = (op5 & ~0x1) == 30; const bool msub = op5 == 28 || op5 == 30; - const auto singles_func = [&] { + const bool inaccurate_fma = fma && !Config::Get(Config::SESSION_USE_FMA); + const bool round_c = use_c && !js.op->fprIsSingle[c]; + + const auto inputs_are_singles_func = [&] { return fpr.IsSingle(a) && (!use_b || fpr.IsSingle(b)) && (!use_c || fpr.IsSingle(c)); }; - const bool singles = singles_func(); - const bool inaccurate_fma = !Config::Get(Config::SESSION_USE_FMA); - const bool round_c = use_c && !js.op->fprIsSingle[inst.FC]; - const RegType type = singles ? RegType::Single : RegType::Register; - const u8 size = singles ? 32 : 64; - const auto reg_encoder = singles ? EncodeRegToDouble : EncodeRegToQuad; + const bool single = inputs_are_singles_func() && !inaccurate_fma; + const RegType type = single ? RegType::Single : RegType::Register; + const u8 size = single ? 32 : 64; + const auto reg_encoder = single ? EncodeRegToDouble : EncodeRegToQuad; const ARM64Reg VA = reg_encoder(fpr.R(a, type)); const ARM64Reg VB = use_b ? reg_encoder(fpr.R(b, type)) : ARM64Reg::INVALID_REG; @@ -118,7 +119,7 @@ void JitArm64::ps_arith(UGeckoInstruction inst) ARM64Reg rounded_c_reg = VC; if (round_c) { - ASSERT_MSG(DYNA_REC, !singles, "Tried to apply 25-bit precision to single"); + ASSERT_MSG(DYNA_REC, !single, "Tried to apply 25-bit precision to single"); V0Q = fpr.GetScopedReg(); rounded_c_reg = reg_encoder(V0Q); @@ -234,7 +235,7 @@ void JitArm64::ps_arith(UGeckoInstruction inst) FixupBranch nan_fixup; if (m_accurate_nans) { - const ARM64Reg nan_temp_reg = singles ? EncodeRegToSingle(V0Q) : EncodeRegToDouble(V0Q); + const ARM64Reg nan_temp_reg = single ? EncodeRegToSingle(V0Q) : EncodeRegToDouble(V0Q); const ARM64Reg nan_temp_reg_paired = reg_encoder(V0Q); // Check if we need to handle NaNs @@ -300,7 +301,7 @@ void JitArm64::ps_arith(UGeckoInstruction inst) SetJumpTarget(nan_fixup); } - ASSERT_MSG(DYNA_REC, singles == singles_func(), + ASSERT_MSG(DYNA_REC, single == inputs_are_singles_func(), "Register allocation turned singles into doubles in the middle of ps_arith"); fpr.FixSinglePrecision(d);