JitArm64: Always use double precision for inaccurate FMA

When we're emulating single-precision FMA using an FMA instruction,
there's no precision benefit from using a double-precision instruction,
assuming all inputs are single-precision. But when we're emulating
single-precision FMA using separate multiplication and addition
instructions, there is.

This change increases the precision of inaccurate FMA to the same level
as Jit64, which matters since the only reason we have the inaccurate
FMA mode is for sync compatibility with Jit64.
This commit is contained in:
JosJuice 2025-09-28 19:20:07 +02:00
parent 373e35ed5b
commit addededecf
2 changed files with 20 additions and 20 deletions

View File

@ -75,26 +75,25 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
const u32 d = inst.FD;
const u32 op5 = inst.SUBOP5;
const bool use_c = op5 >= 25; // fmul and all kind of fmaddXX
const bool use_c = op5 >= 25; // fmul and all kinds of fmaddXX
const bool use_b = op5 != 25; // fmul uses no B
const bool fma = use_b && use_c;
const bool negate_result = (op5 & ~0x1) == 30;
const bool output_is_single = inst.OPCD == 59;
const bool inaccurate_fma = op5 > 25 && !Config::Get(Config::SESSION_USE_FMA);
const bool round_c = use_c && output_is_single && !js.op->fprIsSingle[inst.FC];
const bool inaccurate_fma = fma && !Config::Get(Config::SESSION_USE_FMA);
const bool round_c = use_c && output_is_single && !js.op->fprIsSingle[c];
const auto inputs_are_singles_func = [&] {
return fpr.IsSingle(a, true) && (!use_b || fpr.IsSingle(b, true)) &&
(!use_c || fpr.IsSingle(c, true));
};
const bool inputs_are_singles = inputs_are_singles_func();
const bool single = inputs_are_singles && output_is_single;
const bool single = inputs_are_singles_func() && output_is_single && !inaccurate_fma;
const RegType type = single ? RegType::LowerPairSingle : RegType::LowerPair;
const RegType type_out =
output_is_single ? (inputs_are_singles ? RegType::DuplicatedSingle : RegType::Duplicated) :
RegType::LowerPair;
const RegType type_out = output_is_single ?
(single ? RegType::DuplicatedSingle : RegType::Duplicated) :
RegType::LowerPair;
const auto reg_encoder = single ? EncodeRegToSingle : EncodeRegToDouble;
const ARM64Reg VA = reg_encoder(fpr.R(a, type));
@ -109,7 +108,7 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
ARM64Reg rounded_c_reg = VC;
if (round_c)
{
ASSERT_MSG(DYNA_REC, !inputs_are_singles, "Tried to apply 25-bit precision to single");
ASSERT_MSG(DYNA_REC, !single, "Tried to apply 25-bit precision to single");
V0Q = fpr.GetScopedReg();
rounded_c_reg = reg_encoder(V0Q);
@ -249,7 +248,7 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
if (output_is_single)
{
ASSERT_MSG(DYNA_REC, inputs_are_singles == inputs_are_singles_func(),
ASSERT_MSG(DYNA_REC, single == inputs_are_singles_func(),
"Register allocation turned singles into doubles in the middle of fp_arith");
fpr.FixSinglePrecision(d);

View File

@ -94,16 +94,17 @@ void JitArm64::ps_arith(UGeckoInstruction inst)
const bool negate_result = (op5 & ~0x1) == 30;
const bool msub = op5 == 28 || op5 == 30;
const auto singles_func = [&] {
const bool inaccurate_fma = fma && !Config::Get(Config::SESSION_USE_FMA);
const bool round_c = use_c && !js.op->fprIsSingle[c];
const auto inputs_are_singles_func = [&] {
return fpr.IsSingle(a) && (!use_b || fpr.IsSingle(b)) && (!use_c || fpr.IsSingle(c));
};
const bool singles = singles_func();
const bool inaccurate_fma = !Config::Get(Config::SESSION_USE_FMA);
const bool round_c = use_c && !js.op->fprIsSingle[inst.FC];
const RegType type = singles ? RegType::Single : RegType::Register;
const u8 size = singles ? 32 : 64;
const auto reg_encoder = singles ? EncodeRegToDouble : EncodeRegToQuad;
const bool single = inputs_are_singles_func() && !inaccurate_fma;
const RegType type = single ? RegType::Single : RegType::Register;
const u8 size = single ? 32 : 64;
const auto reg_encoder = single ? EncodeRegToDouble : EncodeRegToQuad;
const ARM64Reg VA = reg_encoder(fpr.R(a, type));
const ARM64Reg VB = use_b ? reg_encoder(fpr.R(b, type)) : ARM64Reg::INVALID_REG;
@ -118,7 +119,7 @@ void JitArm64::ps_arith(UGeckoInstruction inst)
ARM64Reg rounded_c_reg = VC;
if (round_c)
{
ASSERT_MSG(DYNA_REC, !singles, "Tried to apply 25-bit precision to single");
ASSERT_MSG(DYNA_REC, !single, "Tried to apply 25-bit precision to single");
V0Q = fpr.GetScopedReg();
rounded_c_reg = reg_encoder(V0Q);
@ -234,7 +235,7 @@ void JitArm64::ps_arith(UGeckoInstruction inst)
FixupBranch nan_fixup;
if (m_accurate_nans)
{
const ARM64Reg nan_temp_reg = singles ? EncodeRegToSingle(V0Q) : EncodeRegToDouble(V0Q);
const ARM64Reg nan_temp_reg = single ? EncodeRegToSingle(V0Q) : EncodeRegToDouble(V0Q);
const ARM64Reg nan_temp_reg_paired = reg_encoder(V0Q);
// Check if we need to handle NaNs
@ -300,7 +301,7 @@ void JitArm64::ps_arith(UGeckoInstruction inst)
SetJumpTarget(nan_fixup);
}
ASSERT_MSG(DYNA_REC, singles == singles_func(),
ASSERT_MSG(DYNA_REC, single == inputs_are_singles_func(),
"Register allocation turned singles into doubles in the middle of ps_arith");
fpr.FixSinglePrecision(d);