mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2026-02-04 21:45:59 +00:00
JitArm64: Reduce register pressure for inaccurate FMA with accurate NaNs
If result_reg is set to a temporary register instead of VD because of accurate NaNs, there's no need to allocate a secondary temporary register because of inaccurate FMA.
This commit is contained in:
parent
84261cfc23
commit
caad84c636
@ -103,7 +103,6 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
|
||||
|
||||
{
|
||||
Arm64FPRCache::ScopedARM64Reg V0Q = ARM64Reg::INVALID_REG;
|
||||
Arm64FPRCache::ScopedARM64Reg V1Q = ARM64Reg::INVALID_REG;
|
||||
|
||||
ARM64Reg rounded_c_reg = VC;
|
||||
if (round_c)
|
||||
@ -115,21 +114,22 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
|
||||
Force25BitPrecision(rounded_c_reg, VC);
|
||||
}
|
||||
|
||||
ARM64Reg inaccurate_fma_reg = VD;
|
||||
if (fma && inaccurate_fma && VD == VB)
|
||||
{
|
||||
if (V0Q == ARM64Reg::INVALID_REG)
|
||||
V0Q = fpr.GetScopedReg();
|
||||
inaccurate_fma_reg = reg_encoder(V0Q);
|
||||
}
|
||||
|
||||
ARM64Reg result_reg = VD;
|
||||
ARM64Reg inaccurate_fma_reg = VD;
|
||||
const bool preserve_d =
|
||||
m_accurate_nans && (VD == VA || (use_b && VD == VB) || (use_c && VD == VC));
|
||||
if (preserve_d)
|
||||
{
|
||||
V1Q = fpr.GetScopedReg();
|
||||
result_reg = reg_encoder(V1Q);
|
||||
if (V0Q == ARM64Reg::INVALID_REG)
|
||||
V0Q = fpr.GetScopedReg();
|
||||
result_reg = reg_encoder(V0Q);
|
||||
inaccurate_fma_reg = reg_encoder(V0Q);
|
||||
}
|
||||
else if (fma && inaccurate_fma && VD == VB)
|
||||
{
|
||||
if (V0Q == ARM64Reg::INVALID_REG)
|
||||
V0Q = fpr.GetScopedReg();
|
||||
inaccurate_fma_reg = reg_encoder(V0Q);
|
||||
}
|
||||
|
||||
switch (op5)
|
||||
|
||||
@ -120,40 +120,48 @@ void JitArm64::ps_arith(UGeckoInstruction inst)
|
||||
if (round_c)
|
||||
{
|
||||
ASSERT_MSG(DYNA_REC, !single, "Tried to apply 25-bit precision to single");
|
||||
|
||||
V0Q = fpr.GetScopedReg();
|
||||
rounded_c_reg = reg_encoder(V0Q);
|
||||
Force25BitPrecision(rounded_c_reg, VC);
|
||||
}
|
||||
|
||||
ARM64Reg inaccurate_fma_reg = VD;
|
||||
if (fma && inaccurate_fma && VD == VB)
|
||||
{
|
||||
if (V0Q == ARM64Reg::INVALID_REG)
|
||||
V0Q = fpr.GetScopedReg();
|
||||
inaccurate_fma_reg = reg_encoder(V0Q);
|
||||
}
|
||||
|
||||
ARM64Reg result_reg = VD;
|
||||
ARM64Reg inaccurate_fma_reg = VD;
|
||||
const bool need_accurate_fma_reg =
|
||||
fma && !inaccurate_fma && (msub || VD != VB) && (VD == VA || VD == rounded_c_reg);
|
||||
const bool preserve_d =
|
||||
m_accurate_nans && (VD == VA || (use_b && VD == VB) || (use_c && VD == VC));
|
||||
if (need_accurate_fma_reg || preserve_d)
|
||||
{
|
||||
V1Q = fpr.GetScopedReg();
|
||||
result_reg = reg_encoder(V1Q);
|
||||
if (V0Q == ARM64Reg::INVALID_REG)
|
||||
V0Q = fpr.GetScopedReg();
|
||||
result_reg = reg_encoder(V0Q);
|
||||
inaccurate_fma_reg = reg_encoder(V0Q);
|
||||
|
||||
if (need_accurate_fma_reg && round_c)
|
||||
{
|
||||
V1Q = fpr.GetScopedReg();
|
||||
rounded_c_reg = reg_encoder(V1Q);
|
||||
}
|
||||
}
|
||||
else if (fma && inaccurate_fma && VD == VB)
|
||||
{
|
||||
if (V0Q == ARM64Reg::INVALID_REG)
|
||||
V0Q = fpr.GetScopedReg();
|
||||
inaccurate_fma_reg = reg_encoder(V0Q);
|
||||
}
|
||||
|
||||
if (m_accurate_nans)
|
||||
{
|
||||
if (V0Q == ARM64Reg::INVALID_REG)
|
||||
V0Q = fpr.GetScopedReg();
|
||||
if (V1Q == ARM64Reg::INVALID_REG)
|
||||
V1Q = fpr.GetScopedReg();
|
||||
|
||||
if (duplicated_c || VD == result_reg)
|
||||
V2Q = fpr.GetScopedReg();
|
||||
}
|
||||
|
||||
if (round_c)
|
||||
Force25BitPrecision(rounded_c_reg, VC);
|
||||
|
||||
switch (op5)
|
||||
{
|
||||
case 12: // ps_muls0: d = a * c.ps0
|
||||
@ -235,8 +243,8 @@ void JitArm64::ps_arith(UGeckoInstruction inst)
|
||||
FixupBranch nan_fixup;
|
||||
if (m_accurate_nans)
|
||||
{
|
||||
const ARM64Reg nan_temp_reg = single ? EncodeRegToSingle(V0Q) : EncodeRegToDouble(V0Q);
|
||||
const ARM64Reg nan_temp_reg_paired = reg_encoder(V0Q);
|
||||
const ARM64Reg nan_temp_reg = single ? EncodeRegToSingle(V1Q) : EncodeRegToDouble(V1Q);
|
||||
const ARM64Reg nan_temp_reg_paired = reg_encoder(V1Q);
|
||||
|
||||
// Check if we need to handle NaNs
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user