From 0505ade96b6b2f536ded090658c6e39ac1e71429 Mon Sep 17 00:00:00 2001 From: GitHubProUser67 <127040195+GitHubProUser67@users.noreply.github.com> Date: Sun, 20 Apr 2025 11:54:44 +0200 Subject: [PATCH 1/4] This commit implements software floating points support in PCSX2's interpreters.nit specification. This work is a combination or several efforts and researches done prior. Credits: - https://www.gregorygaines.com/blog/emulating-ps2-floating-point-nums-ieee-754-diffs-part-1/ - https://github.com/GitHubProUser67/MultiServer3/tree/main/BackendServices/PS2FloatLibrary - https://github.com/Goatman13/pcsx2/tree/accurate_int_add_sub - PCSX2 Team for their help and support in this massive journey. Fixes codacity warnings. --- common/BitUtils.h | 18 + pcsx2-qt/Settings/AdvancedSettingsWidget.cpp | 10 + pcsx2-qt/Settings/AdvancedSettingsWidget.ui | 679 +++++++++++++ pcsx2/CMakeLists.txt | 2 + pcsx2/Config.h | 23 + pcsx2/FPU.cpp | 332 +++++-- pcsx2/PS2Float.cpp | 969 +++++++++++++++++++ pcsx2/PS2Float.h | 123 +++ pcsx2/Pcsx2Config.cpp | 13 + pcsx2/VU.h | 4 +- pcsx2/VUflags.cpp | 51 +- pcsx2/VUflags.h | 6 + pcsx2/VUops.cpp | 702 ++++++++++---- pcsx2/pcsx2.vcxproj | 2 + pcsx2/pcsx2.vcxproj.filters | 8 + 15 files changed, 2690 insertions(+), 252 deletions(-) create mode 100644 pcsx2/PS2Float.cpp create mode 100644 pcsx2/PS2Float.h diff --git a/common/BitUtils.h b/common/BitUtils.h index 4d12ba2b07..536b6a5f16 100644 --- a/common/BitUtils.h +++ b/common/BitUtils.h @@ -28,6 +28,19 @@ static inline int _BitScanReverse(unsigned long* const Index, const unsigned lon namespace Common { + static constexpr s8 msb[256] = { + -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}; + + static constexpr s32 normalizeAmounts[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16, 24, 24, 24, 24, 24, 24, 24}; + template static constexpr __fi bool IsAligned(T value, unsigned int alignment) { @@ -71,6 +84,11 @@ namespace Common return Common::AlignUpPow2(size, __pagesize); } + __fi static s32 BitScanReverse8(s32 b) + { + return msb[b]; + } + __fi static u32 CountLeadingSignBits(s32 n) { // If the sign bit is 1, we invert the bits to 0 for count-leading-zero. diff --git a/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp b/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp index 0aeb64ff30..be7e5bdb0a 100644 --- a/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp +++ b/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp @@ -46,6 +46,16 @@ AdvancedSettingsWidget::AdvancedSettingsWidget(SettingsWindow* settings_dialog, connect(m_ui.vu0ClampMode, QOverload::of(&QComboBox::currentIndexChanged), [this](int index) { setClampingMode(0, index); }); connect(m_ui.vu1ClampMode, QOverload::of(&QComboBox::currentIndexChanged), [this](int index) { setClampingMode(1, index); }); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeSoftAddSub, "EmuCore/CPU/Recompiler", "fpuSoftAddSub", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeSoftMulDiv, "EmuCore/CPU/Recompiler", "fpuSoftMulDiv", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeSoftSqrt, "EmuCore/CPU/Recompiler", "fpuSoftSqrt", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu0SoftAddSub, "EmuCore/CPU/Recompiler", "vu0SoftAddSub", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu0SoftMulDiv, "EmuCore/CPU/Recompiler", "vu0SoftMulDiv", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu0SoftSqrt, "EmuCore/CPU/Recompiler", "vu0SoftSqrt", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu1SoftAddSub, "EmuCore/CPU/Recompiler", "vu1SoftAddSub", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu1SoftMulDiv, "EmuCore/CPU/Recompiler", "vu1SoftMulDiv", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu1SoftSqrt, "EmuCore/CPU/Recompiler", "vu1SoftSqrt", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.iopRecompiler, "EmuCore/CPU/Recompiler", "EnableIOP", true); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.gameFixes, "EmuCore", "EnableGameFixes", true); diff --git a/pcsx2-qt/Settings/AdvancedSettingsWidget.ui b/pcsx2-qt/Settings/AdvancedSettingsWidget.ui index 2ab29a2e92..0ff4431b92 100644 --- a/pcsx2-qt/Settings/AdvancedSettingsWidget.ui +++ b/pcsx2-qt/Settings/AdvancedSettingsWidget.ui @@ -19,6 +19,685 @@ true + + + + 0 + 0 + 790 + 1317 + + + + + 0 + + + 0 + + + 0 + + + + + Changing these options may cause games to become non-functional. Modify at your own risk, the PCSX2 team will not provide support for configurations with these settings changed. + + + true + + + + + + + EmotionEngine (MIPS-IV) + + + + + + Rounding Mode: + + + + + + + + Nearest + + + + + Negative + + + + + Positive + + + + + Chop/Zero (Default) + + + + + + + + Clamping Mode: + + + + + + + + Nearest (Default) + + + + + Negative + + + + + Positive + + + + + Chop/Zero + + + + + + + + + + Wait Loop Detection + + + + + + + Enable Recompiler + + + + + + + Enable Fast Memory Access + + + + + + + Enable Cache (Slow) + + + + + + + INTC Spin Detection + + + + + + + Pause On TLB Miss + + + + + + + Enable 128MB RAM (Dev Console) + + + + + + + + + + None + + + + + Normal (Default) + + + + + Extra + Preserve Sign + + + + + Full + + + + + + + + Division Rounding Mode: + + + + + + + Software Float + + + + + + Multiplication/Division + + + + + + + Addition/Subtraction + + + + + + + Square Root + + + + + + + + + + + + + Vector Units (VU) + + + + + + VU1 Rounding Mode: + + + + + + + + Nearest + + + + + Negative + + + + + Positive + + + + + Chop/Zero (Default) + + + + + + + + VU1 Clamping Mode: + + + + + + + VU0 Rounding Mode: + + + + + + + VU1 Software Float + + + + + + Multiplication/Division + + + + + + + Addition/Subtraction + + + + + + + Float Square Root + + + + + + + + + + VU0 Software Float + + + + + + Multiplication/Division + + + + + + + Addition/Subtraction + + + + + + + Square Root + + + + + + + + + + + Nearest + + + + + Negative + + + + + Positive + + + + + Chop/Zero (Default) + + + + + + + + + None + + + + + Normal (Default) + + + + + Extra + + + + + Extra + Preserve Sign + + + + + + + + + + mVU Flag Hack + + + + + + + Enable VU1 Recompiler + + + + + + + Enable VU0 Recompiler (Micro Mode) + + + + + + + Enable Instant VU1 + + + + + + + + + VU0 Clamping Mode: + + + + + + + + None + + + + + Normal (Default) + + + + + Extra + + + + + Extra + Preserve Sign + + + + + + + + + + + I/O Processor (IOP, MIPS-I) + + + + + + Enable Recompiler + + + + + + + + + + Game Settings + + + + + + Enable Game Fixes + + + + + + + Enable Compatibility Patches + + + + + + + + + + Savestate Settings + + + + + + Save State On Shutdown + + + + + + + + Low (Fast) + + + + + Medium (Recommended) + + + + + High + + + + + Very High (Slow, Not Recommended) + + + + + + + + + Uncompressed + + + + + Deflate64 + + + + + Zstandard + + + + + LZMA2 + + + + + + + + Compression Level: + + + + + + + Create Save State Backups + + + + + + + Compression Method: + + + + + + + Use Save State Selector + + + + + + + + + + Frame Rate Control + + + + + + hz + + + 10.000000000000000 + + + 300.000000000000000 + + + 0.010000000000000 + + + + + + + hz + + + 10.000000000000000 + + + 300.000000000000000 + + + 0.010000000000000 + + + + + + + PAL Frame Rate: + + + + + + + NTSC Frame Rate: + + + + + + + + + + PINE Settings + + + + + + + 0 + 0 + + + + + + + + + 0 + 0 + + + + Slot: + + + + + + + Enable + + + + + + + + + + Qt::Orientation::Vertical + + + + 20 + 3 + + + + + + diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt index c66a27eeec..8c35a0db7e 100644 --- a/pcsx2/CMakeLists.txt +++ b/pcsx2/CMakeLists.txt @@ -93,6 +93,7 @@ set(pcsx2Sources MTGS.cpp MTVU.cpp Patch.cpp + PS2Float.cpp Pcsx2Config.cpp PerformanceMetrics.cpp PrecompiledHeader.cpp @@ -173,6 +174,7 @@ set(pcsx2Headers MTVU.h Memory.h MemoryTypes.h + PS2Float.h Patch.h PerformanceMetrics.h PrecompiledHeader.h diff --git a/pcsx2/Config.h b/pcsx2/Config.h index a447934501..c7b6440e50 100644 --- a/pcsx2/Config.h +++ b/pcsx2/Config.h @@ -621,17 +621,32 @@ struct Pcsx2Config vu0SignOverflow : 1, vu0Underflow : 1; + bool + vu0SoftAddSub : 1, + vu0SoftMulDiv : 1, + vu0SoftSqrt : 1; + bool vu1Overflow : 1, vu1ExtraOverflow : 1, vu1SignOverflow : 1, vu1Underflow : 1; + bool + vu1SoftAddSub : 1, + vu1SoftMulDiv : 1, + vu1SoftSqrt : 1; + bool fpuOverflow : 1, fpuExtraOverflow : 1, fpuFullMode : 1; + bool + fpuSoftAddSub : 1, + fpuSoftMulDiv : 1, + fpuSoftSqrt : 1; + bool EnableEECache : 1; bool @@ -1472,11 +1487,19 @@ namespace EmuFolders #define CHECK_VU_SIGN_OVERFLOW(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SignOverflow : EmuConfig.Cpu.Recompiler.vu1SignOverflow) #define CHECK_VU_UNDERFLOW(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0Underflow : EmuConfig.Cpu.Recompiler.vu1Underflow) +#define CHECK_VU_SOFT_ADDSUB(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SoftAddSub : EmuConfig.Cpu.Recompiler.vu1SoftAddSub) +#define CHECK_VU_SOFT_MULDIV(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SoftMulDiv : EmuConfig.Cpu.Recompiler.vu1SoftMulDiv) +#define CHECK_VU_SOFT_SQRT(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SoftSqrt : EmuConfig.Cpu.Recompiler.vu1SoftSqrt) + #define CHECK_FPU_OVERFLOW (EmuConfig.Cpu.Recompiler.fpuOverflow) #define CHECK_FPU_EXTRA_OVERFLOW (EmuConfig.Cpu.Recompiler.fpuExtraOverflow) // If enabled, Operands are checked for infinities before being used in the FPU recs #define CHECK_FPU_EXTRA_FLAGS 1 // Always enabled now // Sets D/I flags on FPU instructions #define CHECK_FPU_FULL (EmuConfig.Cpu.Recompiler.fpuFullMode) +#define CHECK_FPU_SOFT_ADDSUB (EmuConfig.Cpu.Recompiler.fpuSoftAddSub) +#define CHECK_FPU_SOFT_MULDIV (EmuConfig.Cpu.Recompiler.fpuSoftMulDiv) +#define CHECK_FPU_SOFT_SQRT (EmuConfig.Cpu.Recompiler.fpuSoftSqrt) + //------------ EE Recompiler defines - Comment to disable a recompiler --------------- #define SHIFT_RECOMPILE // Speed majorly reduced if disabled diff --git a/pcsx2/FPU.cpp b/pcsx2/FPU.cpp index e40e35a0c8..aeab43ea67 100644 --- a/pcsx2/FPU.cpp +++ b/pcsx2/FPU.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-3.0+ #include "Common.h" +#include "PS2Float.h" #include @@ -89,6 +90,19 @@ bool checkUnderflow(u32& xReg, u32 cFlagsToSet) { return false; } +bool checkOverflowUnderflowSoft(PS2Float xReg, u32 cFlagsToSet, bool oflw) +{ + if ((oflw && xReg.of) || (!oflw && xReg.uf)) + { + _ContVal_ |= (cFlagsToSet); + return true; + } + else if (cFlagsToSet & FPUflagO) + _ContVal_ &= oflw ? ~FPUflagO : ~FPUflagU; + + return false; +} + __fi u32 fp_max(u32 a, u32 b) { return ((s32)a < 0 && (s32)b < 0) ? std::min(a, b) : std::max(a, b); @@ -115,6 +129,22 @@ bool checkDivideByZero(u32& xReg, u32 yDivisorReg, u32 zDividendReg, u32 cFlagsT return false; } +bool checkDivideByZeroInvalidSoft(PS2Float xReg, u32 cFlagsToSet1, u32 cFlagsToSet2) +{ + if (xReg.dz) + { + _ContVal_ |= cFlagsToSet1; + return true; + } + else if (xReg.iv) + { + _ContVal_ |= cFlagsToSet2; + return true; + } + + return false; +} + /* Clears the "Cause Flags" of the Control/Status Reg The "EE Core Users Manual" implies that all the Cause flags are cleared every instruction... But, the "EE Core Instruction Set Manual" says that only certain Cause Flags are cleared @@ -138,7 +168,7 @@ bool checkDivideByZero(u32& xReg, u32 yDivisorReg, u32 zDividendReg, u32 cFlagsT #else // Used for Comparing; This compares if the floats are exactly the same. #define C_cond_S(cond) { \ - _ContVal_ = ( fpuDouble(_FsValUl_) cond fpuDouble(_FtValUl_) ) ? \ + _ContVal_ = (fpuCompareFull(_FsValUl_) cond fpuCompareFull(_FtValUl_)) ? \ ( _ContVal_ | FPUflagC ) : \ ( _ContVal_ & ~FPUflagC ); \ } @@ -182,21 +212,81 @@ float fpuDouble(u32 f) } } +static s32 fpuCompareFull(u32 f) +{ + if (!(f & 0x7f800000)) + f = 0; + // If f is negative, flip the non-sign bits so integer compares work like fp compares + if (f & 0x80000000) + f ^= 0x7fffffff; + return static_cast(f); +} + +static __fi PS2Float fpuAccurateAdd(u32 a, u32 b) +{ + return PS2Float(a).Add(PS2Float(b)); +} + +static __fi PS2Float fpuAccurateSub(u32 a, u32 b) +{ + return PS2Float(a).Sub(PS2Float(b)); +} + +static __fi PS2Float fpuAccurateMul(u32 a, u32 b) +{ + return PS2Float(a).Mul(PS2Float(b)); +} + +static __fi PS2Float fpuAccurateDiv(u32 a, u32 b) +{ + return PS2Float(a).Div(PS2Float(b)); +} + +static __fi PS2Float fpuAccurateMulAdd(u32 a, u32 b, u32 c) +{ + return PS2Float(a).MulAdd(PS2Float(b), PS2Float(c)); +} + +static __fi PS2Float fpuAccurateMulSub(u32 a, u32 b, u32 c) +{ + return PS2Float(a).MulSub(PS2Float(b), PS2Float(c)); +} + void ABS_S() { _FdValUl_ = _FsValUl_ & 0x7fffffff; clearFPUFlags( FPUflagO | FPUflagU ); } void ADD_S() { - _FdValf_ = fpuDouble( _FsValUl_ ) + fpuDouble( _FtValUl_ ); - if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; - checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); + if (CHECK_FPU_SOFT_ADDSUB) + { + PS2Float addres = fpuAccurateAdd(_FsValUl_, _FtValUl_); + _FdValUl_ = addres.raw; + if (checkOverflowUnderflowSoft(addres, FPUflagO | FPUflagSO, true)) return; + checkOverflowUnderflowSoft(addres, FPUflagU | FPUflagSU, false); + } + else + { + _FdValf_ = fpuDouble( _FsValUl_ ) + fpuDouble( _FtValUl_ ); + if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; + checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); + } } void ADDA_S() { - _FAValf_ = fpuDouble( _FsValUl_ ) + fpuDouble( _FtValUl_ ); - if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; - checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); + if (CHECK_FPU_SOFT_ADDSUB) + { + PS2Float addres = fpuAccurateAdd(_FsValUl_, _FtValUl_); + _FAValUl_ = addres.raw; + if (checkOverflowUnderflowSoft(addres, FPUflagO | FPUflagSO, true)) return; + checkOverflowUnderflowSoft(addres, FPUflagU | FPUflagSU, false); + } + else + { + _FAValf_ = fpuDouble( _FsValUl_ ) + fpuDouble( _FtValUl_ ); + if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; + checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); + } } void BC1F() { @@ -248,38 +338,75 @@ void CTC1() { } void CVT_S() { - _FdValf_ = (float)_FsValSl_; + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) { _FdValUl_ = PS2Float::Itof(0, _FsValSl_).raw; } + else + { + _FdValf_ = (float)_FsValSl_; + _FdValf_ = fpuDouble(_FdValUl_); + } } void CVT_W() { - if ( ( _FsValUl_ & 0x7F800000 ) <= 0x4E800000 ) { _FdValSl_ = (s32)_FsValf_; } + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) { _FdValSl_ = PS2Float::Ftoi(0, _FsValUl_); } + else if ( ( _FsValUl_ & 0x7F800000 ) <= 0x4E800000 ) { _FdValSl_ = (s32)_FsValf_; } else if ( ( _FsValUl_ & 0x80000000 ) == 0 ) { _FdValUl_ = 0x7fffffff; } else { _FdValUl_ = 0x80000000; } } void DIV_S() { - if (checkDivideByZero( _FdValUl_, _FtValUl_, _FsValUl_, FPUflagD | FPUflagSD, FPUflagI | FPUflagSI)) return; - _FdValf_ = fpuDouble( _FsValUl_ ) / fpuDouble( _FtValUl_ ); - if (checkOverflow( _FdValUl_, 0)) return; - checkUnderflow( _FdValUl_, 0); + if (CHECK_FPU_SOFT_MULDIV) + { + PS2Float divres = fpuAccurateDiv(_FsValUl_, _FtValUl_); + _FdValUl_ = divres.raw; + if (checkDivideByZeroInvalidSoft(divres, FPUflagD | FPUflagSD, FPUflagI | FPUflagSI)) return; + if (checkOverflowUnderflowSoft(divres, FPUflagO | FPUflagSO, true)) return; + checkOverflowUnderflowSoft(divres, FPUflagU | FPUflagSU, false); + } + else + { + if (checkDivideByZero( _FdValUl_, _FtValUl_, _FsValUl_, FPUflagD | FPUflagSD, FPUflagI | FPUflagSI)) return; + _FdValf_ = fpuDouble( _FsValUl_ ) / fpuDouble( _FtValUl_ ); + if (checkOverflow( _FdValUl_, 0)) return; + checkUnderflow( _FdValUl_, 0); + } } -/* The Instruction Set manual has an overly complicated way of +/* The Instruction Set manual has an overflow like way of determining the flags that are set. Hopefully this shorter method provides a similar outcome and is faster. (cottonvibes) */ void MADD_S() { - FPRreg temp; - temp.f = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); - _FdValf_ = fpuDouble( _FAValUl_ ) + fpuDouble( temp.UL ); - if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; - checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); + if (CHECK_FPU_SOFT_ADDSUB && CHECK_FPU_SOFT_MULDIV) + { + PS2Float fmacres = fpuAccurateMulAdd(_FAValUl_, _FsValUl_, _FtValUl_); + _FdValUl_ = fmacres.raw; + if (checkOverflowUnderflowSoft(fmacres, FPUflagO | FPUflagSO, true)) return; + checkOverflowUnderflowSoft(fmacres, FPUflagU | FPUflagSU, false); + } + else + { + FPRreg temp; + temp.f = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + _FdValf_ = fpuDouble( _FAValUl_ ) + fpuDouble( temp.UL ); + if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; + checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); + } } void MADDA_S() { - _FAValf_ += fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); - if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; - checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); + if (CHECK_FPU_SOFT_ADDSUB && CHECK_FPU_SOFT_MULDIV) + { + PS2Float fmacres = fpuAccurateMulAdd(_FAValUl_, _FsValUl_, _FtValUl_); + _FAValUl_ = fmacres.raw; + if (checkOverflowUnderflowSoft(fmacres, FPUflagO | FPUflagSO, true)) return; + checkOverflowUnderflowSoft(fmacres, FPUflagU | FPUflagSU, false); + } + else + { + _FAValf_ += fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; + checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); + } } void MAX_S() { @@ -302,17 +429,37 @@ void MOV_S() { } void MSUB_S() { - FPRreg temp; - temp.f = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); - _FdValf_ = fpuDouble( _FAValUl_ ) - fpuDouble( temp.UL ); - if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; - checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); + if (CHECK_FPU_SOFT_ADDSUB && CHECK_FPU_SOFT_MULDIV) + { + PS2Float fmacres = fpuAccurateMulSub(_FAValUl_, _FsValUl_, _FtValUl_); + _FdValUl_ = fmacres.raw; + if (checkOverflowUnderflowSoft(fmacres, FPUflagO | FPUflagSO, true)) return; + checkOverflowUnderflowSoft(fmacres, FPUflagU | FPUflagSU, false); + } + else + { + FPRreg temp; + temp.f = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + _FdValf_ = fpuDouble( _FAValUl_ ) - fpuDouble( temp.UL ); + if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; + checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); + } } void MSUBA_S() { - _FAValf_ -= fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); - if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; - checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); + if (CHECK_FPU_SOFT_ADDSUB && CHECK_FPU_SOFT_MULDIV) + { + PS2Float fmacres = fpuAccurateMulSub(_FAValUl_, _FsValUl_, _FtValUl_); + _FAValUl_ = fmacres.raw; + if (checkOverflowUnderflowSoft(fmacres, FPUflagO | FPUflagSO, true)) return; + checkOverflowUnderflowSoft(fmacres, FPUflagU | FPUflagSU, false); + } + else + { + _FAValf_ -= fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; + checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); + } } void MTC1() { @@ -320,15 +467,35 @@ void MTC1() { } void MUL_S() { - _FdValf_ = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); - if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; - checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); + if (CHECK_FPU_SOFT_MULDIV) + { + PS2Float mulres = fpuAccurateMul(_FsValUl_, _FtValUl_); + _FdValUl_ = mulres.raw; + if (checkOverflowUnderflowSoft(mulres, FPUflagO | FPUflagSO, true)) return; + checkOverflowUnderflowSoft(mulres, FPUflagU | FPUflagSU, false); + } + else + { + _FdValf_ = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; + checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); + } } void MULA_S() { - _FAValf_ = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); - if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; - checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); + if (CHECK_FPU_SOFT_MULDIV) + { + PS2Float mulres = fpuAccurateMul(_FsValUl_, _FtValUl_); + _FAValUl_ = mulres.raw; + if (checkOverflowUnderflowSoft(mulres, FPUflagO | FPUflagSO, true)) return; + checkOverflowUnderflowSoft(mulres, FPUflagU | FPUflagSU, false); + } + else + { + _FAValf_ = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; + checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); + } } void NEG_S() { @@ -337,47 +504,90 @@ void NEG_S() { } void RSQRT_S() { - FPRreg temp; clearFPUFlags(FPUflagD | FPUflagI); - if ( ( _FtValUl_ & 0x7F800000 ) == 0 ) { // Ft is zero (Denormals are Zero) - _ContVal_ |= FPUflagD | FPUflagSD; - _FdValUl_ = ( _FtValUl_ & 0x80000000 ) | posFmax; - return; + if (CHECK_FPU_SOFT_SQRT) + { + PS2Float rsqrtres = PS2Float(_FsValUl_).Rsqrt(_FtValUl_); + _FdValUl_ = rsqrtres.raw; + if (checkDivideByZeroInvalidSoft(rsqrtres, FPUflagD | FPUflagSD, FPUflagI | FPUflagSI)) return; + if (checkOverflowUnderflowSoft(rsqrtres, FPUflagO | FPUflagSO, true)) return; + checkOverflowUnderflowSoft(rsqrtres, FPUflagU | FPUflagSU, false); } - else if ( _FtValUl_ & 0x80000000 ) { // Ft is negative - _ContVal_ |= FPUflagI | FPUflagSI; - temp.f = sqrt( fabs( fpuDouble( _FtValUl_ ) ) ); - _FdValf_ = fpuDouble( _FsValUl_ ) / fpuDouble( temp.UL ); - } - else { _FdValf_ = fpuDouble( _FsValUl_ ) / sqrt( fpuDouble( _FtValUl_ ) ); } // Ft is positive and not zero + else + { + FPRreg temp; - if (checkOverflow( _FdValUl_, 0)) return; - checkUnderflow( _FdValUl_, 0); + if ( ( _FtValUl_ & 0x7F800000 ) == 0 ) { // Ft is zero (Denormals are Zero) + _ContVal_ |= FPUflagD | FPUflagSD; + _FdValUl_ = ( _FtValUl_ & 0x80000000 ) | posFmax; + return; + } + else if ( _FtValUl_ & 0x80000000 ) { // Ft is negative + _ContVal_ |= FPUflagI | FPUflagSI; + temp.f = sqrt( fabs( fpuDouble( _FtValUl_ ) ) ); + _FdValf_ = fpuDouble( _FsValUl_ ) / fpuDouble( temp.UL ); + } + else { _FdValf_ = fpuDouble( _FsValUl_ ) / sqrt( fpuDouble( _FtValUl_ ) ); } // Ft is positive and not zero + + if (checkOverflow( _FdValUl_, 0)) return; + checkUnderflow( _FdValUl_, 0); + } } void SQRT_S() { clearFPUFlags(FPUflagI | FPUflagD); - if ( ( _FtValUl_ & 0x7F800000 ) == 0 ) // If Ft = +/-0 - _FdValUl_ = _FtValUl_ & 0x80000000;// result is 0 - else if ( _FtValUl_ & 0x80000000 ) { // If Ft is Negative + if (CHECK_FPU_SOFT_SQRT) + { + PS2Float sqrtres = PS2Float(_FtValUl_).Sqrt(); + _FdValUl_ = sqrtres.raw; + if (checkDivideByZeroInvalidSoft(sqrtres, FPUflagD | FPUflagSD, FPUflagI | FPUflagSI)) return; + if (checkOverflowUnderflowSoft(sqrtres, FPUflagO | FPUflagSO, true)) return; + checkOverflowUnderflowSoft(sqrtres, FPUflagU | FPUflagSU, false); + } + else if ((_FtValUl_ & 0x7F800000) == 0) // If Ft = +/-0 + _FdValUl_ = _FtValUl_ & 0x80000000; // result is 0 + else if (_FtValUl_ & 0x80000000) + { + // If Ft is Negative _ContVal_ |= FPUflagI | FPUflagSI; - _FdValf_ = sqrt( fabs( fpuDouble( _FtValUl_ ) ) ); - } else - _FdValf_ = sqrt( fpuDouble( _FtValUl_ ) ); // If Ft is Positive + _FdValf_ = sqrt(fabs(fpuDouble(_FtValUl_))); + } + else + _FdValf_ = sqrt(fpuDouble(_FtValUl_)); // If Ft is Positive } void SUB_S() { - _FdValf_ = fpuDouble( _FsValUl_ ) - fpuDouble( _FtValUl_ ); - if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; - checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); + if (CHECK_FPU_SOFT_ADDSUB) + { + PS2Float subres = fpuAccurateSub(_FsValUl_, _FtValUl_); + _FdValUl_ = subres.raw; + if (checkOverflowUnderflowSoft(subres, FPUflagO | FPUflagSO, true)) return; + checkOverflowUnderflowSoft(subres, FPUflagU | FPUflagSU, false); + } + else + { + _FdValf_ = fpuDouble( _FsValUl_ ) - fpuDouble( _FtValUl_ ); + if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; + checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); + } } void SUBA_S() { - _FAValf_ = fpuDouble( _FsValUl_ ) - fpuDouble( _FtValUl_ ); - if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; - checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); + if (CHECK_FPU_SOFT_ADDSUB) + { + PS2Float subres = fpuAccurateSub(_FsValUl_, _FtValUl_); + _FAValUl_ = subres.raw; + if (checkOverflowUnderflowSoft(subres, FPUflagO | FPUflagSO, true)) return; + checkOverflowUnderflowSoft(subres, FPUflagU | FPUflagSU, false); + } + else + { + _FAValf_ = fpuDouble( _FsValUl_ ) - fpuDouble( _FtValUl_ ); + if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; + checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); + } } } // End Namespace COP1 diff --git a/pcsx2/PS2Float.cpp b/pcsx2/PS2Float.cpp new file mode 100644 index 0000000000..590d10d360 --- /dev/null +++ b/pcsx2/PS2Float.cpp @@ -0,0 +1,969 @@ +// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team +// SPDX-License-Identifier: GPL-3.0+ + +#include +#include +#include +#include +#include +#include +#include +#include "common/Pcsx2Defs.h" +#include "common/BitUtils.h" +#include "PS2Float.h" +#include "Common.h" + +//**************************************************************** +// Radix Divisor +// Algorithm reference: DOI 10.1109/ARITH.1995.465363 +//**************************************************************** + +struct CSAResult +{ + u32 sum; + u32 carry; +}; + +static struct CSAResult CSA(u32 a, u32 b, u32 c) +{ + u32 u = a ^ b; + u32 h = (a & b) | (u & c); + u32 l = u ^ c; + return {l, h << 1}; +} + +static s32 quotientSelect(struct CSAResult current) +{ + // Note: Decimal point is between bits 24 and 25 + u32 mask = (1 << 24) - 1; // Bit 23 needs to be or'd in instead of added + s32 test = ((current.sum & ~mask) + current.carry) | (current.sum & mask); + if (test >= 1 << 23) + { // test >= 0.25 + return 1; + } + else if (test < (s32)(~0u << 24)) + { // test < -0.5 + return -1; + } + else + { + return 0; + } +} + +static u32 mantissa(u32 x) +{ + return (x & 0x7fffff) | 0x800000; +} + +static u32 exponent(u32 x) +{ + return (x >> 23) & 0xff; +} + +//**************************************************************** +// Booth Multiplier +//**************************************************************** + +struct BoothRecode +{ + u32 data; + u32 negate; +}; + +struct AddResult +{ + u32 lo; + u32 hi; +}; + +static BoothRecode Booth(u32 a, u32 b, u32 bit) +{ + u32 test = (bit ? b >> (bit * 2 - 1) : b << 1) & 7; + a <<= (bit * 2); + a += (test == 3 || test == 4) ? a : 0; + u32 neg = (test >= 4 && test <= 6) ? ~0u : 0; + u32 pos = 1 << (bit * 2); + a ^= (neg & -pos); + a &= (test >= 1 && test <= 6) ? ~0u : 0; + return {a, neg & pos}; +} + +static AddResult Add3(u32 a, u32 b, u32 c) +{ + u32 u = a ^ b; + return {u ^ c, ((u & c) | (a & b)) << 1}; +} + +static u64 MulMantissa(u32 a, u32 b) +{ + u64 full = static_cast(a) * static_cast(b); + BoothRecode b0 = Booth(a, b, 0); + BoothRecode b1 = Booth(a, b, 1); + BoothRecode b2 = Booth(a, b, 2); + BoothRecode b3 = Booth(a, b, 3); + BoothRecode b4 = Booth(a, b, 4); + BoothRecode b5 = Booth(a, b, 5); + BoothRecode b6 = Booth(a, b, 6); + BoothRecode b7 = Booth(a, b, 7); + + // First cycle + AddResult t0 = Add3(b1.data, b2.data, b3.data); + AddResult t1 = Add3(b4.data & ~0x7ffu, b5.data & ~0xfffu, b6.data); + // A few adds get skipped, squeeze them back in + t1.hi |= b6.negate | (b5.data & 0x800); + b7.data |= (b5.data & 0x400) + b5.negate; + + // Second cycle + AddResult t2 = Add3(b0.data, t0.lo, t0.hi); + AddResult t3 = Add3(b7.data, t1.lo, t1.hi); + + // Third cycle + AddResult t4 = Add3(t2.hi, t3.lo, t3.hi); + + // Fourth cycle + AddResult t5 = Add3(t2.lo, t4.lo, t4.hi); + + // Discard bits and sum + t5.hi += b7.negate; + t5.lo &= ~0x7fffu; + t5.hi &= ~0x7fffu; + u32 ps2lo = t5.lo + t5.hi; + return full - ((ps2lo ^ full) & 0x8000); +} + +//**************************************************************** +// Float Processor +//**************************************************************** + +PS2Float::PS2Float(s32 value) + : raw((u32)value) +{} + +PS2Float::PS2Float(u32 value) + : raw(value) +{} + +PS2Float::PS2Float(float value) + : raw(std::bit_cast(value)) +{} + +PS2Float::PS2Float(bool sign, u8 exponent, u32 mantissa) + : raw((sign ? 1u : 0u) << 31 | + (u32)(exponent << MANTISSA_BITS) | + (mantissa & 0x7FFFFF)) +{} + +PS2Float PS2Float::Max() +{ + return PS2Float(MAX_FLOATING_POINT_VALUE); +} + +PS2Float PS2Float::Min() +{ + return PS2Float(MIN_FLOATING_POINT_VALUE); +} + +PS2Float PS2Float::One() +{ + return PS2Float(ONE); +} + +PS2Float PS2Float::MinOne() +{ + return PS2Float(MIN_ONE); +} + +PS2Float PS2Float::Add(PS2Float addend) +{ + if (IsDenormalized() || addend.IsDenormalized()) + { + bool sign = DetermineAdditionOperationSign(*this, addend); + + if (IsDenormalized() && !addend.IsDenormalized()) + return PS2Float(sign, addend.Exponent(), addend.Mantissa()); + else if (!IsDenormalized() && addend.IsDenormalized()) + return PS2Float(sign, Exponent(), Mantissa()); + else if (IsDenormalized() && addend.IsDenormalized()) + return PS2Float(sign, 0, 0); + else + Console.Error("Both numbers are not denormalized"); + + return PS2Float(0); + } + + u32 a = raw; + u32 b = addend.raw; + + //exponent difference + s32 exp_diff = Exponent() - addend.Exponent(); + + //diff = 1 .. 24, expt < expd + if (exp_diff > 0 && exp_diff < 25) + { + exp_diff = exp_diff - 1; + b = (MIN_FLOATING_POINT_VALUE << exp_diff) & b; + } + + //diff = -24 .. -1 , expd < expt + else if (exp_diff < 0 && exp_diff > -25) + { + exp_diff = -exp_diff; + exp_diff = exp_diff - 1; + a = a & (MIN_FLOATING_POINT_VALUE << exp_diff); + } + + return PS2Float(a).DoAdd(PS2Float(b)); +} + +PS2Float PS2Float::Sub(PS2Float subtrahend) +{ + if (IsDenormalized() || subtrahend.IsDenormalized()) + { + bool sign = DetermineSubtractionOperationSign(*this, subtrahend); + + if (IsDenormalized() && !subtrahend.IsDenormalized()) + return PS2Float(sign, subtrahend.Exponent(), subtrahend.Mantissa()); + else if (!IsDenormalized() && subtrahend.IsDenormalized()) + return PS2Float(sign, Exponent(), Mantissa()); + else if (IsDenormalized() && subtrahend.IsDenormalized()) + return PS2Float(sign, 0, 0); + else + Console.Error("Both numbers are not denormalized"); + + return PS2Float(0); + } + + u32 a = raw; + u32 b = subtrahend.raw; + + //exponent difference + s32 exp_diff = Exponent() - subtrahend.Exponent(); + + //diff = 1 .. 24, expt < expd + if (exp_diff > 0 && exp_diff < 25) + { + exp_diff = exp_diff - 1; + b = (MIN_FLOATING_POINT_VALUE << exp_diff) & b; + } + + //diff = -24 .. -1 , expd < expt + else if (exp_diff < 0 && exp_diff > -25) + { + exp_diff = -exp_diff; + exp_diff = exp_diff - 1; + a = a & (MIN_FLOATING_POINT_VALUE << exp_diff); + } + + return PS2Float(a).DoAdd(PS2Float(b).Negate()); +} + +PS2Float PS2Float::Mul(PS2Float mulend) +{ + if (IsDenormalized() || mulend.IsDenormalized() || IsZero() || mulend.IsZero()) + return PS2Float(DetermineMultiplicationDivisionOperationSign(*this, mulend), 0, 0); + + return DoMul(mulend); +} + +PS2Float PS2Float::MulAdd(PS2Float opsend, PS2Float optend) +{ + PS2Float mulres = opsend.Mul(optend); + PS2Float addres = Add(mulres); + u32 rawres = addres.raw; + bool oflw = addres.of; + bool uflw = addres.uf; + DetermineMacException(3, raw, of, mulres.of, mulres.Sign() ? 1 : 0, rawres, oflw, uflw); + PS2Float result = PS2Float(rawres); + result.of = oflw; + result.uf = uflw; + return result; +} + +PS2Float PS2Float::MulAddAcc(PS2Float opsend, PS2Float optend) +{ + PS2Float mulres = opsend.Mul(optend); + PS2Float addres = Add(mulres); + u32 rawres = addres.raw; + bool oflw = addres.of; + bool uflw = addres.uf; + DetermineMacException(8, raw, of, mulres.of, mulres.Sign() ? 1 : 0, rawres, oflw, uflw); + raw = rawres; + of = oflw; + PS2Float result = PS2Float(rawres); + result.of = oflw; + result.uf = uflw; + return result; +} + +PS2Float PS2Float::MulSub(PS2Float opsend, PS2Float optend) +{ + PS2Float mulres = opsend.Mul(optend); + PS2Float subres = Sub(mulres); + u32 rawres = subres.raw; + bool oflw = subres.of; + bool uflw = subres.uf; + DetermineMacException(4, raw, of, mulres.of, mulres.Sign() ? 1 : 0, rawres, oflw, uflw); + PS2Float result = PS2Float(rawres); + result.of = oflw; + result.uf = uflw; + return result; +} + +PS2Float PS2Float::MulSubAcc(PS2Float opsend, PS2Float optend) +{ + PS2Float mulres = opsend.Mul(optend); + PS2Float subres = Sub(mulres); + u32 rawres = subres.raw; + bool oflw = subres.of; + bool uflw = subres.uf; + DetermineMacException(9, raw, of, mulres.of, mulres.Sign() ? 1 : 0, rawres, oflw, uflw); + raw = rawres; + of = oflw; + PS2Float result = PS2Float(rawres); + result.of = oflw; + result.uf = uflw; + return result; +} + +PS2Float PS2Float::Div(PS2Float divend) +{ + u32 a = raw; + u32 b = divend.raw; + if (((a & 0x7F800000) == 0) && ((b & 0x7F800000) != 0)) + { + u32 floatResult = 0; + floatResult &= PS2Float::MAX_FLOATING_POINT_VALUE; + floatResult |= (u32)(((s32)(b >> 31) != (s32)(a >> 31)) ? 1 : 0 & 1) << 31; + return PS2Float(floatResult); + } + if (((a & 0x7F800000) != 0) && ((b & 0x7F800000) == 0)) + { + u32 floatResult = PS2Float::MAX_FLOATING_POINT_VALUE; + floatResult &= PS2Float::MAX_FLOATING_POINT_VALUE; + floatResult |= (u32)(((s32)(b >> 31) != (s32)(a >> 31)) ? 1 : 0 & 1) << 31; + PS2Float result = PS2Float(floatResult); + result.dz = true; + return result; + } + if (((a & 0x7F800000) == 0) && ((b & 0x7F800000) == 0)) + { + u32 floatResult = PS2Float::MAX_FLOATING_POINT_VALUE; + floatResult &= PS2Float::MAX_FLOATING_POINT_VALUE; + floatResult |= (u32)(((s32)(b >> 31) != (s32)(a >> 31)) ? 1 : 0 & 1) << 31; + PS2Float result = PS2Float(floatResult); + result.iv = true; + return result; + } + u32 am = mantissa(a) << 2; + u32 bm = mantissa(b) << 2; + struct CSAResult current = {am, 0}; + u32 quotient = 0; + int quotientBit = 1; + for (int i = 0; i < 25; i++) + { + quotient = (quotient << 1) + quotientBit; + u32 add = quotientBit > 0 ? ~bm : quotientBit < 0 ? bm : 0; + current.carry += quotientBit > 0; + struct CSAResult csa = CSA(current.sum, current.carry, add); + quotientBit = quotientSelect(quotientBit ? csa : current); + current.sum = csa.sum << 1; + current.carry = csa.carry << 1; + } + u32 sign = ((a ^ b) & 0x80000000); + u32 Dvdtexp = exponent(a); + u32 Dvsrexp = exponent(b); + s32 cexp = Dvdtexp - Dvsrexp + 126; + if (quotient >= (1 << 24)) + { + cexp += 1; + quotient >>= 1; + } + if (Dvdtexp == 0 && Dvsrexp == 0) + { + PS2Float result = PS2Float(sign | PS2Float::MAX_FLOATING_POINT_VALUE); + result.iv = true; + return result; + } + else if (Dvdtexp == 0 || Dvsrexp != 0) + { + if (Dvdtexp == 0 && Dvsrexp != 0) { return PS2Float(sign); } + } + else + { + PS2Float result = PS2Float(sign | PS2Float::MAX_FLOATING_POINT_VALUE); + result.dz = true; + return result; + } + if (cexp > 255) + { + PS2Float result = PS2Float(sign | PS2Float::MAX_FLOATING_POINT_VALUE); + result.of = true; + return result; + } + else if (cexp < 1) + { + PS2Float result = PS2Float(sign); + result.uf = true; + return result; + } + return (quotient & 0x7fffff) | (cexp << 23) | sign; +} + +PS2Float PS2Float::Sqrt() +{ + u32 a = raw; + if ((a & 0x7F800000) == 0) + { + PS2Float result = PS2Float(0); + result.iv = ((a >> 31) & 1) != 0; + return result; + } + u32 m = mantissa(a) << 1; + if (!(a & 0x800000)) // If exponent is odd after subtracting bias of 127 + m <<= 1; + struct CSAResult current = {m, 0}; + u32 quotient = 0; + s32 quotientBit = 1; + for (s32 i = 0; i < 25; i++) + { + // Adding n to quotient adds n * (2*quotient + n) to quotient^2 + // (which is what we need to subtract from the remainder) + u32 adjust = quotient + (quotientBit << (24 - i)); + quotient += quotientBit << (25 - i); + u32 add = quotientBit > 0 ? ~adjust : quotientBit < 0 ? adjust : 0; + current.carry += quotientBit > 0; + struct CSAResult csa = CSA(current.sum, current.carry, add); + quotientBit = quotientSelect(quotientBit ? csa : current); + current.sum = csa.sum << 1; + current.carry = csa.carry << 1; + } + s32 Dvdtexp = exponent(a); + if (Dvdtexp == 0) + return PS2Float(0); + Dvdtexp = (Dvdtexp + 127) >> 1; + PS2Float result = PS2Float(((quotient >> 2) & 0x7fffff) | (Dvdtexp << 23)); + if (Sign()) + { + if (result.Sign()) + result = result.Negate(); + result.iv = true; + } + return result; +} + +PS2Float PS2Float::Rsqrt(PS2Float other) +{ + PS2Float sqrt = PS2Float(false, other.Exponent(), other.Mantissa()).Sqrt(); + PS2Float div = Div(sqrt); + PS2Float result = PS2Float(div.raw); + result.dz = sqrt.dz || div.dz; + result.iv = sqrt.iv || div.iv; + result.of = div.of; + result.uf = div.uf; + return result; +} + +PS2Float PS2Float::ELENG(PS2Float y, PS2Float z) +{ + PS2Float ACC = Mul(*this); + ACC.MulAddAcc(y, y); + PS2Float p = ACC.MulAdd(z, z); + return p.Sqrt(); +} + +PS2Float PS2Float::ERCPR() +{ + return PS2Float(ONE).Div(*this); +} + +PS2Float PS2Float::ERLENG(PS2Float y, PS2Float z) +{ + PS2Float ACC = Mul(*this); + ACC.MulAddAcc(y, y); + PS2Float p = ACC.MulAdd(z, z); + p = PS2Float(ONE).Rsqrt(p); + return p; +} + +PS2Float PS2Float::ERSADD(PS2Float y, PS2Float z) +{ + PS2Float ACC = Mul(*this); + ACC.MulAddAcc(y, y); + PS2Float p = ACC.MulAdd(z, z); + p = PS2Float(ONE).Div(p); + return p; +} + +PS2Float PS2Float::ESQRT() +{ + return Sqrt(); +} + +PS2Float PS2Float::ESQUR() +{ + return Mul(*this); +} + +PS2Float PS2Float::ESUM(PS2Float y, PS2Float z, PS2Float w) +{ + PS2Float ACC = Mul(PS2Float(ONE)); + ACC.MulAddAcc(y, PS2Float(ONE)); + ACC.MulAddAcc(z, PS2Float(ONE)); + return ACC.MulAdd(w, PS2Float(ONE)); +} + +PS2Float PS2Float::ERSQRT() +{ + return PS2Float(ONE).Rsqrt(*this); +} + +PS2Float PS2Float::ESADD(PS2Float y, PS2Float z) +{ + PS2Float ACC = Mul(*this); + ACC.MulAddAcc(y, y); + return ACC.MulAdd(z, z); +} + +PS2Float PS2Float::EEXP() +{ + float consts[6] = {0.249998688697815f, 0.031257584691048f, 0.002591371303424f, + 0.000171562001924f, 0.000005430199963f, 0.000000690600018f}; + + PS2Float tmp1 = Mul(*this); + PS2Float ACC = Mul(PS2Float(consts[0])); + PS2Float tmp2 = tmp1.Mul(*this); + ACC.MulAddAcc(tmp1, PS2Float(consts[1])); + tmp1 = tmp2.Mul(*this); + ACC.MulAddAcc(tmp2, PS2Float(consts[2])); + tmp2 = tmp1.Mul(*this); + ACC.MulAddAcc(tmp1, PS2Float(consts[3])); + tmp1 = tmp2.Mul(*this); + ACC.MulAddAcc(tmp2, PS2Float(consts[4])); + ACC.MulAddAcc(PS2Float(ONE), PS2Float(ONE)); + PS2Float p = ACC.MulAdd(tmp1, PS2Float(consts[5])); + p = p.Mul(p); + p = p.Mul(p); + p = PS2Float(ONE).Div(p); + + return p; +} + +PS2Float PS2Float::EATAN() +{ + float eatanconst[9] = {0.999999344348907f, -0.333298563957214f, 0.199465364217758f, -0.13085337519646f, + 0.096420042216778f, -0.055909886956215f, 0.021861229091883f, -0.004054057877511f, + 0.785398185253143f}; + + PS2Float tmp1 = Add(PS2Float(ONE)); + PS2Float tmp2 = Sub(PS2Float(ONE)); + *this = tmp2.Div(tmp1); + PS2Float tmp3 = Mul(*this); + PS2Float ACC = PS2Float(eatanconst[0]).Mul(*this); + tmp1 = tmp3.Mul(*this); + tmp2 = tmp1.Mul(tmp3); + ACC.MulAddAcc(tmp1, PS2Float(eatanconst[1])); + tmp1 = tmp2.Mul(tmp3); + ACC.MulAddAcc(tmp2, PS2Float(eatanconst[2])); + tmp2 = tmp1.Mul(tmp3); + ACC.MulAddAcc(tmp1, PS2Float(eatanconst[3])); + tmp1 = tmp2.Mul(tmp3); + ACC.MulAddAcc(tmp2, PS2Float(eatanconst[4])); + tmp2 = tmp1.Mul(tmp3); + ACC.MulAddAcc(tmp1, PS2Float(eatanconst[5])); + tmp1 = tmp2.Mul(tmp3); + ACC.MulAddAcc(tmp2, PS2Float(eatanconst[6])); + ACC.MulAddAcc(PS2Float(ONE), PS2Float(eatanconst[8])); + + return ACC.MulAdd(tmp1, PS2Float(eatanconst[7])); +} + +PS2Float PS2Float::ESIN() +{ + float sinconsts[5] = {1.0f, -0.166666567325592f, 0.008333025500178f, -0.000198074136279f, 0.000002601886990f}; + + PS2Float tmp3 = Mul(*this); + PS2Float ACC = Mul(PS2Float(sinconsts[0])); + PS2Float tmp1 = tmp3.Mul(*this); + PS2Float tmp2 = tmp1.Mul(tmp3); + ACC.MulAddAcc(tmp1, PS2Float(sinconsts[1])); + tmp1 = tmp2.Mul(tmp3); + ACC.MulAddAcc(tmp2, PS2Float(sinconsts[2])); + tmp2 = tmp1.Mul(tmp3); + ACC.MulAddAcc(tmp1, PS2Float(sinconsts[3])); + + return ACC.MulAdd(tmp2, PS2Float(sinconsts[4])); +} + +bool PS2Float::IsDenormalized() +{ + return Exponent() == 0; +} + +bool PS2Float::IsZero() +{ + return Abs() == 0; +} + +u32 PS2Float::Abs() +{ + return (raw & MAX_FLOATING_POINT_VALUE); +} + +PS2Float PS2Float::Negate() +{ + return PS2Float(raw ^ SIGNMASK); +} + +s32 PS2Float::CompareTo(PS2Float other) +{ + s32 selfTwoComplementVal = (s32)Abs(); + if (Sign()) + selfTwoComplementVal = -selfTwoComplementVal; + + s32 otherTwoComplementVal = (s32)other.Abs(); + if (other.Sign()) + otherTwoComplementVal = -otherTwoComplementVal; + + if (selfTwoComplementVal < otherTwoComplementVal) + return -1; + else if (selfTwoComplementVal == otherTwoComplementVal) + return 0; + else + return 1; +} + +s32 PS2Float::CompareOperands(PS2Float other) +{ + u32 selfTwoComplementVal = Abs(); + u32 otherTwoComplementVal = other.Abs(); + + if (selfTwoComplementVal < otherTwoComplementVal) + return -1; + else if (selfTwoComplementVal == otherTwoComplementVal) + return 0; + else + return 1; +} + +double PS2Float::ToDouble() +{ + return std::bit_cast(((u64)Sign() << 63) | ((((u64)Exponent() - BIAS) + 1023ULL) << 52) | ((u64)Mantissa() << 29)); +} + +std::string PS2Float::ToString() +{ + double res = ToDouble(); + + u32 value = raw; + std::ostringstream oss; + oss << std::fixed << std::setprecision(6); + + if (IsDenormalized()) + { + oss << "Denormalized(" << res << ")"; + } + else if (value == MAX_FLOATING_POINT_VALUE) + { + oss << "Fmax(" << res << ")"; + } + else if (value == MIN_FLOATING_POINT_VALUE) + { + oss << "-Fmax(" << res << ")"; + } + else + { + oss << "PS2Float(" << res << ")"; + } + + return oss.str(); +} + +PS2Float PS2Float::DoAdd(PS2Float other) +{ + u8 selfExponent = Exponent(); + s32 resExponent = selfExponent - other.Exponent(); + + if (resExponent < 0) + return other.DoAdd(*this); + else if (resExponent >= 25) + return *this; + + const u8 roundingMultiplier = 6; + + // http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate + u32 sign1 = (u32)((s32)raw >> 31); + s32 selfMantissa = (s32)(((Mantissa() | 0x800000) ^ sign1) - sign1); + u32 sign2 = (u32)((s32)other.raw >> 31); + s32 otherMantissa = (s32)(((other.Mantissa() | 0x800000) ^ sign2) - sign2); + + // PS2 multiply by 2 before doing the Math here. + s32 man = (selfMantissa << roundingMultiplier) + ((otherMantissa << roundingMultiplier) >> resExponent); + s32 absMan = abs(man); + if (absMan == 0) + return PS2Float(0); + + // Remove from exponent the PS2 Multiplier value. + s32 rawExp = selfExponent - roundingMultiplier; + + s32 amount = Common::normalizeAmounts[Common::CountLeadingSignBits(absMan)]; + rawExp -= amount; + absMan <<= amount; + + s32 msbIndex = Common::BitScanReverse8(absMan >> MANTISSA_BITS); + rawExp += msbIndex; + absMan >>= msbIndex; + + if (rawExp > 255) + { + PS2Float result = man < 0 ? Min() : Max(); + result.of = true; + return result; + } + else if (rawExp < 1) + { + PS2Float result = PS2Float(man < 0, 0, 0); + result.uf = true; + return result; + } + + return PS2Float(((u32)man & SIGNMASK) | (u32)rawExp << MANTISSA_BITS | ((u32)absMan & 0x7FFFFF)); +} + +PS2Float PS2Float::DoMul(PS2Float other) +{ + u8 selfExponent = Exponent(); + u8 otherExponent = other.Exponent(); + u32 selfMantissa = Mantissa() | 0x800000; + u32 otherMantissa = other.Mantissa() | 0x800000; + u32 sign = (raw ^ other.raw) & SIGNMASK; + + s32 resExponent = selfExponent + otherExponent - 127; + u32 resMantissa = (u32)(MulMantissa(selfMantissa, otherMantissa) >> MANTISSA_BITS); + + if (resMantissa > 0xFFFFFF) + { + resMantissa >>= 1; + resExponent++; + } + + if (resExponent > 255) + { + PS2Float result = PS2Float(sign | MAX_FLOATING_POINT_VALUE); + result.of = true; + return result; + } + else if (resExponent < 1) + { + PS2Float result = PS2Float(sign); + result.uf = true; + return result; + } + + return PS2Float(sign | (u32)(resExponent << MANTISSA_BITS) | (resMantissa & 0x7FFFFF)); +} + +PS2Float PS2Float::Itof(s32 complement, s32 f1) +{ + if (f1 == 0) + return PS2Float(0); + + s32 resExponent; + + bool negative = f1 < 0; + + if (f1 == -2147483648) + { + if (complement <= 0) + // special case + return PS2Float(0xcf000000); + else + f1 = 2147483647; + } + + s32 u = std::abs(f1); + + s32 shifts; + + s32 lzcnt = Common::CountLeadingSignBits(u); + if (lzcnt < 8) + { + s32 count = 8 - lzcnt; + u >>= count; + shifts = -count; + } + else + { + s32 count = lzcnt - 8; + u <<= count; + shifts = count; + } + + resExponent = BIAS + MANTISSA_BITS - shifts - complement; + + if (resExponent >= 158) + return negative ? PS2Float(0xcf000000) : PS2Float(0x4f000000); + else if (resExponent >= 0) + return PS2Float(negative, (u8)resExponent, (u32)u); + + return PS2Float(0); +} + +s32 PS2Float::Ftoi(s32 complement, u32 f1) +{ + u32 a, result; + + a = f1; + if ((f1 & 0x7F800000) == 0) + result = 0; + else + { + complement = (s32)(f1 >> MANTISSA_BITS & 0xFF) + complement; + f1 &= 0x7FFFFF; + f1 |= 0x800000; + if (complement < 158) + { + if (complement > 126) + { + f1 = (f1 << 7) >> (31 - ((u8)complement - 126)); + if ((s32)a < 0) + f1 = ~f1 + 1; + result = f1; + } + else + result = 0; + } + else if ((s32)a < 0) + result = SIGNMASK; + else + result = MAX_FLOATING_POINT_VALUE; + } + + return (s32)result; +} + +u8 PS2Float::Clip(u32 f1, u32 f2, bool& cplus, bool& cminus) +{ + bool resultPlus = false; + bool resultMinus = false; + u32 a; + + if ((f1 & 0x7F800000) == 0) + { + f1 &= 0xFF800000; + } + + a = f1; + + if ((f2 & 0x7F800000) == 0) + { + f2 &= 0xFF800000; + } + + f1 = f1 & MAX_FLOATING_POINT_VALUE; + f2 = f2 & MAX_FLOATING_POINT_VALUE; + + if ((-1 < (int)a) && (f2 < f1)) + resultPlus = true; + + cplus = resultPlus; + + if (((int)a < 0) && (f2 < f1)) + resultMinus = true; + + cminus = resultMinus; + + return 0; +} + +bool PS2Float::DetermineMultiplicationDivisionOperationSign(PS2Float a, PS2Float b) +{ + return a.Sign() ^ b.Sign(); +} + +bool PS2Float::DetermineAdditionOperationSign(PS2Float a, PS2Float b) +{ + if (a.IsZero() && b.IsZero()) + { + if (!a.Sign() || !b.Sign()) + return false; + else if (a.Sign() && b.Sign()) + return true; + else + Console.Error("Unhandled addition operation flags"); + } + + return a.CompareOperands(b) >= 0 ? a.Sign() : b.Sign(); +} + +bool PS2Float::DetermineSubtractionOperationSign(PS2Float a, PS2Float b) +{ + if (a.IsZero() && b.IsZero()) + { + if (!a.Sign() || b.Sign()) + return false; + else if (a.Sign() && !b.Sign()) + return true; + else + Console.Error("Unhandled subtraction operation flags"); + } + + return a.CompareOperands(b) >= 0 ? a.Sign() : !b.Sign(); +} + +u8 PS2Float::DetermineMacException(u8 mode, u32 acc, bool acc_oflw, bool moflw, s32 msign, u32& addsubres, bool& oflw, bool& uflw) +{ + bool roundToMax; + + if ((mode == 3) || (mode == 8)) + roundToMax = msign == 0; + else + { + if ((mode != 4) && (mode != 9)) + { + Console.Error("Unhandled MacFlag operation flags"); + return 1; + } + + roundToMax = msign != 0; + } + + if (!acc_oflw) + { + if (moflw) + { + if (roundToMax) + { + addsubres = MAX_FLOATING_POINT_VALUE; + uflw = false; + oflw = true; + } + else + { + addsubres = MIN_FLOATING_POINT_VALUE; + uflw = false; + oflw = true; + } + } + } + else if (!moflw) + { + addsubres = acc; + uflw = false; + oflw = true; + } + else if (roundToMax) + { + addsubres = MAX_FLOATING_POINT_VALUE; + uflw = false; + oflw = true; + } + else + { + addsubres = MIN_FLOATING_POINT_VALUE; + uflw = false; + oflw = true; + } + + return 0; +} diff --git a/pcsx2/PS2Float.h b/pcsx2/PS2Float.h new file mode 100644 index 0000000000..5744f60e1a --- /dev/null +++ b/pcsx2/PS2Float.h @@ -0,0 +1,123 @@ +// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team +// SPDX-License-Identifier: GPL-3.0+ + +#pragma once + +class PS2Float +{ +public: + static constexpr u8 BIAS = 127; + static constexpr u8 MANTISSA_BITS = 23; + static constexpr u32 SIGNMASK = 0x80000000; + static constexpr u32 MAX_FLOATING_POINT_VALUE = 0x7FFFFFFF; + static constexpr u32 MIN_FLOATING_POINT_VALUE = 0xFFFFFFFF; + static constexpr u32 ONE = 0x3F800000; + static constexpr u32 MIN_ONE = 0xBF800000; + + bool dz = false; + bool iv = false; + bool of = false; + bool uf = false; + + u32 raw; + + constexpr u32 Mantissa() const { return raw & 0x7FFFFF; } + constexpr u8 Exponent() const { return (raw >> 23) & 0xFF; } + constexpr bool Sign() const { return ((raw >> 31) & 1) != 0; } + + PS2Float(s32 value); + + PS2Float(u32 value); + + PS2Float(float value); + + PS2Float(bool sign, u8 exponent, u32 mantissa); + + static PS2Float Max(); + + static PS2Float Min(); + + static PS2Float One(); + + static PS2Float MinOne(); + + static PS2Float Itof(s32 complement, s32 f1); + + static s32 Ftoi(s32 complement, u32 f1); + + static u8 Clip(u32 f1, u32 f2, bool& cplus, bool& cminus); + + PS2Float Add(PS2Float addend); + + PS2Float Sub(PS2Float subtrahend); + + PS2Float Mul(PS2Float mulend); + + PS2Float MulAdd(PS2Float opsend, PS2Float optend); + + PS2Float MulAddAcc(PS2Float opsend, PS2Float optend); + + PS2Float MulSub(PS2Float opsend, PS2Float optend); + + PS2Float MulSubAcc(PS2Float opsend, PS2Float optend); + + PS2Float Div(PS2Float divend); + + PS2Float Sqrt(); + + PS2Float Rsqrt(PS2Float other); + + PS2Float ELENG(PS2Float y, PS2Float z); + + PS2Float ERCPR(); + + PS2Float ERLENG(PS2Float y, PS2Float z); + + PS2Float ERSADD(PS2Float y, PS2Float z); + + PS2Float ESQRT(); + + PS2Float ESQUR(); + + PS2Float ESUM(PS2Float y, PS2Float z, PS2Float w); + + PS2Float ERSQRT(); + + PS2Float ESADD(PS2Float y, PS2Float z); + + PS2Float EEXP(); + + PS2Float EATAN(); + + PS2Float ESIN(); + + bool IsDenormalized(); + + bool IsZero(); + + u32 Abs(); + + PS2Float Negate(); + + s32 CompareTo(PS2Float other); + + s32 CompareOperands(PS2Float other); + + double ToDouble(); + + std::string ToString(); + +protected: +private: + PS2Float DoAdd(PS2Float other); + + PS2Float DoMul(PS2Float other); + + static bool DetermineMultiplicationDivisionOperationSign(PS2Float a, PS2Float b); + + static bool DetermineAdditionOperationSign(PS2Float a, PS2Float b); + + static bool DetermineSubtractionOperationSign(PS2Float a, PS2Float b); + + static u8 DetermineMacException(u8 mode, u32 acc, bool acc_oflw, bool moflw, s32 msign, u32& addsubres, bool& oflw, bool& uflw); +}; diff --git a/pcsx2/Pcsx2Config.cpp b/pcsx2/Pcsx2Config.cpp index 13cd12578e..9320b5d1d4 100644 --- a/pcsx2/Pcsx2Config.cpp +++ b/pcsx2/Pcsx2Config.cpp @@ -537,14 +537,27 @@ void Pcsx2Config::RecompilerOptions::LoadSave(SettingsWrapper& wrap) SettingsWrapBitBool(vu0ExtraOverflow); SettingsWrapBitBool(vu0SignOverflow); SettingsWrapBitBool(vu0Underflow); + + SettingsWrapBitBool(vu0SoftAddSub); + SettingsWrapBitBool(vu0SoftMulDiv); + SettingsWrapBitBool(vu0SoftSqrt); + SettingsWrapBitBool(vu1Overflow); SettingsWrapBitBool(vu1ExtraOverflow); SettingsWrapBitBool(vu1SignOverflow); SettingsWrapBitBool(vu1Underflow); + SettingsWrapBitBool(vu1SoftAddSub); + SettingsWrapBitBool(vu1SoftMulDiv); + SettingsWrapBitBool(vu1SoftSqrt); + SettingsWrapBitBool(fpuOverflow); SettingsWrapBitBool(fpuExtraOverflow); SettingsWrapBitBool(fpuFullMode); + + SettingsWrapBitBool(fpuSoftAddSub); + SettingsWrapBitBool(fpuSoftMulDiv); + SettingsWrapBitBool(fpuSoftSqrt); } u32 Pcsx2Config::RecompilerOptions::GetEEClampMode() const diff --git a/pcsx2/VU.h b/pcsx2/VU.h index 15d944c90b..f81ea9b52d 100644 --- a/pcsx2/VU.h +++ b/pcsx2/VU.h @@ -149,8 +149,8 @@ struct alignas(16) VURegs alignas(16) u32 micro_macflags[4]; alignas(16) u32 micro_clipflags[4]; alignas(16) u32 micro_statusflags[4]; - // MAC/Status flags -- these are used by interpreters but are kind of hacky - // and shouldn't be relied on for any useful/valid info. Would like to move them out of + // MAC/Status flags -- these are used by interpreters but are kind of hacky without soft floats + // shouldn't be relied on for any useful/valid info without using float floats. Would like to move them out of // this struct eventually. u32 macflag; u32 statusflag; diff --git a/pcsx2/VUflags.cpp b/pcsx2/VUflags.cpp index dd5921a69d..f5506d4ef2 100644 --- a/pcsx2/VUflags.cpp +++ b/pcsx2/VUflags.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-3.0+ #include "Common.h" +#include "PS2Float.h" #include #include @@ -12,10 +13,10 @@ /* NEW FLAGS */ //By asadr. Thnkx F|RES :p /*****************************************/ -static __ri u32 VU_MAC_UPDATE( int shift, VURegs * VU, float f ) +static __ri u32 VU_MAC_UPDATE( s32 shift, VURegs* VU, float f) { u32 v = *(u32*)&f; - int exp = (v >> 23) & 0xff; + s32 exp = (v >> 23) & 0xff; u32 s = v & 0x80000000; if (s) @@ -46,6 +47,32 @@ static __ri u32 VU_MAC_UPDATE( int shift, VURegs * VU, float f ) } } +static __ri u32 VU_MAC_UPDATE(s32 shift, VURegs* VU, PS2Float f) +{ + u32 v = f.raw; + + if (v & PS2Float::SIGNMASK) + VU->macflag |= 0x0010 << shift; + else + VU->macflag &= ~(0x0010 << shift); + + if (f.IsZero()) + { + VU->macflag = (VU->macflag & ~(0x1100 << shift)) | (0x0001 << shift); + return v; + } + else if (f.uf) { VU->macflag = (VU->macflag & ~(0x1000 << shift)) | (0x0101 << shift); } + else if (f.of) { VU->macflag = (VU->macflag & ~(0x0101 << shift)) | (0x1000 << shift); } + else { VU->macflag = (VU->macflag & ~(0x1101 << shift)); } + + return v; +} + +__fi bool IsOverflowSet(VURegs* VU, s32 shift) +{ + return (VU->macflag & (0x1000 << shift)); +} + __fi u32 VU_MACx_UPDATE(VURegs * VU, float x) { return VU_MAC_UPDATE(3, VU, x); @@ -66,6 +93,26 @@ __fi u32 VU_MACw_UPDATE(VURegs * VU, float w) return VU_MAC_UPDATE(0, VU, w); } +__fi u32 VU_MACx_UPDATE(VURegs* VU, PS2Float x) +{ + return VU_MAC_UPDATE(3, VU, x); +} + +__fi u32 VU_MACy_UPDATE(VURegs* VU, PS2Float y) +{ + return VU_MAC_UPDATE(2, VU, y); +} + +__fi u32 VU_MACz_UPDATE(VURegs* VU, PS2Float z) +{ + return VU_MAC_UPDATE(1, VU, z); +} + +__fi u32 VU_MACw_UPDATE(VURegs* VU, PS2Float w) +{ + return VU_MAC_UPDATE(0, VU, w); +} + __fi void VU_MACx_CLEAR(VURegs * VU) { VU->macflag&= ~(0x1111<<3); diff --git a/pcsx2/VUflags.h b/pcsx2/VUflags.h index 3ac149d5fe..75c9eade7f 100644 --- a/pcsx2/VUflags.h +++ b/pcsx2/VUflags.h @@ -3,11 +3,17 @@ #pragma once #include "VU.h" +#include "PS2Float.h" +extern bool IsOverflowSet(VURegs* VU, s32 shift); extern u32 VU_MACx_UPDATE(VURegs * VU, float x); extern u32 VU_MACy_UPDATE(VURegs * VU, float y); extern u32 VU_MACz_UPDATE(VURegs * VU, float z); extern u32 VU_MACw_UPDATE(VURegs * VU, float w); +extern u32 VU_MACx_UPDATE(VURegs* VU, PS2Float x); +extern u32 VU_MACy_UPDATE(VURegs* VU, PS2Float y); +extern u32 VU_MACz_UPDATE(VURegs* VU, PS2Float z); +extern u32 VU_MACw_UPDATE(VURegs* VU, PS2Float w); extern void VU_MACx_CLEAR(VURegs * VU); extern void VU_MACy_CLEAR(VURegs * VU); extern void VU_MACz_CLEAR(VURegs * VU); diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp index 05fa9a4855..b4fae0cac8 100644 --- a/pcsx2/VUops.cpp +++ b/pcsx2/VUops.cpp @@ -3,6 +3,7 @@ #include "Common.h" #include "VUops.h" +#include "PS2Float.h" #include "GS.h" #include "Gif_Unit.h" #include "MTVU.h" @@ -462,34 +463,48 @@ static __fi float vuDouble(u32 f) } #endif -static __fi float vuADD_TriAceHack(u32 a, u32 b) +static __fi PS2Float vuAccurateAdd(u32 a, u32 b) { - // On VU0 TriAce Games use ADDi and expects these bit-perfect results: - //if (a == 0xb3e2a619 && b == 0x42546666) return vuDouble(0x42546666); - //if (a == 0x8b5b19e9 && b == 0xc7f079b3) return vuDouble(0xc7f079b3); - //if (a == 0x4b1ed4a8 && b == 0x43a02666) return vuDouble(0x4b1ed5e7); - //if (a == 0x7d1ca47b && b == 0x42f23333) return vuDouble(0x7d1ca47b); + return PS2Float(a).Add(PS2Float(b)); +} - // In the 3rd case, some other rounding error is giving us incorrect - // operands ('a' is wrong); and therefor an incorrect result. - // We're getting: 0x4b1ed4a8 + 0x43a02666 = 0x4b1ed5e8 - // We should be getting: 0x4b1ed4a7 + 0x43a02666 = 0x4b1ed5e7 - // microVU gets the correct operands and result. The interps likely - // don't get it due to rounding towards nearest in other calculations. +static __fi PS2Float vuAccurateSub(u32 a, u32 b) +{ + return PS2Float(a).Sub(PS2Float(b)); +} - // microVU uses something like this to get TriAce games working, - // but VU interpreters don't seem to need it currently: +static __fi PS2Float vuAccurateMul(u32 a, u32 b) +{ + return PS2Float(a).Mul(PS2Float(b)); +} - // Update Sept 2021, now the interpreters don't suck, they do - Refraction - s32 aExp = (a >> 23) & 0xff; - s32 bExp = (b >> 23) & 0xff; - if (aExp - bExp >= 25) b &= 0x80000000; - if (aExp - bExp <=-25) a &= 0x80000000; - float ret = vuDouble(a) + vuDouble(b); - //DevCon.WriteLn("aExp = %d, bExp = %d", aExp, bExp); - //DevCon.WriteLn("0x%08x + 0x%08x = 0x%08x", a, b, (u32&)ret); - //DevCon.WriteLn("%f + %f = %f", vuDouble(a), vuDouble(b), ret); - return ret; +static __fi PS2Float vuAccurateDiv(u32 a, u32 b) +{ + return PS2Float(a).Div(PS2Float(b)); +} + +static __fi PS2Float vuAccurateMulAdd(u32 a, u32 b, u32 c) +{ + return PS2Float(a).MulAdd(PS2Float(b), PS2Float(c)); +} + +static __fi PS2Float vuAccurateMulSub(u32 a, u32 b, u32 c) +{ + return PS2Float(a).MulSub(PS2Float(b), PS2Float(c)); +} + +static __fi PS2Float vuAccurateMulAddAcc(u32 a, u32 b, u32 c, bool oflw) +{ + PS2Float acc = PS2Float(a); + acc = oflw; + return acc.MulAddAcc(PS2Float(b), PS2Float(c)); +} + +static __fi PS2Float vuAccurateMulSubAcc(u32 a, u32 b, u32 c, bool oflw) +{ + PS2Float acc = PS2Float(a); + acc = oflw; + return acc.MulSubAcc(PS2Float(b), PS2Float(c)); } template @@ -549,34 +564,55 @@ static __fi void applyBinaryMACOpBroadcast(VURegs* VU, u32 bc) VU_STAT_UPDATE(VU); } +template +static __fi void applyAccurateBinaryMACOp(VURegs* VU) +{ + VECTOR* dst = _getDst(VU); + if (_X) { dst->i.x = VU_MACx_UPDATE(VU, Fn(VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y) { dst->i.y = VU_MACy_UPDATE(VU, Fn(VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z) { dst->i.z = VU_MACz_UPDATE(VU, Fn(VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W) { dst->i.w = VU_MACw_UPDATE(VU, Fn(VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); +} + +template +static __fi void applyAccurateBinaryMACOpBroadcast(VURegs* VU, u32 bc) +{ + VECTOR* dst = _getDst(VU); + if (_X) { dst->i.x = VU_MACx_UPDATE(VU, Fn(VU->VF[_Fs_].i.x, bc)); } else VU_MACx_CLEAR(VU); + if (_Y) { dst->i.y = VU_MACy_UPDATE(VU, Fn(VU->VF[_Fs_].i.y, bc)); } else VU_MACy_CLEAR(VU); + if (_Z) { dst->i.z = VU_MACz_UPDATE(VU, Fn(VU->VF[_Fs_].i.z, bc)); } else VU_MACz_CLEAR(VU); + if (_W) { dst->i.w = VU_MACw_UPDATE(VU, Fn(VU->VF[_Fs_].i.w, bc)); } else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); +} + static __fi float _vuOpADD(u32 fs, u32 ft) { return vuDouble(fs) + vuDouble(ft); } +static __fi PS2Float _vuAccurateOpADD(u32 fs, u32 ft) +{ + return PS2Float(fs).Add(PS2Float(ft)); +} + static __fi void _vuADD(VURegs* VU) { - applyBinaryMACOp<_vuOpADD, MACOpDst::Fd>(VU); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) + applyAccurateBinaryMACOp<_vuAccurateOpADD, MACOpDst::Fd>(VU); + else + applyBinaryMACOp<_vuOpADD, MACOpDst::Fd>(VU); } static __fi void vuADDbc(VURegs* VU, u32 bc) { - applyBinaryMACOpBroadcast<_vuOpADD, MACOpDst::Fd>(VU, bc); -} - -static __fi void vuADDbc_addsubhack(VURegs* VU, u32 bc) -{ - if (CHECK_VUADDSUBHACK) - applyBinaryMACOpBroadcast(VU, bc); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) + applyAccurateBinaryMACOpBroadcast<_vuAccurateOpADD, MACOpDst::Fd>(VU, bc); else applyBinaryMACOpBroadcast<_vuOpADD, MACOpDst::Fd>(VU, bc); } -static __fi void _vuADDi(VURegs* VU) -{ - vuADDbc_addsubhack(VU, VU->VI[REG_I].UL); -} - +static __fi void _vuADDi(VURegs* VU) { vuADDbc(VU, VU->VI[REG_I].UL); } static __fi void _vuADDq(VURegs* VU) { vuADDbc(VU, VU->VI[REG_Q].UL); } static __fi void _vuADDx(VURegs* VU) { vuADDbc(VU, VU->VF[_Ft_].i.x); } static __fi void _vuADDy(VURegs* VU) { vuADDbc(VU, VU->VF[_Ft_].i.y); } @@ -585,12 +621,18 @@ static __fi void _vuADDw(VURegs* VU) { vuADDbc(VU, VU->VF[_Ft_].i.w); } static __fi void _vuADDA(VURegs* VU) { - applyBinaryMACOp<_vuOpADD, MACOpDst::Acc>(VU); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) + applyAccurateBinaryMACOp<_vuAccurateOpADD, MACOpDst::Acc>(VU); + else + applyBinaryMACOp<_vuOpADD, MACOpDst::Acc>(VU); } static __fi void vuADDAbc(VURegs* VU, u32 bc) { - applyBinaryMACOpBroadcast<_vuOpADD, MACOpDst::Acc>(VU, bc); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) + applyAccurateBinaryMACOpBroadcast<_vuAccurateOpADD, MACOpDst::Acc>(VU, bc); + else + applyBinaryMACOpBroadcast<_vuOpADD, MACOpDst::Acc>(VU, bc); } static __fi void _vuADDAi(VURegs* VU) { vuADDAbc(VU, VU->VI[REG_I].UL); } @@ -605,14 +647,25 @@ static __fi float _vuOpSUB(u32 fs, u32 ft) return vuDouble(fs) - vuDouble(ft); } +static __fi PS2Float _vuAccurateOpSUB(u32 fs, u32 ft) +{ + return PS2Float(fs).Sub(PS2Float(ft)); +} + static __fi void _vuSUB(VURegs* VU) { - applyBinaryMACOp<_vuOpSUB, MACOpDst::Fd>(VU); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) + applyAccurateBinaryMACOp<_vuAccurateOpSUB, MACOpDst::Fd>(VU); + else + applyBinaryMACOp<_vuOpSUB, MACOpDst::Fd>(VU); } static __fi void vuSUBbc(VURegs* VU, u32 bc) { - applyBinaryMACOpBroadcast<_vuOpSUB, MACOpDst::Fd>(VU, bc); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) + applyAccurateBinaryMACOpBroadcast<_vuAccurateOpSUB, MACOpDst::Fd>(VU, bc); + else + applyBinaryMACOpBroadcast<_vuOpSUB, MACOpDst::Fd>(VU, bc); } static __fi void _vuSUBi(VURegs* VU) { vuSUBbc(VU, VU->VI[REG_I].UL); } @@ -624,12 +677,18 @@ static __fi void _vuSUBw(VURegs* VU) { vuSUBbc(VU, VU->VF[_Ft_].i.w); } static __fi void _vuSUBA(VURegs* VU) { - applyBinaryMACOp<_vuOpSUB, MACOpDst::Acc>(VU); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) + applyAccurateBinaryMACOp<_vuAccurateOpSUB, MACOpDst::Acc>(VU); + else + applyBinaryMACOp<_vuOpSUB, MACOpDst::Acc>(VU); } static __fi void vuSUBAbc(VURegs* VU, u32 bc) { - applyBinaryMACOpBroadcast<_vuOpSUB, MACOpDst::Acc>(VU, bc); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) + applyAccurateBinaryMACOpBroadcast<_vuAccurateOpSUB, MACOpDst::Acc>(VU, bc); + else + applyBinaryMACOpBroadcast<_vuOpSUB, MACOpDst::Acc>(VU, bc); } static __fi void _vuSUBAi(VURegs* VU) { vuSUBAbc(VU, VU->VI[REG_I].UL); } @@ -644,14 +703,25 @@ static __fi float _vuOpMUL(u32 fs, u32 ft) return vuDouble(fs) * vuDouble(ft); } +static __fi PS2Float _vuAccurateOpMUL(u32 fs, u32 ft) +{ + return PS2Float(fs).Mul(PS2Float(ft)); +} + static __fi void _vuMUL(VURegs* VU) { - applyBinaryMACOp<_vuOpMUL, MACOpDst::Fd>(VU); + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + applyAccurateBinaryMACOp<_vuAccurateOpMUL, MACOpDst::Fd>(VU); + else + applyBinaryMACOp<_vuOpMUL, MACOpDst::Fd>(VU); } static __fi void vuMULbc(VURegs* VU, u32 bc) { - applyBinaryMACOpBroadcast<_vuOpMUL, MACOpDst::Fd>(VU, bc); + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + applyAccurateBinaryMACOpBroadcast<_vuAccurateOpMUL, MACOpDst::Fd>(VU, bc); + else + applyBinaryMACOpBroadcast<_vuOpMUL, MACOpDst::Fd>(VU, bc); } static __fi void _vuMULi(VURegs* VU) { vuMULbc(VU, VU->VI[REG_I].UL); } @@ -664,12 +734,18 @@ static __fi void _vuMULw(VURegs* VU) { vuMULbc(VU, VU->VF[_Ft_].i.w); } static __fi void _vuMULA(VURegs* VU) { - applyBinaryMACOp<_vuOpMUL, MACOpDst::Acc>(VU); + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + applyAccurateBinaryMACOp<_vuAccurateOpMUL, MACOpDst::Acc>(VU); + else + applyBinaryMACOp<_vuOpMUL, MACOpDst::Acc>(VU); } static __fi void vuMULAbc(VURegs* VU, u32 bc) { - applyBinaryMACOpBroadcast<_vuOpMUL, MACOpDst::Acc>(VU, bc); + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + applyAccurateBinaryMACOpBroadcast<_vuAccurateOpMUL, MACOpDst::Acc>(VU, bc); + else + applyBinaryMACOpBroadcast<_vuOpMUL, MACOpDst::Acc>(VU, bc); } static __fi void _vuMULAi(VURegs* VU) { vuMULAbc(VU, VU->VI[REG_I].UL); } @@ -701,19 +777,81 @@ static __fi void applyTernaryMACOpBroadcast(VURegs* VU, u32 bc) VU_STAT_UPDATE(VU); } +template +static __fi void applyAccurateTernaryMACOp(VURegs* VU) +{ + VECTOR* dst = _getDst(VU); + if (_X) { dst->i.x = VU_MACx_UPDATE(VU, Fn(VU->ACC.i.x, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y) { dst->i.y = VU_MACy_UPDATE(VU, Fn(VU->ACC.i.y, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z) { dst->i.z = VU_MACz_UPDATE(VU, Fn(VU->ACC.i.z, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W) { dst->i.w = VU_MACw_UPDATE(VU, Fn(VU->ACC.i.w, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); +} + +template +static __fi void applyAccurateTernaryMACOpBroadcast(VURegs* VU, u32 bc) +{ + VECTOR* dst = _getDst(VU); + if (_X) { dst->i.x = VU_MACx_UPDATE(VU, Fn(VU->ACC.i.x, VU->VF[_Fs_].i.x, bc)); } else VU_MACx_CLEAR(VU); + if (_Y) { dst->i.y = VU_MACy_UPDATE(VU, Fn(VU->ACC.i.y, VU->VF[_Fs_].i.y, bc)); } else VU_MACy_CLEAR(VU); + if (_Z) { dst->i.z = VU_MACz_UPDATE(VU, Fn(VU->ACC.i.z, VU->VF[_Fs_].i.z, bc)); } else VU_MACz_CLEAR(VU); + if (_W) { dst->i.w = VU_MACw_UPDATE(VU, Fn(VU->ACC.i.w, VU->VF[_Fs_].i.w, bc)); } else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); +} + +template +static __fi void applyAccurateAccumulatorTernaryMACOp(VURegs* VU) +{ + VECTOR* dst = _getDst(VU); + if (_X) { dst->i.x = VU_MACx_UPDATE(VU, Fn(VU->ACC.i.x, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, IsOverflowSet(VU, 3))); } else VU_MACx_CLEAR(VU); + if (_Y) { dst->i.y = VU_MACy_UPDATE(VU, Fn(VU->ACC.i.y, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, IsOverflowSet(VU, 2))); } else VU_MACy_CLEAR(VU); + if (_Z) { dst->i.z = VU_MACz_UPDATE(VU, Fn(VU->ACC.i.z, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, IsOverflowSet(VU, 1))); } else VU_MACz_CLEAR(VU); + if (_W) { dst->i.w = VU_MACw_UPDATE(VU, Fn(VU->ACC.i.w, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, IsOverflowSet(VU, 0))); } else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); +} + +template +static __fi void applyAccurateAccumulatorTernaryMACOpBroadcast(VURegs* VU, u32 bc) +{ + VECTOR* dst = _getDst(VU); + if (_X) { dst->i.x = VU_MACx_UPDATE(VU, Fn(VU->ACC.i.x, VU->VF[_Fs_].i.x, bc, IsOverflowSet(VU, 3))); } else VU_MACx_CLEAR(VU); + if (_Y) { dst->i.y = VU_MACy_UPDATE(VU, Fn(VU->ACC.i.y, VU->VF[_Fs_].i.y, bc, IsOverflowSet(VU, 2))); } else VU_MACy_CLEAR(VU); + if (_Z) { dst->i.z = VU_MACz_UPDATE(VU, Fn(VU->ACC.i.z, VU->VF[_Fs_].i.z, bc, IsOverflowSet(VU, 1))); } else VU_MACz_CLEAR(VU); + if (_W) { dst->i.w = VU_MACw_UPDATE(VU, Fn(VU->ACC.i.w, VU->VF[_Fs_].i.w, bc, IsOverflowSet(VU, 0))); } else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); +} + static __fi float _vuOpMADD(u32 acc, u32 fs, u32 ft) { return vuDouble(acc) + vuDouble(fs) * vuDouble(ft); } +static __fi PS2Float _vuAccurateOpMADD(u32 acc, u32 fs, u32 ft) +{ + return PS2Float(acc).MulAdd(PS2Float(fs), PS2Float(ft)); +} + +static __fi PS2Float _vuAccurateOpMADDA(u32 acc, u32 fs, u32 ft, bool oflw) +{ + PS2Float accfloat = PS2Float(acc); + accfloat.of = oflw; + return accfloat.MulAddAcc(PS2Float(fs), PS2Float(ft)); +} + static __fi void _vuMADD(VURegs* VU) { - applyTernaryMACOp<_vuOpMADD, MACOpDst::Fd>(VU); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + applyAccurateTernaryMACOp<_vuAccurateOpMADD, MACOpDst::Fd>(VU); + else + applyTernaryMACOp<_vuOpMADD, MACOpDst::Fd>(VU); } static __fi void vuMADDbc(VURegs* VU, u32 bc) { - applyTernaryMACOpBroadcast<_vuOpMADD, MACOpDst::Fd>(VU, bc); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + applyAccurateTernaryMACOpBroadcast<_vuAccurateOpMADD, MACOpDst::Fd>(VU, bc); + else + applyTernaryMACOpBroadcast<_vuOpMADD, MACOpDst::Fd>(VU, bc); } static __fi void _vuMADDi(VURegs* VU) { vuMADDbc(VU, VU->VI[REG_I].UL); } @@ -725,12 +863,18 @@ static __fi void _vuMADDw(VURegs* VU) { vuMADDbc(VU, VU->VF[_Ft_].i.w); } static __fi void _vuMADDA(VURegs* VU) { - applyTernaryMACOp<_vuOpMADD, MACOpDst::Acc>(VU); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + applyAccurateAccumulatorTernaryMACOp<_vuAccurateOpMADDA, MACOpDst::Acc>(VU); + else + applyTernaryMACOp<_vuOpMADD, MACOpDst::Acc>(VU); } static __fi void vuMADDAbc(VURegs* VU, u32 bc) { - applyTernaryMACOpBroadcast<_vuOpMADD, MACOpDst::Acc>(VU, bc); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + applyAccurateAccumulatorTernaryMACOpBroadcast<_vuAccurateOpMADDA, MACOpDst::Acc>(VU, bc); + else + applyTernaryMACOpBroadcast<_vuOpMADD, MACOpDst::Acc>(VU, bc); } static __fi void _vuMADDAi(VURegs* VU) { vuMADDAbc(VU, VU->VI[REG_I].UL); } @@ -745,14 +889,32 @@ static __fi float _vuOpMSUB(u32 acc, u32 fs, u32 ft) return vuDouble(acc) - vuDouble(fs) * vuDouble(ft); } +static __fi PS2Float _vuAccurateOpMSUB(u32 acc, u32 fs, u32 ft) +{ + return PS2Float(acc).MulSub(PS2Float(fs), PS2Float(ft)); +} + +static __fi PS2Float _vuAccurateOpMSUBA(u32 acc, u32 fs, u32 ft, bool oflw) +{ + PS2Float accfloat = PS2Float(acc); + accfloat.of = oflw; + return accfloat.MulSubAcc(PS2Float(fs), PS2Float(ft)); +} + static __fi void _vuMSUB(VURegs* VU) { - applyTernaryMACOp<_vuOpMSUB, MACOpDst::Fd>(VU); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + applyAccurateTernaryMACOp<_vuAccurateOpMSUB, MACOpDst::Fd>(VU); + else + applyTernaryMACOp<_vuOpMSUB, MACOpDst::Fd>(VU); } static __fi void vuMSUBbc(VURegs* VU, u32 bc) { - applyTernaryMACOpBroadcast<_vuOpMSUB, MACOpDst::Fd>(VU, bc); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + applyAccurateTernaryMACOpBroadcast<_vuAccurateOpMSUB, MACOpDst::Fd>(VU, bc); + else + applyTernaryMACOpBroadcast<_vuOpMSUB, MACOpDst::Fd>(VU, bc); } static __fi void _vuMSUBi(VURegs* VU) { vuMSUBbc(VU, VU->VI[REG_I].UL); } @@ -764,12 +926,18 @@ static __fi void _vuMSUBw(VURegs* VU) { vuMSUBbc(VU, VU->VF[_Ft_].i.w); } static __fi void _vuMSUBA(VURegs* VU) { - applyTernaryMACOp<_vuOpMSUB, MACOpDst::Acc>(VU); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + applyAccurateAccumulatorTernaryMACOp<_vuAccurateOpMSUBA, MACOpDst::Acc>(VU); + else + applyTernaryMACOp<_vuOpMSUB, MACOpDst::Acc>(VU); } static __fi void vuMSUBAbc(VURegs* VU, u32 bc) { - applyTernaryMACOpBroadcast<_vuOpMSUB, MACOpDst::Acc>(VU, bc); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + applyAccurateAccumulatorTernaryMACOpBroadcast<_vuAccurateOpMSUBA, MACOpDst::Acc>(VU, bc); + else + applyTernaryMACOpBroadcast<_vuOpMSUB, MACOpDst::Acc>(VU, bc); } static __fi void _vuMSUBAi(VURegs* VU) { vuMSUBAbc(VU, VU->VI[REG_I].UL); } @@ -840,32 +1008,55 @@ static __fi void _vuMINIw(VURegs* VU) { applyMinMaxBroadcast(VU, VU->VF[ static __fi void _vuOPMULA(VURegs* VU) { - VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.z)); - VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.x)); - VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.y)); + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + { + VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z)); + VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x)); + VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y)); + } + else + { + VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.z)); + VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.x)); + VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.y)); + } VU_STAT_UPDATE(VU); } static __fi void _vuOPMSUB(VURegs* VU) { VECTOR* dst; - float ftx, fty, ftz; - float fsx, fsy, fsz; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftx = vuDouble(VU->VF[_Ft_].i.x); - fty = vuDouble(VU->VF[_Ft_].i.y); - ftz = vuDouble(VU->VF[_Ft_].i.z); - fsx = vuDouble(VU->VF[_Fs_].i.x); - fsy = vuDouble(VU->VF[_Fs_].i.y); - fsz = vuDouble(VU->VF[_Fs_].i.z); + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + { + u32 ftx = VU->VF[_Ft_].i.x; + u32 fty = VU->VF[_Ft_].i.y; + u32 ftz = VU->VF[_Ft_].i.z; + u32 fsx = VU->VF[_Fs_].i.x; + u32 fsy = VU->VF[_Fs_].i.y; + u32 fsz = VU->VF[_Fs_].i.z; - dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - fsy * ftz); - dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - fsz * ftx); - dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - fsx * fty); + dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMulSub(VU->ACC.i.x, fsy, ftz)); + dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMulSub(VU->ACC.i.y, fsz, ftx)); + dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMulSub(VU->ACC.i.z, fsx, fty)); + } + else + { + float ftx = vuDouble(VU->VF[_Ft_].i.x); + float fty = vuDouble(VU->VF[_Ft_].i.y); + float ftz = vuDouble(VU->VF[_Ft_].i.z); + float fsx = vuDouble(VU->VF[_Fs_].i.x); + float fsy = vuDouble(VU->VF[_Fs_].i.y); + float fsz = vuDouble(VU->VF[_Fs_].i.z); + + dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - fsy * ftz); + dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - fsz * ftx); + dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - fsx * fty); + } VU_STAT_UPDATE(VU); } @@ -930,57 +1121,45 @@ static __fi void _vuCLIP(VURegs* VU) static __fi void _vuDIV(VURegs* VU) { - float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); - float fs = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - - VU->statusflag &= ~0x30; - - if (ft == 0.0) + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) { - if (fs == 0.0) - VU->statusflag |= 0x10; - else - VU->statusflag |= 0x20; + PS2Float ft = PS2Float(VU->VF[_Ft_].UL[_Ftf_]); + PS2Float fs = PS2Float(VU->VF[_Fs_].UL[_Fsf_]); - if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ - (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) - VU->q.UL = 0xFF7FFFFF; + VU->statusflag &= ~0x30; + + if (ft.IsZero()) + { + if (fs.IsZero()) + VU->statusflag |= 0x10; + else + VU->statusflag |= 0x20; + + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = PS2Float::MIN_FLOATING_POINT_VALUE; + else + VU->q.UL = PS2Float::MAX_FLOATING_POINT_VALUE; + } else - VU->q.UL = 0x7F7FFFFF; + { + VU->q.UL = fs.Div(ft).raw; + } } else { - VU->q.F = fs / ft; - VU->q.F = vuDouble(VU->q.UL); - } -} + float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); + float fs = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); -static __fi void _vuSQRT(VURegs* VU) -{ - float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); + VU->statusflag &= ~0x30; - VU->statusflag &= ~0x30; - - if (ft < 0.0) - VU->statusflag |= 0x10; - VU->q.F = sqrt(fabs(ft)); - VU->q.F = vuDouble(VU->q.UL); -} - -static __fi void _vuRSQRT(VURegs* VU) -{ - float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); - float fs = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - float temp; - - VU->statusflag &= ~0x30; - - if (ft == 0.0) - { - VU->statusflag |= 0x20; - - if (fs != 0) + if (ft == 0.0) { + if (fs == 0.0) + VU->statusflag |= 0x10; + else + VU->statusflag |= 0x20; + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) VU->q.UL = 0xFF7FFFFF; @@ -989,25 +1168,117 @@ static __fi void _vuRSQRT(VURegs* VU) } else { - if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ - (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) - VU->q.UL = 0x80000000; - else - VU->q.UL = 0; + VU->q.F = fs / ft; + VU->q.F = vuDouble(VU->q.UL); + } + } +} +static __fi void _vuSQRT(VURegs* VU) +{ + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + PS2Float ft = PS2Float(VU->VF[_Ft_].UL[_Ftf_]); + + VU->statusflag &= ~0x30; + + if (ft.ToDouble() < 0.0) VU->statusflag |= 0x10; + VU->q.UL = PS2Float(ft).Sqrt().raw; + } + else + { + float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); + + VU->statusflag &= ~0x30; + + if (ft < 0.0) + VU->statusflag |= 0x10; + VU->q.F = sqrt(fabs(ft)); + VU->q.F = vuDouble(VU->q.UL); + } +} + +static __fi void _vuRSQRT(VURegs* VU) +{ + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + PS2Float ft = PS2Float(VU->VF[_Ft_].UL[_Ftf_]); + PS2Float fs = PS2Float(VU->VF[_Fs_].UL[_Fsf_]); + + VU->statusflag &= ~0x30; + + if (ft.IsZero()) + { + VU->statusflag |= 0x20; + + if (!fs.IsZero()) + { + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = PS2Float::MIN_FLOATING_POINT_VALUE; + else + VU->q.UL = PS2Float::MAX_FLOATING_POINT_VALUE; + } + else + { + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = 0x80000000; + else + VU->q.UL = 0; + + VU->statusflag |= 0x10; + } + } + else + { + if (ft.ToDouble() < 0.0) + VU->statusflag |= 0x10; + + VU->q.UL = fs.Rsqrt(PS2Float(ft)).raw; } } else { - if (ft < 0.0) - { - VU->statusflag |= 0x10; - } + float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); + float fs = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); + float temp; - temp = sqrt(fabs(ft)); - VU->q.F = fs / temp; - VU->q.F = vuDouble(VU->q.UL); + VU->statusflag &= ~0x30; + + if (ft == 0.0) + { + VU->statusflag |= 0x20; + + if (fs != 0) + { + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = 0xFF7FFFFF; + else + VU->q.UL = 0x7F7FFFFF; + } + else + { + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = 0x80000000; + else + VU->q.UL = 0; + + VU->statusflag |= 0x10; + } + } + else + { + if (ft < 0.0) + VU->statusflag |= 0x10; + + temp = sqrt(fabs(ft)); + VU->q.F = fs / temp; + VU->q.F = vuDouble(VU->q.UL); + } } } @@ -1651,45 +1922,61 @@ static __ri void _vuWAITP(VURegs* VU) static __ri void _vuESADD(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].i.x).ESADD(PS2Float(VU->VF[_Fs_].i.y), PS2Float(VU->VF[_Fs_].i.z)).raw; } + else + { + float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); - VU->p.F = p; + VU->p.F = p; + } } static __ri void _vuERSADD(VURegs* VU) { - float p = (vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x)) + (vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y)) + (vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z)); + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].i.x).ERSADD(PS2Float(VU->VF[_Fs_].i.y), PS2Float(VU->VF[_Fs_].i.z)).raw; } + else + { + float p = (vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x)) + (vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y)) + (vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z)); - if (p != 0.0) - p = 1.0f / p; + if (p != 0.0) + p = 1.0f / p; - VU->p.F = p; + VU->p.F = p; + } } static __ri void _vuELENG(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); - - if (p >= 0) + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].i.x).ELENG(PS2Float(VU->VF[_Fs_].i.y), PS2Float(VU->VF[_Fs_].i.z)).raw; } + else { - p = sqrt(p); + float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); + + if (p >= 0) + { + p = sqrt(p); + } + VU->p.F = p; } - VU->p.F = p; } static __ri void _vuERLENG(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); - - if (p >= 0) + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].i.x).ERLENG(PS2Float(VU->VF[_Fs_].i.y), PS2Float(VU->VF[_Fs_].i.z)).raw; } + else { - p = sqrt(p); - if (p != 0) + float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); + + if (p >= 0) { - p = 1.0f / p; + p = sqrt(p); + if (p != 0) + { + p = 1.0f / p; + } } + VU->p.F = p; } - VU->p.F = p; } @@ -1709,99 +1996,140 @@ static __ri float _vuCalculateEATAN(float inputvalue) { return result; } +static __ri PS2Float _vuCalculateAccurateEATAN(PS2Float inputvalue) +{ + return inputvalue.EATAN(); +} + static __ri void _vuEATAN(VURegs* VU) { - float p = _vuCalculateEATAN(vuDouble(VU->VF[_Fs_].UL[_Fsf_])); - VU->p.F = p; + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = _vuCalculateAccurateEATAN(PS2Float(VU->VF[_Fs_].UL[_Fsf_])).raw; } + else + { + float p = _vuCalculateEATAN(vuDouble(VU->VF[_Fs_].UL[_Fsf_])); + VU->p.F = p; + } } static __ri void _vuEATANxy(VURegs* VU) { - float p = 0; - if (vuDouble(VU->VF[_Fs_].i.x) != 0) + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = _vuCalculateAccurateEATAN(PS2Float(VU->VF[_Fs_].i.y).Div(PS2Float(VU->VF[_Fs_].i.x))).raw; } + else { - p = _vuCalculateEATAN(vuDouble(VU->VF[_Fs_].i.y) / vuDouble(VU->VF[_Fs_].i.x)); + float p = 0; + if (vuDouble(VU->VF[_Fs_].i.x) != 0) + { + p = _vuCalculateEATAN(vuDouble(VU->VF[_Fs_].i.y) / vuDouble(VU->VF[_Fs_].i.x)); + } + VU->p.F = p; } - VU->p.F = p; } static __ri void _vuEATANxz(VURegs* VU) { - float p = 0; - if (vuDouble(VU->VF[_Fs_].i.x) != 0) + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = _vuCalculateAccurateEATAN(PS2Float(VU->VF[_Fs_].i.z).Div(PS2Float(VU->VF[_Fs_].i.x))).raw; } + else { - p = _vuCalculateEATAN(vuDouble(VU->VF[_Fs_].i.z) / vuDouble(VU->VF[_Fs_].i.x)); + float p = 0; + if (vuDouble(VU->VF[_Fs_].i.x) != 0) + { + p = _vuCalculateEATAN(vuDouble(VU->VF[_Fs_].i.z) / vuDouble(VU->VF[_Fs_].i.x)); + } + VU->p.F = p; } - VU->p.F = p; } static __ri void _vuESUM(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VF[_Fs_].i.w); - VU->p.F = p; + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].i.x).ESUM(PS2Float(VU->VF[_Fs_].i.y), PS2Float(VU->VF[_Fs_].i.z), PS2Float(VU->VF[_Fs_].i.w)).raw; } + else + { + float p = vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VF[_Fs_].i.w); + VU->p.F = p; + } } static __ri void _vuERCPR(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - - if (p != 0) + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].UL[_Fsf_]).ERCPR().raw; } + else { - p = 1.0 / p; - } + float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - VU->p.F = p; + if (p != 0) + { + p = 1.0 / p; + } + + VU->p.F = p; + } } static __ri void _vuESQRT(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - - if (p >= 0) + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].UL[_Fsf_]).ESQRT().raw; } + else { - p = sqrt(p); - } + float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - VU->p.F = p; + if (p >= 0) + { + p = sqrt(p); + } + + VU->p.F = p; + } } static __ri void _vuERSQRT(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - - if (p >= 0) + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].UL[_Fsf_]).ERSQRT().raw; } + else { - p = sqrt(p); - if (p) - { - p = 1.0f / p; - } - } + float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - VU->p.F = p; + if (p >= 0) + { + p = sqrt(p); + if (p) + { + p = 1.0f / p; + } + } + + VU->p.F = p; + } } static __ri void _vuESIN(VURegs* VU) { - float sinconsts[5] = {1.0f, -0.166666567325592f, 0.008333025500178f, -0.000198074136279f, 0.000002601886990f}; - float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].UL[_Fsf_]).ESIN().raw; } + else + { + float sinconsts[5] = {1.0f, -0.166666567325592f, 0.008333025500178f, -0.000198074136279f, 0.000002601886990f}; + float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - p = (sinconsts[0] * p) + (sinconsts[1] * pow(p, 3)) + (sinconsts[2] * pow(p, 5)) + (sinconsts[3] * pow(p, 7)) + (sinconsts[4] * pow(p, 9)); - VU->p.F = vuDouble(*(u32*)&p); + p = (sinconsts[0] * p) + (sinconsts[1] * pow(p, 3)) + (sinconsts[2] * pow(p, 5)) + (sinconsts[3] * pow(p, 7)) + (sinconsts[4] * pow(p, 9)); + VU->p.F = vuDouble(*(u32*)&p); + } } static __ri void _vuEEXP(VURegs* VU) { - float consts[6] = {0.249998688697815f, 0.031257584691048f, 0.002591371303424f, - 0.000171562001924f, 0.000005430199963f, 0.000000690600018f}; - float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].UL[_Fsf_]).EEXP().raw; } + else + { + float consts[6] = {0.249998688697815f, 0.031257584691048f, 0.002591371303424f, + 0.000171562001924f, 0.000005430199963f, 0.000000690600018f}; + float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - p = 1.0f + (consts[0] * p) + (consts[1] * pow(p, 2)) + (consts[2] * pow(p, 3)) + (consts[3] * pow(p, 4)) + (consts[4] * pow(p, 5)) + (consts[5] * pow(p, 6)); - p = pow(p, 4); - p = vuDouble(*(u32*)&p); - p = 1 / p; + p = 1.0f + (consts[0] * p) + (consts[1] * pow(p, 2)) + (consts[2] * pow(p, 3)) + (consts[3] * pow(p, 4)) + (consts[4] * pow(p, 5)) + (consts[5] * pow(p, 6)); + p = pow(p, 4); + p = vuDouble(*(u32*)&p); + p = 1 / p; - VU->p.F = p; + VU->p.F = p; + } } static __ri void _vuXITOP(VURegs* VU) diff --git a/pcsx2/pcsx2.vcxproj b/pcsx2/pcsx2.vcxproj index 1875a886df..c878045d00 100644 --- a/pcsx2/pcsx2.vcxproj +++ b/pcsx2/pcsx2.vcxproj @@ -284,6 +284,7 @@ + @@ -730,6 +731,7 @@ + diff --git a/pcsx2/pcsx2.vcxproj.filters b/pcsx2/pcsx2.vcxproj.filters index 8ede454a21..c2e13652e2 100644 --- a/pcsx2/pcsx2.vcxproj.filters +++ b/pcsx2/pcsx2.vcxproj.filters @@ -289,6 +289,9 @@ {cd8ec519-2196-43f7-86de-7faced2d4296} + + {e244cd3f-4431-4628-a294-d22c9614133b} + @@ -1448,6 +1451,8 @@ System\Ps2\GS\Renderers\Software + + System\Ps2\EmotionEngine\Shared @@ -2410,6 +2415,9 @@ System\Ps2\Iop\SIO\PAD + + System\Ps2\EmotionEngine\Shared + From 9ccca2ba26ffe630d7d9600a0fe950bc42b84016 Mon Sep 17 00:00:00 2001 From: GitHubProUser67 <127040195+GitHubProUser67@users.noreply.github.com> Date: Mon, 9 Jun 2025 21:32:35 +0200 Subject: [PATCH 2/4] [Soft-Float] - Fixes add/sub edge cases for denormals handling. This commit fixup a calculation mistake in add/sub when dealing with booth numbers that are denormals. --- pcsx2/PS2Float.cpp | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/pcsx2/PS2Float.cpp b/pcsx2/PS2Float.cpp index 590d10d360..840bafc98f 100644 --- a/pcsx2/PS2Float.cpp +++ b/pcsx2/PS2Float.cpp @@ -185,7 +185,14 @@ PS2Float PS2Float::Add(PS2Float addend) else if (!IsDenormalized() && addend.IsDenormalized()) return PS2Float(sign, Exponent(), Mantissa()); else if (IsDenormalized() && addend.IsDenormalized()) - return PS2Float(sign, 0, 0); + { + if (!Sign() || !addend.Sign()) + return PS2Float(false, 0, 0); + else if (Sign() && addend.Sign()) + return PS2Float(true, 0, 0); + else + Console.Error("Unhandled addition operation flags"); + } else Console.Error("Both numbers are not denormalized"); @@ -227,7 +234,14 @@ PS2Float PS2Float::Sub(PS2Float subtrahend) else if (!IsDenormalized() && subtrahend.IsDenormalized()) return PS2Float(sign, Exponent(), Mantissa()); else if (IsDenormalized() && subtrahend.IsDenormalized()) - return PS2Float(sign, 0, 0); + { + if (!Sign() || subtrahend.Sign()) + return PS2Float(false, 0, 0); + else if (Sign() && !subtrahend.Sign()) + return PS2Float(true, 0, 0); + else + Console.Error("Unhandled subtraction operation flags"); + } else Console.Error("Both numbers are not denormalized"); @@ -883,31 +897,11 @@ bool PS2Float::DetermineMultiplicationDivisionOperationSign(PS2Float a, PS2Float bool PS2Float::DetermineAdditionOperationSign(PS2Float a, PS2Float b) { - if (a.IsZero() && b.IsZero()) - { - if (!a.Sign() || !b.Sign()) - return false; - else if (a.Sign() && b.Sign()) - return true; - else - Console.Error("Unhandled addition operation flags"); - } - return a.CompareOperands(b) >= 0 ? a.Sign() : b.Sign(); } bool PS2Float::DetermineSubtractionOperationSign(PS2Float a, PS2Float b) { - if (a.IsZero() && b.IsZero()) - { - if (!a.Sign() || b.Sign()) - return false; - else if (a.Sign() && !b.Sign()) - return true; - else - Console.Error("Unhandled subtraction operation flags"); - } - return a.CompareOperands(b) >= 0 ? a.Sign() : !b.Sign(); } From cf64b86c1afe9a2eaa60941dda0d7a1b477cb242 Mon Sep 17 00:00:00 2001 From: GitHubProUser67 <127040195+GitHubProUser67@users.noreply.github.com> Date: Mon, 29 Sep 2025 20:27:52 +0200 Subject: [PATCH 3/4] [Soft-Float] - Merges the softloat-ui branch from: https://github.com/TheLastRar/pcsx2/tree/softfoat-ui I am sorry for the manual merging, hopefully credits are enough in the commit title. --- common/SettingsWrapper.cpp | 20 + common/SettingsWrapper.h | 2 + pcsx2-qt/Settings/AdvancedSettingsWidget.cpp | 12 +- pcsx2-qt/Settings/AdvancedSettingsWidget.ui | 783 +++---------------- pcsx2/Config.h | 37 +- pcsx2/FPU.cpp | 22 +- pcsx2/Pcsx2Config.cpp | 21 +- pcsx2/VUops.cpp | 58 +- 8 files changed, 196 insertions(+), 759 deletions(-) diff --git a/common/SettingsWrapper.cpp b/common/SettingsWrapper.cpp index f11b397f89..da3775ba39 100644 --- a/common/SettingsWrapper.cpp +++ b/common/SettingsWrapper.cpp @@ -23,6 +23,26 @@ SettingsWrapper::SettingsWrapper(SettingsInterface& si) { } +int SettingsWrapper::EntryFlagsBitfield(const char* section, const char* var_prefix, int value, const std::pair* entries, const int defvalue /*= 0.0*/) +{ + std::string name; + int outval = 0; + + int i = 0; + while (entries[i].second != nullptr) + { + name.assign(var_prefix); + name.append(entries[i].second); + const int bit = entries[i].first; + const bool def = defvalue & bit; + bool val = value & bit; + Entry(section, name.c_str(), val, def); + outval |= val ? bit : 0; + i++; + } + return outval; +} + SettingsLoadWrapper::SettingsLoadWrapper(SettingsInterface& si) : SettingsWrapper(si) { diff --git a/common/SettingsWrapper.h b/common/SettingsWrapper.h index 1f174385eb..dbaff271c8 100644 --- a/common/SettingsWrapper.h +++ b/common/SettingsWrapper.h @@ -9,6 +9,7 @@ #include "common/SmallString.h" #include +#include // Helper class which loads or saves depending on the derived class. class SettingsWrapper @@ -29,6 +30,7 @@ public: // This special form of Entry is provided for bitfields, which cannot be passed by reference. virtual bool EntryBitBool(const char* section, const char* var, bool value, const bool defvalue = false) = 0; virtual int EntryBitfield(const char* section, const char* var, int value, const int defvalue = 0) = 0; + int EntryFlagsBitfield(const char* section, const char* var_prefix, int value, const std::pair* entries, const int defvalue = 0); template void EnumEntry(const char* section, const char* var, T& value, const char* const* enumArray = nullptr, const T defvalue = (T)0) diff --git a/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp b/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp index be7e5bdb0a..73cac5f67b 100644 --- a/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp +++ b/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp @@ -47,14 +47,14 @@ AdvancedSettingsWidget::AdvancedSettingsWidget(SettingsWindow* settings_dialog, connect(m_ui.vu1ClampMode, QOverload::of(&QComboBox::currentIndexChanged), [this](int index) { setClampingMode(1, index); }); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeSoftAddSub, "EmuCore/CPU/Recompiler", "fpuSoftAddSub", false); - SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeSoftMulDiv, "EmuCore/CPU/Recompiler", "fpuSoftMulDiv", false); - SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeSoftSqrt, "EmuCore/CPU/Recompiler", "fpuSoftSqrt", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeSoftMul, "EmuCore/CPU/Recompiler", "fpuSoftMul", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeSoftDivSqrt, "EmuCore/CPU/Recompiler", "fpuSoftDivSqrt", false); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu0SoftAddSub, "EmuCore/CPU/Recompiler", "vu0SoftAddSub", false); - SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu0SoftMulDiv, "EmuCore/CPU/Recompiler", "vu0SoftMulDiv", false); - SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu0SoftSqrt, "EmuCore/CPU/Recompiler", "vu0SoftSqrt", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu0SoftMul, "EmuCore/CPU/Recompiler", "vu0SoftMul", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu0SoftDivSqrt, "EmuCore/CPU/Recompiler", "vu0SoftDivSqrt", false); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu1SoftAddSub, "EmuCore/CPU/Recompiler", "vu1SoftAddSub", false); - SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu1SoftMulDiv, "EmuCore/CPU/Recompiler", "vu1SoftMulDiv", false); - SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu1SoftSqrt, "EmuCore/CPU/Recompiler", "vu1SoftSqrt", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu1SoftMul, "EmuCore/CPU/Recompiler", "vu1SoftMul", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu1SoftDivSqrt, "EmuCore/CPU/Recompiler", "vu1SoftDivSqrt", false); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.iopRecompiler, "EmuCore/CPU/Recompiler", "EnableIOP", true); diff --git a/pcsx2-qt/Settings/AdvancedSettingsWidget.ui b/pcsx2-qt/Settings/AdvancedSettingsWidget.ui index 0ff4431b92..55963ac346 100644 --- a/pcsx2-qt/Settings/AdvancedSettingsWidget.ui +++ b/pcsx2-qt/Settings/AdvancedSettingsWidget.ui @@ -7,7 +7,7 @@ 0 0 700 - 1100 + 1190 @@ -19,685 +19,6 @@ true - - - - 0 - 0 - 790 - 1317 - - - - - 0 - - - 0 - - - 0 - - - - - Changing these options may cause games to become non-functional. Modify at your own risk, the PCSX2 team will not provide support for configurations with these settings changed. - - - true - - - - - - - EmotionEngine (MIPS-IV) - - - - - - Rounding Mode: - - - - - - - - Nearest - - - - - Negative - - - - - Positive - - - - - Chop/Zero (Default) - - - - - - - - Clamping Mode: - - - - - - - - Nearest (Default) - - - - - Negative - - - - - Positive - - - - - Chop/Zero - - - - - - - - - - Wait Loop Detection - - - - - - - Enable Recompiler - - - - - - - Enable Fast Memory Access - - - - - - - Enable Cache (Slow) - - - - - - - INTC Spin Detection - - - - - - - Pause On TLB Miss - - - - - - - Enable 128MB RAM (Dev Console) - - - - - - - - - - None - - - - - Normal (Default) - - - - - Extra + Preserve Sign - - - - - Full - - - - - - - - Division Rounding Mode: - - - - - - - Software Float - - - - - - Multiplication/Division - - - - - - - Addition/Subtraction - - - - - - - Square Root - - - - - - - - - - - - - Vector Units (VU) - - - - - - VU1 Rounding Mode: - - - - - - - - Nearest - - - - - Negative - - - - - Positive - - - - - Chop/Zero (Default) - - - - - - - - VU1 Clamping Mode: - - - - - - - VU0 Rounding Mode: - - - - - - - VU1 Software Float - - - - - - Multiplication/Division - - - - - - - Addition/Subtraction - - - - - - - Float Square Root - - - - - - - - - - VU0 Software Float - - - - - - Multiplication/Division - - - - - - - Addition/Subtraction - - - - - - - Square Root - - - - - - - - - - - Nearest - - - - - Negative - - - - - Positive - - - - - Chop/Zero (Default) - - - - - - - - - None - - - - - Normal (Default) - - - - - Extra - - - - - Extra + Preserve Sign - - - - - - - - - - mVU Flag Hack - - - - - - - Enable VU1 Recompiler - - - - - - - Enable VU0 Recompiler (Micro Mode) - - - - - - - Enable Instant VU1 - - - - - - - - - VU0 Clamping Mode: - - - - - - - - None - - - - - Normal (Default) - - - - - Extra - - - - - Extra + Preserve Sign - - - - - - - - - - - I/O Processor (IOP, MIPS-I) - - - - - - Enable Recompiler - - - - - - - - - - Game Settings - - - - - - Enable Game Fixes - - - - - - - Enable Compatibility Patches - - - - - - - - - - Savestate Settings - - - - - - Save State On Shutdown - - - - - - - - Low (Fast) - - - - - Medium (Recommended) - - - - - High - - - - - Very High (Slow, Not Recommended) - - - - - - - - - Uncompressed - - - - - Deflate64 - - - - - Zstandard - - - - - LZMA2 - - - - - - - - Compression Level: - - - - - - - Create Save State Backups - - - - - - - Compression Method: - - - - - - - Use Save State Selector - - - - - - - - - - Frame Rate Control - - - - - - hz - - - 10.000000000000000 - - - 300.000000000000000 - - - 0.010000000000000 - - - - - - - hz - - - 10.000000000000000 - - - 300.000000000000000 - - - 0.010000000000000 - - - - - - - PAL Frame Rate: - - - - - - - NTSC Frame Rate: - - - - - - - - - - PINE Settings - - - - - - - 0 - 0 - - - - - - - - - 0 - 0 - - - - Slot: - - - - - - - Enable - - - - - - - - - - Qt::Orientation::Vertical - - - - 20 - 3 - - - - - - @@ -799,7 +120,7 @@ - + @@ -852,6 +173,36 @@ + + + + Software Float + + + + + + Multiplication + + + + + + + Addition/Subtraction + + + + + + + Division/Square Root + + + + + + @@ -861,7 +212,7 @@ Vector Units (VU) - + VU1 Rounding Mode: @@ -892,7 +243,67 @@ - + + + + VU1 Software Float + + + + + + Multiplication + + + + + + + Addition/Subtraction + + + + + + + Division/Square Root + + + + + + + + + + VU0 Software Float + + + + + + Multiplication + + + + + + + Addition/Subtraction + + + + + + + Division/Square Root + + + + + + + @@ -962,14 +373,14 @@ - + VU1 Clamping Mode: - + @@ -993,7 +404,7 @@ - + diff --git a/pcsx2/Config.h b/pcsx2/Config.h index c7b6440e50..d35b753f69 100644 --- a/pcsx2/Config.h +++ b/pcsx2/Config.h @@ -12,6 +12,7 @@ #include #include #include +#include // Macro used for removing some of the redtape involved in defining bitfield/union helpers. // @@ -621,10 +622,8 @@ struct Pcsx2Config vu0SignOverflow : 1, vu0Underflow : 1; - bool - vu0SoftAddSub : 1, - vu0SoftMulDiv : 1, - vu0SoftSqrt : 1; + unsigned int + vu0SoftFloat : 3; bool vu1Overflow : 1, @@ -632,20 +631,16 @@ struct Pcsx2Config vu1SignOverflow : 1, vu1Underflow : 1; - bool - vu1SoftAddSub : 1, - vu1SoftMulDiv : 1, - vu1SoftSqrt : 1; + unsigned int + vu1SoftFloat : 3; bool fpuOverflow : 1, fpuExtraOverflow : 1, fpuFullMode : 1; - bool - fpuSoftAddSub : 1, - fpuSoftMulDiv : 1, - fpuSoftSqrt : 1; + unsigned int + fpuSoftFloat : 3; bool EnableEECache : 1; @@ -1487,18 +1482,24 @@ namespace EmuFolders #define CHECK_VU_SIGN_OVERFLOW(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SignOverflow : EmuConfig.Cpu.Recompiler.vu1SignOverflow) #define CHECK_VU_UNDERFLOW(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0Underflow : EmuConfig.Cpu.Recompiler.vu1Underflow) -#define CHECK_VU_SOFT_ADDSUB(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SoftAddSub : EmuConfig.Cpu.Recompiler.vu1SoftAddSub) -#define CHECK_VU_SOFT_MULDIV(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SoftMulDiv : EmuConfig.Cpu.Recompiler.vu1SoftMulDiv) -#define CHECK_VU_SOFT_SQRT(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SoftSqrt : EmuConfig.Cpu.Recompiler.vu1SoftSqrt) +#define SOFT_FLOAT_ADDSUB 0x1 +#define SOFT_FLOAT_MUL 0x2 +#define SOFT_FLOAT_DIVSQRT 0x4 + +#define CHECK_VU_SOFT_ADDSUB(vunum) ((((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SoftFloat : EmuConfig.Cpu.Recompiler.vu1SoftFloat) & SOFT_FLOAT_ADDSUB) +#define CHECK_VU_SOFT_MUL(vunum) ((((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SoftFloat : EmuConfig.Cpu.Recompiler.vu1SoftFloat) & SOFT_FLOAT_MUL) +#define CHECK_VU_SOFT_DIVSQRT(vunum) ((((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SoftFloat : EmuConfig.Cpu.Recompiler.vu1SoftFloat) & SOFT_FLOAT_DIVSQRT) +#define CHECK_VU_SOFT(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SoftFloat : EmuConfig.Cpu.Recompiler.vu1SoftFloat) #define CHECK_FPU_OVERFLOW (EmuConfig.Cpu.Recompiler.fpuOverflow) #define CHECK_FPU_EXTRA_OVERFLOW (EmuConfig.Cpu.Recompiler.fpuExtraOverflow) // If enabled, Operands are checked for infinities before being used in the FPU recs #define CHECK_FPU_EXTRA_FLAGS 1 // Always enabled now // Sets D/I flags on FPU instructions #define CHECK_FPU_FULL (EmuConfig.Cpu.Recompiler.fpuFullMode) -#define CHECK_FPU_SOFT_ADDSUB (EmuConfig.Cpu.Recompiler.fpuSoftAddSub) -#define CHECK_FPU_SOFT_MULDIV (EmuConfig.Cpu.Recompiler.fpuSoftMulDiv) -#define CHECK_FPU_SOFT_SQRT (EmuConfig.Cpu.Recompiler.fpuSoftSqrt) +#define CHECK_FPU_SOFT_ADDSUB (EmuConfig.Cpu.Recompiler.fpuSoftFloat & SOFT_FLOAT_ADDSUB) +#define CHECK_FPU_SOFT_MUL (EmuConfig.Cpu.Recompiler.fpuSoftFloat & SOFT_FLOAT_MUL) +#define CHECK_FPU_SOFT_DIVSQRT (EmuConfig.Cpu.Recompiler.fpuSoftFloat & SOFT_FLOAT_DIVSQRT) +#define CHECK_FPU_SOFT (EmuConfig.Cpu.Recompiler.fpuSoftFloat) //------------ EE Recompiler defines - Comment to disable a recompiler --------------- diff --git a/pcsx2/FPU.cpp b/pcsx2/FPU.cpp index aeab43ea67..c7a99cd37d 100644 --- a/pcsx2/FPU.cpp +++ b/pcsx2/FPU.cpp @@ -338,7 +338,7 @@ void CTC1() { } void CVT_S() { - if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) { _FdValUl_ = PS2Float::Itof(0, _FsValSl_).raw; } + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MUL || CHECK_FPU_SOFT_DIVSQRT) { _FdValUl_ = PS2Float::Itof(0, _FsValSl_).raw; } else { _FdValf_ = (float)_FsValSl_; @@ -347,14 +347,14 @@ void CVT_S() { } void CVT_W() { - if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) { _FdValSl_ = PS2Float::Ftoi(0, _FsValUl_); } + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MUL || CHECK_FPU_SOFT_DIVSQRT) { _FdValSl_ = PS2Float::Ftoi(0, _FsValUl_); } else if ( ( _FsValUl_ & 0x7F800000 ) <= 0x4E800000 ) { _FdValSl_ = (s32)_FsValf_; } else if ( ( _FsValUl_ & 0x80000000 ) == 0 ) { _FdValUl_ = 0x7fffffff; } else { _FdValUl_ = 0x80000000; } } void DIV_S() { - if (CHECK_FPU_SOFT_MULDIV) + if (CHECK_FPU_SOFT_DIVSQRT) { PS2Float divres = fpuAccurateDiv(_FsValUl_, _FtValUl_); _FdValUl_ = divres.raw; @@ -376,7 +376,7 @@ void DIV_S() { method provides a similar outcome and is faster. (cottonvibes) */ void MADD_S() { - if (CHECK_FPU_SOFT_ADDSUB && CHECK_FPU_SOFT_MULDIV) + if (CHECK_FPU_SOFT_ADDSUB && CHECK_FPU_SOFT_MUL) { PS2Float fmacres = fpuAccurateMulAdd(_FAValUl_, _FsValUl_, _FtValUl_); _FdValUl_ = fmacres.raw; @@ -394,7 +394,7 @@ void MADD_S() { } void MADDA_S() { - if (CHECK_FPU_SOFT_ADDSUB && CHECK_FPU_SOFT_MULDIV) + if (CHECK_FPU_SOFT_ADDSUB && CHECK_FPU_SOFT_MUL) { PS2Float fmacres = fpuAccurateMulAdd(_FAValUl_, _FsValUl_, _FtValUl_); _FAValUl_ = fmacres.raw; @@ -429,7 +429,7 @@ void MOV_S() { } void MSUB_S() { - if (CHECK_FPU_SOFT_ADDSUB && CHECK_FPU_SOFT_MULDIV) + if (CHECK_FPU_SOFT_ADDSUB && CHECK_FPU_SOFT_MUL) { PS2Float fmacres = fpuAccurateMulSub(_FAValUl_, _FsValUl_, _FtValUl_); _FdValUl_ = fmacres.raw; @@ -447,7 +447,7 @@ void MSUB_S() { } void MSUBA_S() { - if (CHECK_FPU_SOFT_ADDSUB && CHECK_FPU_SOFT_MULDIV) + if (CHECK_FPU_SOFT_ADDSUB && CHECK_FPU_SOFT_MUL) { PS2Float fmacres = fpuAccurateMulSub(_FAValUl_, _FsValUl_, _FtValUl_); _FAValUl_ = fmacres.raw; @@ -467,7 +467,7 @@ void MTC1() { } void MUL_S() { - if (CHECK_FPU_SOFT_MULDIV) + if (CHECK_FPU_SOFT_MUL) { PS2Float mulres = fpuAccurateMul(_FsValUl_, _FtValUl_); _FdValUl_ = mulres.raw; @@ -483,7 +483,7 @@ void MUL_S() { } void MULA_S() { - if (CHECK_FPU_SOFT_MULDIV) + if (CHECK_FPU_SOFT_MUL) { PS2Float mulres = fpuAccurateMul(_FsValUl_, _FtValUl_); _FAValUl_ = mulres.raw; @@ -506,7 +506,7 @@ void NEG_S() { void RSQRT_S() { clearFPUFlags(FPUflagD | FPUflagI); - if (CHECK_FPU_SOFT_SQRT) + if (CHECK_FPU_SOFT_DIVSQRT) { PS2Float rsqrtres = PS2Float(_FsValUl_).Rsqrt(_FtValUl_); _FdValUl_ = rsqrtres.raw; @@ -538,7 +538,7 @@ void RSQRT_S() { void SQRT_S() { clearFPUFlags(FPUflagI | FPUflagD); - if (CHECK_FPU_SOFT_SQRT) + if (CHECK_FPU_SOFT_DIVSQRT) { PS2Float sqrtres = PS2Float(_FtValUl_).Sqrt(); _FdValUl_ = sqrtres.raw; diff --git a/pcsx2/Pcsx2Config.cpp b/pcsx2/Pcsx2Config.cpp index 9320b5d1d4..d80d29d415 100644 --- a/pcsx2/Pcsx2Config.cpp +++ b/pcsx2/Pcsx2Config.cpp @@ -533,31 +533,34 @@ void Pcsx2Config::RecompilerOptions::LoadSave(SettingsWrapper& wrap) SettingsWrapBitBool(EnableFastmem); SettingsWrapBitBool(PauseOnTLBMiss); + // clang-format off + static constexpr std::pair s_softfloat_variables[] = { + {SOFT_FLOAT_ADDSUB, "SoftAddSub"}, + {SOFT_FLOAT_MUL, "SoftMul"}, + {SOFT_FLOAT_DIVSQRT, "SoftDivSqrt"}, + {0, nullptr}, + }; + // clang-format on + SettingsWrapBitBool(vu0Overflow); SettingsWrapBitBool(vu0ExtraOverflow); SettingsWrapBitBool(vu0SignOverflow); SettingsWrapBitBool(vu0Underflow); - SettingsWrapBitBool(vu0SoftAddSub); - SettingsWrapBitBool(vu0SoftMulDiv); - SettingsWrapBitBool(vu0SoftSqrt); + vu0SoftFloat = wrap.EntryFlagsBitfield(CURRENT_SETTINGS_SECTION, "vu0", vu0SoftFloat, s_softfloat_variables); SettingsWrapBitBool(vu1Overflow); SettingsWrapBitBool(vu1ExtraOverflow); SettingsWrapBitBool(vu1SignOverflow); SettingsWrapBitBool(vu1Underflow); - SettingsWrapBitBool(vu1SoftAddSub); - SettingsWrapBitBool(vu1SoftMulDiv); - SettingsWrapBitBool(vu1SoftSqrt); + vu1SoftFloat = wrap.EntryFlagsBitfield(CURRENT_SETTINGS_SECTION, "vu1", vu1SoftFloat, s_softfloat_variables); SettingsWrapBitBool(fpuOverflow); SettingsWrapBitBool(fpuExtraOverflow); SettingsWrapBitBool(fpuFullMode); - SettingsWrapBitBool(fpuSoftAddSub); - SettingsWrapBitBool(fpuSoftMulDiv); - SettingsWrapBitBool(fpuSoftSqrt); + fpuSoftFloat = wrap.EntryFlagsBitfield(CURRENT_SETTINGS_SECTION, "fpu", fpuSoftFloat, s_softfloat_variables); } u32 Pcsx2Config::RecompilerOptions::GetEEClampMode() const diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp index b4fae0cac8..503bc235c5 100644 --- a/pcsx2/VUops.cpp +++ b/pcsx2/VUops.cpp @@ -710,7 +710,7 @@ static __fi PS2Float _vuAccurateOpMUL(u32 fs, u32 ft) static __fi void _vuMUL(VURegs* VU) { - if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + if (CHECK_VU_SOFT_MUL((VU == &VU1) ? 1 : 0)) applyAccurateBinaryMACOp<_vuAccurateOpMUL, MACOpDst::Fd>(VU); else applyBinaryMACOp<_vuOpMUL, MACOpDst::Fd>(VU); @@ -718,7 +718,7 @@ static __fi void _vuMUL(VURegs* VU) static __fi void vuMULbc(VURegs* VU, u32 bc) { - if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + if (CHECK_VU_SOFT_MUL((VU == &VU1) ? 1 : 0)) applyAccurateBinaryMACOpBroadcast<_vuAccurateOpMUL, MACOpDst::Fd>(VU, bc); else applyBinaryMACOpBroadcast<_vuOpMUL, MACOpDst::Fd>(VU, bc); @@ -734,7 +734,7 @@ static __fi void _vuMULw(VURegs* VU) { vuMULbc(VU, VU->VF[_Ft_].i.w); } static __fi void _vuMULA(VURegs* VU) { - if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + if (CHECK_VU_SOFT_MUL((VU == &VU1) ? 1 : 0)) applyAccurateBinaryMACOp<_vuAccurateOpMUL, MACOpDst::Acc>(VU); else applyBinaryMACOp<_vuOpMUL, MACOpDst::Acc>(VU); @@ -742,7 +742,7 @@ static __fi void _vuMULA(VURegs* VU) static __fi void vuMULAbc(VURegs* VU, u32 bc) { - if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + if (CHECK_VU_SOFT_MUL((VU == &VU1) ? 1 : 0)) applyAccurateBinaryMACOpBroadcast<_vuAccurateOpMUL, MACOpDst::Acc>(VU, bc); else applyBinaryMACOpBroadcast<_vuOpMUL, MACOpDst::Acc>(VU, bc); @@ -840,7 +840,7 @@ static __fi PS2Float _vuAccurateOpMADDA(u32 acc, u32 fs, u32 ft, bool oflw) static __fi void _vuMADD(VURegs* VU) { - if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MUL((VU == &VU1) ? 1 : 0)) applyAccurateTernaryMACOp<_vuAccurateOpMADD, MACOpDst::Fd>(VU); else applyTernaryMACOp<_vuOpMADD, MACOpDst::Fd>(VU); @@ -848,7 +848,7 @@ static __fi void _vuMADD(VURegs* VU) static __fi void vuMADDbc(VURegs* VU, u32 bc) { - if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MUL((VU == &VU1) ? 1 : 0)) applyAccurateTernaryMACOpBroadcast<_vuAccurateOpMADD, MACOpDst::Fd>(VU, bc); else applyTernaryMACOpBroadcast<_vuOpMADD, MACOpDst::Fd>(VU, bc); @@ -863,7 +863,7 @@ static __fi void _vuMADDw(VURegs* VU) { vuMADDbc(VU, VU->VF[_Ft_].i.w); } static __fi void _vuMADDA(VURegs* VU) { - if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MUL((VU == &VU1) ? 1 : 0)) applyAccurateAccumulatorTernaryMACOp<_vuAccurateOpMADDA, MACOpDst::Acc>(VU); else applyTernaryMACOp<_vuOpMADD, MACOpDst::Acc>(VU); @@ -871,7 +871,7 @@ static __fi void _vuMADDA(VURegs* VU) static __fi void vuMADDAbc(VURegs* VU, u32 bc) { - if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MUL((VU == &VU1) ? 1 : 0)) applyAccurateAccumulatorTernaryMACOpBroadcast<_vuAccurateOpMADDA, MACOpDst::Acc>(VU, bc); else applyTernaryMACOpBroadcast<_vuOpMADD, MACOpDst::Acc>(VU, bc); @@ -903,7 +903,7 @@ static __fi PS2Float _vuAccurateOpMSUBA(u32 acc, u32 fs, u32 ft, bool oflw) static __fi void _vuMSUB(VURegs* VU) { - if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MUL((VU == &VU1) ? 1 : 0)) applyAccurateTernaryMACOp<_vuAccurateOpMSUB, MACOpDst::Fd>(VU); else applyTernaryMACOp<_vuOpMSUB, MACOpDst::Fd>(VU); @@ -911,7 +911,7 @@ static __fi void _vuMSUB(VURegs* VU) static __fi void vuMSUBbc(VURegs* VU, u32 bc) { - if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MUL((VU == &VU1) ? 1 : 0)) applyAccurateTernaryMACOpBroadcast<_vuAccurateOpMSUB, MACOpDst::Fd>(VU, bc); else applyTernaryMACOpBroadcast<_vuOpMSUB, MACOpDst::Fd>(VU, bc); @@ -926,7 +926,7 @@ static __fi void _vuMSUBw(VURegs* VU) { vuMSUBbc(VU, VU->VF[_Ft_].i.w); } static __fi void _vuMSUBA(VURegs* VU) { - if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MUL((VU == &VU1) ? 1 : 0)) applyAccurateAccumulatorTernaryMACOp<_vuAccurateOpMSUBA, MACOpDst::Acc>(VU); else applyTernaryMACOp<_vuOpMSUB, MACOpDst::Acc>(VU); @@ -934,7 +934,7 @@ static __fi void _vuMSUBA(VURegs* VU) static __fi void vuMSUBAbc(VURegs* VU, u32 bc) { - if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MUL((VU == &VU1) ? 1 : 0)) applyAccurateAccumulatorTernaryMACOpBroadcast<_vuAccurateOpMSUBA, MACOpDst::Acc>(VU, bc); else applyTernaryMACOpBroadcast<_vuOpMSUB, MACOpDst::Acc>(VU, bc); @@ -1008,7 +1008,7 @@ static __fi void _vuMINIw(VURegs* VU) { applyMinMaxBroadcast(VU, VU->VF[ static __fi void _vuOPMULA(VURegs* VU) { - if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + if (CHECK_VU_SOFT_MUL((VU == &VU1) ? 1 : 0)) { VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z)); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x)); @@ -1031,7 +1031,7 @@ static __fi void _vuOPMSUB(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + if (CHECK_VU_SOFT_MUL((VU == &VU1) ? 1 : 0)) { u32 ftx = VU->VF[_Ft_].i.x; u32 fty = VU->VF[_Ft_].i.y; @@ -1121,7 +1121,7 @@ static __fi void _vuCLIP(VURegs* VU) static __fi void _vuDIV(VURegs* VU) { - if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + if (CHECK_VU_SOFT_DIVSQRT((VU == &VU1) ? 1 : 0)) { PS2Float ft = PS2Float(VU->VF[_Ft_].UL[_Ftf_]); PS2Float fs = PS2Float(VU->VF[_Fs_].UL[_Fsf_]); @@ -1176,7 +1176,7 @@ static __fi void _vuDIV(VURegs* VU) static __fi void _vuSQRT(VURegs* VU) { - if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + if (CHECK_VU_SOFT_DIVSQRT((VU == &VU1) ? 1 : 0)) { PS2Float ft = PS2Float(VU->VF[_Ft_].UL[_Ftf_]); @@ -1201,7 +1201,7 @@ static __fi void _vuSQRT(VURegs* VU) static __fi void _vuRSQRT(VURegs* VU) { - if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + if (CHECK_VU_SOFT_DIVSQRT((VU == &VU1) ? 1 : 0)) { PS2Float ft = PS2Float(VU->VF[_Ft_].UL[_Ftf_]); PS2Float fs = PS2Float(VU->VF[_Fs_].UL[_Fsf_]); @@ -1922,7 +1922,7 @@ static __ri void _vuWAITP(VURegs* VU) static __ri void _vuESADD(VURegs* VU) { - if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].i.x).ESADD(PS2Float(VU->VF[_Fs_].i.y), PS2Float(VU->VF[_Fs_].i.z)).raw; } + if (CHECK_VU_SOFT_MUL((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].i.x).ESADD(PS2Float(VU->VF[_Fs_].i.y), PS2Float(VU->VF[_Fs_].i.z)).raw; } else { float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); @@ -1933,7 +1933,7 @@ static __ri void _vuESADD(VURegs* VU) static __ri void _vuERSADD(VURegs* VU) { - if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].i.x).ERSADD(PS2Float(VU->VF[_Fs_].i.y), PS2Float(VU->VF[_Fs_].i.z)).raw; } + if (CHECK_VU_SOFT_MUL((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_DIVSQRT((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].i.x).ERSADD(PS2Float(VU->VF[_Fs_].i.y), PS2Float(VU->VF[_Fs_].i.z)).raw; } else { float p = (vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x)) + (vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y)) + (vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z)); @@ -1947,7 +1947,7 @@ static __ri void _vuERSADD(VURegs* VU) static __ri void _vuELENG(VURegs* VU) { - if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].i.x).ELENG(PS2Float(VU->VF[_Fs_].i.y), PS2Float(VU->VF[_Fs_].i.z)).raw; } + if (CHECK_VU_SOFT_MUL((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_DIVSQRT((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].i.x).ELENG(PS2Float(VU->VF[_Fs_].i.y), PS2Float(VU->VF[_Fs_].i.z)).raw; } else { float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); @@ -1962,7 +1962,7 @@ static __ri void _vuELENG(VURegs* VU) static __ri void _vuERLENG(VURegs* VU) { - if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].i.x).ERLENG(PS2Float(VU->VF[_Fs_].i.y), PS2Float(VU->VF[_Fs_].i.z)).raw; } + if (CHECK_VU_SOFT_MUL((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_DIVSQRT((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].i.x).ERLENG(PS2Float(VU->VF[_Fs_].i.y), PS2Float(VU->VF[_Fs_].i.z)).raw; } else { float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); @@ -2003,7 +2003,7 @@ static __ri PS2Float _vuCalculateAccurateEATAN(PS2Float inputvalue) static __ri void _vuEATAN(VURegs* VU) { - if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = _vuCalculateAccurateEATAN(PS2Float(VU->VF[_Fs_].UL[_Fsf_])).raw; } + if (CHECK_VU_SOFT_MUL((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = _vuCalculateAccurateEATAN(PS2Float(VU->VF[_Fs_].UL[_Fsf_])).raw; } else { float p = _vuCalculateEATAN(vuDouble(VU->VF[_Fs_].UL[_Fsf_])); @@ -2013,7 +2013,7 @@ static __ri void _vuEATAN(VURegs* VU) static __ri void _vuEATANxy(VURegs* VU) { - if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = _vuCalculateAccurateEATAN(PS2Float(VU->VF[_Fs_].i.y).Div(PS2Float(VU->VF[_Fs_].i.x))).raw; } + if (CHECK_VU_SOFT_MUL((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_DIVSQRT((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = _vuCalculateAccurateEATAN(PS2Float(VU->VF[_Fs_].i.y).Div(PS2Float(VU->VF[_Fs_].i.x))).raw; } else { float p = 0; @@ -2027,7 +2027,7 @@ static __ri void _vuEATANxy(VURegs* VU) static __ri void _vuEATANxz(VURegs* VU) { - if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = _vuCalculateAccurateEATAN(PS2Float(VU->VF[_Fs_].i.z).Div(PS2Float(VU->VF[_Fs_].i.x))).raw; } + if (CHECK_VU_SOFT_MUL((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_DIVSQRT((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = _vuCalculateAccurateEATAN(PS2Float(VU->VF[_Fs_].i.z).Div(PS2Float(VU->VF[_Fs_].i.x))).raw; } else { float p = 0; @@ -2051,7 +2051,7 @@ static __ri void _vuESUM(VURegs* VU) static __ri void _vuERCPR(VURegs* VU) { - if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].UL[_Fsf_]).ERCPR().raw; } + if (CHECK_VU_SOFT_DIVSQRT((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].UL[_Fsf_]).ERCPR().raw; } else { float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); @@ -2067,7 +2067,7 @@ static __ri void _vuERCPR(VURegs* VU) static __ri void _vuESQRT(VURegs* VU) { - if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].UL[_Fsf_]).ESQRT().raw; } + if (CHECK_VU_SOFT_DIVSQRT((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].UL[_Fsf_]).ESQRT().raw; } else { float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); @@ -2083,7 +2083,7 @@ static __ri void _vuESQRT(VURegs* VU) static __ri void _vuERSQRT(VURegs* VU) { - if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].UL[_Fsf_]).ERSQRT().raw; } + if (CHECK_VU_SOFT_DIVSQRT((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].UL[_Fsf_]).ERSQRT().raw; } else { float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); @@ -2103,7 +2103,7 @@ static __ri void _vuERSQRT(VURegs* VU) static __ri void _vuESIN(VURegs* VU) { - if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].UL[_Fsf_]).ESIN().raw; } + if (CHECK_VU_SOFT_MUL((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].UL[_Fsf_]).ESIN().raw; } else { float sinconsts[5] = {1.0f, -0.166666567325592f, 0.008333025500178f, -0.000198074136279f, 0.000002601886990f}; @@ -2116,7 +2116,7 @@ static __ri void _vuESIN(VURegs* VU) static __ri void _vuEEXP(VURegs* VU) { - if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].UL[_Fsf_]).EEXP().raw; } + if (CHECK_VU_SOFT_MUL((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_DIVSQRT((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].UL[_Fsf_]).EEXP().raw; } else { float consts[6] = {0.249998688697815f, 0.031257584691048f, 0.002591371303424f, From ba261eb2a48a15a6c6691d6f80996a0cb4c7ff87 Mon Sep 17 00:00:00 2001 From: GitHubProUser67 <127040195+GitHubProUser67@users.noreply.github.com> Date: Thu, 2 Oct 2025 21:48:07 +0200 Subject: [PATCH 4/4] [Soft-Float] - Fixes visual studio compilation. This commit fixes an XML syntax issue in the visual studio filters file. --- pcsx2/pcsx2.vcxproj.filters | 1 + 1 file changed, 1 insertion(+) diff --git a/pcsx2/pcsx2.vcxproj.filters b/pcsx2/pcsx2.vcxproj.filters index c2e13652e2..0582162ebc 100644 --- a/pcsx2/pcsx2.vcxproj.filters +++ b/pcsx2/pcsx2.vcxproj.filters @@ -1451,6 +1451,7 @@ System\Ps2\GS\Renderers\Software + System\Ps2\EmotionEngine\Shared