diff --git a/pcsx2-qt/pcsx2-qt.vcxproj b/pcsx2-qt/pcsx2-qt.vcxproj
index daa3566389..29d93a5195 100644
--- a/pcsx2-qt/pcsx2-qt.vcxproj
+++ b/pcsx2-qt/pcsx2-qt.vcxproj
@@ -125,6 +125,9 @@
{a4323327-3f2b-4271-83d9-7f9a3c66b6b2}
+
+ {67d0160c-0fe4-44b9-ac2e-82bbcf4104df}
+
diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt
index b13260bb8a..3599be56ef 100644
--- a/pcsx2/CMakeLists.txt
+++ b/pcsx2/CMakeLists.txt
@@ -1659,6 +1659,7 @@ target_link_libraries(PCSX2_FLAGS INTERFACE
chdr-static
libzip::zip
cpuinfo
+ zydis
ZLIB::ZLIB
PkgConfig::SOUNDTOUCH
PNG::PNG
diff --git a/pcsx2/Config.h b/pcsx2/Config.h
index ced2425345..663c04d29e 100644
--- a/pcsx2/Config.h
+++ b/pcsx2/Config.h
@@ -380,10 +380,6 @@ struct Pcsx2Config
fpuExtraOverflow : 1,
fpuFullMode : 1;
- bool
- StackFrameChecks : 1,
- PreBlockCheckEE : 1,
- PreBlockCheckIOP : 1;
bool
EnableEECache : 1;
BITFIELD_END
diff --git a/pcsx2/Pcsx2Config.cpp b/pcsx2/Pcsx2Config.cpp
index bc02b1690c..0ae5f3ae8f 100644
--- a/pcsx2/Pcsx2Config.cpp
+++ b/pcsx2/Pcsx2Config.cpp
@@ -220,10 +220,6 @@ void Pcsx2Config::RecompilerOptions::LoadSave(SettingsWrapper& wrap)
SettingsWrapBitBool(fpuOverflow);
SettingsWrapBitBool(fpuExtraOverflow);
SettingsWrapBitBool(fpuFullMode);
-
- SettingsWrapBitBool(StackFrameChecks);
- SettingsWrapBitBool(PreBlockCheckEE);
- SettingsWrapBitBool(PreBlockCheckIOP);
}
bool Pcsx2Config::CpuOptions::CpusChanged(const CpuOptions& right) const
diff --git a/pcsx2/pcsx2core.vcxproj b/pcsx2/pcsx2core.vcxproj
index 8d80861a12..a00cc59be0 100644
--- a/pcsx2/pcsx2core.vcxproj
+++ b/pcsx2/pcsx2core.vcxproj
@@ -52,6 +52,7 @@
%(AdditionalIncludeDirectories);$(SolutionDir)3rdparty\rapidyaml\rapidyaml\ext\c4core\src\c4\ext\fast_float\include
%(AdditionalIncludeDirectories);$(SolutionDir)3rdparty\rcheevos\rcheevos\include;$(SolutionDir)3rdparty\rainterface
%(AdditionalIncludeDirectories);$(SolutionDir)3rdparty\discord-rpc\include
+ %(AdditionalIncludeDirectories);$(SolutionDir)3rdparty\zydis\include;$(SolutionDir)3rdparty\zydis\dependencies\zycore\include
Async
Use
PrecompiledHeader.h
@@ -65,6 +66,7 @@
PCSX2_CI;%(PreprocessorDefinitions)
_M_SSE=0x401;%(PreprocessorDefinitions)
_M_SSE=0x501;%(PreprocessorDefinitions)
+ ZYCORE_STATIC_DEFINE;ZYDIS_STATIC_DEFINE;%(PreprocessorDefinitions)
NotSet
AdvancedVectorExtensions2
false
@@ -835,4 +837,4 @@
-
\ No newline at end of file
+
diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp
index 5446f3250b..49db0fc8f5 100644
--- a/pcsx2/x86/iR3000A.cpp
+++ b/pcsx2/x86/iR3000A.cpp
@@ -51,6 +51,19 @@
#include "fmt/core.h"
+// #define DUMP_BLOCKS 1
+// #define TRACE_BLOCKS 1
+
+#ifdef DUMP_BLOCKS
+#include "Zydis/Zydis.h"
+#include "Zycore/Format.h"
+#include "Zycore/Status.h"
+#endif
+
+#ifdef TRACE_BLOCKS
+#include
+#endif
+
using namespace x86Emitter;
extern void psxBREAK();
@@ -1266,7 +1279,7 @@ void psxRecompileNextInstruction(int delayslot)
static void PreBlockCheck(u32 blockpc)
{
-#ifdef PCSX2_DEBUG
+#if 0
extern void iDumpPsxRegisters(u32 startpc, u32 temp);
static u32 lastrec = 0;
@@ -1289,6 +1302,37 @@ static void PreBlockCheck(u32 blockpc)
lastrec = blockpc;
}
#endif
+#ifdef TRACE_BLOCKS
+#if 1
+ static FILE* fp = nullptr;
+ static bool fp_opened = false;
+ if (!fp_opened && psxRegs.cycle >= 0)
+ {
+ fp = std::fopen("C:\\Dumps\\comp\\ioplog.txt", "wb");
+ fp_opened = true;
+ }
+ if (fp)
+ {
+ u32 hash = crc32(0, (Bytef*)&psxRegs, offsetof(psxRegisters, pc));
+
+#if 1
+ std::fprintf(fp, "%08X (%u; %08X):", psxRegs.pc, psxRegs.cycle, hash);
+ for (int i = 0; i < 34; i++)
+ {
+ std::fprintf(fp, " %s: %08X", R3000A::disRNameGPR[i], psxRegs.GPR.r[i]);
+ }
+ std::fprintf(fp, "\n");
+#else
+ std::fprintf(fp, "%08X (%u): %08X\n", psxRegs.pc, psxRegs.cycle, hash);
+#endif
+ // std::fflush(fp);
+ }
+#endif
+#if 0
+ if (psxRegs.cycle == 0)
+ __debugbreak();
+#endif
+#endif
}
static void iopRecRecompile(const u32 startpc)
@@ -1352,10 +1396,9 @@ static void iopRecRecompile(const u32 startpc)
xJNZ(iopDispatcherReg);
}
- if (IsDebugBuild)
- {
- xFastCall((void*)PreBlockCheck, psxpc);
- }
+#ifdef TRACE_BLOCKS
+ xFastCall((void*)PreBlockCheck, psxpc);
+#endif
// go until the next branch
i = startpc;
diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp
index 59d0f52880..c624bb28d8 100644
--- a/pcsx2/x86/ix86-32/iR5900-32.cpp
+++ b/pcsx2/x86/ix86-32/iR5900-32.cpp
@@ -50,6 +50,19 @@
// Only for MOVQ workaround.
#include "common/emitter/internal.h"
+//#define DUMP_BLOCKS 1
+//#define TRACE_BLOCKS 1
+
+#ifdef DUMP_BLOCKS
+#include "Zydis/Zydis.h"
+#include "Zycore/Format.h"
+#include "Zycore/Status.h"
+#endif
+
+#ifdef TRACE_BLOCKS
+#include
+#endif
+
using namespace x86Emitter;
using namespace R5900;
@@ -128,6 +141,124 @@ static void ClearRecLUT(BASEBLOCK* base, int count);
static u32 scaleblockcycles();
static void recExitExecution();
+#ifdef TRACE_BLOCKS
+static void pauseAAA()
+{
+ fprintf(stderr, "\nPaused\n");
+ fflush(stdout);
+ fflush(stderr);
+#ifdef _MSC_VER
+ __debugbreak();
+#else
+ sleep(1);
+#endif
+}
+#endif
+
+#ifdef DUMP_BLOCKS
+static ZydisFormatterFunc s_old_print_address;
+
+static ZyanStatus ZydisFormatterPrintAddressAbsolute(const ZydisFormatter* formatter,
+ ZydisFormatterBuffer* buffer, ZydisFormatterContext* context)
+{
+ ZyanU64 address;
+ ZYAN_CHECK(ZydisCalcAbsoluteAddress(context->instruction, context->operand,
+ context->runtime_address, &address));
+
+ char buf[128];
+ u32 len = 0;
+
+#define A(x) ((u64)(x))
+
+ if (address >= A(eeMem->Main) && address < A(eeMem->Scratch))
+ {
+ len = snprintf(buf, sizeof(buf), "eeMem+0x%08X", static_cast(address - A(eeMem->Main)));
+ }
+ else if (address >= A(eeMem->Scratch) && address < A(eeMem->ROM))
+ {
+ len = snprintf(buf, sizeof(buf), "eeScratchpad+0x%08X", static_cast(address - A(eeMem->Scratch)));
+ }
+ else if (address >= A(&cpuRegs.GPR) && address < A(&cpuRegs.HI))
+ {
+ const u32 offset = static_cast(address - A(&cpuRegs)) % 16u;
+ if (offset != 0)
+ len = snprintf(buf, sizeof(buf), "cpuRegs.GPR.%s+%u", GPR_REG[static_cast(address - A(&cpuRegs)) / 16u], offset);
+ else
+ len = snprintf(buf, sizeof(buf), "cpuRegs.GPR.%s", GPR_REG[static_cast(address - A(&cpuRegs)) / 16u]);
+ }
+ else if (address >= A(&cpuRegs.HI) && address < A(&cpuRegs.CP0))
+ {
+ const u32 offset = static_cast(address - A(&cpuRegs.HI)) % 16u;
+ if (offset != 0)
+ len = snprintf(buf, sizeof(buf), "cpuRegs.%s+%u", (address >= A(&cpuRegs.LO) ? "LO" : "HI"), offset);
+ else
+ len = snprintf(buf, sizeof(buf), "cpuRegs.%s", (address >= A(&cpuRegs.LO) ? "LO" : "HI"));
+ }
+ else if (address == A(&cpuRegs.pc))
+ {
+ len = snprintf(buf, sizeof(buf), "cpuRegs.pc");
+ }
+ else if (address == A(&cpuRegs.cycle))
+ {
+ len = snprintf(buf, sizeof(buf), "cpuRegs.cycle");
+ }
+ else if (address == A(&g_nextEventCycle))
+ {
+ len = snprintf(buf, sizeof(buf), "g_nextEventCycle");
+ }
+ else if (address >= A(fpuRegs.fpr) && address < A(fpuRegs.fprc))
+ {
+ len = snprintf(buf, sizeof(buf), "fpuRegs.f%02u", static_cast(address - A(fpuRegs.fpr)) / 4u);
+ }
+ else if (address >= A(&VU0.VF[0]) && address < A(&VU0.VI[0]))
+ {
+ const u32 offset = static_cast(address - A(&VU0.VF[0])) % 16u;
+ if (offset != 0)
+ len = snprintf(buf, sizeof(buf), "VU0.VF[%02u]+%u", static_cast(address - A(&VU0.VF[0])) / 16u, offset);
+ else
+ len = snprintf(buf, sizeof(buf), "VU0.VF[%02u]", static_cast(address - A(&VU0.VF[0])) / 16u);
+ }
+ else if (address >= A(&VU0.VI[0]) && address < A(&VU0.ACC))
+ {
+ const u32 offset = static_cast(address - A(&VU0.VI[0])) % 16u;
+ const u32 vi = static_cast(address - A(&VU0.VI[0])) / 16u;
+ if (offset != 0)
+ len = snprintf(buf, sizeof(buf), "VU0.%s+%u", COP2_REG_CTL[vi], offset);
+ else
+ len = snprintf(buf, sizeof(buf), "VU0.%s", COP2_REG_CTL[vi]);
+ }
+ else if (address >= A(&VU0.ACC) && address < A(&VU0.q))
+ {
+ const u32 offset = static_cast(address - A(&VU0.ACC));
+ if (offset != 0)
+ len = snprintf(buf, sizeof(buf), "VU0.ACC+%u", offset);
+ else
+ len = snprintf(buf, sizeof(buf), "VU0.ACC");
+ }
+ else if (address >= A(&VU0.q) && address < A(&VU0.idx))
+ {
+ const u32 offset = static_cast(address - A(&VU0.q)) % 16u;
+ const char* reg = (address >= A(&VU0.p)) ? "p" : "q";
+ if (offset != 0)
+ len = snprintf(buf, sizeof(buf), "VU0.%s+%u", reg, offset);
+ else
+ len = snprintf(buf, sizeof(buf), "VU0.%s", reg);
+ }
+
+#undef A
+
+ if (len > 0)
+ {
+ ZYAN_CHECK(ZydisFormatterBufferAppend(buffer, ZYDIS_TOKEN_SYMBOL));
+ ZyanString* string;
+ ZYAN_CHECK(ZydisFormatterBufferGetString(buffer, &string));
+ return ZyanStringAppendFormat(string, "<%s>", buf);
+ }
+
+ return s_old_print_address(formatter, buffer, context);
+}
+#endif
+
void _eeFlushAllUnused()
{
u32 i;
@@ -1653,6 +1784,57 @@ static void PreBlockCheck(u32 blockpc)
lastrec = blockpc;
}*/
+
+#ifdef TRACE_BLOCKS
+#if 0
+ static FILE* fp = nullptr;
+ static bool fp_opened = false;
+ if (!fp_opened && cpuRegs.cycle >= 0)
+ {
+ fp = std::fopen("C:\\Dumps\\comp\\reglog.txt", "wb");
+ fp_opened = true;
+ }
+ if (fp)
+ {
+ u32 hash = crc32(0, (Bytef*)&cpuRegs, offsetof(cpuRegisters, pc));
+ u32 hashf = crc32(0, (Bytef*)&fpuRegs, sizeof(fpuRegisters));
+ u32 hashi = crc32(0, (Bytef*)&VU0, offsetof(VURegs, idx));
+
+#if 1
+ std::fprintf(fp, "%08X (%u; %08X; %08X; %08X):", cpuRegs.pc, cpuRegs.cycle, hash, hashf, hashi);
+ for (int i = 0; i < 34; i++)
+ {
+ std::fprintf(fp, " %s: %08X%08X%08X%08X", R3000A::disRNameGPR[i], cpuRegs.GPR.r[i].UL[3], cpuRegs.GPR.r[i].UL[2], cpuRegs.GPR.r[i].UL[1], cpuRegs.GPR.r[i].UL[0]);
+ }
+#if 1
+ std::fprintf(fp, "\nFPR: CR: %08X ACC: %08X", fpuRegs.fprc[31], fpuRegs.ACC.UL);
+ for (int i = 0; i < 32; i++)
+ std::fprintf(fp, " %08X", fpuRegs.fpr[i].UL);
+#endif
+#if 1
+ std::fprintf(fp, "\nVF: ");
+ for (int i = 0; i < 32; i++)
+ std::fprintf(fp, " %u: %08X %08X %08X %08X", i, VU0.VF[i].UL[0], VU0.VF[i].UL[1], VU0.VF[i].UL[2], VU0.VF[i].UL[3]);
+ std::fprintf(fp, "\nVI: ");
+ for (int i = 0; i < 32; i++)
+ std::fprintf(fp, " %u: %08X", i, VU0.VI[i].UL);
+ std::fprintf(fp, "\nACC: %08X %08X %08X %08X Q: %08X P: %08X", VU0.ACC.UL[0], VU0.ACC.UL[1], VU0.ACC.UL[2], VU0.ACC.UL[3], VU0.q.UL, VU0.p.UL);
+ std::fprintf(fp, " MAC %08X %08X %08X %08X", VU0.micro_macflags[3], VU0.micro_macflags[2], VU0.micro_macflags[1], VU0.micro_macflags[0]);
+ std::fprintf(fp, " CLIP %08X %08X %08X %08X", VU0.micro_clipflags[3], VU0.micro_clipflags[2], VU0.micro_clipflags[1], VU0.micro_clipflags[0]);
+ std::fprintf(fp, " STATUS %08X %08X %08X %08X", VU0.micro_statusflags[3], VU0.micro_statusflags[2], VU0.micro_statusflags[1], VU0.micro_statusflags[0]);
+#endif
+ std::fprintf(fp, "\n");
+#else
+ std::fprintf(fp, "%08X (%u): %08X %08X %08X\n", cpuRegs.pc, cpuRegs.cycle, hash, hashf, hashi);
+#endif
+ // std::fflush(fp);
+ }
+#endif
+#if 0
+ if (cpuRegs.cycle == 0)
+ pauseAAA();
+#endif
+#endif
}
#ifdef PCSX2_DEBUG
@@ -1917,14 +2099,9 @@ static void recRecompile(const u32 startpc)
_initX86regs();
_initXMMregs();
- if (EmuConfig.Cpu.Recompiler.PreBlockCheckEE)
- {
- // per-block dump checks, for debugging purposes.
- // [TODO] : These must be enabled from the GUI or INI to be used, otherwise the
- // code that calls PreBlockCheck will not be generated.
-
- xFastCall((void*)PreBlockCheck, pc);
- }
+#ifdef TRACE_BLOCKS
+ xFastCall((void*)PreBlockCheck, pc);
+#endif
if (EmuConfig.Gamefixes.GoemonTlbHack)
{
@@ -2247,6 +2424,26 @@ StartRecomp:
iDumpBlock(startpc, recPtr);
#endif
+#ifdef DUMP_BLOCKS
+ ZydisDecoder disas_decoder;
+ ZydisDecoderInit(&disas_decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_ADDRESS_WIDTH_64);
+
+ ZydisFormatter disas_formatter;
+ ZydisFormatterInit(&disas_formatter, ZYDIS_FORMATTER_STYLE_INTEL);
+
+ s_old_print_address = (ZydisFormatterFunc)&ZydisFormatterPrintAddressAbsolute;
+ ZydisFormatterSetHook(&disas_formatter, ZYDIS_FORMATTER_FUNC_PRINT_ADDRESS_ABS, (const void**)&s_old_print_address);
+
+ ZydisDecodedInstruction disas_instruction;
+#if 0
+ const bool dump_block = (startpc == 0x00000000);
+#elif 1
+ const bool dump_block = true;
+#else
+ const bool dump_block = false;
+#endif
+#endif
+
// Detect and handle self-modified code
memory_protect_recompiled_code(startpc, (s_nEndBlock - startpc) >> 2);
@@ -2259,7 +2456,35 @@ StartRecomp:
g_pCurInstInfo = s_pInstCache;
while (!g_branch && pc < s_nEndBlock)
{
+#ifdef DUMP_BLOCKS
+ if (dump_block)
+ {
+ std::string disasm;
+ disR5900Fasm(disasm, *(u32*)PSM(pc), pc, false);
+ fprintf(stderr, "Compiling %08X %s\n", pc, disasm.c_str());
+
+ const u8* instStart = x86Ptr;
+ recompileNextInstruction(0);
+
+ const u8* instPtr = instStart;
+ ZyanUSize instLength = static_cast(x86Ptr - instStart);
+ while (ZYAN_SUCCESS(ZydisDecoderDecodeBuffer(&disas_decoder, instPtr, instLength, &disas_instruction)))
+ {
+ char buffer[256];
+ if (ZYAN_SUCCESS(ZydisFormatterFormatInstruction(&disas_formatter, &disas_instruction, buffer, sizeof(buffer), (ZyanU64)instPtr)))
+ std::fprintf(stderr, " %016" PRIX64 " %s\n", (u64)instPtr, buffer);
+
+ instPtr += disas_instruction.length;
+ instLength -= disas_instruction.length;
+ }
+ }
+ else
+ {
+ recompileNextInstruction(0);
+ }
+#else
recompileNextInstruction(0); // For the love of recursion, batman!
+#endif
}
}
diff --git a/pcsx2/x86/ix86-32/recVTLB.cpp b/pcsx2/x86/ix86-32/recVTLB.cpp
index aa0f729fab..f193416d11 100644
--- a/pcsx2/x86/ix86-32/recVTLB.cpp
+++ b/pcsx2/x86/ix86-32/recVTLB.cpp
@@ -23,6 +23,8 @@
#include "iR5900.h"
#include "common/Perf.h"
+//#define LOG_STORES
+
using namespace vtlb_private;
using namespace x86Emitter;
@@ -76,6 +78,34 @@ using namespace x86Emitter;
*/
+#ifdef LOG_STORES
+static std::FILE* logfile;
+static bool CheckLogFile()
+{
+ if (!logfile)
+ logfile = std::fopen("C:\\Dumps\\comp\\memlog.bad.txt", "wb");
+ return (logfile != nullptr);
+}
+
+static void LogWrite(u32 addr, u64 val)
+{
+ if (!CheckLogFile())
+ return;
+
+ std::fprintf(logfile, "%08X @ %u: %llx\n", addr, cpuRegs.cycle, val);
+ std::fflush(logfile);
+}
+
+static void __vectorcall LogWriteQuad(u32 addr, __m128i val)
+{
+ if (!CheckLogFile())
+ return;
+
+ std::fprintf(logfile, "%08X @ %u: %llx %llx\n", addr, cpuRegs.cycle, val.m128i_u64[0], val.m128i_u64[1]);
+ std::fflush(logfile);
+}
+#endif
+
namespace vtlb_private
{
// ------------------------------------------------------------------------
@@ -471,6 +501,38 @@ void vtlb_DynGenReadNonQuad_Const(u32 bits, bool sign, u32 addr_const)
void vtlb_DynGenWrite(u32 sz)
{
+#ifdef LOG_STORES
+ //if (sz != 128)
+ {
+ iFlushCall(FLUSH_FULLVTLB);
+
+ xPUSH(arg1reg);
+ xPUSH(arg2reg);
+ if (sz == 128)
+ {
+ xSUB(rsp, 32 + 32);
+ xMOVAPS(ptr[rsp + 32], xRegisterSSE::GetArgRegister(1, 0));
+ xFastCall((void*)LogWriteQuad);
+ xMOVAPS(xRegisterSSE::GetArgRegister(1, 0), ptr[rsp + 32]);
+ xADD(rsp, 32 + 32);
+ }
+ else
+ {
+ if (sz == 8)
+ xAND(arg2regd, 0xFF);
+ else if (sz == 16)
+ xAND(arg2regd, 0xFFFF);
+ else if (sz == 32)
+ xAND(arg2regd, -1);
+ xSUB(rsp, 32);
+ xFastCall((void*)LogWrite);
+ xADD(rsp, 32);
+ }
+ xPOP(arg2reg);
+ xPOP(arg1reg);
+ }
+#endif
+
u32* writeback = DynGen_PrepRegs();
DynGen_IndirectDispatch(1, sz);
@@ -488,6 +550,39 @@ void vtlb_DynGenWrite_Const(u32 bits, u32 addr_const)
{
EE::Profiler.EmitConstMem(addr_const);
+#ifdef LOG_STORES
+ iFlushCall(FLUSH_FULLVTLB);
+
+ //if (bits != 128)
+ {
+ xPUSH(arg1reg);
+ xPUSH(arg2reg);
+ xMOV(arg1reg, addr_const);
+ if (bits == 128)
+ {
+ xSUB(rsp, 32 + 32);
+ xMOVAPS(ptr[rsp + 32], xRegisterSSE::GetArgRegister(1, 0));
+ xFastCall((void*)LogWriteQuad);
+ xMOVAPS(xRegisterSSE::GetArgRegister(1, 0), ptr[rsp + 32]);
+ xADD(rsp, 32 + 32);
+ }
+ else
+ {
+ if (bits == 8)
+ xAND(arg2regd, 0xFF);
+ else if (bits == 16)
+ xAND(arg2regd, 0xFFFF);
+ else if (bits == 32)
+ xAND(arg2regd, -1);
+ xSUB(rsp, 32);
+ xFastCall((void*)LogWrite);
+ xADD(rsp, 32);
+ }
+ xPOP(arg2reg);
+ xPOP(arg1reg);
+ }
+#endif
+
auto vmv = vtlbdata.vmap[addr_const >> VTLB_PAGE_BITS];
if (!vmv.isHandler(addr_const))
{