From 382910c5205ff1ce05a00bf440cc3df6499f067f Mon Sep 17 00:00:00 2001 From: AlpinDale Date: Mon, 8 Dec 2025 23:45:03 +0430 Subject: [PATCH 01/19] core: add macOS arm64 support --- CMakeDarwinPresets.json | 13 ++++++++ CMakeLists.txt | 8 +++-- CMakePresets.json | 24 ++++++++++++++ src/common/signal_context.cpp | 21 ++++++++++++ src/common/va_ctx.h | 22 ++++++++++++- src/core/libraries/fiber/fiber.cpp | 24 ++++++++++++++ src/core/libraries/kernel/kernel.cpp | 4 +-- .../libraries/kernel/threads/exception.cpp | 13 ++++++++ src/core/libraries/kernel/threads/mutex.cpp | 6 ++++ .../libc_internal/libc_internal_io.cpp | 2 +- src/core/libraries/libs.h | 12 +++++++ src/core/linker.cpp | 20 ++++++++++++ src/core/thread.cpp | 4 +++ .../passes/flatten_extended_userdata_pass.cpp | 32 +++++++++++++------ 14 files changed, 189 insertions(+), 16 deletions(-) diff --git a/CMakeDarwinPresets.json b/CMakeDarwinPresets.json index 10824a66c..8e09600ec 100644 --- a/CMakeDarwinPresets.json +++ b/CMakeDarwinPresets.json @@ -17,6 +17,19 @@ "CMAKE_INSTALL_PREFIX": "${sourceDir}/Build/${presetName}", "CMAKE_OSX_ARCHITECTURES": "x86_64" } + }, + { + "name": "arm64-Clang-Base", + "hidden": true, + "generator": "Ninja", + "binaryDir": "${sourceDir}/Build/${presetName}", + "cacheVariables": { + "CMAKE_C_COMPILER": "/usr/bin/clang", + "CMAKE_CXX_COMPILER": "/usr/bin/clang++", + "CMAKE_INSTALL_PREFIX": "${sourceDir}/Build/${presetName}", + "CMAKE_OSX_ARCHITECTURES": "arm64", + "CMAKE_PREFIX_PATH": "/opt/homebrew" + } } ] } \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 04534ec26..df2905b70 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -550,13 +550,15 @@ set(USBD_LIB src/core/libraries/usbd/usbd.cpp src/core/libraries/usbd/emulated/skylander.h ) -set(FIBER_LIB src/core/libraries/fiber/fiber_context.s - src/core/libraries/fiber/fiber.cpp +set(FIBER_LIB src/core/libraries/fiber/fiber.cpp src/core/libraries/fiber/fiber.h src/core/libraries/fiber/fiber_error.h ) -set_source_files_properties(src/core/libraries/fiber/fiber_context.s PROPERTIES COMPILE_OPTIONS -Wno-unused-command-line-argument) +if(ARCHITECTURE STREQUAL "x86_64") + list(APPEND FIBER_LIB src/core/libraries/fiber/fiber_context.s) + set_source_files_properties(src/core/libraries/fiber/fiber_context.s PROPERTIES COMPILE_OPTIONS -Wno-unused-command-line-argument) +endif() set(VDEC_LIB src/core/libraries/videodec/videodec2_impl.cpp src/core/libraries/videodec/videodec2_impl.h diff --git a/CMakePresets.json b/CMakePresets.json index c34007a34..1914f9895 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -30,6 +30,30 @@ "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } + }, + { + "name": "arm64-Clang-Debug", + "displayName": "Clang ARM64 Debug", + "inherits": ["arm64-Clang-Base"], + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Debug" + } + }, + { + "name": "arm64-Clang-Release", + "displayName": "Clang ARM64 Release", + "inherits": ["arm64-Clang-Base"], + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Release" + } + }, + { + "name": "arm64-Clang-RelWithDebInfo", + "displayName": "Clang ARM64 RelWithDebInfo", + "inherits": ["arm64-Clang-Base"], + "cacheVariables": { + "CMAKE_BUILD_TYPE": "RelWithDebInfo" + } } ] } \ No newline at end of file diff --git a/src/common/signal_context.cpp b/src/common/signal_context.cpp index 112160bc8..929b250ec 100644 --- a/src/common/signal_context.cpp +++ b/src/common/signal_context.cpp @@ -19,14 +19,22 @@ void* GetXmmPointer(void* ctx, u8 index) { case index: \ return (void*)(&((EXCEPTION_POINTERS*)ctx)->ContextRecord->Xmm##index.Low) #elif defined(__APPLE__) +#if defined(ARCH_X86_64) #define CASE(index) \ case index: \ return (void*)(&((ucontext_t*)ctx)->uc_mcontext->__fs.__fpu_xmm##index); +#elif defined(ARCH_ARM64) + UNREACHABLE_MSG("XMM registers not available on ARM64"); + return nullptr; +#else +#error "Unsupported architecture" +#endif #else #define CASE(index) \ case index: \ return (void*)(&((ucontext_t*)ctx)->uc_mcontext.fpregs->_xmm[index].element[0]) #endif +#if !defined(ARCH_ARM64) || !defined(__APPLE__) switch (index) { CASE(0); CASE(1); @@ -50,13 +58,20 @@ void* GetXmmPointer(void* ctx, u8 index) { } } #undef CASE +#endif } void* GetRip(void* ctx) { #if defined(_WIN32) return (void*)((EXCEPTION_POINTERS*)ctx)->ContextRecord->Rip; #elif defined(__APPLE__) +#if defined(ARCH_X86_64) return (void*)((ucontext_t*)ctx)->uc_mcontext->__ss.__rip; +#elif defined(ARCH_ARM64) + return (void*)((ucontext_t*)ctx)->uc_mcontext->__ss.__pc; +#else +#error "Unsupported architecture" +#endif #else return (void*)((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RIP]; #endif @@ -66,7 +81,13 @@ void IncrementRip(void* ctx, u64 length) { #if defined(_WIN32) ((EXCEPTION_POINTERS*)ctx)->ContextRecord->Rip += length; #elif defined(__APPLE__) +#if defined(ARCH_X86_64) ((ucontext_t*)ctx)->uc_mcontext->__ss.__rip += length; +#elif defined(ARCH_ARM64) + ((ucontext_t*)ctx)->uc_mcontext->__ss.__pc += length; +#else +#error "Unsupported architecture" +#endif #else ((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RIP] += length; #endif diff --git a/src/common/va_ctx.h b/src/common/va_ctx.h index cffe468ff..f39ab70b4 100644 --- a/src/common/va_ctx.h +++ b/src/common/va_ctx.h @@ -2,9 +2,16 @@ // SPDX-License-Identifier: GPL-2.0-or-later #pragma once -#include +#include "common/arch.h" #include "common/types.h" +#ifdef ARCH_X86_64 +#include +#elif defined(ARCH_ARM64) +#include +#endif + +#ifdef ARCH_X86_64 #define VA_ARGS \ uint64_t rdi, uint64_t rsi, uint64_t rdx, uint64_t rcx, uint64_t r8, uint64_t r9, \ uint64_t overflow_arg_area, __m128 xmm0, __m128 xmm1, __m128 xmm2, __m128 xmm3, \ @@ -30,6 +37,17 @@ (ctx).va_list.gp_offset = offsetof(::Common::VaRegSave, gp); \ (ctx).va_list.fp_offset = offsetof(::Common::VaRegSave, fp); \ (ctx).va_list.overflow_arg_area = &overflow_arg_area; +#elif defined(ARCH_ARM64) +#define VA_ARGS ... +#define VA_CTX(ctx) \ + alignas(16)::Common::VaCtx ctx{}; \ + (ctx).va_list.reg_save_area = nullptr; \ + (ctx).va_list.gp_offset = 0; \ + (ctx).va_list.fp_offset = 0; \ + (ctx).va_list.overflow_arg_area = nullptr; +#else +#error "Unsupported architecture" +#endif namespace Common { @@ -44,7 +62,9 @@ struct VaList { struct VaRegSave { u64 gp[6]; +#ifdef ARCH_X86_64 __m128 fp[8]; +#endif }; struct VaCtx { diff --git a/src/core/libraries/fiber/fiber.cpp b/src/core/libraries/fiber/fiber.cpp index 2ebfbd244..8e95ca769 100644 --- a/src/core/libraries/fiber/fiber.cpp +++ b/src/core/libraries/fiber/fiber.cpp @@ -3,6 +3,7 @@ #include "fiber.h" +#include "common/arch.h" #include "common/elf_info.h" #include "common/logging/log.h" #include "core/libraries/fiber/fiber_error.h" @@ -23,12 +24,35 @@ OrbisFiberContext* GetFiberContext() { return Core::GetTcbBase()->tcb_fiber; } +#ifdef ARCH_X86_64 extern "C" s32 PS4_SYSV_ABI _sceFiberSetJmp(OrbisFiberContext* ctx) asm("_sceFiberSetJmp"); extern "C" s32 PS4_SYSV_ABI _sceFiberLongJmp(OrbisFiberContext* ctx) asm("_sceFiberLongJmp"); extern "C" void PS4_SYSV_ABI _sceFiberSwitchEntry(OrbisFiberData* data, bool set_fpu) asm("_sceFiberSwitchEntry"); +#elif defined(ARCH_ARM64) +extern "C" s32 PS4_SYSV_ABI _sceFiberSetJmp(OrbisFiberContext* ctx); +extern "C" s32 PS4_SYSV_ABI _sceFiberLongJmp(OrbisFiberContext* ctx); +extern "C" void PS4_SYSV_ABI _sceFiberSwitchEntry(OrbisFiberData* data, + bool set_fpu); +#endif extern "C" void PS4_SYSV_ABI _sceFiberForceQuit(u64 ret) asm("_sceFiberForceQuit"); +#ifdef ARCH_ARM64 +extern "C" s32 PS4_SYSV_ABI _sceFiberSetJmp(OrbisFiberContext* ctx) { + UNREACHABLE_MSG("ARM64 fiber implementation not yet complete"); + return 0; +} + +extern "C" s32 PS4_SYSV_ABI _sceFiberLongJmp(OrbisFiberContext* ctx) { + UNREACHABLE_MSG("ARM64 fiber implementation not yet complete"); + return 0; +} + +extern "C" void PS4_SYSV_ABI _sceFiberSwitchEntry(OrbisFiberData* data, bool set_fpu) { + UNREACHABLE_MSG("ARM64 fiber implementation not yet complete"); +} +#endif + extern "C" void PS4_SYSV_ABI _sceFiberForceQuit(u64 ret) { OrbisFiberContext* g_ctx = GetFiberContext(); g_ctx->return_val = ret; diff --git a/src/core/libraries/kernel/kernel.cpp b/src/core/libraries/kernel/kernel.cpp index 6594bfab2..434526982 100644 --- a/src/core/libraries/kernel/kernel.cpp +++ b/src/core/libraries/kernel/kernel.cpp @@ -318,8 +318,8 @@ void RegisterLib(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("Mv1zUObHvXI", "libkernel", 1, "libkernel", sceKernelGetSystemSwVersion); LIB_FUNCTION("igMefp4SAv0", "libkernel", 1, "libkernel", get_authinfo); LIB_FUNCTION("G-MYv5erXaU", "libkernel", 1, "libkernel", sceKernelGetAppInfo); - LIB_FUNCTION("PfccT7qURYE", "libkernel", 1, "libkernel", kernel_ioctl); - LIB_FUNCTION("wW+k21cmbwQ", "libkernel", 1, "libkernel", kernel_ioctl); + LIB_FUNCTION_VARIADIC("PfccT7qURYE", "libkernel", 1, "libkernel", kernel_ioctl); + LIB_FUNCTION_VARIADIC("wW+k21cmbwQ", "libkernel", 1, "libkernel", kernel_ioctl); LIB_FUNCTION("JGfTMBOdUJo", "libkernel", 1, "libkernel", sceKernelGetFsSandboxRandomWord); LIB_FUNCTION("6xVpy0Fdq+I", "libkernel", 1, "libkernel", _sigprocmask); LIB_FUNCTION("Xjoosiw+XPI", "libkernel", 1, "libkernel", sceKernelUuidCreate); diff --git a/src/core/libraries/kernel/threads/exception.cpp b/src/core/libraries/kernel/threads/exception.cpp index 95ced79c0..094cfb61f 100644 --- a/src/core/libraries/kernel/threads/exception.cpp +++ b/src/core/libraries/kernel/threads/exception.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/arch.h" #include "common/assert.h" #include "core/libraries/kernel/orbis_error.h" #include "core/libraries/kernel/threads/exception.h" @@ -23,6 +24,7 @@ void SigactionHandler(int signum, siginfo_t* inf, ucontext_t* raw_context) { if (handler) { auto ctx = Ucontext{}; #ifdef __APPLE__ +#ifdef ARCH_X86_64 const auto& regs = raw_context->uc_mcontext->__ss; ctx.uc_mcontext.mc_r8 = regs.__r8; ctx.uc_mcontext.mc_r9 = regs.__r9; @@ -42,7 +44,13 @@ void SigactionHandler(int signum, siginfo_t* inf, ucontext_t* raw_context) { ctx.uc_mcontext.mc_rsp = regs.__rsp; ctx.uc_mcontext.mc_fs = regs.__fs; ctx.uc_mcontext.mc_gs = regs.__gs; +#elif defined(ARCH_ARM64) + UNREACHABLE_MSG("ARM64 exception handling not yet implemented"); #else +#error "Unsupported architecture" +#endif +#else +#ifdef ARCH_X86_64 const auto& regs = raw_context->uc_mcontext.gregs; ctx.uc_mcontext.mc_r8 = regs[REG_R8]; ctx.uc_mcontext.mc_r9 = regs[REG_R9]; @@ -62,6 +70,11 @@ void SigactionHandler(int signum, siginfo_t* inf, ucontext_t* raw_context) { ctx.uc_mcontext.mc_rsp = regs[REG_RSP]; ctx.uc_mcontext.mc_fs = (regs[REG_CSGSFS] >> 32) & 0xFFFF; ctx.uc_mcontext.mc_gs = (regs[REG_CSGSFS] >> 16) & 0xFFFF; +#elif defined(ARCH_ARM64) + UNREACHABLE_MSG("ARM64 exception handling not yet implemented"); +#else +#error "Unsupported architecture" +#endif #endif handler(POSIX_SIGUSR1, &ctx); } diff --git a/src/core/libraries/kernel/threads/mutex.cpp b/src/core/libraries/kernel/threads/mutex.cpp index 5d97c5dc1..006f86084 100644 --- a/src/core/libraries/kernel/threads/mutex.cpp +++ b/src/core/libraries/kernel/threads/mutex.cpp @@ -18,7 +18,13 @@ static std::mutex MutxStaticLock; #define THR_ADAPTIVE_MUTEX_INITIALIZER ((PthreadMutex*)1) #define THR_MUTEX_DESTROYED ((PthreadMutex*)2) +#ifdef ARCH_X86_64 #define CPU_SPINWAIT __asm__ volatile("pause") +#elif defined(ARCH_ARM64) +#define CPU_SPINWAIT __asm__ volatile("yield") +#else +#define CPU_SPINWAIT +#endif #define CHECK_AND_INIT_MUTEX \ if (PthreadMutex* m = *mutex; m <= THR_MUTEX_DESTROYED) [[unlikely]] { \ diff --git a/src/core/libraries/libc_internal/libc_internal_io.cpp b/src/core/libraries/libc_internal/libc_internal_io.cpp index 8105b66cc..1677d41a4 100644 --- a/src/core/libraries/libc_internal/libc_internal_io.cpp +++ b/src/core/libraries/libc_internal/libc_internal_io.cpp @@ -18,6 +18,6 @@ int PS4_SYSV_ABI internal_snprintf(char* s, size_t n, VA_ARGS) { return snprintf_ctx(s, n, &ctx); } void RegisterlibSceLibcInternalIo(Core::Loader::SymbolsResolver* sym) { - LIB_FUNCTION("eLdDw6l0-bU", "libSceLibcInternal", 1, "libSceLibcInternal", internal_snprintf); + LIB_FUNCTION_VARIADIC("eLdDw6l0-bU", "libSceLibcInternal", 1, "libSceLibcInternal", internal_snprintf); } } // namespace Libraries::LibcInternal \ No newline at end of file diff --git a/src/core/libraries/libs.h b/src/core/libraries/libs.h index 1229e6411..b170ad554 100644 --- a/src/core/libraries/libs.h +++ b/src/core/libraries/libs.h @@ -19,6 +19,18 @@ sym->AddSymbol(sr, func); \ } +#define LIB_FUNCTION_VARIADIC(nid, lib, libversion, mod, function) \ + { \ + Core::Loader::SymbolResolver sr{}; \ + sr.name = nid; \ + sr.library = lib; \ + sr.library_version = libversion; \ + sr.module = mod; \ + sr.type = Core::Loader::SymbolType::Function; \ + auto func = reinterpret_cast(function); \ + sym->AddSymbol(sr, func); \ + } + #define LIB_OBJ(nid, lib, libversion, mod, obj) \ { \ Core::Loader::SymbolResolver sr{}; \ diff --git a/src/core/linker.cpp b/src/core/linker.cpp index ac6b37769..7ac8791ae 100644 --- a/src/core/linker.cpp +++ b/src/core/linker.cpp @@ -49,6 +49,26 @@ static PS4_SYSV_ABI void* RunMainEntry [[noreturn]] (EntryParams* params) { : "rax", "rsi", "rdi"); UNREACHABLE(); } +#elif defined(ARCH_ARM64) +static PS4_SYSV_ABI void* RunMainEntry [[noreturn]] (EntryParams* params) { + void* entry = reinterpret_cast(params->entry_addr); + asm volatile("mov x2, sp\n" + "and x2, x2, #0xFFFFFFFFFFFFFFF0\n" + "sub x2, x2, #8\n" + "mov sp, x2\n" + "ldr x0, [%1, #8]\n" + "sub sp, sp, #16\n" + "str x0, [sp]\n" + "ldr x0, [%1]\n" + "str x0, [sp, #8]\n" + "mov x0, %1\n" + "mov x1, %2\n" + "br %0\n" + : + : "r"(entry), "r"(params), "r"(ProgramExitFunc) + : "x0", "x1", "x2", "memory"); + UNREACHABLE(); +} #endif Linker::Linker() : memory{Memory::Instance()} {} diff --git a/src/core/thread.cpp b/src/core/thread.cpp index 0015f40b9..82fa81867 100644 --- a/src/core/thread.cpp +++ b/src/core/thread.cpp @@ -11,8 +11,10 @@ #include #include #include +#ifdef ARCH_X86_64 #include #endif +#endif namespace Core { @@ -126,8 +128,10 @@ void NativeThread::Exit() { void NativeThread::Initialize() { // Set MXCSR and FPUCW registers to the values used by Orbis. +#ifdef ARCH_X86_64 _mm_setcsr(ORBIS_MXCSR); asm volatile("fldcw %0" : : "m"(ORBIS_FPUCW)); +#endif #if _WIN64 tid = GetCurrentThreadId(); #else diff --git a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp index e1f9f2c5a..e45ecf66c 100644 --- a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp +++ b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp @@ -4,8 +4,11 @@ #include #include +#ifdef ARCH_X86_64 #include #include +#endif +#include "common/arch.h" #include "common/config.h" #include "common/io_file.h" #include "common/logging/log.h" @@ -23,19 +26,27 @@ #include "src/common/arch.h" #include "src/common/decoder.h" +#ifdef ARCH_X86_64 using namespace Xbyak::util; static Xbyak::CodeGenerator g_srt_codegen(32_MB); static const u8* g_srt_codegen_start = nullptr; +#endif namespace Shader { +#ifdef ARCH_X86_64 PFN_SrtWalker RegisterWalkerCode(const u8* ptr, size_t size) { const auto func_addr = (PFN_SrtWalker)g_srt_codegen.getCurr(); g_srt_codegen.db(ptr, size); g_srt_codegen.ready(); return func_addr; } +#else +PFN_SrtWalker RegisterWalkerCode(const u8* ptr, size_t size) { + return nullptr; +} +#endif } // namespace Shader @@ -69,12 +80,12 @@ static void DumpSrtProgram(const Shader::Info& info, const u8* code, size_t code } static bool SrtWalkerSignalHandler(void* context, void* fault_address) { - // Only handle if the fault address is within the SRT code range +#ifdef ARCH_X86_64 const u8* code_start = g_srt_codegen_start; const u8* code_end = code_start + g_srt_codegen.getSize(); const void* code = Common::GetRip(context); if (code < code_start || code >= code_end) { - return false; // Not in SRT code range + return false; } // Patch instruction to zero register @@ -117,6 +128,9 @@ static bool SrtWalkerSignalHandler(void* context, void* fault_address) { LOG_DEBUG(Render_Recompiler, "Patched SRT walker at {}", code); return true; +#else + return false; +#endif } using namespace Shader; @@ -159,6 +173,7 @@ namespace Shader::Optimization { namespace { +#ifdef ARCH_X86_64 static inline void PushPtr(Xbyak::CodeGenerator& c, u32 off_dw) { c.push(rdi); c.mov(rdi, ptr[rdi + (off_dw << 2)]); @@ -169,18 +184,12 @@ static inline void PushPtr(Xbyak::CodeGenerator& c, u32 off_dw) { static inline void PopPtr(Xbyak::CodeGenerator& c) { c.pop(rdi); }; - static void VisitPointer(u32 off_dw, IR::Inst* subtree, PassInfo& pass_info, Xbyak::CodeGenerator& c) { PushPtr(c, off_dw); PassInfo::PtrUserList* use_list = pass_info.GetUsesAsPointer(subtree); ASSERT(use_list); - // First copy all the src data from this tree level - // That way, all data that was contiguous in the guest SRT is also contiguous in the - // flattened buffer. - // TODO src and dst are contiguous. Optimize with wider loads/stores - // TODO if this subtree is dynamically indexed, don't compact it (keep it sparse) for (auto [src_off_dw, use] : *use_list) { c.mov(r10d, ptr[rdi + (src_off_dw << 2)]); c.mov(ptr[rsi + (pass_info.dst_off_dw << 2)], r10d); @@ -189,7 +198,6 @@ static void VisitPointer(u32 off_dw, IR::Inst* subtree, PassInfo& pass_info, pass_info.dst_off_dw++; } - // Then visit any children used as pointers for (const auto [src_off_dw, use] : *use_list) { if (pass_info.GetUsesAsPointer(use)) { VisitPointer(src_off_dw, use, pass_info, c); @@ -236,6 +244,10 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) { info.srt_info.flattened_bufsize_dw = pass_info.dst_off_dw; } +#else +static void GenerateSrtProgram(Info& info, PassInfo& pass_info) { +} +#endif }; // namespace @@ -293,7 +305,9 @@ void FlattenExtendedUserdataPass(IR::Program& program) { } } +#ifdef ARCH_X86_64 GenerateSrtProgram(info, pass_info); +#endif // Assign offsets to duplicate readconsts for (IR::Inst* readconst : all_readconsts) { From 6b6c405329e987c30301bed784769012b4bfb88c Mon Sep 17 00:00:00 2001 From: AlpinDale Date: Tue, 9 Dec 2025 00:02:04 +0430 Subject: [PATCH 02/19] nit: undo --- .../ir/passes/flatten_extended_userdata_pass.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp index e45ecf66c..07e004ae6 100644 --- a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp +++ b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp @@ -81,11 +81,12 @@ static void DumpSrtProgram(const Shader::Info& info, const u8* code, size_t code static bool SrtWalkerSignalHandler(void* context, void* fault_address) { #ifdef ARCH_X86_64 + // Only handle if the fault address is within the SRT code range const u8* code_start = g_srt_codegen_start; const u8* code_end = code_start + g_srt_codegen.getSize(); const void* code = Common::GetRip(context); if (code < code_start || code >= code_end) { - return false; + return false; // Not in SRT code range } // Patch instruction to zero register @@ -184,12 +185,18 @@ static inline void PushPtr(Xbyak::CodeGenerator& c, u32 off_dw) { static inline void PopPtr(Xbyak::CodeGenerator& c) { c.pop(rdi); }; + static void VisitPointer(u32 off_dw, IR::Inst* subtree, PassInfo& pass_info, Xbyak::CodeGenerator& c) { PushPtr(c, off_dw); PassInfo::PtrUserList* use_list = pass_info.GetUsesAsPointer(subtree); ASSERT(use_list); + // First copy all the src data from this tree level + // That way, all data that was contiguous in the guest SRT is also contiguous in the + // flattened buffer. + // TODO src and dst are contiguous. Optimize with wider loads/stores + // TODO if this subtree is dynamically indexed, don't compact it (keep it sparse) for (auto [src_off_dw, use] : *use_list) { c.mov(r10d, ptr[rdi + (src_off_dw << 2)]); c.mov(ptr[rsi + (pass_info.dst_off_dw << 2)], r10d); @@ -198,6 +205,7 @@ static void VisitPointer(u32 off_dw, IR::Inst* subtree, PassInfo& pass_info, pass_info.dst_off_dw++; } + // Then visit any children used as pointers for (const auto [src_off_dw, use] : *use_list) { if (pass_info.GetUsesAsPointer(use)) { VisitPointer(src_off_dw, use, pass_info, c); From 513bc203dfddaaa0f64cca1571ed68a5d2e4bf4d Mon Sep 17 00:00:00 2001 From: AlpinDale Date: Tue, 9 Dec 2025 00:29:57 +0430 Subject: [PATCH 03/19] some address space work --- src/core/address_space.cpp | 70 ++++++++++++++++++++++++++++++++++---- 1 file changed, 64 insertions(+), 6 deletions(-) diff --git a/src/core/address_space.cpp b/src/core/address_space.cpp index 3f063ea76..bb20ad659 100644 --- a/src/core/address_space.cpp +++ b/src/core/address_space.cpp @@ -20,12 +20,16 @@ #include #endif -#if defined(__APPLE__) && defined(ARCH_X86_64) +#if defined(__APPLE__) && (defined(ARCH_X86_64) || defined(ARCH_ARM64)) // Reserve space for the system address space using a zerofill section. +// Note: These assembly directives are x86_64-specific, but the memory layout constants +// below apply to both x86_64 and ARM64 on macOS. +#if defined(ARCH_X86_64) asm(".zerofill SYSTEM_MANAGED,SYSTEM_MANAGED,__SYSTEM_MANAGED,0x7FFBFC000"); asm(".zerofill SYSTEM_RESERVED,SYSTEM_RESERVED,__SYSTEM_RESERVED,0x7C0004000"); asm(".zerofill USER_AREA,USER_AREA,__USER_AREA,0x5F9000000000"); #endif +#endif namespace Core { @@ -33,7 +37,7 @@ namespace Core { constexpr VAddr SYSTEM_MANAGED_MIN = 0x400000ULL; constexpr VAddr SYSTEM_MANAGED_MAX = 0x7FFFFBFFFULL; constexpr VAddr SYSTEM_RESERVED_MIN = 0x7FFFFC000ULL; -#if defined(__APPLE__) && defined(ARCH_X86_64) +#if defined(__APPLE__) && (defined(ARCH_X86_64) || defined(ARCH_ARM64)) // Commpage ranges from 0xFC0000000 - 0xFFFFFFFFF, so decrease the system reserved maximum. constexpr VAddr SYSTEM_RESERVED_MAX = 0xFBFFFFFFFULL; // GPU-reserved memory ranges from 0x1000000000 - 0x6FFFFFFFFF, so increase the user minimum. @@ -512,11 +516,13 @@ struct AddressSpace::Impl { user_size = UserSize; constexpr int protection_flags = PROT_READ | PROT_WRITE; +#if defined(__APPLE__) && (defined(ARCH_X86_64) || defined(ARCH_ARM64)) + // On macOS (both x86_64 and ARM64), we run into limitations due to the commpage from + // 0xFC0000000 - 0xFFFFFFFFF and the GPU carveout region from 0x1000000000 - 0x6FFFFFFFFF. + // Because this creates gaps in the available virtual memory region, we map memory space + // using three distinct parts. +#if defined(ARCH_X86_64) constexpr int map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_FIXED; -#if defined(__APPLE__) && defined(ARCH_X86_64) - // On ARM64 Macs, we run into limitations due to the commpage from 0xFC0000000 - 0xFFFFFFFFF - // and the GPU carveout region from 0x1000000000 - 0x6FFFFFFFFF. Because this creates gaps - // in the available virtual memory region, we map memory space using three distinct parts. system_managed_base = reinterpret_cast(mmap(reinterpret_cast(SYSTEM_MANAGED_MIN), system_managed_size, protection_flags, map_flags, -1, 0)); @@ -525,6 +531,58 @@ struct AddressSpace::Impl { system_reserved_size, protection_flags, map_flags, -1, 0)); user_base = reinterpret_cast( mmap(reinterpret_cast(USER_MIN), user_size, protection_flags, map_flags, -1, 0)); +#elif defined(ARCH_ARM64) + // On ARM64 macOS, MAP_FIXED may not work at these addresses due to system restrictions. + // We need these exact addresses for the PS4 memory layout, so we try multiple approaches. + int map_flags_fixed = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_FIXED; + + system_managed_base = + reinterpret_cast(mmap(reinterpret_cast(SYSTEM_MANAGED_MIN), + system_managed_size, protection_flags, map_flags_fixed, -1, 0)); + if (system_managed_base == MAP_FAILED) { + // Try without MAP_NORESERVE + int map_flags_noreserve = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED; + system_managed_base = + reinterpret_cast(mmap(reinterpret_cast(SYSTEM_MANAGED_MIN), + system_managed_size, protection_flags, map_flags_noreserve, -1, 0)); + if (system_managed_base == MAP_FAILED) { + LOG_CRITICAL(Kernel_Vmm, "mmap failed for system_managed_base at {}: {}. " + "ARM64 macOS does not allow mapping at address 0x400000. " + "The PS4 memory layout requires exact addresses. " + "Consider using x86_64 mode or implementing address translation for ARM64.", + fmt::ptr(reinterpret_cast(SYSTEM_MANAGED_MIN)), strerror(errno)); + throw std::bad_alloc{}; + } + } + + system_reserved_base = + reinterpret_cast(mmap(reinterpret_cast(SYSTEM_RESERVED_MIN), + system_reserved_size, protection_flags, map_flags_fixed, -1, 0)); + if (system_reserved_base == MAP_FAILED) { + int map_flags_noreserve = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED; + system_reserved_base = + reinterpret_cast(mmap(reinterpret_cast(SYSTEM_RESERVED_MIN), + system_reserved_size, protection_flags, map_flags_noreserve, -1, 0)); + if (system_reserved_base == MAP_FAILED) { + LOG_CRITICAL(Kernel_Vmm, "mmap failed for system_reserved_base at {}: {}", + fmt::ptr(reinterpret_cast(SYSTEM_RESERVED_MIN)), strerror(errno)); + throw std::bad_alloc{}; + } + } + + user_base = reinterpret_cast( + mmap(reinterpret_cast(USER_MIN), user_size, protection_flags, map_flags_fixed, -1, 0)); + if (user_base == MAP_FAILED) { + int map_flags_noreserve = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED; + user_base = reinterpret_cast( + mmap(reinterpret_cast(USER_MIN), user_size, protection_flags, map_flags_noreserve, -1, 0)); + if (user_base == MAP_FAILED) { + LOG_CRITICAL(Kernel_Vmm, "mmap failed for user_base at {}: {}", + fmt::ptr(reinterpret_cast(USER_MIN)), strerror(errno)); + throw std::bad_alloc{}; + } + } +#endif #else const auto virtual_size = system_managed_size + system_reserved_size + user_size; #if defined(ARCH_X86_64) From 1c0617146c2a1e8f2cd8a48061f6d53ffc412822 Mon Sep 17 00:00:00 2001 From: AlpinDale Date: Tue, 9 Dec 2025 00:33:28 +0430 Subject: [PATCH 04/19] flexible address in mmap --- src/core/address_space.cpp | 63 +++++++++++++++----------------------- 1 file changed, 24 insertions(+), 39 deletions(-) diff --git a/src/core/address_space.cpp b/src/core/address_space.cpp index bb20ad659..5327d6fa0 100644 --- a/src/core/address_space.cpp +++ b/src/core/address_space.cpp @@ -532,56 +532,41 @@ struct AddressSpace::Impl { user_base = reinterpret_cast( mmap(reinterpret_cast(USER_MIN), user_size, protection_flags, map_flags, -1, 0)); #elif defined(ARCH_ARM64) - // On ARM64 macOS, MAP_FIXED may not work at these addresses due to system restrictions. - // We need these exact addresses for the PS4 memory layout, so we try multiple approaches. - int map_flags_fixed = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_FIXED; + // On ARM64 macOS, MAP_FIXED doesn't work at low addresses (0x400000) due to system restrictions. + // Map memory wherever possible and use offset calculations. This is a temporary solution + // until proper address translation is implemented for ARM64. + // Note: This means the PS4 virtual addresses won't match host addresses, so instruction + // translation/JIT will need to handle the offset. + constexpr int map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; + // Map the three regions separately, but let the system choose addresses system_managed_base = - reinterpret_cast(mmap(reinterpret_cast(SYSTEM_MANAGED_MIN), - system_managed_size, protection_flags, map_flags_fixed, -1, 0)); + reinterpret_cast(mmap(nullptr, system_managed_size, protection_flags, map_flags, -1, 0)); if (system_managed_base == MAP_FAILED) { - // Try without MAP_NORESERVE - int map_flags_noreserve = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED; - system_managed_base = - reinterpret_cast(mmap(reinterpret_cast(SYSTEM_MANAGED_MIN), - system_managed_size, protection_flags, map_flags_noreserve, -1, 0)); - if (system_managed_base == MAP_FAILED) { - LOG_CRITICAL(Kernel_Vmm, "mmap failed for system_managed_base at {}: {}. " - "ARM64 macOS does not allow mapping at address 0x400000. " - "The PS4 memory layout requires exact addresses. " - "Consider using x86_64 mode or implementing address translation for ARM64.", - fmt::ptr(reinterpret_cast(SYSTEM_MANAGED_MIN)), strerror(errno)); - throw std::bad_alloc{}; - } + LOG_CRITICAL(Kernel_Vmm, "mmap failed for system_managed_base: {}", strerror(errno)); + throw std::bad_alloc{}; } system_reserved_base = - reinterpret_cast(mmap(reinterpret_cast(SYSTEM_RESERVED_MIN), - system_reserved_size, protection_flags, map_flags_fixed, -1, 0)); + reinterpret_cast(mmap(nullptr, system_reserved_size, protection_flags, map_flags, -1, 0)); if (system_reserved_base == MAP_FAILED) { - int map_flags_noreserve = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED; - system_reserved_base = - reinterpret_cast(mmap(reinterpret_cast(SYSTEM_RESERVED_MIN), - system_reserved_size, protection_flags, map_flags_noreserve, -1, 0)); - if (system_reserved_base == MAP_FAILED) { - LOG_CRITICAL(Kernel_Vmm, "mmap failed for system_reserved_base at {}: {}", - fmt::ptr(reinterpret_cast(SYSTEM_RESERVED_MIN)), strerror(errno)); - throw std::bad_alloc{}; - } + LOG_CRITICAL(Kernel_Vmm, "mmap failed for system_reserved_base: {}", strerror(errno)); + throw std::bad_alloc{}; } user_base = reinterpret_cast( - mmap(reinterpret_cast(USER_MIN), user_size, protection_flags, map_flags_fixed, -1, 0)); + mmap(nullptr, user_size, protection_flags, map_flags, -1, 0)); if (user_base == MAP_FAILED) { - int map_flags_noreserve = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED; - user_base = reinterpret_cast( - mmap(reinterpret_cast(USER_MIN), user_size, protection_flags, map_flags_noreserve, -1, 0)); - if (user_base == MAP_FAILED) { - LOG_CRITICAL(Kernel_Vmm, "mmap failed for user_base at {}: {}", - fmt::ptr(reinterpret_cast(USER_MIN)), strerror(errno)); - throw std::bad_alloc{}; - } + LOG_CRITICAL(Kernel_Vmm, "mmap failed for user_base: {}", strerror(errno)); + throw std::bad_alloc{}; } + + LOG_WARNING(Kernel_Vmm, "ARM64 macOS: Using flexible memory layout. " + "PS4 addresses will be offset from host addresses. " + "system_managed: {} (expected {}), system_reserved: {} (expected {}), user: {} (expected {})", + fmt::ptr(system_managed_base), fmt::ptr(reinterpret_cast(SYSTEM_MANAGED_MIN)), + fmt::ptr(system_reserved_base), fmt::ptr(reinterpret_cast(SYSTEM_RESERVED_MIN)), + fmt::ptr(user_base), fmt::ptr(reinterpret_cast(USER_MIN))); #endif #else const auto virtual_size = system_managed_size + system_reserved_size + user_size; @@ -618,7 +603,7 @@ struct AddressSpace::Impl { fmt::ptr(user_base + user_size - 1)); const VAddr system_managed_addr = reinterpret_cast(system_managed_base); - const VAddr system_reserved_addr = reinterpret_cast(system_managed_base); + const VAddr system_reserved_addr = reinterpret_cast(system_reserved_base); const VAddr user_addr = reinterpret_cast(user_base); m_free_regions.insert({system_managed_addr, system_managed_addr + system_managed_size}); m_free_regions.insert({system_reserved_addr, system_reserved_addr + system_reserved_size}); From 26900f9074a04a945ab50403b3ef009c7ce12c69 Mon Sep 17 00:00:00 2001 From: AlpinDale Date: Tue, 9 Dec 2025 00:38:36 +0430 Subject: [PATCH 05/19] address translation --- src/core/address_space.cpp | 63 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/src/core/address_space.cpp b/src/core/address_space.cpp index 5327d6fa0..584824384 100644 --- a/src/core/address_space.cpp +++ b/src/core/address_space.cpp @@ -650,8 +650,32 @@ struct AddressSpace::Impl { const int handle = phys_addr != -1 ? (fd == -1 ? backing_fd : fd) : -1; const off_t host_offset = phys_addr != -1 ? phys_addr : 0; const int flag = phys_addr != -1 ? MAP_SHARED : (MAP_ANONYMOUS | MAP_PRIVATE); + +#if defined(__APPLE__) && defined(ARCH_ARM64) + // On ARM64 macOS, translate PS4 virtual addresses to host addresses + void* host_addr = nullptr; + if (virtual_addr >= SYSTEM_MANAGED_MIN && virtual_addr <= SYSTEM_MANAGED_MAX) { + // System managed region + u64 offset = virtual_addr - SYSTEM_MANAGED_MIN; + host_addr = system_managed_base + offset; + } else if (virtual_addr >= SYSTEM_RESERVED_MIN && virtual_addr <= SYSTEM_RESERVED_MAX) { + // System reserved region + u64 offset = virtual_addr - SYSTEM_RESERVED_MIN; + host_addr = system_reserved_base + offset; + } else if (virtual_addr >= USER_MIN && virtual_addr <= USER_MAX) { + // User region + u64 offset = virtual_addr - USER_MIN; + host_addr = user_base + offset; + } else { + LOG_CRITICAL(Kernel_Vmm, "Invalid virtual address for mapping: {:#x}", virtual_addr); + return MAP_FAILED; + } + + void* ret = mmap(host_addr, size, prot, MAP_FIXED | flag, handle, host_offset); +#else void* ret = mmap(reinterpret_cast(virtual_addr), size, prot, MAP_FIXED | flag, handle, host_offset); +#endif ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno)); return ret; } @@ -671,9 +695,29 @@ struct AddressSpace::Impl { // Free the relevant region. m_free_regions.insert({start_address, end_address}); +#if defined(__APPLE__) && defined(ARCH_ARM64) + // On ARM64 macOS, translate PS4 virtual addresses to host addresses + void* host_addr = nullptr; + if (start_address >= SYSTEM_MANAGED_MIN && start_address <= SYSTEM_MANAGED_MAX) { + u64 offset = start_address - SYSTEM_MANAGED_MIN; + host_addr = system_managed_base + offset; + } else if (start_address >= SYSTEM_RESERVED_MIN && start_address <= SYSTEM_RESERVED_MAX) { + u64 offset = start_address - SYSTEM_RESERVED_MIN; + host_addr = system_reserved_base + offset; + } else if (start_address >= USER_MIN && start_address <= USER_MAX) { + u64 offset = start_address - USER_MIN; + host_addr = user_base + offset; + } else { + LOG_CRITICAL(Kernel_Vmm, "Invalid virtual address for unmapping: {:#x}", start_address); + return; + } + void* ret = mmap(host_addr, end_address - start_address, + PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); +#else // Return the adjusted pointers. void* ret = mmap(reinterpret_cast(start_address), end_address - start_address, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); +#endif ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno)); } @@ -690,7 +734,26 @@ struct AddressSpace::Impl { flags |= PROT_EXEC; } #endif +#if defined(__APPLE__) && defined(ARCH_ARM64) + // On ARM64 macOS, translate PS4 virtual addresses to host addresses + void* host_addr = nullptr; + if (virtual_addr >= SYSTEM_MANAGED_MIN && virtual_addr <= SYSTEM_MANAGED_MAX) { + u64 offset = virtual_addr - SYSTEM_MANAGED_MIN; + host_addr = system_managed_base + offset; + } else if (virtual_addr >= SYSTEM_RESERVED_MIN && virtual_addr <= SYSTEM_RESERVED_MAX) { + u64 offset = virtual_addr - SYSTEM_RESERVED_MIN; + host_addr = system_reserved_base + offset; + } else if (virtual_addr >= USER_MIN && virtual_addr <= USER_MAX) { + u64 offset = virtual_addr - USER_MIN; + host_addr = user_base + offset; + } else { + LOG_CRITICAL(Kernel_Vmm, "Invalid virtual address for protection: {:#x}", virtual_addr); + return; + } + int ret = mprotect(host_addr, size, flags); +#else int ret = mprotect(reinterpret_cast(virtual_addr), size, flags); +#endif ASSERT_MSG(ret == 0, "mprotect failed: {}", strerror(errno)); } From c99e312e56f6a230dcb85694dd2f9921f491e1b6 Mon Sep 17 00:00:00 2001 From: AlpinDale Date: Tue, 9 Dec 2025 02:25:58 +0430 Subject: [PATCH 06/19] simple JIT recompiler + tests --- CMakeLists.txt | 39 ++ src/core/address_space.cpp | 74 ++- src/core/address_space.h | 3 + src/core/jit/arm64_codegen.cpp | 562 ++++++++++++++++++++++ src/core/jit/arm64_codegen.h | 130 ++++++ src/core/jit/block_manager.cpp | 94 ++++ src/core/jit/block_manager.h | 49 ++ src/core/jit/calling_convention.cpp | 63 +++ src/core/jit/calling_convention.h | 33 ++ src/core/jit/execution_engine.cpp | 202 ++++++++ src/core/jit/execution_engine.h | 53 +++ src/core/jit/register_mapping.cpp | 123 +++++ src/core/jit/register_mapping.h | 136 ++++++ src/core/jit/simd_translator.cpp | 206 ++++++++ src/core/jit/simd_translator.h | 39 ++ src/core/jit/x86_64_translator.cpp | 701 ++++++++++++++++++++++++++++ src/core/jit/x86_64_translator.h | 80 ++++ src/core/linker.cpp | 26 +- src/core/memory.cpp | 10 + src/core/signals.cpp | 21 +- tests/CMakeLists.txt | 58 +++ tests/main.cpp | 9 + tests/test_arm64_codegen.cpp | 111 +++++ tests/test_block_manager.cpp | 180 +++++++ tests/test_execution_engine.cpp | 49 ++ tests/test_logging_stub.cpp | 25 + tests/test_register_mapping.cpp | 86 ++++ 27 files changed, 3121 insertions(+), 41 deletions(-) create mode 100644 src/core/jit/arm64_codegen.cpp create mode 100644 src/core/jit/arm64_codegen.h create mode 100644 src/core/jit/block_manager.cpp create mode 100644 src/core/jit/block_manager.h create mode 100644 src/core/jit/calling_convention.cpp create mode 100644 src/core/jit/calling_convention.h create mode 100644 src/core/jit/execution_engine.cpp create mode 100644 src/core/jit/execution_engine.h create mode 100644 src/core/jit/register_mapping.cpp create mode 100644 src/core/jit/register_mapping.h create mode 100644 src/core/jit/simd_translator.cpp create mode 100644 src/core/jit/simd_translator.h create mode 100644 src/core/jit/x86_64_translator.cpp create mode 100644 src/core/jit/x86_64_translator.h create mode 100644 tests/CMakeLists.txt create mode 100644 tests/main.cpp create mode 100644 tests/test_arm64_codegen.cpp create mode 100644 tests/test_block_manager.cpp create mode 100644 tests/test_execution_engine.cpp create mode 100644 tests/test_logging_stub.cpp create mode 100644 tests/test_register_mapping.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index df2905b70..e3421d138 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,6 +19,8 @@ endif() project(shadPS4 CXX C ASM ${ADDITIONAL_LANGUAGES}) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + # Forcing PIE makes sure that the base address is high enough so that it doesn't clash with the PS4 memory. if(UNIX AND NOT APPLE) set(CMAKE_POSITION_INDEPENDENT_CODE TRUE) @@ -846,6 +848,24 @@ if (ARCHITECTURE STREQUAL "x86_64") src/core/cpu_patches.h) endif() +if (ARCHITECTURE STREQUAL "arm64") + set(CORE ${CORE} + src/core/jit/arm64_codegen.cpp + src/core/jit/arm64_codegen.h + src/core/jit/register_mapping.cpp + src/core/jit/register_mapping.h + src/core/jit/x86_64_translator.cpp + src/core/jit/x86_64_translator.h + src/core/jit/block_manager.cpp + src/core/jit/block_manager.h + src/core/jit/execution_engine.cpp + src/core/jit/execution_engine.h + src/core/jit/calling_convention.cpp + src/core/jit/calling_convention.h + src/core/jit/simd_translator.cpp + src/core/jit/simd_translator.h) +endif() + set(SHADER_RECOMPILER src/shader_recompiler/profile.h src/shader_recompiler/recompiler.cpp src/shader_recompiler/recompiler.h @@ -1220,3 +1240,22 @@ endif() # Install rules install(TARGETS shadps4 BUNDLE DESTINATION .) + +# Testing +option(BUILD_TESTS "Build test suite" OFF) + +if(BUILD_TESTS) + enable_testing() + + include(FetchContent) + FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG v1.17.0 + ) + # For Windows: Prevent overriding the parent project's compiler/linker settings + set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) + FetchContent_MakeAvailable(googletest) + + add_subdirectory(tests) +endif() diff --git a/src/core/address_space.cpp b/src/core/address_space.cpp index 584824384..a82a224a3 100644 --- a/src/core/address_space.cpp +++ b/src/core/address_space.cpp @@ -532,41 +532,44 @@ struct AddressSpace::Impl { user_base = reinterpret_cast( mmap(reinterpret_cast(USER_MIN), user_size, protection_flags, map_flags, -1, 0)); #elif defined(ARCH_ARM64) - // On ARM64 macOS, MAP_FIXED doesn't work at low addresses (0x400000) due to system restrictions. - // Map memory wherever possible and use offset calculations. This is a temporary solution - // until proper address translation is implemented for ARM64. - // Note: This means the PS4 virtual addresses won't match host addresses, so instruction + // On ARM64 macOS, MAP_FIXED doesn't work at low addresses (0x400000) due to system + // restrictions. Map memory wherever possible and use offset calculations. This is a + // temporary solution until proper address translation is implemented for ARM64. Note: This + // means the PS4 virtual addresses won't match host addresses, so instruction // translation/JIT will need to handle the offset. constexpr int map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; - + // Map the three regions separately, but let the system choose addresses - system_managed_base = - reinterpret_cast(mmap(nullptr, system_managed_size, protection_flags, map_flags, -1, 0)); + system_managed_base = reinterpret_cast( + mmap(nullptr, system_managed_size, protection_flags, map_flags, -1, 0)); if (system_managed_base == MAP_FAILED) { LOG_CRITICAL(Kernel_Vmm, "mmap failed for system_managed_base: {}", strerror(errno)); throw std::bad_alloc{}; } - - system_reserved_base = - reinterpret_cast(mmap(nullptr, system_reserved_size, protection_flags, map_flags, -1, 0)); + + system_reserved_base = reinterpret_cast( + mmap(nullptr, system_reserved_size, protection_flags, map_flags, -1, 0)); if (system_reserved_base == MAP_FAILED) { LOG_CRITICAL(Kernel_Vmm, "mmap failed for system_reserved_base: {}", strerror(errno)); throw std::bad_alloc{}; } - - user_base = reinterpret_cast( - mmap(nullptr, user_size, protection_flags, map_flags, -1, 0)); + + user_base = + reinterpret_cast(mmap(nullptr, user_size, protection_flags, map_flags, -1, 0)); if (user_base == MAP_FAILED) { LOG_CRITICAL(Kernel_Vmm, "mmap failed for user_base: {}", strerror(errno)); throw std::bad_alloc{}; } - - LOG_WARNING(Kernel_Vmm, "ARM64 macOS: Using flexible memory layout. " - "PS4 addresses will be offset from host addresses. " - "system_managed: {} (expected {}), system_reserved: {} (expected {}), user: {} (expected {})", - fmt::ptr(system_managed_base), fmt::ptr(reinterpret_cast(SYSTEM_MANAGED_MIN)), - fmt::ptr(system_reserved_base), fmt::ptr(reinterpret_cast(SYSTEM_RESERVED_MIN)), - fmt::ptr(user_base), fmt::ptr(reinterpret_cast(USER_MIN))); + + LOG_WARNING( + Kernel_Vmm, + "ARM64 macOS: Using flexible memory layout. " + "PS4 addresses will be offset from host addresses. " + "system_managed: {} (expected {}), system_reserved: {} (expected {}), user: {} " + "(expected {})", + fmt::ptr(system_managed_base), fmt::ptr(reinterpret_cast(SYSTEM_MANAGED_MIN)), + fmt::ptr(system_reserved_base), fmt::ptr(reinterpret_cast(SYSTEM_RESERVED_MIN)), + fmt::ptr(user_base), fmt::ptr(reinterpret_cast(USER_MIN))); #endif #else const auto virtual_size = system_managed_size + system_reserved_size + user_size; @@ -650,7 +653,7 @@ struct AddressSpace::Impl { const int handle = phys_addr != -1 ? (fd == -1 ? backing_fd : fd) : -1; const off_t host_offset = phys_addr != -1 ? phys_addr : 0; const int flag = phys_addr != -1 ? MAP_SHARED : (MAP_ANONYMOUS | MAP_PRIVATE); - + #if defined(__APPLE__) && defined(ARCH_ARM64) // On ARM64 macOS, translate PS4 virtual addresses to host addresses void* host_addr = nullptr; @@ -670,7 +673,7 @@ struct AddressSpace::Impl { LOG_CRITICAL(Kernel_Vmm, "Invalid virtual address for mapping: {:#x}", virtual_addr); return MAP_FAILED; } - + void* ret = mmap(host_addr, size, prot, MAP_FIXED | flag, handle, host_offset); #else void* ret = mmap(reinterpret_cast(virtual_addr), size, prot, MAP_FIXED | flag, @@ -711,8 +714,8 @@ struct AddressSpace::Impl { LOG_CRITICAL(Kernel_Vmm, "Invalid virtual address for unmapping: {:#x}", start_address); return; } - void* ret = mmap(host_addr, end_address - start_address, - PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + void* ret = mmap(host_addr, end_address - start_address, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); #else // Return the adjusted pointers. void* ret = mmap(reinterpret_cast(start_address), end_address - start_address, @@ -853,4 +856,27 @@ boost::icl::interval_set AddressSpace::GetUsableRegions() { #endif } +void* AddressSpace::TranslateAddress(VAddr ps4_addr) const { +#ifdef ARCH_X86_64 + // On x86_64, PS4 addresses are directly mapped, so we can cast them + return reinterpret_cast(ps4_addr); +#elif defined(ARCH_ARM64) && defined(__APPLE__) + // On ARM64 macOS, translate PS4 virtual addresses to host addresses + if (ps4_addr >= SYSTEM_MANAGED_MIN && ps4_addr <= SYSTEM_MANAGED_MAX) { + u64 offset = ps4_addr - SYSTEM_MANAGED_MIN; + return system_managed_base + offset; + } else if (ps4_addr >= SYSTEM_RESERVED_MIN && ps4_addr <= SYSTEM_RESERVED_MAX) { + u64 offset = ps4_addr - SYSTEM_RESERVED_MIN; + return system_reserved_base + offset; + } else if (ps4_addr >= USER_MIN && ps4_addr <= USER_MAX) { + u64 offset = ps4_addr - USER_MIN; + return user_base + offset; + } + return nullptr; +#else + // Generic ARM64 or other platforms + return reinterpret_cast(ps4_addr); +#endif +} + } // namespace Core diff --git a/src/core/address_space.h b/src/core/address_space.h index 5c50039bd..578185e93 100644 --- a/src/core/address_space.h +++ b/src/core/address_space.h @@ -88,6 +88,9 @@ public: // Returns an interval set containing all usable regions. boost::icl::interval_set GetUsableRegions(); + // Translate PS4 virtual address to host address (for ARM64) + void* TranslateAddress(VAddr ps4_addr) const; + private: struct Impl; std::unique_ptr impl; diff --git a/src/core/jit/arm64_codegen.cpp b/src/core/jit/arm64_codegen.cpp new file mode 100644 index 000000000..516240295 --- /dev/null +++ b/src/core/jit/arm64_codegen.cpp @@ -0,0 +1,562 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include "arm64_codegen.h" +#include "common/assert.h" +#include "common/logging/log.h" +#include "common/types.h" +#if defined(__APPLE__) && defined(ARCH_ARM64) +#include +#endif + +namespace Core::Jit { + +static constexpr size_t PAGE_SIZE = 4096; +static constexpr size_t ALIGNMENT = 16; + +static size_t alignUp(size_t value, size_t alignment) { + return (value + alignment - 1) & ~(alignment - 1); +} + +static void* allocateExecutableMemory(size_t size) { + size = alignUp(size, PAGE_SIZE); +#if defined(__APPLE__) && defined(ARCH_ARM64) + // On macOS ARM64: + // 1. Allocate with PROT_READ | PROT_WRITE (no PROT_EXEC initially) + // 2. Use pthread_jit_write_protect_np to allow writing + // 3. After writing, use mprotect to add PROT_EXEC + void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (ptr == MAP_FAILED) { + LOG_CRITICAL(Core, "Failed to allocate executable memory: {} (errno={})", strerror(errno), + errno); + return nullptr; + } + // Initially disable write protection so we can write code + pthread_jit_write_protect_np(0); + return ptr; +#else + void* ptr = + mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (ptr == MAP_FAILED) { + LOG_CRITICAL(Core, "Failed to allocate executable memory: {}", strerror(errno)); + return nullptr; + } + return ptr; +#endif +} + +Arm64CodeGenerator::Arm64CodeGenerator(size_t buffer_size, void* code_ptr) + : buffer_size(alignUp(buffer_size, PAGE_SIZE)), owns_buffer(code_ptr == nullptr) { + if (code_ptr) { + code_buffer = code_ptr; + this->code_ptr = code_ptr; + } else { + code_buffer = allocateExecutableMemory(buffer_size); + this->code_ptr = code_buffer; + } + if (!code_buffer) { + throw std::bad_alloc(); + } +} + +Arm64CodeGenerator::~Arm64CodeGenerator() { + if (owns_buffer && code_buffer) { + munmap(code_buffer, buffer_size); + } +} + +void Arm64CodeGenerator::reset() { + code_ptr = code_buffer; + fixups.clear(); +} + +void Arm64CodeGenerator::setSize(size_t offset) { + code_ptr = static_cast(code_buffer) + offset; +} + +void Arm64CodeGenerator::emit32(u32 instruction) { +#if defined(__APPLE__) && defined(ARCH_ARM64) + // On macOS ARM64, disable write protection before writing + pthread_jit_write_protect_np(0); +#endif + u8* curr = static_cast(code_ptr); + u8* end = static_cast(code_buffer) + buffer_size; + ASSERT_MSG(curr + 4 <= end, "Code buffer overflow"); + *reinterpret_cast(curr) = instruction; + code_ptr = curr + 4; +#if defined(__APPLE__) && defined(ARCH_ARM64) + // Re-enable write protection after writing + pthread_jit_write_protect_np(1); +#endif +} + +void Arm64CodeGenerator::emit64(u64 instruction) { + emit32(static_cast(instruction)); + emit32(static_cast(instruction >> 32)); +} + +void* Arm64CodeGenerator::allocateCode(size_t size) { + size = alignUp(size, ALIGNMENT); + void* result = code_ptr; + u8* curr = static_cast(code_ptr); + u8* end = static_cast(code_buffer) + buffer_size; + code_ptr = curr + size; + ASSERT_MSG(static_cast(code_ptr) <= end, "Code buffer overflow"); + return result; +} + +void Arm64CodeGenerator::makeExecutable() { + size_t size = getSize(); + size = alignUp(size, PAGE_SIZE); +#if defined(__APPLE__) && defined(ARCH_ARM64) + // On macOS ARM64, re-enable write protection before making executable + pthread_jit_write_protect_np(1); + // Flush instruction cache + __builtin___clear_cache(static_cast(code_buffer), + static_cast(code_buffer) + size); +#endif + if (mprotect(code_buffer, size, PROT_READ | PROT_EXEC) != 0) { + LOG_CRITICAL(Core, "Failed to make code executable: {}", strerror(errno)); + } +} + +// Memory operations +void Arm64CodeGenerator::ldr(int reg, void* addr) { + movz(9, reinterpret_cast(addr) & 0xFFFF); + movk(9, (reinterpret_cast(addr) >> 16) & 0xFFFF, 16); + movk(9, (reinterpret_cast(addr) >> 32) & 0xFFFF, 32); + movk(9, (reinterpret_cast(addr) >> 48) & 0xFFFF, 48); + ldr(reg, 9, 0); +} + +void Arm64CodeGenerator::ldr(int reg, int base_reg, s32 offset) { + if (offset >= 0 && offset < 32768 && (offset % 8 == 0)) { + emit32(0xF9400000 | (reg << 0) | (base_reg << 5) | ((offset / 8) << 10)); + } else { + mov_imm(9, offset); + add(9, base_reg, 9); + ldr(reg, 9, 0); + } +} + +void Arm64CodeGenerator::ldrh(int reg, int base_reg, s32 offset) { + if (offset >= 0 && offset < 8192 && (offset % 2 == 0)) { + emit32(0x79400000 | (reg << 0) | (base_reg << 5) | ((offset / 2) << 12)); + } else { + mov_imm(9, offset); + add(9, base_reg, 9); + ldrh(reg, 9, 0); + } +} + +void Arm64CodeGenerator::ldrb(int reg, int base_reg, s32 offset) { + if (offset >= 0 && offset < 4096) { + emit32(0x39400000 | (reg << 0) | (base_reg << 5) | (offset << 12)); + } else { + mov_imm(9, offset); + add(9, base_reg, 9); + ldrb(reg, 9, 0); + } +} + +void Arm64CodeGenerator::ldp(int reg1, int reg2, int base_reg, s32 offset) { + if (offset >= -256 && offset < 256 && (offset % 8 == 0)) { + s32 scaled_offset = offset / 8; + u32 imm7 = (scaled_offset >= 0) ? scaled_offset : (64 + scaled_offset); + emit32(0xA9400000 | (reg1 << 0) | (reg2 << 10) | (base_reg << 5) | (imm7 << 15)); + } else { + mov_imm(9, offset); + add(9, base_reg, 9); + ldp(reg1, reg2, 9, 0); + } +} + +void Arm64CodeGenerator::str(int reg, void* addr) { + movz(9, reinterpret_cast(addr) & 0xFFFF); + movk(9, (reinterpret_cast(addr) >> 16) & 0xFFFF, 16); + movk(9, (reinterpret_cast(addr) >> 32) & 0xFFFF, 32); + movk(9, (reinterpret_cast(addr) >> 48) & 0xFFFF, 48); + str(reg, 9, 0); +} + +void Arm64CodeGenerator::str(int reg, int base_reg, s32 offset) { + if (offset >= 0 && offset < 32768 && (offset % 8 == 0)) { + emit32(0xF9000000 | (reg << 0) | (base_reg << 5) | ((offset / 8) << 10)); + } else { + mov_imm(9, offset); + add(9, base_reg, 9); + str(reg, 9, 0); + } +} + +void Arm64CodeGenerator::strh(int reg, int base_reg, s32 offset) { + if (offset >= 0 && offset < 8192 && (offset % 2 == 0)) { + emit32(0x79000000 | (reg << 0) | (base_reg << 5) | ((offset / 2) << 12)); + } else { + mov_imm(9, offset); + add(9, base_reg, 9); + strh(reg, 9, 0); + } +} + +void Arm64CodeGenerator::strb(int reg, int base_reg, s32 offset) { + if (offset >= 0 && offset < 4096) { + emit32(0x39000000 | (reg << 0) | (base_reg << 5) | (offset << 12)); + } else { + mov_imm(9, offset); + add(9, base_reg, 9); + strb(reg, 9, 0); + } +} + +void Arm64CodeGenerator::stp(int reg1, int reg2, int base_reg, s32 offset) { + if (offset >= -256 && offset < 256 && (offset % 8 == 0)) { + s32 scaled_offset = offset / 8; + u32 imm7 = (scaled_offset >= 0) ? scaled_offset : (64 + scaled_offset); + emit32(0xA9000000 | (reg1 << 0) | (reg2 << 10) | (base_reg << 5) | (imm7 << 15)); + } else { + mov_imm(9, offset); + add(9, base_reg, 9); + stp(reg1, reg2, 9, 0); + } +} + +// Arithmetic operations +void Arm64CodeGenerator::add(int dst, int src1, int src2) { + emit32(0x8B000000 | (dst << 0) | (src1 << 5) | (src2 << 16)); +} + +void Arm64CodeGenerator::add_imm(int dst, int src1, s32 imm) { + if (imm >= 0 && imm < 4096) { + emit32(0x91000000 | (dst << 0) | (src1 << 5) | (imm << 10)); + } else if (imm < 0 && imm > -4096) { + sub_imm(dst, src1, -imm); + } else { + mov_imm(9, imm); + add(dst, src1, 9); + } +} + +void Arm64CodeGenerator::sub(int dst, int src1, int src2) { + emit32(0xCB000000 | (dst << 0) | (src1 << 5) | (src2 << 16)); +} + +void Arm64CodeGenerator::sub_imm(int dst, int src1, s32 imm) { + if (imm >= 0 && imm < 4096) { + emit32(0xD1000000 | (dst << 0) | (src1 << 5) | (imm << 10)); + } else if (imm < 0 && imm > -4096) { + add_imm(dst, src1, -imm); + } else { + mov_imm(9, imm); + sub(dst, src1, 9); + } +} + +void Arm64CodeGenerator::mul(int dst, int src1, int src2) { + emit32(0x9B007C00 | (dst << 0) | (src1 << 5) | (src2 << 16)); +} + +void Arm64CodeGenerator::sdiv(int dst, int src1, int src2) { + emit32(0x9AC00C00 | (dst << 0) | (src1 << 5) | (src2 << 16)); +} + +void Arm64CodeGenerator::udiv(int dst, int src1, int src2) { + emit32(0x9AC00800 | (dst << 0) | (src1 << 5) | (src2 << 16)); +} + +void Arm64CodeGenerator::and_(int dst, int src1, int src2) { + emit32(0x8A000000 | (dst << 0) | (src1 << 5) | (src2 << 16)); +} + +void Arm64CodeGenerator::and_(int dst, int src1, u64 imm) { + if (imm <= 0xFFF) { + emit32(0x92000000 | (dst << 0) | (src1 << 5) | (static_cast(imm) << 10)); + } else { + mov_imm(9, imm); + and_(dst, src1, 9); + } +} + +void Arm64CodeGenerator::orr(int dst, int src1, int src2) { + emit32(0xAA000000 | (dst << 0) | (src1 << 5) | (src2 << 16)); +} + +void Arm64CodeGenerator::orr(int dst, int src1, u64 imm) { + if (imm <= 0xFFF) { + emit32(0xB2000000 | (dst << 0) | (src1 << 5) | (static_cast(imm) << 10)); + } else { + mov_imm(9, imm); + orr(dst, src1, 9); + } +} + +void Arm64CodeGenerator::eor(int dst, int src1, int src2) { + emit32(0xCA000000 | (dst << 0) | (src1 << 5) | (src2 << 16)); +} + +void Arm64CodeGenerator::eor(int dst, int src1, u64 imm) { + if (imm <= 0xFFF) { + emit32(0xD2000000 | (dst << 0) | (src1 << 5) | (static_cast(imm) << 10)); + } else { + mov_imm(9, imm); + eor(dst, src1, 9); + } +} + +void Arm64CodeGenerator::mvn(int dst, int src) { + emit32(0xAA200000 | (dst << 0) | (src << 16)); +} + +void Arm64CodeGenerator::lsl(int dst, int src1, int src2) { + emit32(0x9AC02000 | (dst << 0) | (src1 << 5) | (src2 << 16)); +} + +void Arm64CodeGenerator::lsl(int dst, int src1, u8 shift) { + ASSERT_MSG(shift < 64, "Shift amount must be < 64"); + emit32(0xD3400000 | (dst << 0) | (src1 << 5) | (shift << 10)); +} + +void Arm64CodeGenerator::lsr(int dst, int src1, int src2) { + emit32(0x9AC02400 | (dst << 0) | (src1 << 5) | (src2 << 16)); +} + +void Arm64CodeGenerator::lsr(int dst, int src1, u8 shift) { + ASSERT_MSG(shift < 64, "Shift amount must be < 64"); + emit32(0xD3500000 | (dst << 0) | (src1 << 5) | (shift << 10)); +} + +void Arm64CodeGenerator::asr(int dst, int src1, int src2) { + emit32(0x9AC02800 | (dst << 0) | (src1 << 5) | (src2 << 16)); +} + +void Arm64CodeGenerator::asr(int dst, int src1, u8 shift) { + ASSERT_MSG(shift < 64, "Shift amount must be < 64"); + emit32(0xD3600000 | (dst << 0) | (src1 << 5) | (shift << 10)); +} + +// Move operations +void Arm64CodeGenerator::mov(int dst, int src) { + if (dst != src) { + emit32(0xAA0003E0 | (dst << 0) | (src << 16)); + } +} + +void Arm64CodeGenerator::mov_imm(int dst, s64 imm) { + if (imm >= 0 && imm <= 0xFFFF) { + movz(dst, static_cast(imm)); + } else if (imm >= -0x10000 && imm < 0) { + movn(dst, static_cast(-imm - 1)); + } else { + movz(dst, imm & 0xFFFF); + if ((imm >> 16) & 0xFFFF) { + movk(dst, (imm >> 16) & 0xFFFF, 16); + } + if ((imm >> 32) & 0xFFFF) { + movk(dst, (imm >> 32) & 0xFFFF, 32); + } + if ((imm >> 48) & 0xFFFF) { + movk(dst, (imm >> 48) & 0xFFFF, 48); + } + } +} + +void Arm64CodeGenerator::movz(int dst, u16 imm, u8 shift) { + ASSERT_MSG(shift % 16 == 0 && shift < 64, "Shift must be multiple of 16 and < 64"); + emit32(0xD2800000 | (dst << 0) | (imm << 5) | ((shift / 16) << 21)); +} + +void Arm64CodeGenerator::movk(int dst, u16 imm, u8 shift) { + ASSERT_MSG(shift % 16 == 0 && shift < 64, "Shift must be multiple of 16 and < 64"); + emit32(0xF2800000 | (dst << 0) | (imm << 5) | ((shift / 16) << 21)); +} + +void Arm64CodeGenerator::movn(int dst, u16 imm, u8 shift) { + ASSERT_MSG(shift % 16 == 0 && shift < 64, "Shift must be multiple of 16 and < 64"); + emit32(0x92800000 | (dst << 0) | (imm << 5) | ((shift / 16) << 21)); +} + +// Compare operations +void Arm64CodeGenerator::cmp(int reg1, int reg2) { + emit32(0xEB000000 | (31 << 0) | (reg1 << 5) | (reg2 << 16)); +} + +void Arm64CodeGenerator::cmp_imm(int reg, s32 imm) { + if (imm >= 0 && imm < 4096) { + emit32(0xF1000000 | (31 << 0) | (reg << 5) | (imm << 10)); + } else { + mov_imm(9, imm); + cmp(reg, 9); + } +} + +void Arm64CodeGenerator::tst(int reg1, int reg2) { + emit32(0xEA000000 | (31 << 0) | (reg1 << 5) | (reg2 << 16)); +} + +void Arm64CodeGenerator::tst(int reg, u64 imm) { + if (imm <= 0xFFF) { + emit32(0xF2000000 | (31 << 0) | (reg << 5) | (static_cast(imm) << 10)); + } else { + mov(9, imm); + tst(reg, 9); + } +} + +// Branch operations +void Arm64CodeGenerator::b(void* target) { + s64 offset = reinterpret_cast(target) - reinterpret_cast(code_ptr); + if (offset >= -0x8000000 && offset < 0x8000000) { + s32 imm26 = static_cast(offset / 4); + emit32(0x14000000 | (imm26 & 0x3FFFFFF)); + } else { + movz(9, reinterpret_cast(target) & 0xFFFF); + movk(9, (reinterpret_cast(target) >> 16) & 0xFFFF, 16); + movk(9, (reinterpret_cast(target) >> 32) & 0xFFFF, 32); + movk(9, (reinterpret_cast(target) >> 48) & 0xFFFF, 48); + br(9); + } +} + +void Arm64CodeGenerator::b(int condition, void* target) { + s64 offset = reinterpret_cast(target) - reinterpret_cast(code_ptr); + if (offset >= -0x8000000 && offset < 0x8000000) { + s32 imm19 = static_cast(offset / 4); + emit32(0x54000000 | (condition << 0) | (imm19 << 5)); + } else { + movz(9, reinterpret_cast(target) & 0xFFFF); + movk(9, (reinterpret_cast(target) >> 16) & 0xFFFF, 16); + movk(9, (reinterpret_cast(target) >> 32) & 0xFFFF, 32); + movk(9, (reinterpret_cast(target) >> 48) & 0xFFFF, 48); + emit32(0x54000000 | (condition << 0) | (0 << 5)); + br(9); + } +} + +void Arm64CodeGenerator::bl(void* target) { + s64 offset = reinterpret_cast(target) - reinterpret_cast(code_ptr); + if (offset >= -0x8000000 && offset < 0x8000000) { + s32 imm26 = static_cast(offset / 4); + emit32(0x94000000 | (imm26 & 0x3FFFFFF)); + } else { + movz(9, reinterpret_cast(target) & 0xFFFF); + movk(9, (reinterpret_cast(target) >> 16) & 0xFFFF, 16); + movk(9, (reinterpret_cast(target) >> 32) & 0xFFFF, 32); + movk(9, (reinterpret_cast(target) >> 48) & 0xFFFF, 48); + blr(9); + } +} + +void Arm64CodeGenerator::br(int reg) { + emit32(0xD61F0000 | (reg << 5)); +} + +void Arm64CodeGenerator::blr(int reg) { + emit32(0xD63F0000 | (reg << 5)); +} + +void Arm64CodeGenerator::ret(int reg) { + emit32(0xD65F0000 | (reg << 5)); +} + +// Conditional branches +void Arm64CodeGenerator::b_eq(void* target) { + b(0, target); +} +void Arm64CodeGenerator::b_ne(void* target) { + b(1, target); +} +void Arm64CodeGenerator::b_lt(void* target) { + b(11, target); +} +void Arm64CodeGenerator::b_le(void* target) { + b(13, target); +} +void Arm64CodeGenerator::b_gt(void* target) { + b(12, target); +} +void Arm64CodeGenerator::b_ge(void* target) { + b(10, target); +} +void Arm64CodeGenerator::b_lo(void* target) { + b(3, target); +} +void Arm64CodeGenerator::b_ls(void* target) { + b(9, target); +} +void Arm64CodeGenerator::b_hi(void* target) { + b(8, target); +} +void Arm64CodeGenerator::b_hs(void* target) { + b(2, target); +} + +// Stack operations +void Arm64CodeGenerator::push(int reg) { + sub(31, 31, 16); + str(reg, 31, 0); +} + +void Arm64CodeGenerator::push(int reg1, int reg2) { + sub(31, 31, 16); + stp(reg1, reg2, 31, 0); +} + +void Arm64CodeGenerator::pop(int reg) { + ldr(reg, 31, 0); + add(31, 31, 16); +} + +void Arm64CodeGenerator::pop(int reg1, int reg2) { + ldp(reg1, reg2, 31, 0); + add(31, 31, 16); +} + +// System operations +void Arm64CodeGenerator::nop() { + emit32(0xD503201F); +} + +void Arm64CodeGenerator::brk(u16 imm) { + emit32(0xD4200000 | (imm << 5)); +} + +// NEON/SIMD operations +void Arm64CodeGenerator::ldr_v(int vreg, int base_reg, s32 offset) { + if (offset >= 0 && offset < 4096 && (offset % 16 == 0)) { + emit32(0x3DC00000 | (vreg << 0) | (base_reg << 5) | ((offset / 16) << 12)); + } else { + mov_imm(9, offset); + add(9, base_reg, 9); + ldr_v(vreg, 9, 0); + } +} + +void Arm64CodeGenerator::str_v(int vreg, int base_reg, s32 offset) { + if (offset >= 0 && offset < 4096 && (offset % 16 == 0)) { + emit32(0x3D800000 | (vreg << 0) | (base_reg << 5) | ((offset / 16) << 12)); + } else { + mov_imm(9, offset); + add(9, base_reg, 9); + str_v(vreg, 9, 0); + } +} + +void Arm64CodeGenerator::mov_v(int vdst, int vsrc) { + emit32(0x4EA01C00 | (vdst << 0) | (vsrc << 5)); +} + +void Arm64CodeGenerator::add_v(int vdst, int vsrc1, int vsrc2) { + emit32(0x4E208400 | (vdst << 0) | (vsrc1 << 5) | (vsrc2 << 16)); +} + +void Arm64CodeGenerator::sub_v(int vdst, int vsrc1, int vsrc2) { + emit32(0x4EA08400 | (vdst << 0) | (vsrc1 << 5) | (vsrc2 << 16)); +} + +void Arm64CodeGenerator::mul_v(int vdst, int vsrc1, int vsrc2) { + emit32(0x4E209C00 | (vdst << 0) | (vsrc1 << 5) | (vsrc2 << 16)); +} + +} // namespace Core::Jit diff --git a/src/core/jit/arm64_codegen.h b/src/core/jit/arm64_codegen.h new file mode 100644 index 000000000..13107a82b --- /dev/null +++ b/src/core/jit/arm64_codegen.h @@ -0,0 +1,130 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "common/types.h" + +namespace Core::Jit { + +class Arm64CodeGenerator { +public: + explicit Arm64CodeGenerator(size_t buffer_size = 64_KB, void* code_ptr = nullptr); + ~Arm64CodeGenerator(); + + Arm64CodeGenerator(const Arm64CodeGenerator&) = delete; + Arm64CodeGenerator& operator=(const Arm64CodeGenerator&) = delete; + + void* getCode() const { + return code_buffer; + } + void* getCurr() const { + return code_ptr; + } + size_t getSize() const { + return static_cast(code_ptr) - static_cast(code_buffer); + } + + void reset(); + void setSize(size_t offset); + + // Memory operations + void ldr(int reg, void* addr); + void ldr(int reg, int base_reg, s32 offset = 0); + void ldrh(int reg, int base_reg, s32 offset = 0); + void ldrb(int reg, int base_reg, s32 offset = 0); + void ldp(int reg1, int reg2, int base_reg, s32 offset = 0); + void str(int reg, void* addr); + void str(int reg, int base_reg, s32 offset = 0); + void strh(int reg, int base_reg, s32 offset = 0); + void strb(int reg, int base_reg, s32 offset = 0); + void stp(int reg1, int reg2, int base_reg, s32 offset = 0); + + // Arithmetic operations + void add(int dst, int src1, int src2); + void add_imm(int dst, int src1, s32 imm); + void sub(int dst, int src1, int src2); + void sub_imm(int dst, int src1, s32 imm); + void mul(int dst, int src1, int src2); + void sdiv(int dst, int src1, int src2); + void udiv(int dst, int src1, int src2); + void and_(int dst, int src1, int src2); + void and_(int dst, int src1, u64 imm); + void orr(int dst, int src1, int src2); + void orr(int dst, int src1, u64 imm); + void eor(int dst, int src1, int src2); + void eor(int dst, int src1, u64 imm); + void mvn(int dst, int src); + void lsl(int dst, int src1, int src2); + void lsl(int dst, int src1, u8 shift); + void lsr(int dst, int src1, int src2); + void lsr(int dst, int src1, u8 shift); + void asr(int dst, int src1, int src2); + void asr(int dst, int src1, u8 shift); + + // Move operations + void mov(int dst, int src); + void mov_imm(int dst, s64 imm); + void movz(int dst, u16 imm, u8 shift = 0); + void movk(int dst, u16 imm, u8 shift = 0); + void movn(int dst, u16 imm, u8 shift = 0); + + // Compare operations + void cmp(int reg1, int reg2); + void cmp_imm(int reg, s32 imm); + void tst(int reg1, int reg2); + void tst(int reg, u64 imm); + + // Branch operations + void b(void* target); + void b(int condition, void* target); + void bl(void* target); + void br(int reg); + void blr(int reg); + void ret(int reg = 30); // X30 is LR by default + + // Conditional branches + void b_eq(void* target); + void b_ne(void* target); + void b_lt(void* target); + void b_le(void* target); + void b_gt(void* target); + void b_ge(void* target); + void b_lo(void* target); // unsigned lower + void b_ls(void* target); // unsigned lower or same + void b_hi(void* target); // unsigned higher + void b_hs(void* target); // unsigned higher or same + + // Stack operations + void push(int reg); + void push(int reg1, int reg2); + void pop(int reg); + void pop(int reg1, int reg2); + + // System operations + void nop(); + void brk(u16 imm = 0); + + // NEON/SIMD operations (for XMM registers) + void ldr_v(int vreg, int base_reg, s32 offset = 0); + void str_v(int vreg, int base_reg, s32 offset = 0); + void mov_v(int vdst, int vsrc); + void add_v(int vdst, int vsrc1, int vsrc2); + void sub_v(int vdst, int vsrc1, int vsrc2); + void mul_v(int vdst, int vsrc1, int vsrc2); + +private: + void emit32(u32 instruction); + void emit64(u64 instruction); + void* allocateCode(size_t size); + void makeExecutable(); + + void* code_buffer; + void* code_ptr; + size_t buffer_size; + bool owns_buffer; + std::vector> fixups; // (fixup_location, target_address) +}; + +} // namespace Core::Jit diff --git a/src/core/jit/block_manager.cpp b/src/core/jit/block_manager.cpp new file mode 100644 index 000000000..172a817ca --- /dev/null +++ b/src/core/jit/block_manager.cpp @@ -0,0 +1,94 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "block_manager.h" +#include "common/logging/log.h" + +namespace Core::Jit { + +BlockManager::BlockManager() = default; + +BlockManager::~BlockManager() { + Clear(); +} + +CodeBlock* BlockManager::GetBlock(VAddr ps4_address) { + std::lock_guard lock(mutex); + auto it = blocks.find(ps4_address); + if (it != blocks.end()) { + return it->second.get(); + } + return nullptr; +} + +CodeBlock* BlockManager::CreateBlock(VAddr ps4_address, void* arm64_code, size_t code_size, + size_t instruction_count) { + std::lock_guard lock(mutex); + + auto block = std::make_unique(ps4_address, arm64_code, code_size, instruction_count); + CodeBlock* result = block.get(); + blocks[ps4_address] = std::move(block); + + LOG_DEBUG(Core, "Created code block at PS4 address {:#x}, ARM64 code: {}, size: {}", + ps4_address, arm64_code, code_size); + + return result; +} + +void BlockManager::InvalidateBlock(VAddr ps4_address) { + std::lock_guard lock(mutex); + blocks.erase(ps4_address); + LOG_DEBUG(Core, "Invalidated code block at PS4 address {:#x}", ps4_address); +} + +void BlockManager::InvalidateRange(VAddr start, VAddr end) { + std::lock_guard lock(mutex); + + auto it = blocks.begin(); + while (it != blocks.end()) { + VAddr block_addr = it->first; + if (block_addr >= start && block_addr < end) { + it = blocks.erase(it); + } else { + auto& deps = it->second->dependencies; + bool has_dependency_in_range = false; + for (VAddr dep : deps) { + if (dep >= start && dep < end) { + has_dependency_in_range = true; + break; + } + } + if (has_dependency_in_range) { + it = blocks.erase(it); + } else { + ++it; + } + } + } + + LOG_DEBUG(Core, "Invalidated code blocks in range {:#x} - {:#x}", start, end); +} + +void BlockManager::AddDependency(VAddr block_address, VAddr dependency) { + std::lock_guard lock(mutex); + auto it = blocks.find(block_address); + if (it != blocks.end()) { + it->second->dependencies.insert(dependency); + } +} + +void BlockManager::Clear() { + std::lock_guard lock(mutex); + blocks.clear(); +} + +size_t BlockManager::GetTotalCodeSize() const { + std::lock_guard lock(mutex); + size_t total = 0; + for (const auto& [addr, block] : blocks) { + total += block->code_size; + } + return total; +} + +} // namespace Core::Jit diff --git a/src/core/jit/block_manager.h b/src/core/jit/block_manager.h new file mode 100644 index 000000000..6e0734b79 --- /dev/null +++ b/src/core/jit/block_manager.h @@ -0,0 +1,49 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include +#include "common/types.h" + +namespace Core::Jit { + +struct CodeBlock { + VAddr ps4_address; + void* arm64_code; + size_t code_size; + size_t instruction_count; + std::set dependencies; + bool is_linked; + + CodeBlock(VAddr addr, void* code, size_t size, size_t count) + : ps4_address(addr), arm64_code(code), code_size(size), instruction_count(count), + is_linked(false) {} +}; + +class BlockManager { +public: + BlockManager(); + ~BlockManager(); + + CodeBlock* GetBlock(VAddr ps4_address); + CodeBlock* CreateBlock(VAddr ps4_address, void* arm64_code, size_t code_size, + size_t instruction_count); + void InvalidateBlock(VAddr ps4_address); + void InvalidateRange(VAddr start, VAddr end); + void AddDependency(VAddr block_address, VAddr dependency); + void Clear(); + + size_t GetBlockCount() const { + return blocks.size(); + } + size_t GetTotalCodeSize() const; + + std::unordered_map> blocks; + mutable std::mutex mutex; +}; + +} // namespace Core::Jit diff --git a/src/core/jit/calling_convention.cpp b/src/core/jit/calling_convention.cpp new file mode 100644 index 000000000..f7931ae86 --- /dev/null +++ b/src/core/jit/calling_convention.cpp @@ -0,0 +1,63 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "calling_convention.h" +#include "common/assert.h" + +namespace Core::Jit { + +CallingConvention::CallingConvention(Arm64CodeGenerator& codegen, RegisterMapper& reg_mapper) + : codegen(codegen), reg_mapper(reg_mapper) {} + +void CallingConvention::PrepareCall(int arg_count, const std::vector& arg_regs) { + ASSERT_MSG(arg_count <= MAX_INT_ARGS, "Too many arguments"); + ASSERT_MSG(arg_regs.size() >= static_cast(arg_count), "Not enough argument registers"); + + for (int i = 0; i < arg_count && i < MAX_INT_ARGS; i++) { + int arm64_arg_reg = i; + int x86_arg_reg = arg_regs[i]; + int mapped_reg = reg_mapper.MapX86_64ToArm64(static_cast(x86_arg_reg)); + if (mapped_reg != arm64_arg_reg) { + codegen.mov(arm64_arg_reg, mapped_reg); + } + } +} + +void CallingConvention::CallFunction(void* function_ptr) { + codegen.movz(16, reinterpret_cast(function_ptr) & 0xFFFF); + codegen.movk(16, (reinterpret_cast(function_ptr) >> 16) & 0xFFFF, 16); + codegen.movk(16, (reinterpret_cast(function_ptr) >> 32) & 0xFFFF, 32); + codegen.movk(16, (reinterpret_cast(function_ptr) >> 48) & 0xFFFF, 48); + codegen.blr(16); +} + +void CallingConvention::CallFunction(int reg) { + codegen.blr(reg); +} + +void CallingConvention::Return(int return_reg) { + if (return_reg >= 0) { + int arm64_return = reg_mapper.MapX86_64ToArm64(X86_64Register::RAX); + if (return_reg != arm64_return) { + codegen.mov(arm64_return, return_reg); + } + } + codegen.ret(); +} + +void CallingConvention::SaveCallerSavedRegisters() { + saved_registers.clear(); + for (int i = 0; i < 8; i++) { + codegen.push(i); + saved_registers.push_back(i); + } +} + +void CallingConvention::RestoreCallerSavedRegisters() { + for (auto it = saved_registers.rbegin(); it != saved_registers.rend(); ++it) { + codegen.pop(*it); + } + saved_registers.clear(); +} + +} // namespace Core::Jit diff --git a/src/core/jit/calling_convention.h b/src/core/jit/calling_convention.h new file mode 100644 index 000000000..6f90c92ad --- /dev/null +++ b/src/core/jit/calling_convention.h @@ -0,0 +1,33 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "arm64_codegen.h" +#include "register_mapping.h" + +namespace Core::Jit { + +class CallingConvention { +public: + explicit CallingConvention(Arm64CodeGenerator& codegen, RegisterMapper& reg_mapper); + + void PrepareCall(int arg_count, const std::vector& arg_regs); + void CallFunction(void* function_ptr); + void CallFunction(int reg); + void Return(int return_reg = -1); + + void SaveCallerSavedRegisters(); + void RestoreCallerSavedRegisters(); + + static constexpr int MAX_INT_ARGS = 8; + static constexpr int MAX_FLOAT_ARGS = 8; + +private: + Arm64CodeGenerator& codegen; + RegisterMapper& reg_mapper; + std::vector saved_registers; +}; + +} // namespace Core::Jit diff --git a/src/core/jit/execution_engine.cpp b/src/core/jit/execution_engine.cpp new file mode 100644 index 000000000..768f63392 --- /dev/null +++ b/src/core/jit/execution_engine.cpp @@ -0,0 +1,202 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include "common/decoder.h" +#include "common/logging/log.h" +#include "core/memory.h" +#include "execution_engine.h" + +namespace Core::Jit { + +static void* AllocateExecutableMemory(size_t size) { + size = (size + 4095) & ~4095; + void* ptr = + mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (ptr == MAP_FAILED) { + LOG_CRITICAL(Core, "Failed to allocate executable memory: {}", strerror(errno)); + return nullptr; + } + return ptr; +} + +ExecutionEngine::ExecutionEngine() + : code_buffer(nullptr), code_buffer_size(DEFAULT_CODE_BUFFER_SIZE), code_buffer_used(0) { + block_manager = std::make_unique(); + register_mapper = std::make_unique(); +} + +ExecutionEngine::~ExecutionEngine() { + Shutdown(); +} + +void ExecutionEngine::Initialize() { + code_buffer = AllocateExecutableMemory(code_buffer_size); + if (!code_buffer) { + throw std::bad_alloc(); + } + + code_generator = std::make_unique(code_buffer_size, code_buffer); + translator = std::make_unique(*code_generator, *register_mapper); + + LOG_INFO(Core, "JIT Execution Engine initialized"); +} + +void ExecutionEngine::Shutdown() { + if (code_buffer) { + munmap(code_buffer, code_buffer_size); + code_buffer = nullptr; + } + code_generator.reset(); + translator.reset(); + block_manager.reset(); + register_mapper.reset(); +} + +void* ExecutionEngine::AllocateCodeBuffer(size_t size) { + size = (size + 15) & ~15; + if (code_buffer_used + size > code_buffer_size) { + LOG_WARNING(Core, "Code buffer exhausted, need to allocate more"); + return nullptr; + } + void* result = static_cast(code_buffer) + code_buffer_used; + code_buffer_used += size; + return result; +} + +CodeBlock* ExecutionEngine::TranslateBasicBlock(VAddr start_address, size_t max_instructions) { + auto* memory = Core::Memory::Instance(); + auto& address_space = memory->GetAddressSpace(); + void* ps4_code_ptr = address_space.TranslateAddress(start_address); + if (!ps4_code_ptr) { + LOG_ERROR(Core, "Invalid PS4 address for translation: {:#x}", start_address); + return nullptr; + } + + code_generator->reset(); + void* block_start = code_generator->getCurr(); + + VAddr current_address = start_address; + size_t instruction_count = 0; + bool block_end = false; + + while (instruction_count < max_instructions && !block_end) { + ZydisDecodedInstruction instruction; + ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; + + void* code_ptr = address_space.TranslateAddress(current_address); + if (!code_ptr) { + break; + } + + ZyanStatus status = + Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_ptr, 15); + if (!ZYAN_SUCCESS(status)) { + LOG_WARNING(Core, "Failed to decode instruction at {:#x}", current_address); + break; + } + + bool translated = translator->TranslateInstruction(instruction, operands, current_address); + if (!translated) { + LOG_WARNING(Core, "Failed to translate instruction at {:#x}", current_address); + break; + } + + instruction_count++; + current_address += instruction.length; + + switch (instruction.mnemonic) { + case ZYDIS_MNEMONIC_RET: + case ZYDIS_MNEMONIC_JMP: + case ZYDIS_MNEMONIC_CALL: + block_end = true; + break; + default: + break; + } + } + + if (instruction_count == 0) { + return nullptr; + } + + size_t code_size = code_generator->getSize(); + CodeBlock* block = + block_manager->CreateBlock(start_address, block_start, code_size, instruction_count); + + LOG_DEBUG(Core, "Translated basic block at {:#x}, {} instructions, {} bytes", start_address, + instruction_count, code_size); + + return block; +} + +CodeBlock* ExecutionEngine::TranslateBlock(VAddr ps4_address) { + CodeBlock* existing = block_manager->GetBlock(ps4_address); + if (existing) { + return existing; + } + + return TranslateBasicBlock(ps4_address); +} + +void ExecutionEngine::LinkBlock(CodeBlock* block, VAddr target_address) { + CodeBlock* target_block = block_manager->GetBlock(target_address); + if (target_block && !block->is_linked) { + void* link_location = static_cast(block->arm64_code) + block->code_size - 4; + code_generator->setSize(reinterpret_cast(link_location) - + static_cast(code_generator->getCode())); + code_generator->b(target_block->arm64_code); + block->is_linked = true; + } +} + +bool ExecutionEngine::ExecuteBlock(VAddr ps4_address) { + CodeBlock* block = TranslateBlock(ps4_address); + if (!block) { + LOG_ERROR(Core, "Failed to translate or find block at {:#x}", ps4_address); + return false; + } + + typedef void (*BlockFunc)(); + BlockFunc func = reinterpret_cast(block->arm64_code); + func(); + + return true; +} + +void ExecutionEngine::InvalidateBlock(VAddr ps4_address) { + block_manager->InvalidateBlock(ps4_address); +} + +void ExecutionEngine::InvalidateRange(VAddr start, VAddr end) { + block_manager->InvalidateRange(start, end); +} + +bool ExecutionEngine::IsJitCode(void* code_ptr) const { + if (!code_buffer) { + return false; + } + u8* ptr = static_cast(code_ptr); + u8* start = static_cast(code_buffer); + u8* end = start + code_buffer_size; + return ptr >= start && ptr < end; +} + +VAddr ExecutionEngine::GetPs4AddressForJitCode(void* code_ptr) const { + if (!IsJitCode(code_ptr)) { + return 0; + } + std::lock_guard lock(block_manager->mutex); + for (const auto& [ps4_addr, block] : block_manager->blocks) { + u8* block_start = static_cast(block->arm64_code); + u8* block_end = block_start + block->code_size; + u8* ptr = static_cast(code_ptr); + if (ptr >= block_start && ptr < block_end) { + return ps4_addr; + } + } + return 0; +} + +} // namespace Core::Jit diff --git a/src/core/jit/execution_engine.h b/src/core/jit/execution_engine.h new file mode 100644 index 000000000..ec8195397 --- /dev/null +++ b/src/core/jit/execution_engine.h @@ -0,0 +1,53 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "arm64_codegen.h" +#include "block_manager.h" +#include "common/singleton.h" +#include "common/types.h" +#include "register_mapping.h" +#include "x86_64_translator.h" + +namespace Core::Jit { + +class ExecutionEngine { +public: + ExecutionEngine(); + ~ExecutionEngine(); + + bool ExecuteBlock(VAddr ps4_address); + CodeBlock* TranslateBlock(VAddr ps4_address); + void InvalidateBlock(VAddr ps4_address); + void InvalidateRange(VAddr start, VAddr end); + + bool IsJitCode(void* code_ptr) const; + VAddr GetPs4AddressForJitCode(void* code_ptr) const; + + void Initialize(); + void Shutdown(); + +private: + CodeBlock* TranslateBasicBlock(VAddr start_address, size_t max_instructions = 100); + void* AllocateCodeBuffer(size_t size); + void LinkBlock(CodeBlock* block, VAddr target_address); + + std::unique_ptr block_manager; + std::unique_ptr register_mapper; + std::unique_ptr code_generator; + std::unique_ptr translator; + + void* code_buffer; + size_t code_buffer_size; + size_t code_buffer_used; + + static constexpr size_t DEFAULT_CODE_BUFFER_SIZE = 64_MB; + + friend class BlockManager; +}; + +using JitEngine = Common::Singleton; + +} // namespace Core::Jit diff --git a/src/core/jit/register_mapping.cpp b/src/core/jit/register_mapping.cpp new file mode 100644 index 000000000..7a5634cb7 --- /dev/null +++ b/src/core/jit/register_mapping.cpp @@ -0,0 +1,123 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include "common/assert.h" +#include "register_mapping.h" + +namespace Core::Jit { + +RegisterMapper::RegisterMapper() : register_save_area(nullptr) { + x86_to_arm64_map.fill(INVALID_MAPPING); + spilled_registers.fill(false); + + x86_to_arm64_map[static_cast(X86_64Register::RAX)] = + GetArm64RegisterNumber(Arm64Register::X0); + x86_to_arm64_map[static_cast(X86_64Register::RCX)] = + GetArm64RegisterNumber(Arm64Register::X1); + x86_to_arm64_map[static_cast(X86_64Register::RDX)] = + GetArm64RegisterNumber(Arm64Register::X2); + x86_to_arm64_map[static_cast(X86_64Register::RBX)] = + GetArm64RegisterNumber(Arm64Register::X19); + x86_to_arm64_map[static_cast(X86_64Register::RSP)] = + GetArm64RegisterNumber(Arm64Register::SP); + x86_to_arm64_map[static_cast(X86_64Register::RBP)] = + GetArm64RegisterNumber(Arm64Register::X29); + x86_to_arm64_map[static_cast(X86_64Register::RSI)] = + GetArm64RegisterNumber(Arm64Register::X3); + x86_to_arm64_map[static_cast(X86_64Register::RDI)] = + GetArm64RegisterNumber(Arm64Register::X0); + x86_to_arm64_map[static_cast(X86_64Register::R8)] = + GetArm64RegisterNumber(Arm64Register::X4); + x86_to_arm64_map[static_cast(X86_64Register::R9)] = + GetArm64RegisterNumber(Arm64Register::X5); + x86_to_arm64_map[static_cast(X86_64Register::R10)] = + GetArm64RegisterNumber(Arm64Register::X6); + x86_to_arm64_map[static_cast(X86_64Register::R11)] = + GetArm64RegisterNumber(Arm64Register::X7); + x86_to_arm64_map[static_cast(X86_64Register::R12)] = + GetArm64RegisterNumber(Arm64Register::X20); + x86_to_arm64_map[static_cast(X86_64Register::R13)] = + GetArm64RegisterNumber(Arm64Register::X21); + x86_to_arm64_map[static_cast(X86_64Register::R14)] = + GetArm64RegisterNumber(Arm64Register::X22); + x86_to_arm64_map[static_cast(X86_64Register::R15)] = + GetArm64RegisterNumber(Arm64Register::X23); + + x86_to_arm64_map[static_cast(X86_64Register::XMM0)] = + GetArm64RegisterNumber(Arm64Register::V0); + x86_to_arm64_map[static_cast(X86_64Register::XMM1)] = + GetArm64RegisterNumber(Arm64Register::V1); + x86_to_arm64_map[static_cast(X86_64Register::XMM2)] = + GetArm64RegisterNumber(Arm64Register::V2); + x86_to_arm64_map[static_cast(X86_64Register::XMM3)] = + GetArm64RegisterNumber(Arm64Register::V3); + x86_to_arm64_map[static_cast(X86_64Register::XMM4)] = + GetArm64RegisterNumber(Arm64Register::V4); + x86_to_arm64_map[static_cast(X86_64Register::XMM5)] = + GetArm64RegisterNumber(Arm64Register::V5); + x86_to_arm64_map[static_cast(X86_64Register::XMM6)] = + GetArm64RegisterNumber(Arm64Register::V6); + x86_to_arm64_map[static_cast(X86_64Register::XMM7)] = + GetArm64RegisterNumber(Arm64Register::V7); + x86_to_arm64_map[static_cast(X86_64Register::XMM8)] = + GetArm64RegisterNumber(Arm64Register::V8); + x86_to_arm64_map[static_cast(X86_64Register::XMM9)] = + GetArm64RegisterNumber(Arm64Register::V9); + x86_to_arm64_map[static_cast(X86_64Register::XMM10)] = + GetArm64RegisterNumber(Arm64Register::V10); + x86_to_arm64_map[static_cast(X86_64Register::XMM11)] = + GetArm64RegisterNumber(Arm64Register::V11); + x86_to_arm64_map[static_cast(X86_64Register::XMM12)] = + GetArm64RegisterNumber(Arm64Register::V12); + x86_to_arm64_map[static_cast(X86_64Register::XMM13)] = + GetArm64RegisterNumber(Arm64Register::V13); + x86_to_arm64_map[static_cast(X86_64Register::XMM14)] = + GetArm64RegisterNumber(Arm64Register::V14); + x86_to_arm64_map[static_cast(X86_64Register::XMM15)] = + GetArm64RegisterNumber(Arm64Register::V15); + + x86_to_arm64_map[static_cast(X86_64Register::FLAGS)] = + GetArm64RegisterNumber(Arm64Register::X11); +} + +int RegisterMapper::MapX86_64ToArm64(X86_64Register x86_reg) { + size_t index = static_cast(x86_reg); + ASSERT_MSG(index < static_cast(X86_64Register::COUNT), "Invalid x86_64 register"); + return x86_to_arm64_map[index]; +} + +int RegisterMapper::MapX86_64XmmToArm64Neon(X86_64Register xmm_reg) { + if (!IsXmmRegister(xmm_reg)) { + return INVALID_MAPPING; + } + return MapX86_64ToArm64(xmm_reg); +} + +bool RegisterMapper::IsXmmRegister(X86_64Register reg) { + return reg >= X86_64Register::XMM0 && reg <= X86_64Register::XMM15; +} + +void RegisterMapper::SpillRegister(X86_64Register x86_reg) { + size_t index = static_cast(x86_reg); + ASSERT_MSG(index < static_cast(X86_64Register::COUNT), "Invalid x86_64 register"); + spilled_registers[index] = true; +} + +void RegisterMapper::ReloadRegister(X86_64Register x86_reg) { + size_t index = static_cast(x86_reg); + ASSERT_MSG(index < static_cast(X86_64Register::COUNT), "Invalid x86_64 register"); + spilled_registers[index] = false; +} + +bool RegisterMapper::IsRegisterSpilled(X86_64Register x86_reg) const { + size_t index = static_cast(x86_reg); + ASSERT_MSG(index < static_cast(X86_64Register::COUNT), "Invalid x86_64 register"); + return spilled_registers[index]; +} + +void RegisterMapper::SaveAllRegisters() {} + +void RegisterMapper::RestoreAllRegisters() {} + +} // namespace Core::Jit diff --git a/src/core/jit/register_mapping.h b/src/core/jit/register_mapping.h new file mode 100644 index 000000000..80e1caab7 --- /dev/null +++ b/src/core/jit/register_mapping.h @@ -0,0 +1,136 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "common/types.h" + +namespace Core::Jit { + +enum class X86_64Register : u8 { + RAX = 0, + RCX = 1, + RDX = 2, + RBX = 3, + RSP = 4, + RBP = 5, + RSI = 6, + RDI = 7, + R8 = 8, + R9 = 9, + R10 = 10, + R11 = 11, + R12 = 12, + R13 = 13, + R14 = 14, + R15 = 15, + XMM0 = 16, + XMM1 = 17, + XMM2 = 18, + XMM3 = 19, + XMM4 = 20, + XMM5 = 21, + XMM6 = 22, + XMM7 = 23, + XMM8 = 24, + XMM9 = 25, + XMM10 = 26, + XMM11 = 27, + XMM12 = 28, + XMM13 = 29, + XMM14 = 30, + XMM15 = 31, + FLAGS = 32, + COUNT = 33 +}; + +enum class Arm64Register : u8 { + X0 = 0, + X1 = 1, + X2 = 2, + X3 = 3, + X4 = 4, + X5 = 5, + X6 = 6, + X7 = 7, + X8 = 8, + X9 = 9, + X10 = 10, + X11 = 11, + X12 = 12, + X13 = 13, + X14 = 14, + X15 = 15, + X16 = 16, + X17 = 17, + X18 = 18, + X19 = 19, + X20 = 20, + X21 = 21, + X22 = 22, + X23 = 23, + X24 = 24, + X25 = 25, + X26 = 26, + X27 = 27, + X28 = 28, + X29 = 29, + X30 = 30, + SP = 31, + V0 = 32, + V1 = 33, + V2 = 34, + V3 = 35, + V4 = 36, + V5 = 37, + V6 = 38, + V7 = 39, + V8 = 40, + V9 = 41, + V10 = 42, + V11 = 43, + V12 = 44, + V13 = 45, + V14 = 46, + V15 = 47, + COUNT = 48 +}; + +class RegisterMapper { +public: + RegisterMapper(); + + int MapX86_64ToArm64(X86_64Register x86_reg); + int MapX86_64XmmToArm64Neon(X86_64Register xmm_reg); + bool IsXmmRegister(X86_64Register reg); + + void SpillRegister(X86_64Register x86_reg); + void ReloadRegister(X86_64Register x86_reg); + bool IsRegisterSpilled(X86_64Register x86_reg) const; + + void SaveAllRegisters(); + void RestoreAllRegisters(); + + static constexpr int SCRATCH_REG = 9; + static constexpr int SCRATCH_REG2 = 10; + static constexpr int FLAGS_REG = 11; + static constexpr int STACK_POINTER = 31; + +private: + static constexpr int INVALID_MAPPING = -1; + + std::array(X86_64Register::COUNT)> x86_to_arm64_map; + std::array(X86_64Register::COUNT)> spilled_registers; + void* register_save_area; +}; + +inline int GetArm64RegisterNumber(Arm64Register reg) { + return static_cast(reg); +} + +inline int GetX86_64RegisterNumber(X86_64Register reg) { + return static_cast(reg); +} + +} // namespace Core::Jit diff --git a/src/core/jit/simd_translator.cpp b/src/core/jit/simd_translator.cpp new file mode 100644 index 000000000..aa9eb5780 --- /dev/null +++ b/src/core/jit/simd_translator.cpp @@ -0,0 +1,206 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/assert.h" +#include "common/logging/log.h" +#include "register_mapping.h" +#include "simd_translator.h" + +namespace Core::Jit { + +SimdTranslator::SimdTranslator(Arm64CodeGenerator& codegen, RegisterMapper& reg_mapper) + : codegen(codegen), reg_mapper(reg_mapper) {} + +int SimdTranslator::GetArm64NeonRegister(const ZydisDecodedOperand& operand) { + if (operand.type != ZYDIS_OPERAND_TYPE_REGISTER) { + return -1; + } + if (operand.reg.value < ZYDIS_REGISTER_XMM0 || operand.reg.value > ZYDIS_REGISTER_XMM15) { + return -1; + } + X86_64Register xmm_reg = + static_cast(static_cast(X86_64Register::XMM0) + + static_cast(operand.reg.value - ZYDIS_REGISTER_XMM0)); + return reg_mapper.MapX86_64XmmToArm64Neon(xmm_reg); +} + +void SimdTranslator::LoadMemoryOperandV(int vreg, const ZydisDecodedOperand& mem_op) { + ASSERT_MSG(mem_op.type == ZYDIS_OPERAND_TYPE_MEMORY, "Expected memory operand"); + + int addr_reg = RegisterMapper::SCRATCH_REG; + codegen.mov(addr_reg, 0); + + if (mem_op.mem.base != ZYDIS_REGISTER_NONE && mem_op.mem.base != ZYDIS_REGISTER_RIP) { + if (mem_op.mem.base >= ZYDIS_REGISTER_RAX && mem_op.mem.base <= ZYDIS_REGISTER_R15) { + X86_64Register x86_base = + static_cast(mem_op.mem.base - ZYDIS_REGISTER_RAX); + if (x86_base < X86_64Register::COUNT) { + int base_reg = reg_mapper.MapX86_64ToArm64(x86_base); + codegen.mov(addr_reg, base_reg); + } + } + } + + if (mem_op.mem.disp.value != 0) { + codegen.add(addr_reg, addr_reg, static_cast(mem_op.mem.disp.value)); + } + + codegen.ldr_v(vreg, addr_reg, 0); +} + +void SimdTranslator::StoreMemoryOperandV(int vreg, const ZydisDecodedOperand& mem_op) { + ASSERT_MSG(mem_op.type == ZYDIS_OPERAND_TYPE_MEMORY, "Expected memory operand"); + + int addr_reg = RegisterMapper::SCRATCH_REG; + codegen.mov(addr_reg, 0); + + if (mem_op.mem.base != ZYDIS_REGISTER_NONE) { + if (mem_op.mem.base >= ZYDIS_REGISTER_RAX && mem_op.mem.base <= ZYDIS_REGISTER_R15) { + X86_64Register x86_base = + static_cast(mem_op.mem.base - ZYDIS_REGISTER_RAX); + if (x86_base < X86_64Register::COUNT) { + int base_reg = reg_mapper.MapX86_64ToArm64(x86_base); + codegen.mov(addr_reg, base_reg); + } + } + } + + if (mem_op.mem.disp.value != 0) { + codegen.add(addr_reg, addr_reg, static_cast(mem_op.mem.disp.value)); + } + + codegen.str_v(vreg, addr_reg, 0); +} + +bool SimdTranslator::TranslateSseInstruction(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands) { + switch (instruction.mnemonic) { + case ZYDIS_MNEMONIC_MOVAPS: + return TranslateMovaps(instruction, operands); + case ZYDIS_MNEMONIC_MOVUPS: + return TranslateMovups(instruction, operands); + case ZYDIS_MNEMONIC_ADDPS: + return TranslateAddps(instruction, operands); + case ZYDIS_MNEMONIC_SUBPS: + return TranslateSubps(instruction, operands); + case ZYDIS_MNEMONIC_MULPS: + return TranslateMulps(instruction, operands); + default: + LOG_WARNING(Core, "Unsupported SSE instruction: {}", + ZydisMnemonicGetString(instruction.mnemonic)); + return false; + } +} + +bool SimdTranslator::TranslateMovaps(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands) { + const auto& dst = operands[0]; + const auto& src = operands[1]; + + int dst_vreg = GetArm64NeonRegister(dst); + if (dst_vreg == -1) { + return false; + } + + if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) { + int src_vreg = GetArm64NeonRegister(src); + if (src_vreg == -1) { + return false; + } + codegen.mov_v(dst_vreg, src_vreg); + } else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) { + LoadMemoryOperandV(dst_vreg, src); + } else { + return false; + } + + return true; +} + +bool SimdTranslator::TranslateMovups(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands) { + return TranslateMovaps(instruction, operands); +} + +bool SimdTranslator::TranslateAddps(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands) { + const auto& dst = operands[0]; + const auto& src = operands[1]; + + int dst_vreg = GetArm64NeonRegister(dst); + if (dst_vreg == -1) { + return false; + } + + if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) { + int src_vreg = GetArm64NeonRegister(src); + if (src_vreg == -1) { + return false; + } + codegen.add_v(dst_vreg, dst_vreg, src_vreg); + } else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) { + int scratch_vreg = 8; + LoadMemoryOperandV(scratch_vreg, src); + codegen.add_v(dst_vreg, dst_vreg, scratch_vreg); + } else { + return false; + } + + return true; +} + +bool SimdTranslator::TranslateSubps(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands) { + const auto& dst = operands[0]; + const auto& src = operands[1]; + + int dst_vreg = GetArm64NeonRegister(dst); + if (dst_vreg == -1) { + return false; + } + + if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) { + int src_vreg = GetArm64NeonRegister(src); + if (src_vreg == -1) { + return false; + } + codegen.sub_v(dst_vreg, dst_vreg, src_vreg); + } else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) { + int scratch_vreg = 8; + LoadMemoryOperandV(scratch_vreg, src); + codegen.sub_v(dst_vreg, dst_vreg, scratch_vreg); + } else { + return false; + } + + return true; +} + +bool SimdTranslator::TranslateMulps(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands) { + const auto& dst = operands[0]; + const auto& src = operands[1]; + + int dst_vreg = GetArm64NeonRegister(dst); + if (dst_vreg == -1) { + return false; + } + + if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) { + int src_vreg = GetArm64NeonRegister(src); + if (src_vreg == -1) { + return false; + } + codegen.mul_v(dst_vreg, dst_vreg, src_vreg); + } else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) { + int scratch_vreg = 8; + LoadMemoryOperandV(scratch_vreg, src); + codegen.mul_v(dst_vreg, dst_vreg, scratch_vreg); + } else { + return false; + } + + return true; +} + +} // namespace Core::Jit diff --git a/src/core/jit/simd_translator.h b/src/core/jit/simd_translator.h new file mode 100644 index 000000000..916d662b7 --- /dev/null +++ b/src/core/jit/simd_translator.h @@ -0,0 +1,39 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "arm64_codegen.h" +#include "register_mapping.h" + +namespace Core::Jit { + +class SimdTranslator { +public: + explicit SimdTranslator(Arm64CodeGenerator& codegen, RegisterMapper& reg_mapper); + + bool TranslateSseInstruction(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands); + + bool TranslateMovaps(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands); + bool TranslateMovups(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands); + bool TranslateAddps(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands); + bool TranslateSubps(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands); + bool TranslateMulps(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands); + +private: + int GetArm64NeonRegister(const ZydisDecodedOperand& operand); + void LoadMemoryOperandV(int vreg, const ZydisDecodedOperand& mem_op); + void StoreMemoryOperandV(int vreg, const ZydisDecodedOperand& mem_op); + + Arm64CodeGenerator& codegen; + RegisterMapper& reg_mapper; +}; + +} // namespace Core::Jit diff --git a/src/core/jit/x86_64_translator.cpp b/src/core/jit/x86_64_translator.cpp new file mode 100644 index 000000000..0352955ed --- /dev/null +++ b/src/core/jit/x86_64_translator.cpp @@ -0,0 +1,701 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include "common/assert.h" +#include "common/logging/log.h" +#include "register_mapping.h" +#include "x86_64_translator.h" + +namespace Core::Jit { + +X86_64Translator::X86_64Translator(Arm64CodeGenerator& codegen, RegisterMapper& reg_mapper) + : codegen(codegen), reg_mapper(reg_mapper) {} + +bool X86_64Translator::TranslateInstruction(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands, VAddr address) { + switch (instruction.mnemonic) { + case ZYDIS_MNEMONIC_MOV: + return TranslateMov(instruction, operands); + case ZYDIS_MNEMONIC_ADD: + return TranslateAdd(instruction, operands); + case ZYDIS_MNEMONIC_SUB: + return TranslateSub(instruction, operands); + case ZYDIS_MNEMONIC_MUL: + return TranslateMul(instruction, operands); + case ZYDIS_MNEMONIC_DIV: + case ZYDIS_MNEMONIC_IDIV: + return TranslateDiv(instruction, operands); + case ZYDIS_MNEMONIC_AND: + return TranslateAnd(instruction, operands); + case ZYDIS_MNEMONIC_OR: + return TranslateOr(instruction, operands); + case ZYDIS_MNEMONIC_XOR: + return TranslateXor(instruction, operands); + case ZYDIS_MNEMONIC_NOT: + return TranslateNot(instruction, operands); + case ZYDIS_MNEMONIC_SHL: + return TranslateShl(instruction, operands); + case ZYDIS_MNEMONIC_SHR: + return TranslateShr(instruction, operands); + case ZYDIS_MNEMONIC_SAR: + return TranslateSar(instruction, operands); + case ZYDIS_MNEMONIC_PUSH: + return TranslatePush(instruction, operands); + case ZYDIS_MNEMONIC_POP: + return TranslatePop(instruction, operands); + case ZYDIS_MNEMONIC_CALL: + return TranslateCall(instruction, operands, address); + case ZYDIS_MNEMONIC_RET: + return TranslateRet(instruction, operands); + case ZYDIS_MNEMONIC_JMP: + return TranslateJmp(instruction, operands, address); + case ZYDIS_MNEMONIC_CMP: + return TranslateCmp(instruction, operands); + case ZYDIS_MNEMONIC_TEST: + return TranslateTest(instruction, operands); + case ZYDIS_MNEMONIC_LEA: + return TranslateLea(instruction, operands); + default: + LOG_ERROR(Core, "Unsupported instruction: {}", + ZydisMnemonicGetString(instruction.mnemonic)); + return false; + } +} + +X86_64Register X86_64Translator::ZydisToX86_64Register(ZydisRegister reg) { + if (reg >= ZYDIS_REGISTER_RAX && reg <= ZYDIS_REGISTER_R15) { + return static_cast(static_cast(reg - ZYDIS_REGISTER_RAX)); + } else if (reg >= ZYDIS_REGISTER_XMM0 && reg <= ZYDIS_REGISTER_XMM15) { + return static_cast(static_cast(X86_64Register::XMM0) + + static_cast(reg - ZYDIS_REGISTER_XMM0)); + } + return X86_64Register::COUNT; +} + +int X86_64Translator::GetArm64Register(const ZydisDecodedOperand& operand) { + if (operand.type != ZYDIS_OPERAND_TYPE_REGISTER) { + return -1; + } + X86_64Register x86_reg = ZydisToX86_64Register(operand.reg.value); + if (x86_reg == X86_64Register::COUNT) { + return -1; + } + return reg_mapper.MapX86_64ToArm64(x86_reg); +} + +int X86_64Translator::GetArm64XmmRegister(const ZydisDecodedOperand& operand) { + if (operand.type != ZYDIS_OPERAND_TYPE_REGISTER) { + return -1; + } + X86_64Register x86_reg = ZydisToX86_64Register(operand.reg.value); + if (!reg_mapper.IsXmmRegister(x86_reg)) { + return -1; + } + return reg_mapper.MapX86_64XmmToArm64Neon(x86_reg); +} + +void X86_64Translator::CalculateMemoryAddress(int dst_reg, const ZydisDecodedOperand& mem_op) { + ASSERT_MSG(mem_op.type == ZYDIS_OPERAND_TYPE_MEMORY, "Expected memory operand"); + + const auto& mem = mem_op.mem; + int base_reg = -1; + int index_reg = -1; + + if (mem.base != ZYDIS_REGISTER_NONE && mem.base != ZYDIS_REGISTER_RIP) { + X86_64Register x86_base = ZydisToX86_64Register(mem.base); + if (x86_base != X86_64Register::COUNT) { + base_reg = reg_mapper.MapX86_64ToArm64(x86_base); + } + } + + if (mem.index != ZYDIS_REGISTER_NONE) { + X86_64Register x86_index = ZydisToX86_64Register(mem.index); + if (x86_index != X86_64Register::COUNT) { + index_reg = reg_mapper.MapX86_64ToArm64(x86_index); + } + } + + if (base_reg == -1 && index_reg == -1 && mem.disp.value == 0) { + codegen.mov(dst_reg, 0); + return; + } + + if (base_reg != -1) { + codegen.mov(dst_reg, base_reg); + } else { + codegen.mov(dst_reg, 0); + } + + if (index_reg != -1) { + if (mem.scale > 0 && mem.scale <= 8) { + codegen.mov(RegisterMapper::SCRATCH_REG, static_cast(mem.scale)); + codegen.mul(RegisterMapper::SCRATCH_REG, index_reg, RegisterMapper::SCRATCH_REG); + codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG); + } else { + codegen.add(dst_reg, dst_reg, index_reg); + } + } + + if (mem.disp.value != 0) { + codegen.add(dst_reg, dst_reg, static_cast(mem.disp.value)); + } +} + +void X86_64Translator::LoadMemoryOperand(int dst_reg, const ZydisDecodedOperand& mem_op, + size_t size) { + CalculateMemoryAddress(RegisterMapper::SCRATCH_REG, mem_op); + + if (mem_op.mem.base == ZYDIS_REGISTER_RIP) { + LOG_WARNING(Core, "RIP-relative addressing not fully supported in JIT"); + } + + switch (size) { + case 1: + codegen.ldrb(dst_reg, RegisterMapper::SCRATCH_REG, 0); + break; + case 2: + codegen.ldrh(dst_reg, RegisterMapper::SCRATCH_REG, 0); + break; + case 4: + case 8: + codegen.ldr(dst_reg, RegisterMapper::SCRATCH_REG, 0); + break; + default: + ASSERT_MSG(false, "Unsupported memory load size: {}", size); + } +} + +void X86_64Translator::StoreMemoryOperand(int src_reg, const ZydisDecodedOperand& mem_op, + size_t size) { + CalculateMemoryAddress(RegisterMapper::SCRATCH_REG, mem_op); + + if (mem_op.mem.base == ZYDIS_REGISTER_RIP) { + LOG_WARNING(Core, "RIP-relative addressing not fully supported in JIT"); + } + + switch (size) { + case 1: + codegen.strb(src_reg, RegisterMapper::SCRATCH_REG, 0); + break; + case 2: + codegen.strh(src_reg, RegisterMapper::SCRATCH_REG, 0); + break; + case 4: + case 8: + codegen.str(src_reg, RegisterMapper::SCRATCH_REG, 0); + break; + default: + ASSERT_MSG(false, "Unsupported memory store size: {}", size); + } +} + +void X86_64Translator::LoadImmediate(int dst_reg, const ZydisDecodedOperand& imm_op) { + ASSERT_MSG(imm_op.type == ZYDIS_OPERAND_TYPE_IMMEDIATE, "Expected immediate operand"); + s64 value = static_cast(imm_op.imm.value.s); + codegen.mov(dst_reg, value); +} + +bool X86_64Translator::TranslateMov(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands) { + const auto& dst = operands[0]; + const auto& src = operands[1]; + + if (dst.type == ZYDIS_OPERAND_TYPE_REGISTER) { + int dst_reg = GetArm64Register(dst); + if (dst_reg == -1) { + return false; + } + + if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) { + int src_reg = GetArm64Register(src); + if (src_reg == -1) { + return false; + } + codegen.mov(dst_reg, src_reg); + } else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) { + LoadImmediate(dst_reg, src); + } else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) { + LoadMemoryOperand(dst_reg, src, instruction.operand_width / 8); + } else { + return false; + } + } else if (dst.type == ZYDIS_OPERAND_TYPE_MEMORY) { + int src_reg = -1; + if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) { + src_reg = GetArm64Register(src); + if (src_reg == -1) { + return false; + } + } else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) { + LoadImmediate(RegisterMapper::SCRATCH_REG, src); + src_reg = RegisterMapper::SCRATCH_REG; + } else { + return false; + } + StoreMemoryOperand(src_reg, dst, instruction.operand_width / 8); + } else { + return false; + } + + return true; +} + +bool X86_64Translator::TranslateAdd(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands) { + const auto& dst = operands[0]; + const auto& src = operands[1]; + + int dst_reg = GetArm64Register(dst); + if (dst_reg == -1) { + return false; + } + + if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) { + int src_reg = GetArm64Register(src); + if (src_reg == -1) { + return false; + } + codegen.add(dst_reg, dst_reg, src_reg); + } else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) { + s32 imm = static_cast(src.imm.value.s); + codegen.add_imm(dst_reg, dst_reg, imm); + } else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) { + LoadMemoryOperand(RegisterMapper::SCRATCH_REG, src, instruction.operand_width / 8); + codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG); + } else { + return false; + } + + return true; +} + +bool X86_64Translator::TranslateSub(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands) { + const auto& dst = operands[0]; + const auto& src = operands[1]; + + int dst_reg = GetArm64Register(dst); + if (dst_reg == -1) { + return false; + } + + if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) { + int src_reg = GetArm64Register(src); + if (src_reg == -1) { + return false; + } + codegen.sub(dst_reg, dst_reg, src_reg); + } else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) { + s32 imm = static_cast(src.imm.value.s); + codegen.sub_imm(dst_reg, dst_reg, imm); + } else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) { + LoadMemoryOperand(RegisterMapper::SCRATCH_REG, src, instruction.operand_width / 8); + codegen.sub(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG); + } else { + return false; + } + + return true; +} + +bool X86_64Translator::TranslateMul(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands) { + const auto& dst = operands[0]; + + int dst_reg = GetArm64Register(dst); + if (dst_reg == -1) { + return false; + } + + if (operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER) { + int src_reg = GetArm64Register(operands[1]); + if (src_reg == -1) { + return false; + } + codegen.mul(dst_reg, dst_reg, src_reg); + } else if (operands[1].type == ZYDIS_OPERAND_TYPE_MEMORY) { + LoadMemoryOperand(RegisterMapper::SCRATCH_REG, operands[1], instruction.operand_width / 8); + codegen.mul(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG); + } else { + return false; + } + + return true; +} + +bool X86_64Translator::TranslateDiv(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands) { + LOG_WARNING(Core, "DIV instruction translation not fully implemented"); + return false; +} + +bool X86_64Translator::TranslateAnd(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands) { + const auto& dst = operands[0]; + const auto& src = operands[1]; + + int dst_reg = GetArm64Register(dst); + if (dst_reg == -1) { + return false; + } + + if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) { + int src_reg = GetArm64Register(src); + if (src_reg == -1) { + return false; + } + codegen.and_(dst_reg, dst_reg, src_reg); + } else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) { + u64 imm = static_cast(src.imm.value.u); + codegen.and_(dst_reg, dst_reg, imm); + } else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) { + LoadMemoryOperand(RegisterMapper::SCRATCH_REG, src, instruction.operand_width / 8); + codegen.and_(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG); + } else { + return false; + } + + return true; +} + +bool X86_64Translator::TranslateOr(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands) { + const auto& dst = operands[0]; + const auto& src = operands[1]; + + int dst_reg = GetArm64Register(dst); + if (dst_reg == -1) { + return false; + } + + if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) { + int src_reg = GetArm64Register(src); + if (src_reg == -1) { + return false; + } + codegen.orr(dst_reg, dst_reg, src_reg); + } else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) { + u64 imm = static_cast(src.imm.value.u); + codegen.orr(dst_reg, dst_reg, imm); + } else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) { + LoadMemoryOperand(RegisterMapper::SCRATCH_REG, src, instruction.operand_width / 8); + codegen.orr(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG); + } else { + return false; + } + + return true; +} + +bool X86_64Translator::TranslateXor(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands) { + const auto& dst = operands[0]; + const auto& src = operands[1]; + + int dst_reg = GetArm64Register(dst); + if (dst_reg == -1) { + return false; + } + + if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) { + int src_reg = GetArm64Register(src); + if (src_reg == -1) { + return false; + } + codegen.eor(dst_reg, dst_reg, src_reg); + } else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) { + u64 imm = static_cast(src.imm.value.u); + codegen.eor(dst_reg, dst_reg, imm); + } else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) { + LoadMemoryOperand(RegisterMapper::SCRATCH_REG, src, instruction.operand_width / 8); + codegen.eor(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG); + } else { + return false; + } + + return true; +} + +bool X86_64Translator::TranslateNot(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands) { + const auto& dst = operands[0]; + + int dst_reg = GetArm64Register(dst); + if (dst_reg == -1) { + return false; + } + + codegen.mvn(dst_reg, dst_reg); + + return true; +} + +bool X86_64Translator::TranslateShl(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands) { + const auto& dst = operands[0]; + const auto& src = operands[1]; + + int dst_reg = GetArm64Register(dst); + if (dst_reg == -1) { + return false; + } + + if (src.type == ZYDIS_OPERAND_TYPE_REGISTER && + (src.reg.value == ZYDIS_REGISTER_CL || src.reg.value == ZYDIS_REGISTER_RCX)) { + int cl_reg = reg_mapper.MapX86_64ToArm64(X86_64Register::RCX); + codegen.lsl(dst_reg, dst_reg, cl_reg); + } else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) { + u64 shift_val = src.imm.value.u; + if (shift_val < 64) { + codegen.lsl(dst_reg, dst_reg, static_cast(shift_val)); + } else { + codegen.mov(dst_reg, 0); + } + } else { + return false; + } + + return true; +} + +bool X86_64Translator::TranslateShr(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands) { + const auto& dst = operands[0]; + const auto& src = operands[1]; + + int dst_reg = GetArm64Register(dst); + if (dst_reg == -1) { + return false; + } + + if (src.type == ZYDIS_OPERAND_TYPE_REGISTER && + (src.reg.value == ZYDIS_REGISTER_CL || src.reg.value == ZYDIS_REGISTER_RCX)) { + int cl_reg = reg_mapper.MapX86_64ToArm64(X86_64Register::RCX); + codegen.lsr(dst_reg, dst_reg, cl_reg); + } else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) { + u64 shift_val = src.imm.value.u; + if (shift_val < 64) { + codegen.lsr(dst_reg, dst_reg, static_cast(shift_val)); + } else { + codegen.mov(dst_reg, 0); + } + } else { + return false; + } + + return true; +} + +bool X86_64Translator::TranslateSar(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands) { + const auto& dst = operands[0]; + const auto& src = operands[1]; + + int dst_reg = GetArm64Register(dst); + if (dst_reg == -1) { + return false; + } + + if (src.type == ZYDIS_OPERAND_TYPE_REGISTER && + (src.reg.value == ZYDIS_REGISTER_CL || src.reg.value == ZYDIS_REGISTER_RCX)) { + int cl_reg = reg_mapper.MapX86_64ToArm64(X86_64Register::RCX); + codegen.asr(dst_reg, dst_reg, cl_reg); + } else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) { + u64 shift_val = src.imm.value.u; + if (shift_val < 64) { + codegen.asr(dst_reg, dst_reg, static_cast(shift_val)); + } else { + codegen.mov(dst_reg, 0); + } + } else { + return false; + } + + return true; +} + +bool X86_64Translator::TranslatePush(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands) { + const auto& src = operands[0]; + + int sp_reg = reg_mapper.MapX86_64ToArm64(X86_64Register::RSP); + codegen.sub(sp_reg, sp_reg, 8); + + if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) { + int src_reg = GetArm64Register(src); + if (src_reg == -1) { + return false; + } + codegen.str(src_reg, sp_reg, 0); + } else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) { + LoadImmediate(RegisterMapper::SCRATCH_REG, src); + codegen.str(RegisterMapper::SCRATCH_REG, sp_reg, 0); + } else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) { + LoadMemoryOperand(RegisterMapper::SCRATCH_REG, src, instruction.operand_width / 8); + codegen.str(RegisterMapper::SCRATCH_REG, sp_reg, 0); + } else { + return false; + } + + return true; +} + +bool X86_64Translator::TranslatePop(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands) { + const auto& dst = operands[0]; + + int dst_reg = GetArm64Register(dst); + if (dst_reg == -1) { + return false; + } + + int sp_reg = reg_mapper.MapX86_64ToArm64(X86_64Register::RSP); + codegen.ldr(dst_reg, sp_reg, 0); + codegen.add(sp_reg, sp_reg, 8); + + return true; +} + +bool X86_64Translator::TranslateCall(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands, VAddr address) { + LOG_WARNING(Core, "CALL instruction translation needs execution engine integration"); + return false; +} + +bool X86_64Translator::TranslateRet(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands) { + codegen.ret(); + return true; +} + +bool X86_64Translator::TranslateJmp(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands, VAddr address) { + LOG_WARNING(Core, "JMP instruction translation needs execution engine integration"); + return false; +} + +bool X86_64Translator::TranslateCmp(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands) { + const auto& dst = operands[0]; + const auto& src = operands[1]; + + int dst_reg = GetArm64Register(dst); + if (dst_reg == -1) { + return false; + } + + if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) { + int src_reg = GetArm64Register(src); + if (src_reg == -1) { + return false; + } + codegen.cmp(dst_reg, src_reg); + } else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) { + s32 imm = static_cast(src.imm.value.s); + codegen.cmp_imm(dst_reg, imm); + } else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) { + LoadMemoryOperand(RegisterMapper::SCRATCH_REG, src, instruction.operand_width / 8); + codegen.cmp(dst_reg, RegisterMapper::SCRATCH_REG); + } else { + return false; + } + + return true; +} + +bool X86_64Translator::TranslateTest(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands) { + const auto& dst = operands[0]; + const auto& src = operands[1]; + + int dst_reg = GetArm64Register(dst); + if (dst_reg == -1) { + return false; + } + + if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) { + int src_reg = GetArm64Register(src); + if (src_reg == -1) { + return false; + } + codegen.tst(dst_reg, src_reg); + } else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) { + u64 imm = static_cast(src.imm.value.u); + codegen.tst(dst_reg, imm); + } else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) { + LoadMemoryOperand(RegisterMapper::SCRATCH_REG, src, instruction.operand_width / 8); + codegen.tst(dst_reg, RegisterMapper::SCRATCH_REG); + } else { + return false; + } + + return true; +} + +bool X86_64Translator::TranslateLea(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands) { + const auto& dst = operands[0]; + const auto& src = operands[1]; + + ASSERT_MSG(src.type == ZYDIS_OPERAND_TYPE_MEMORY, "LEA source must be memory"); + + int dst_reg = GetArm64Register(dst); + if (dst_reg == -1) { + return false; + } + + CalculateMemoryAddress(dst_reg, src); + + return true; +} + +void X86_64Translator::UpdateFlagsForArithmetic(int result_reg, int src1_reg, int src2_reg, + bool is_subtract) { + int flags_reg = reg_mapper.MapX86_64ToArm64(X86_64Register::FLAGS); + + codegen.cmp(result_reg, 0); + + codegen.mov(RegisterMapper::SCRATCH_REG, 0); + + codegen.b_eq(codegen.getCurr()); + codegen.mov(RegisterMapper::SCRATCH_REG, 1 << 6); + codegen.b(codegen.getCurr()); +} + +void X86_64Translator::UpdateFlagsForLogical(int result_reg) { + codegen.cmp(result_reg, 0); +} + +void X86_64Translator::UpdateFlagsForShift(int result_reg, int shift_amount) { + codegen.cmp(result_reg, 0); +} + +int X86_64Translator::GetConditionCode(ZydisMnemonic mnemonic) { + switch (mnemonic) { + case ZYDIS_MNEMONIC_JZ: + return 0; + case ZYDIS_MNEMONIC_JNZ: + return 1; + case ZYDIS_MNEMONIC_JL: + return 11; + case ZYDIS_MNEMONIC_JLE: + return 13; + case ZYDIS_MNEMONIC_JNLE: + return 12; + case ZYDIS_MNEMONIC_JNL: + return 10; + case ZYDIS_MNEMONIC_JB: + return 3; + case ZYDIS_MNEMONIC_JBE: + return 9; + case ZYDIS_MNEMONIC_JNBE: + return 8; + case ZYDIS_MNEMONIC_JNB: + return 2; + default: + return -1; + } +} + +} // namespace Core::Jit diff --git a/src/core/jit/x86_64_translator.h b/src/core/jit/x86_64_translator.h new file mode 100644 index 000000000..6f492e042 --- /dev/null +++ b/src/core/jit/x86_64_translator.h @@ -0,0 +1,80 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "arm64_codegen.h" +#include "common/types.h" +#include "register_mapping.h" + +namespace Core::Jit { + +class X86_64Translator { +public: + explicit X86_64Translator(Arm64CodeGenerator& codegen, RegisterMapper& reg_mapper); + ~X86_64Translator() = default; + + bool TranslateInstruction(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands, VAddr address); + + bool TranslateMov(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands); + bool TranslateAdd(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands); + bool TranslateSub(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands); + bool TranslateMul(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands); + bool TranslateDiv(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands); + bool TranslateAnd(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands); + bool TranslateOr(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands); + bool TranslateXor(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands); + bool TranslateNot(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands); + bool TranslateShl(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands); + bool TranslateShr(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands); + bool TranslateSar(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands); + bool TranslatePush(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands); + bool TranslatePop(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands); + bool TranslateCall(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands, VAddr address); + bool TranslateRet(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands); + bool TranslateJmp(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands, VAddr address); + bool TranslateCmp(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands); + bool TranslateTest(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands); + bool TranslateLea(const ZydisDecodedInstruction& instruction, + const ZydisDecodedOperand* operands); + + void UpdateFlagsForArithmetic(int result_reg, int src1_reg, int src2_reg, bool is_subtract); + void UpdateFlagsForLogical(int result_reg); + void UpdateFlagsForShift(int result_reg, int shift_amount); + int GetConditionCode(ZydisMnemonic mnemonic); + +private: + int GetArm64Register(const ZydisDecodedOperand& operand); + int GetArm64XmmRegister(const ZydisDecodedOperand& operand); + void LoadMemoryOperand(int dst_reg, const ZydisDecodedOperand& mem_op, size_t size); + void StoreMemoryOperand(int src_reg, const ZydisDecodedOperand& mem_op, size_t size); + void LoadImmediate(int dst_reg, const ZydisDecodedOperand& imm_op); + void CalculateMemoryAddress(int dst_reg, const ZydisDecodedOperand& mem_op); + X86_64Register ZydisToX86_64Register(ZydisRegister reg); + + Arm64CodeGenerator& codegen; + RegisterMapper& reg_mapper; +}; + +} // namespace Core::Jit diff --git a/src/core/linker.cpp b/src/core/linker.cpp index 7ac8791ae..fae0d608b 100644 --- a/src/core/linker.cpp +++ b/src/core/linker.cpp @@ -20,6 +20,9 @@ #include "core/memory.h" #include "core/tls.h" #include "ipc/ipc.h" +#ifdef ARCH_ARM64 +#include "core/jit/execution_engine.h" +#endif namespace Core { @@ -51,22 +54,13 @@ static PS4_SYSV_ABI void* RunMainEntry [[noreturn]] (EntryParams* params) { } #elif defined(ARCH_ARM64) static PS4_SYSV_ABI void* RunMainEntry [[noreturn]] (EntryParams* params) { - void* entry = reinterpret_cast(params->entry_addr); - asm volatile("mov x2, sp\n" - "and x2, x2, #0xFFFFFFFFFFFFFFF0\n" - "sub x2, x2, #8\n" - "mov sp, x2\n" - "ldr x0, [%1, #8]\n" - "sub sp, sp, #16\n" - "str x0, [sp]\n" - "ldr x0, [%1]\n" - "str x0, [sp, #8]\n" - "mov x0, %1\n" - "mov x1, %2\n" - "br %0\n" - : - : "r"(entry), "r"(params), "r"(ProgramExitFunc) - : "x0", "x1", "x2", "memory"); + auto* jit = Core::Jit::JitEngine::Instance(); + if (jit) { + jit->Initialize(); + jit->ExecuteBlock(params->entry_addr); + } else { + LOG_CRITICAL(Core_Linker, "JIT engine not available"); + } UNREACHABLE(); } #endif diff --git a/src/core/memory.cpp b/src/core/memory.cpp index b9fd7fd7d..c04d2e0f5 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -6,6 +6,7 @@ #include "common/config.h" #include "common/debug.h" #include "core/file_sys/fs.h" +#include "core/jit/execution_engine.h" #include "core/libraries/kernel/memory.h" #include "core/libraries/kernel/orbis_error.h" #include "core/libraries/kernel/process.h" @@ -849,6 +850,15 @@ s64 MemoryManager::ProtectBytes(VAddr addr, VirtualMemoryArea& vma_base, u64 siz impl.Protect(addr, size, perms); +#ifdef ARCH_ARM64 + if (True(prot & MemoryProt::CpuWrite) && vma_base.type == VMAType::Code) { + auto* jit = Core::Jit::JitEngine::Instance(); + if (jit) { + jit->InvalidateRange(addr, addr + adjusted_size); + } + } +#endif + return adjusted_size; } diff --git a/src/core/signals.cpp b/src/core/signals.cpp index 4099ac237..6f1761a94 100644 --- a/src/core/signals.cpp +++ b/src/core/signals.cpp @@ -6,6 +6,9 @@ #include "common/decoder.h" #include "common/signal_context.h" #include "core/signals.h" +#ifdef ARCH_ARM64 +#include "core/jit/execution_engine.h" +#endif #ifdef _WIN32 #include @@ -79,6 +82,15 @@ static void SignalHandler(int sig, siginfo_t* info, void* raw_context) { case SIGSEGV: case SIGBUS: { const bool is_write = Common::IsWriteError(raw_context); +#ifdef ARCH_ARM64 + auto* jit = Core::Jit::JitEngine::Instance(); + if (jit && jit->IsJitCode(code_address)) { + VAddr ps4_addr = jit->GetPs4AddressForJitCode(code_address); + if (ps4_addr != 0) { + jit->InvalidateBlock(ps4_addr); + } + } +#endif if (!signals->DispatchAccessViolation(raw_context, info->si_addr)) { UNREACHABLE_MSG( "Unhandled access violation in thread '{}' at code address {}: {} address {}", @@ -87,13 +99,20 @@ static void SignalHandler(int sig, siginfo_t* info, void* raw_context) { } break; } - case SIGILL: + case SIGILL: { +#ifdef ARCH_ARM64 + auto* jit = Core::Jit::JitEngine::Instance(); + if (jit && jit->IsJitCode(code_address)) { + LOG_ERROR(Core, "Illegal instruction in JIT code at {}", fmt::ptr(code_address)); + } +#endif if (!signals->DispatchIllegalInstruction(raw_context)) { UNREACHABLE_MSG("Unhandled illegal instruction in thread '{}' at code address {}: {}", GetThreadName(), fmt::ptr(code_address), DisassembleInstruction(code_address)); } break; + } case SIGUSR1: { // Sleep thread until signal is received sigset_t sigset; sigemptyset(&sigset); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 000000000..9dcf67f74 --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,58 @@ +# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +# SPDX-License-Identifier: GPL-2.0-or-later + +add_executable(jit_tests + test_arm64_codegen.cpp + test_register_mapping.cpp + test_block_manager.cpp + test_execution_engine.cpp + main.cpp +) + +if (ARCHITECTURE STREQUAL "arm64") + target_sources(jit_tests PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/arm64_codegen.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/arm64_codegen.h + ${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/register_mapping.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/register_mapping.h + ${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/block_manager.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/block_manager.h + ${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/x86_64_translator.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/x86_64_translator.h + ${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/simd_translator.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/simd_translator.h + ${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/calling_convention.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/calling_convention.h + ) +endif() + +target_sources(jit_tests PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../src/common/assert.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../src/common/decoder.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_logging_stub.cpp +) + +target_link_libraries(jit_tests PRIVATE + GTest::gtest + GTest::gtest_main + GTest::gmock + Zydis::Zydis + fmt::fmt +) + +target_include_directories(jit_tests PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../src + ${CMAKE_CURRENT_SOURCE_DIR}/../externals/zydis/include +) + +target_compile_definitions(jit_tests PRIVATE + ARCH_ARM64 +) + +# to make ctest work +add_test(NAME JitTests COMMAND jit_tests) + +set_tests_properties(JitTests PROPERTIES + TIMEOUT 60 + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} +) diff --git a/tests/main.cpp b/tests/main.cpp new file mode 100644 index 000000000..216dc15b5 --- /dev/null +++ b/tests/main.cpp @@ -0,0 +1,9 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tests/test_arm64_codegen.cpp b/tests/test_arm64_codegen.cpp new file mode 100644 index 000000000..cec51731f --- /dev/null +++ b/tests/test_arm64_codegen.cpp @@ -0,0 +1,111 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "core/jit/arm64_codegen.h" +#include +#include +#include + +using namespace Core::Jit; + +class Arm64CodeGenTest : public ::testing::Test { +protected: + void SetUp() override { test_gen = std::make_unique(); } + + void TearDown() override { test_gen.reset(); } + + std::unique_ptr test_gen; +}; + +TEST_F(Arm64CodeGenTest, Constructor) { + EXPECT_NE(test_gen->getCode(), nullptr); + EXPECT_EQ(test_gen->getSize(), 0); +} + +TEST_F(Arm64CodeGenTest, Reset) { + test_gen->add(0, 1, 2); + size_t size_after_add = test_gen->getSize(); + EXPECT_GT(size_after_add, 0); + + test_gen->reset(); + EXPECT_EQ(test_gen->getSize(), 0); +} + +TEST_F(Arm64CodeGenTest, AddInstruction) { + test_gen->add(0, 1, 2); // X0 = X1 + X2 + EXPECT_GT(test_gen->getSize(), 0); + EXPECT_LE(test_gen->getSize(), 4); // Should be 4 bytes (one instruction) +} + +TEST_F(Arm64CodeGenTest, AddImmediate) { + test_gen->add_imm(0, 1, 42); // X0 = X1 + 42 + EXPECT_GT(test_gen->getSize(), 0); +} + +TEST_F(Arm64CodeGenTest, MovRegister) { + test_gen->mov(0, 1); // X0 = X1 + EXPECT_GT(test_gen->getSize(), 0); +} + +TEST_F(Arm64CodeGenTest, MovImmediate) { + test_gen->mov(0, 0x1234LL); // X0 = 0x1234 + EXPECT_GT(test_gen->getSize(), 0); + // Large immediate may require multiple instructions + EXPECT_LE(test_gen->getSize(), + 16); // Up to 4 instructions for 64-bit immediate +} + +TEST_F(Arm64CodeGenTest, LoadStore) { + test_gen->ldr(0, 1, 0); // X0 = [X1] + test_gen->str(0, 1, 0); // [X1] = X0 + EXPECT_GE(test_gen->getSize(), 8); // At least 2 instructions +} + +TEST_F(Arm64CodeGenTest, Branch) { + void *target = test_gen->getCode(); // Branch to start of code + test_gen->b(target); + EXPECT_GT(test_gen->getSize(), 0); +} + +TEST_F(Arm64CodeGenTest, ConditionalBranch) { + void *target = test_gen->getCode(); // Branch to start of code + test_gen->b(0, target); // Branch if equal + EXPECT_GT(test_gen->getSize(), 0); +} + +TEST_F(Arm64CodeGenTest, Compare) { + test_gen->cmp(0, 1); // Compare X0 and X1 + EXPECT_GT(test_gen->getSize(), 0); +} + +TEST_F(Arm64CodeGenTest, ArithmeticOperations) { + test_gen->add(0, 1, 2); + test_gen->sub(0, 1, 2); + test_gen->mul(0, 1, 2); + test_gen->and_(0, 1, 2); + test_gen->orr(0, 1, 2); + test_gen->eor(0, 1, 2); + EXPECT_GE(test_gen->getSize(), 24); // At least 6 instructions +} + +TEST_F(Arm64CodeGenTest, SIMDOperations) { + test_gen->mov_v(0, 1); // V0 = V1 + test_gen->add_v(0, 1, 2); // V0 = V1 + V2 + test_gen->sub_v(0, 1, 2); // V0 = V1 - V2 + test_gen->mul_v(0, 1, 2); // V0 = V1 * V2 + EXPECT_GE(test_gen->getSize(), 16); // At least 4 instructions +} + +TEST_F(Arm64CodeGenTest, SetSize) { + test_gen->add(0, 1, 2); + size_t original_size = test_gen->getSize(); + EXPECT_GT(original_size, 0); + + // Test setting size to 0 + test_gen->setSize(0); + EXPECT_EQ(test_gen->getSize(), 0); + + // Test setting size back (this should work without throwing) + test_gen->setSize(original_size); + EXPECT_EQ(test_gen->getSize(), original_size); +} diff --git a/tests/test_block_manager.cpp b/tests/test_block_manager.cpp new file mode 100644 index 000000000..ab5dc3f53 --- /dev/null +++ b/tests/test_block_manager.cpp @@ -0,0 +1,180 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "core/jit/block_manager.h" +#include +#include +#if defined(__APPLE__) && defined(ARCH_ARM64) +#include +#endif + +using namespace Core::Jit; + +class BlockManagerTest : public ::testing::Test { +protected: + void SetUp() override { + // Allocate executable memory for test code blocks +#if defined(__APPLE__) && defined(ARCH_ARM64) + // On macOS ARM64, use the JIT API approach + test_code = mmap(nullptr, 64 * 1024, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(test_code, MAP_FAILED) + << "Failed to allocate executable memory for test"; + pthread_jit_write_protect_np(0); // Disable write protection for writing + // Will make executable later if needed +#else + test_code = mmap(nullptr, 64 * 1024, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(test_code, MAP_FAILED) + << "Failed to allocate executable memory for test"; +#endif + } + + void TearDown() override { + if (test_code != MAP_FAILED) { + munmap(test_code, 64 * 1024); + } + } + + void *test_code = MAP_FAILED; +}; + +TEST_F(BlockManagerTest, Constructor) { + BlockManager manager; + EXPECT_EQ(manager.GetBlockCount(), 0); + EXPECT_EQ(manager.GetTotalCodeSize(), 0); +} + +TEST_F(BlockManagerTest, CreateBlock) { + BlockManager manager; + VAddr ps4_addr = 0x400000; + void *arm64_code = test_code; + size_t code_size = 1024; + size_t instruction_count = 10; + + CodeBlock *block = + manager.CreateBlock(ps4_addr, arm64_code, code_size, instruction_count); + ASSERT_NE(block, nullptr); + EXPECT_EQ(block->ps4_address, ps4_addr); + EXPECT_EQ(block->arm64_code, arm64_code); + EXPECT_EQ(block->code_size, code_size); + EXPECT_EQ(block->instruction_count, instruction_count); + EXPECT_FALSE(block->is_linked); + EXPECT_EQ(manager.GetBlockCount(), 1); + EXPECT_EQ(manager.GetTotalCodeSize(), code_size); +} + +TEST_F(BlockManagerTest, GetBlock) { + BlockManager manager; + VAddr ps4_addr = 0x400000; + void *arm64_code = test_code; + + // Block doesn't exist yet + CodeBlock *block = manager.GetBlock(ps4_addr); + EXPECT_EQ(block, nullptr); + + manager.CreateBlock(ps4_addr, arm64_code, 1024, 10); + + // Now it should exist + block = manager.GetBlock(ps4_addr); + ASSERT_NE(block, nullptr); + EXPECT_EQ(block->ps4_address, ps4_addr); +} + +TEST_F(BlockManagerTest, MultipleBlocks) { + BlockManager manager; + + // Create multiple blocks + for (int i = 0; i < 10; ++i) { + VAddr ps4_addr = 0x400000 + (i * 0x1000); + void *arm64_code = static_cast(test_code) + (i * 1024); + manager.CreateBlock(ps4_addr, arm64_code, 1024, 10); + } + + EXPECT_EQ(manager.GetBlockCount(), 10); + EXPECT_EQ(manager.GetTotalCodeSize(), 10 * 1024); +} + +TEST_F(BlockManagerTest, InvalidateBlock) { + BlockManager manager; + VAddr ps4_addr = 0x400000; + + // Create and verify block exists + manager.CreateBlock(ps4_addr, test_code, 1024, 10); + EXPECT_NE(manager.GetBlock(ps4_addr), nullptr); + + // Invalidate block + manager.InvalidateBlock(ps4_addr); + + // Block should no longer exist + EXPECT_EQ(manager.GetBlock(ps4_addr), nullptr); + EXPECT_EQ(manager.GetBlockCount(), 0); + EXPECT_EQ(manager.GetTotalCodeSize(), 0); +} + +TEST_F(BlockManagerTest, InvalidateRange) { + BlockManager manager; + + // Create blocks at different addresses + manager.CreateBlock(0x400000, test_code, 1024, 10); + manager.CreateBlock(0x401000, static_cast(test_code) + 1024, 1024, + 10); + manager.CreateBlock(0x402000, static_cast(test_code) + 2048, 1024, + 10); + manager.CreateBlock(0x500000, static_cast(test_code) + 3072, 1024, + 10); + + EXPECT_EQ(manager.GetBlockCount(), 4); + + // Invalidate range that covers first 3 blocks + manager.InvalidateRange(0x400000, 0x403000); + + // First 3 blocks should be gone, last one should remain + EXPECT_EQ(manager.GetBlock(0x400000), nullptr); + EXPECT_EQ(manager.GetBlock(0x401000), nullptr); + EXPECT_EQ(manager.GetBlock(0x402000), nullptr); + EXPECT_NE(manager.GetBlock(0x500000), nullptr); + EXPECT_EQ(manager.GetBlockCount(), 1); +} + +TEST_F(BlockManagerTest, AddDependency) { + BlockManager manager; + VAddr block_addr = 0x400000; + VAddr dep_addr = 0x500000; + + CodeBlock *block = manager.CreateBlock(block_addr, test_code, 1024, 10); + manager.AddDependency(block_addr, dep_addr); + + EXPECT_EQ(block->dependencies.size(), 1); + EXPECT_NE(block->dependencies.find(dep_addr), block->dependencies.end()); +} + +TEST_F(BlockManagerTest, MultipleDependencies) { + BlockManager manager; + VAddr block_addr = 0x400000; + + CodeBlock *block = manager.CreateBlock(block_addr, test_code, 1024, 10); + manager.AddDependency(block_addr, 0x500000); + manager.AddDependency(block_addr, 0x600000); + manager.AddDependency(block_addr, 0x700000); + + EXPECT_EQ(block->dependencies.size(), 3); +} + +TEST_F(BlockManagerTest, Clear) { + BlockManager manager; + + // Create multiple blocks + for (int i = 0; i < 5; ++i) { + VAddr ps4_addr = 0x400000 + (i * 0x1000); + void *arm64_code = static_cast(test_code) + (i * 1024); + manager.CreateBlock(ps4_addr, arm64_code, 1024, 10); + } + + EXPECT_EQ(manager.GetBlockCount(), 5); + + manager.Clear(); + + EXPECT_EQ(manager.GetBlockCount(), 0); + EXPECT_EQ(manager.GetTotalCodeSize(), 0); +} diff --git a/tests/test_execution_engine.cpp b/tests/test_execution_engine.cpp new file mode 100644 index 000000000..e69e02905 --- /dev/null +++ b/tests/test_execution_engine.cpp @@ -0,0 +1,49 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "core/jit/arm64_codegen.h" +#include "core/jit/block_manager.h" +#include "core/jit/register_mapping.h" +#include +#include + +using namespace Core::Jit; + +// NOTE: ExecutionEngine requires MemoryManager and AddressSpace which have +// heavy dependencies. These tests focus on the components that can be tested in +// isolation. Full integration tests would require the complete emulator system +// to be initialized. Let's just skip them for now. + +class ExecutionEngineComponentTest : public ::testing::Test { +protected: + void SetUp() override {} + + void TearDown() override {} +}; + +// Test that the components used by ExecutionEngine can be constructed +TEST_F(ExecutionEngineComponentTest, ComponentConstruction) { + BlockManager block_manager; + RegisterMapper register_mapper; + Arm64CodeGenerator code_generator; + + // All components should construct successfully + EXPECT_EQ(block_manager.GetBlockCount(), 0); + EXPECT_NE(code_generator.getCode(), nullptr); +} + +// Test block invalidation through BlockManager (used by ExecutionEngine) +TEST_F(ExecutionEngineComponentTest, BlockInvalidation) { + BlockManager block_manager; + VAddr test_addr = 0x400000; + + // Invalidate should not crash even if block doesn't exist + EXPECT_NO_THROW(block_manager.InvalidateBlock(test_addr)); +} + +TEST_F(ExecutionEngineComponentTest, BlockInvalidateRange) { + BlockManager block_manager; + + // Invalidate range should not crash + EXPECT_NO_THROW(block_manager.InvalidateRange(0x400000, 0x500000)); +} diff --git a/tests/test_logging_stub.cpp b/tests/test_logging_stub.cpp new file mode 100644 index 000000000..cd16d66bc --- /dev/null +++ b/tests/test_logging_stub.cpp @@ -0,0 +1,25 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/logging/types.h" +#include + +namespace Common::Log { + +void FmtLogMessageImpl(Class log_class, Level log_level, const char *filename, + unsigned int line_num, const char *function, + const char *format, const fmt::format_args &args) { + // Stub implementation - just ignore logs in tests + (void)log_class; + (void)log_level; + (void)filename; + (void)line_num; + (void)function; + (void)format; + (void)args; +} + +void Start() {} +void Stop() {} + +} // namespace Common::Log diff --git a/tests/test_register_mapping.cpp b/tests/test_register_mapping.cpp new file mode 100644 index 000000000..d1f15583c --- /dev/null +++ b/tests/test_register_mapping.cpp @@ -0,0 +1,86 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "core/jit/register_mapping.h" +#include + +using namespace Core::Jit; + +class RegisterMappingTest : public ::testing::Test { +protected: + RegisterMapper mapper; +}; + +TEST_F(RegisterMappingTest, MapGeneralPurposeRegisters) { + // Test mapping of common x86_64 registers + EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::RAX), 0); // X0 + EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::RCX), 1); // X1 + EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::RDX), 2); // X2 + EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::RSI), 3); // X3 + EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::RDI), + 0); // X0 (same as RAX) + EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::R8), 4); // X4 + EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::R9), 5); // X5 +} + +TEST_F(RegisterMappingTest, MapStackPointer) { + EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::RSP), 31); // SP +} + +TEST_F(RegisterMappingTest, MapFramePointer) { + EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::RBP), 29); // FP +} + +TEST_F(RegisterMappingTest, MapCalleeSavedRegisters) { + EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::RBX), 19); // X19 + EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::R12), 20); // X20 + EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::R13), 21); // X21 + EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::R14), 22); // X22 + EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::R15), 23); // X23 +} + +TEST_F(RegisterMappingTest, MapFlagsRegister) { + EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::FLAGS), 11); // X11 +} + +TEST_F(RegisterMappingTest, MapXMMRegisters) { + // Test mapping of XMM registers to NEON registers (V registers start at 32) + EXPECT_EQ(mapper.MapX86_64XmmToArm64Neon(X86_64Register::XMM0), 32); // V0 + EXPECT_EQ(mapper.MapX86_64XmmToArm64Neon(X86_64Register::XMM1), 33); // V1 + EXPECT_EQ(mapper.MapX86_64XmmToArm64Neon(X86_64Register::XMM2), 34); // V2 + EXPECT_EQ(mapper.MapX86_64XmmToArm64Neon(X86_64Register::XMM3), 35); // V3 + EXPECT_EQ(mapper.MapX86_64XmmToArm64Neon(X86_64Register::XMM4), 36); // V4 + EXPECT_EQ(mapper.MapX86_64XmmToArm64Neon(X86_64Register::XMM5), 37); // V5 + EXPECT_EQ(mapper.MapX86_64XmmToArm64Neon(X86_64Register::XMM6), 38); // V6 + EXPECT_EQ(mapper.MapX86_64XmmToArm64Neon(X86_64Register::XMM7), 39); // V7 +} + +TEST_F(RegisterMappingTest, MapAllXMMRegisters) { + // Test all 16 XMM registers (V registers start at 32) + for (int i = 0; i < 16; ++i) { + X86_64Register xmm_reg = + static_cast(static_cast(X86_64Register::XMM0) + i); + int neon_reg = mapper.MapX86_64XmmToArm64Neon(xmm_reg); + EXPECT_EQ(neon_reg, 32 + i) << "XMM" << i << " should map to V" << i + << " (register number " << (32 + i) << ")"; + } +} + +TEST_F(RegisterMappingTest, InvalidRegister) { + // COUNT is not a valid register + // NOTE: The implementation uses ASSERT_MSG which will crash on invalid input + // This test verifies that valid registers work correctly + // Testing invalid registers would require a different implementation that + // returns error codes For now, we just verify that the last valid register + // works + int result = mapper.MapX86_64ToArm64(X86_64Register::XMM15); + EXPECT_GE(result, 0) << "Last valid register should map correctly"; +} + +TEST_F(RegisterMappingTest, RegisterMappingConsistency) { + // Test that register mappings are consistent + // RAX should always map to the same ARM64 register + int reg1 = mapper.MapX86_64ToArm64(X86_64Register::RAX); + int reg2 = mapper.MapX86_64ToArm64(X86_64Register::RAX); + EXPECT_EQ(reg1, reg2); +} From 66a160a995a1fa0c96aed9befaf2691deed9c497 Mon Sep 17 00:00:00 2001 From: AlpinDale Date: Tue, 9 Dec 2025 02:29:19 +0430 Subject: [PATCH 07/19] clang-format --- src/common/va_ctx.h | 10 +++++----- src/core/libraries/fiber/fiber.cpp | 3 +-- src/core/libraries/libc_internal/libc_internal_io.cpp | 3 ++- .../ir/passes/flatten_extended_userdata_pass.cpp | 3 +-- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/common/va_ctx.h b/src/common/va_ctx.h index f39ab70b4..f0788c6c5 100644 --- a/src/common/va_ctx.h +++ b/src/common/va_ctx.h @@ -39,11 +39,11 @@ (ctx).va_list.overflow_arg_area = &overflow_arg_area; #elif defined(ARCH_ARM64) #define VA_ARGS ... -#define VA_CTX(ctx) \ - alignas(16)::Common::VaCtx ctx{}; \ - (ctx).va_list.reg_save_area = nullptr; \ - (ctx).va_list.gp_offset = 0; \ - (ctx).va_list.fp_offset = 0; \ +#define VA_CTX(ctx) \ + alignas(16)::Common::VaCtx ctx{}; \ + (ctx).va_list.reg_save_area = nullptr; \ + (ctx).va_list.gp_offset = 0; \ + (ctx).va_list.fp_offset = 0; \ (ctx).va_list.overflow_arg_area = nullptr; #else #error "Unsupported architecture" diff --git a/src/core/libraries/fiber/fiber.cpp b/src/core/libraries/fiber/fiber.cpp index 8e95ca769..a490548c8 100644 --- a/src/core/libraries/fiber/fiber.cpp +++ b/src/core/libraries/fiber/fiber.cpp @@ -32,8 +32,7 @@ extern "C" void PS4_SYSV_ABI _sceFiberSwitchEntry(OrbisFiberData* data, #elif defined(ARCH_ARM64) extern "C" s32 PS4_SYSV_ABI _sceFiberSetJmp(OrbisFiberContext* ctx); extern "C" s32 PS4_SYSV_ABI _sceFiberLongJmp(OrbisFiberContext* ctx); -extern "C" void PS4_SYSV_ABI _sceFiberSwitchEntry(OrbisFiberData* data, - bool set_fpu); +extern "C" void PS4_SYSV_ABI _sceFiberSwitchEntry(OrbisFiberData* data, bool set_fpu); #endif extern "C" void PS4_SYSV_ABI _sceFiberForceQuit(u64 ret) asm("_sceFiberForceQuit"); diff --git a/src/core/libraries/libc_internal/libc_internal_io.cpp b/src/core/libraries/libc_internal/libc_internal_io.cpp index 1677d41a4..cd15a8f35 100644 --- a/src/core/libraries/libc_internal/libc_internal_io.cpp +++ b/src/core/libraries/libc_internal/libc_internal_io.cpp @@ -18,6 +18,7 @@ int PS4_SYSV_ABI internal_snprintf(char* s, size_t n, VA_ARGS) { return snprintf_ctx(s, n, &ctx); } void RegisterlibSceLibcInternalIo(Core::Loader::SymbolsResolver* sym) { - LIB_FUNCTION_VARIADIC("eLdDw6l0-bU", "libSceLibcInternal", 1, "libSceLibcInternal", internal_snprintf); + LIB_FUNCTION_VARIADIC("eLdDw6l0-bU", "libSceLibcInternal", 1, "libSceLibcInternal", + internal_snprintf); } } // namespace Libraries::LibcInternal \ No newline at end of file diff --git a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp index 07e004ae6..90a938639 100644 --- a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp +++ b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp @@ -253,8 +253,7 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) { info.srt_info.flattened_bufsize_dw = pass_info.dst_off_dw; } #else -static void GenerateSrtProgram(Info& info, PassInfo& pass_info) { -} +static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {} #endif }; // namespace From 8d0182fd9953fc9c13af4ae2ac6fa76fd3304911 Mon Sep 17 00:00:00 2001 From: AlpinDale Date: Tue, 9 Dec 2025 02:39:48 +0430 Subject: [PATCH 08/19] try to fix the ci --- .github/workflows/build.yml | 53 ++++++++++++++++++++++++++++++++++++- src/core/address_space.cpp | 8 +++--- 2 files changed, 57 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d1fab6354..05cf1bf53 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -146,6 +146,57 @@ jobs: name: shadps4-macos-sdl-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }} path: upload/ + macos-sdl-arm64: + runs-on: macos-15 + needs: get-info + steps: + - uses: actions/checkout@v5 + with: + submodules: recursive + + - name: Setup latest Xcode + uses: maxim-lobanov/setup-xcode@v1 + with: + xcode-version: latest + + - name: Cache CMake Configuration + uses: actions/cache@v4 + env: + cache-name: ${{ runner.os }}-sdl-arm64-cache-cmake-configuration + with: + path: | + ${{github.workspace}}/build-arm64 + key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} + restore-keys: | + ${{ env.cache-name }}- + + - name: Cache CMake Build + uses: hendrikmuhs/ccache-action@v1.2.19 + env: + cache-name: ${{runner.os}}-sdl-arm64-cache-cmake-build + with: + append-timestamp: false + create-symlink: true + key: ${{env.cache-name}}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} + variant: sccache + + - name: Configure CMake + run: cmake --fresh -B ${{github.workspace}}/build-arm64 -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache + + - name: Build + run: cmake --build ${{github.workspace}}/build-arm64 --config ${{env.BUILD_TYPE}} --parallel $(sysctl -n hw.ncpu) + + - name: Package and Upload macOS ARM64 SDL artifact + run: | + mkdir upload-arm64 + mv ${{github.workspace}}/build-arm64/shadps4 upload-arm64 + mv ${{github.workspace}}/build-arm64/MoltenVK_icd.json upload-arm64 + mv ${{github.workspace}}/build-arm64/libMoltenVK.dylib upload-arm64 + - uses: actions/upload-artifact@v4 + with: + name: shadps4-macos-arm64-sdl-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }} + path: upload-arm64/ + linux-sdl: runs-on: ubuntu-24.04 needs: get-info @@ -245,7 +296,7 @@ jobs: pre-release: if: github.ref == 'refs/heads/main' && github.repository == 'shadps4-emu/shadPS4' && github.event_name == 'push' - needs: [get-info, windows-sdl, macos-sdl, linux-sdl] + needs: [get-info, windows-sdl, macos-sdl, macos-sdl-arm64, linux-sdl] runs-on: ubuntu-latest steps: - name: Download all artifacts diff --git a/src/core/address_space.cpp b/src/core/address_space.cpp index a82a224a3..d186d8a04 100644 --- a/src/core/address_space.cpp +++ b/src/core/address_space.cpp @@ -574,6 +574,7 @@ struct AddressSpace::Impl { #else const auto virtual_size = system_managed_size + system_reserved_size + user_size; #if defined(ARCH_X86_64) + constexpr int map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_FIXED; const auto virtual_base = reinterpret_cast(mmap(reinterpret_cast(SYSTEM_MANAGED_MIN), virtual_size, protection_flags, map_flags, -1, 0)); @@ -581,6 +582,7 @@ struct AddressSpace::Impl { system_reserved_base = reinterpret_cast(SYSTEM_RESERVED_MIN); user_base = reinterpret_cast(USER_MIN); #else + constexpr int map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; // Map memory wherever possible and instruction translation can handle offsetting to the // base. const auto virtual_base = @@ -732,7 +734,7 @@ struct AddressSpace::Impl { if (write) { flags |= PROT_WRITE; } -#ifdef ARCH_X86_64 +#if defined(ARCH_X86_64) if (execute) { flags |= PROT_EXEC; } @@ -786,7 +788,7 @@ AddressSpace::~AddressSpace() = default; void* AddressSpace::Map(VAddr virtual_addr, size_t size, u64 alignment, PAddr phys_addr, bool is_exec) { -#if ARCH_X86_64 +#if defined(ARCH_X86_64) const auto prot = is_exec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE; #else // On non-native architectures, we can simplify things by ignoring the execute flag for the @@ -857,7 +859,7 @@ boost::icl::interval_set AddressSpace::GetUsableRegions() { } void* AddressSpace::TranslateAddress(VAddr ps4_addr) const { -#ifdef ARCH_X86_64 +#if defined(ARCH_X86_64) // On x86_64, PS4 addresses are directly mapped, so we can cast them return reinterpret_cast(ps4_addr); #elif defined(ARCH_ARM64) && defined(__APPLE__) From 185e6cd4295d0ce4a9d74d2a65c0de5166515ba4 Mon Sep 17 00:00:00 2001 From: AlpinDale Date: Tue, 9 Dec 2025 02:52:44 +0430 Subject: [PATCH 09/19] use `pthread_jit_write_protect_np` in execution engine --- src/core/jit/execution_engine.cpp | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/src/core/jit/execution_engine.cpp b/src/core/jit/execution_engine.cpp index 768f63392..cc257d9f7 100644 --- a/src/core/jit/execution_engine.cpp +++ b/src/core/jit/execution_engine.cpp @@ -7,11 +7,33 @@ #include "common/logging/log.h" #include "core/memory.h" #include "execution_engine.h" +#if defined(__APPLE__) && defined(ARCH_ARM64) +#include +#endif namespace Core::Jit { +static size_t alignUp(size_t value, size_t alignment) { + return (value + alignment - 1) & ~(alignment - 1); +} + static void* AllocateExecutableMemory(size_t size) { - size = (size + 4095) & ~4095; + size = alignUp(size, 4096); +#if defined(__APPLE__) && defined(ARCH_ARM64) + // On macOS ARM64: + // 1. Allocate with PROT_READ | PROT_WRITE (no PROT_EXEC initially) + // 2. Use pthread_jit_write_protect_np to allow writing + // 3. After writing, use mprotect to add PROT_EXEC + void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (ptr == MAP_FAILED) { + LOG_CRITICAL(Core, "Failed to allocate executable memory: {} (errno={})", strerror(errno), + errno); + return nullptr; + } + // Initially disable write protection so we can write code + pthread_jit_write_protect_np(0); + return ptr; +#else void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (ptr == MAP_FAILED) { @@ -19,6 +41,7 @@ static void* AllocateExecutableMemory(size_t size) { return nullptr; } return ptr; +#endif } ExecutionEngine::ExecutionEngine() @@ -45,6 +68,10 @@ void ExecutionEngine::Initialize() { void ExecutionEngine::Shutdown() { if (code_buffer) { +#if defined(__APPLE__) && defined(ARCH_ARM64) + // On macOS ARM64, ensure write protection is enabled before unmapping + pthread_jit_write_protect_np(1); +#endif munmap(code_buffer, code_buffer_size); code_buffer = nullptr; } @@ -122,6 +149,7 @@ CodeBlock* ExecutionEngine::TranslateBasicBlock(VAddr start_address, size_t max_ } size_t code_size = code_generator->getSize(); + code_generator->makeExecutable(); CodeBlock* block = block_manager->CreateBlock(start_address, block_start, code_size, instruction_count); From 7326c11a3e11a8745c5038c4b419085c0e4967d9 Mon Sep 17 00:00:00 2001 From: AlpinDale Date: Tue, 9 Dec 2025 02:59:51 +0430 Subject: [PATCH 10/19] early jit init --- src/core/jit/arm64_codegen.h | 3 ++- src/core/jit/execution_engine.cpp | 5 +++++ src/core/jit/execution_engine.h | 3 +++ src/core/linker.cpp | 5 ++++- src/emulator.cpp | 14 ++++++++++++++ 5 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/core/jit/arm64_codegen.h b/src/core/jit/arm64_codegen.h index 13107a82b..519802c44 100644 --- a/src/core/jit/arm64_codegen.h +++ b/src/core/jit/arm64_codegen.h @@ -114,11 +114,12 @@ public: void sub_v(int vdst, int vsrc1, int vsrc2); void mul_v(int vdst, int vsrc1, int vsrc2); + void makeExecutable(); + private: void emit32(u32 instruction); void emit64(u64 instruction); void* allocateCode(size_t size); - void makeExecutable(); void* code_buffer; void* code_ptr; diff --git a/src/core/jit/execution_engine.cpp b/src/core/jit/execution_engine.cpp index cc257d9f7..e7b2f70bc 100644 --- a/src/core/jit/execution_engine.cpp +++ b/src/core/jit/execution_engine.cpp @@ -55,6 +55,11 @@ ExecutionEngine::~ExecutionEngine() { } void ExecutionEngine::Initialize() { + if (IsInitialized()) { + LOG_DEBUG(Core, "JIT Execution Engine already initialized"); + return; + } + code_buffer = AllocateExecutableMemory(code_buffer_size); if (!code_buffer) { throw std::bad_alloc(); diff --git a/src/core/jit/execution_engine.h b/src/core/jit/execution_engine.h index ec8195397..eb0c69499 100644 --- a/src/core/jit/execution_engine.h +++ b/src/core/jit/execution_engine.h @@ -28,6 +28,9 @@ public: void Initialize(); void Shutdown(); + bool IsInitialized() const { + return code_buffer != nullptr; + } private: CodeBlock* TranslateBasicBlock(VAddr start_address, size_t max_instructions = 100); diff --git a/src/core/linker.cpp b/src/core/linker.cpp index fae0d608b..ce6861100 100644 --- a/src/core/linker.cpp +++ b/src/core/linker.cpp @@ -56,7 +56,10 @@ static PS4_SYSV_ABI void* RunMainEntry [[noreturn]] (EntryParams* params) { static PS4_SYSV_ABI void* RunMainEntry [[noreturn]] (EntryParams* params) { auto* jit = Core::Jit::JitEngine::Instance(); if (jit) { - jit->Initialize(); + // JIT should already be initialized in Emulator::Run(), but check just in case + if (!jit->IsInitialized()) { + jit->Initialize(); + } jit->ExecuteBlock(params->entry_addr); } else { LOG_CRITICAL(Core_Linker, "JIT engine not available"); diff --git a/src/emulator.cpp b/src/emulator.cpp index f0026068c..bddf6a80e 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -30,6 +30,7 @@ #include "core/file_format/psf.h" #include "core/file_format/trp.h" #include "core/file_sys/fs.h" +#include "core/jit/execution_engine.h" #include "core/libraries/disc_map/disc_map.h" #include "core/libraries/font/font.h" #include "core/libraries/font/fontft.h" @@ -261,6 +262,19 @@ void Emulator::Run(std::filesystem::path file, std::vector args, controller = Common::Singleton::Instance(); linker = Common::Singleton::Instance(); +#ifdef ARCH_ARM64 + // Initialize JIT engine early for ARM64 builds + auto* jit = Core::Jit::JitEngine::Instance(); + if (jit) { + try { + jit->Initialize(); + LOG_INFO(Loader, "JIT Execution Engine initialized"); + } catch (const std::bad_alloc& e) { + LOG_CRITICAL(Loader, "Failed to initialize JIT engine: {}", e.what()); + } + } +#endif + // Load renderdoc module VideoCore::LoadRenderDoc(); From 32f08343b2928dfa2cc3f9f7033f8083b72c9bbc Mon Sep 17 00:00:00 2001 From: AlpinDale Date: Tue, 9 Dec 2025 03:10:56 +0430 Subject: [PATCH 11/19] register state preservation infra --- src/core/jit/register_mapping.cpp | 149 +++++++++++++++++++++++++++++- src/core/jit/register_mapping.h | 15 ++- 2 files changed, 160 insertions(+), 4 deletions(-) diff --git a/src/core/jit/register_mapping.cpp b/src/core/jit/register_mapping.cpp index 7a5634cb7..946f47182 100644 --- a/src/core/jit/register_mapping.cpp +++ b/src/core/jit/register_mapping.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include "arm64_codegen.h" #include "common/assert.h" #include "register_mapping.h" @@ -116,8 +117,152 @@ bool RegisterMapper::IsRegisterSpilled(X86_64Register x86_reg) const { return spilled_registers[index]; } -void RegisterMapper::SaveAllRegisters() {} +void RegisterMapper::SaveRegister(Arm64CodeGenerator& codegen, X86_64Register x86_reg, + RegisterContext* ctx) { + if (!ctx) { + return; + } -void RegisterMapper::RestoreAllRegisters() {} + int arm64_reg = MapX86_64ToArm64(x86_reg); + if (arm64_reg == INVALID_MAPPING) { + return; + } + + size_t index = static_cast(x86_reg); + if (IsXmmRegister(x86_reg)) { + int vreg = MapX86_64XmmToArm64Neon(x86_reg); + if (vreg != INVALID_MAPPING) { + codegen.movz(SCRATCH_REG, + reinterpret_cast(&ctx->xmm_regs[index - 16][0]) & 0xFFFF); + codegen.movk(SCRATCH_REG, + (reinterpret_cast(&ctx->xmm_regs[index - 16][0]) >> 16) & 0xFFFF, 16); + codegen.movk(SCRATCH_REG, + (reinterpret_cast(&ctx->xmm_regs[index - 16][0]) >> 32) & 0xFFFF, 32); + codegen.movk(SCRATCH_REG, + (reinterpret_cast(&ctx->xmm_regs[index - 16][0]) >> 48) & 0xFFFF, 48); + codegen.str_v(vreg, SCRATCH_REG, 0); + } + } else if (x86_reg == X86_64Register::FLAGS) { + codegen.movz(SCRATCH_REG, reinterpret_cast(&ctx->flags) & 0xFFFF); + codegen.movk(SCRATCH_REG, (reinterpret_cast(&ctx->flags) >> 16) & 0xFFFF, 16); + codegen.movk(SCRATCH_REG, (reinterpret_cast(&ctx->flags) >> 32) & 0xFFFF, 32); + codegen.movk(SCRATCH_REG, (reinterpret_cast(&ctx->flags) >> 48) & 0xFFFF, 48); + codegen.str(arm64_reg, SCRATCH_REG, 0); + } else if (x86_reg == X86_64Register::RSP || x86_reg == X86_64Register::RBP) { + if (arm64_reg == STACK_POINTER) { + codegen.mov(SCRATCH_REG, STACK_POINTER); + codegen.movz(SCRATCH_REG2, reinterpret_cast(&ctx->rsp) & 0xFFFF); + codegen.movk(SCRATCH_REG2, (reinterpret_cast(&ctx->rsp) >> 16) & 0xFFFF, 16); + codegen.movk(SCRATCH_REG2, (reinterpret_cast(&ctx->rsp) >> 32) & 0xFFFF, 32); + codegen.movk(SCRATCH_REG2, (reinterpret_cast(&ctx->rsp) >> 48) & 0xFFFF, 48); + codegen.str(SCRATCH_REG, SCRATCH_REG2, 0); + } else { + codegen.movz(SCRATCH_REG, reinterpret_cast(&ctx->rbp) & 0xFFFF); + codegen.movk(SCRATCH_REG, (reinterpret_cast(&ctx->rbp) >> 16) & 0xFFFF, 16); + codegen.movk(SCRATCH_REG, (reinterpret_cast(&ctx->rbp) >> 32) & 0xFFFF, 32); + codegen.movk(SCRATCH_REG, (reinterpret_cast(&ctx->rbp) >> 48) & 0xFFFF, 48); + codegen.str(arm64_reg, SCRATCH_REG, 0); + } + } else { + if (index < 16) { + codegen.movz(SCRATCH_REG, reinterpret_cast(&ctx->gp_regs[index]) & 0xFFFF); + codegen.movk(SCRATCH_REG, (reinterpret_cast(&ctx->gp_regs[index]) >> 16) & 0xFFFF, + 16); + codegen.movk(SCRATCH_REG, (reinterpret_cast(&ctx->gp_regs[index]) >> 32) & 0xFFFF, + 32); + codegen.movk(SCRATCH_REG, (reinterpret_cast(&ctx->gp_regs[index]) >> 48) & 0xFFFF, + 48); + codegen.str(arm64_reg, SCRATCH_REG, 0); + } + } +} + +void RegisterMapper::RestoreRegister(Arm64CodeGenerator& codegen, X86_64Register x86_reg, + RegisterContext* ctx) { + if (!ctx) { + return; + } + + int arm64_reg = MapX86_64ToArm64(x86_reg); + if (arm64_reg == INVALID_MAPPING) { + return; + } + + size_t index = static_cast(x86_reg); + if (IsXmmRegister(x86_reg)) { + int vreg = MapX86_64XmmToArm64Neon(x86_reg); + if (vreg != INVALID_MAPPING) { + codegen.movz(SCRATCH_REG, + reinterpret_cast(&ctx->xmm_regs[index - 16][0]) & 0xFFFF); + codegen.movk(SCRATCH_REG, + (reinterpret_cast(&ctx->xmm_regs[index - 16][0]) >> 16) & 0xFFFF, 16); + codegen.movk(SCRATCH_REG, + (reinterpret_cast(&ctx->xmm_regs[index - 16][0]) >> 32) & 0xFFFF, 32); + codegen.movk(SCRATCH_REG, + (reinterpret_cast(&ctx->xmm_regs[index - 16][0]) >> 48) & 0xFFFF, 48); + codegen.ldr_v(vreg, SCRATCH_REG, 0); + } + } else if (x86_reg == X86_64Register::FLAGS) { + codegen.movz(SCRATCH_REG, reinterpret_cast(&ctx->flags) & 0xFFFF); + codegen.movk(SCRATCH_REG, (reinterpret_cast(&ctx->flags) >> 16) & 0xFFFF, 16); + codegen.movk(SCRATCH_REG, (reinterpret_cast(&ctx->flags) >> 32) & 0xFFFF, 32); + codegen.movk(SCRATCH_REG, (reinterpret_cast(&ctx->flags) >> 48) & 0xFFFF, 48); + codegen.ldr(arm64_reg, SCRATCH_REG, 0); + } else if (x86_reg == X86_64Register::RSP || x86_reg == X86_64Register::RBP) { + if (arm64_reg == STACK_POINTER) { + codegen.movz(SCRATCH_REG, reinterpret_cast(&ctx->rsp) & 0xFFFF); + codegen.movk(SCRATCH_REG, (reinterpret_cast(&ctx->rsp) >> 16) & 0xFFFF, 16); + codegen.movk(SCRATCH_REG, (reinterpret_cast(&ctx->rsp) >> 32) & 0xFFFF, 32); + codegen.movk(SCRATCH_REG, (reinterpret_cast(&ctx->rsp) >> 48) & 0xFFFF, 48); + codegen.ldr(SCRATCH_REG2, SCRATCH_REG, 0); + codegen.mov(STACK_POINTER, SCRATCH_REG2); + } else { + codegen.movz(SCRATCH_REG, reinterpret_cast(&ctx->rbp) & 0xFFFF); + codegen.movk(SCRATCH_REG, (reinterpret_cast(&ctx->rbp) >> 16) & 0xFFFF, 16); + codegen.movk(SCRATCH_REG, (reinterpret_cast(&ctx->rbp) >> 32) & 0xFFFF, 32); + codegen.movk(SCRATCH_REG, (reinterpret_cast(&ctx->rbp) >> 48) & 0xFFFF, 48); + codegen.ldr(arm64_reg, SCRATCH_REG, 0); + } + } else { + if (index < 16) { + codegen.movz(SCRATCH_REG, reinterpret_cast(&ctx->gp_regs[index]) & 0xFFFF); + codegen.movk(SCRATCH_REG, (reinterpret_cast(&ctx->gp_regs[index]) >> 16) & 0xFFFF, + 16); + codegen.movk(SCRATCH_REG, (reinterpret_cast(&ctx->gp_regs[index]) >> 32) & 0xFFFF, + 32); + codegen.movk(SCRATCH_REG, (reinterpret_cast(&ctx->gp_regs[index]) >> 48) & 0xFFFF, + 48); + codegen.ldr(arm64_reg, SCRATCH_REG, 0); + } + } +} + +void RegisterMapper::SaveAllRegisters(Arm64CodeGenerator& codegen, RegisterContext* ctx) { + if (!ctx) { + return; + } + + for (int i = 0; i < 16; i++) { + SaveRegister(codegen, static_cast(i), ctx); + } + for (int i = 16; i < 32; i++) { + SaveRegister(codegen, static_cast(i), ctx); + } + SaveRegister(codegen, X86_64Register::FLAGS, ctx); +} + +void RegisterMapper::RestoreAllRegisters(Arm64CodeGenerator& codegen, RegisterContext* ctx) { + if (!ctx) { + return; + } + + RestoreRegister(codegen, X86_64Register::FLAGS, ctx); + for (int i = 16; i < 32; i++) { + RestoreRegister(codegen, static_cast(i), ctx); + } + for (int i = 0; i < 16; i++) { + RestoreRegister(codegen, static_cast(i), ctx); + } +} } // namespace Core::Jit diff --git a/src/core/jit/register_mapping.h b/src/core/jit/register_mapping.h index 80e1caab7..c6d9cf540 100644 --- a/src/core/jit/register_mapping.h +++ b/src/core/jit/register_mapping.h @@ -5,6 +5,7 @@ #include #include "common/types.h" +#include "core/jit/arm64_codegen.h" namespace Core::Jit { @@ -97,6 +98,14 @@ enum class Arm64Register : u8 { COUNT = 48 }; +struct RegisterContext { + u64 gp_regs[16]; + u64 xmm_regs[16][2]; + u64 flags; + u64 rsp; + u64 rbp; +}; + class RegisterMapper { public: RegisterMapper(); @@ -109,8 +118,10 @@ public: void ReloadRegister(X86_64Register x86_reg); bool IsRegisterSpilled(X86_64Register x86_reg) const; - void SaveAllRegisters(); - void RestoreAllRegisters(); + void SaveAllRegisters(Arm64CodeGenerator& codegen, RegisterContext* ctx); + void RestoreAllRegisters(Arm64CodeGenerator& codegen, RegisterContext* ctx); + void SaveRegister(Arm64CodeGenerator& codegen, X86_64Register x86_reg, RegisterContext* ctx); + void RestoreRegister(Arm64CodeGenerator& codegen, X86_64Register x86_reg, RegisterContext* ctx); static constexpr int SCRATCH_REG = 9; static constexpr int SCRATCH_REG2 = 10; From ad7368fd507090cbcd0942f608a6c25b1f19ffd1 Mon Sep 17 00:00:00 2001 From: AlpinDale Date: Tue, 9 Dec 2025 03:16:29 +0430 Subject: [PATCH 12/19] ci fix attempt 2 --- CMakeLists.txt | 9 +++++++-- .../ir/passes/flatten_extended_userdata_pass.cpp | 5 ++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e3421d138..159d1ccae 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -239,7 +239,9 @@ find_package(toml11 4.2.0 CONFIG) find_package(tsl-robin-map 1.3.0 CONFIG) find_package(VulkanHeaders 1.4.329 CONFIG) find_package(VulkanMemoryAllocator 3.1.0 CONFIG) -find_package(xbyak 7.07 CONFIG) +if(ARCHITECTURE STREQUAL "x86_64") + find_package(xbyak 7.07 CONFIG REQUIRED) +endif() find_package(xxHash 0.8.2 MODULE) find_package(ZLIB 1.3 MODULE) find_package(Zydis 5.0.0 CONFIG) @@ -1105,7 +1107,10 @@ add_executable(shadps4 create_target_directory_groups(shadps4) -target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui gcn half::half ZLIB::ZLIB PNG::PNG) +target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui gcn half::half ZLIB::ZLIB PNG::PNG) +if(ARCHITECTURE STREQUAL "x86_64") + target_link_libraries(shadps4 PRIVATE xbyak::xbyak) +endif() target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 SDL3_mixer::SDL3_mixer pugixml::pugixml) target_link_libraries(shadps4 PRIVATE stb::headers libusb::usb lfreist-hwinfo::hwinfo nlohmann_json::nlohmann_json miniz) diff --git a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp index 90a938639..d4b0593ab 100644 --- a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp +++ b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp @@ -2,13 +2,13 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/arch.h" #include #include #ifdef ARCH_X86_64 #include #include #endif -#include "common/arch.h" #include "common/config.h" #include "common/io_file.h" #include "common/logging/log.h" @@ -23,8 +23,7 @@ #include "shader_recompiler/ir/reg.h" #include "shader_recompiler/ir/srt_gvn_table.h" #include "shader_recompiler/ir/value.h" -#include "src/common/arch.h" -#include "src/common/decoder.h" +#include "common/decoder.h" #ifdef ARCH_X86_64 using namespace Xbyak::util; From 82accdb01d6353e2b68cbbc3a1cc9a55df327880 Mon Sep 17 00:00:00 2001 From: AlpinDale Date: Tue, 9 Dec 2025 03:18:59 +0430 Subject: [PATCH 13/19] dammit --- .../ir/passes/flatten_extended_userdata_pass.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp index d4b0593ab..6b9d4bcca 100644 --- a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp +++ b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp @@ -2,14 +2,15 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include "common/arch.h" #include #include +#include "common/arch.h" #ifdef ARCH_X86_64 #include #include #endif #include "common/config.h" +#include "common/decoder.h" #include "common/io_file.h" #include "common/logging/log.h" #include "common/path_util.h" @@ -23,7 +24,6 @@ #include "shader_recompiler/ir/reg.h" #include "shader_recompiler/ir/srt_gvn_table.h" #include "shader_recompiler/ir/value.h" -#include "common/decoder.h" #ifdef ARCH_X86_64 using namespace Xbyak::util; From 5ed8842d06e2c9b02ef80ed7446409322f3c8da3 Mon Sep 17 00:00:00 2001 From: AlpinDale Date: Tue, 9 Dec 2025 03:30:32 +0430 Subject: [PATCH 14/19] xbyak fix --- CMakeLists.txt | 3 --- externals/CMakeLists.txt | 8 +++++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 159d1ccae..037af5dc4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -239,9 +239,6 @@ find_package(toml11 4.2.0 CONFIG) find_package(tsl-robin-map 1.3.0 CONFIG) find_package(VulkanHeaders 1.4.329 CONFIG) find_package(VulkanMemoryAllocator 3.1.0 CONFIG) -if(ARCHITECTURE STREQUAL "x86_64") - find_package(xbyak 7.07 CONFIG REQUIRED) -endif() find_package(xxHash 0.8.2 MODULE) find_package(ZLIB 1.3 MODULE) find_package(Zydis 5.0.0 CONFIG) diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index eb3723f2c..733d856e7 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -112,9 +112,11 @@ if (NOT TARGET tsl::robin_map) add_subdirectory(robin-map) endif() -# Xbyak -if (NOT TARGET xbyak::xbyak) - add_subdirectory(xbyak) +# Xbyak (x86_64 only) +if (ARCHITECTURE STREQUAL "x86_64") + if (NOT TARGET xbyak::xbyak) + add_subdirectory(xbyak) + endif() endif() # MagicEnum From 9e8a328235cffb423c898fd56da1b921eed4aeca Mon Sep 17 00:00:00 2001 From: AlpinDale Date: Tue, 9 Dec 2025 04:13:58 +0430 Subject: [PATCH 15/19] block linking --- src/core/jit/block_manager.h | 8 +- src/core/jit/execution_engine.cpp | 139 ++++++++++++++-- src/core/jit/x86_64_translator.cpp | 43 ++++- tests/CMakeLists.txt | 1 + tests/test_block_linking.cpp | 247 +++++++++++++++++++++++++++++ 5 files changed, 425 insertions(+), 13 deletions(-) create mode 100644 tests/test_block_linking.cpp diff --git a/src/core/jit/block_manager.h b/src/core/jit/block_manager.h index 6e0734b79..07d6f80fa 100644 --- a/src/core/jit/block_manager.h +++ b/src/core/jit/block_manager.h @@ -19,9 +19,15 @@ struct CodeBlock { std::set dependencies; bool is_linked; + // Control flow targets for linking + VAddr fallthrough_target; // Next sequential address (if block doesn't end with branch) + VAddr branch_target; // Direct branch target (JMP) + void* branch_patch_location; // Location in ARM64 code to patch for direct branch + CodeBlock(VAddr addr, void* code, size_t size, size_t count) : ps4_address(addr), arm64_code(code), code_size(size), instruction_count(count), - is_linked(false) {} + is_linked(false), fallthrough_target(0), branch_target(0), + branch_patch_location(nullptr) {} }; class BlockManager { diff --git a/src/core/jit/execution_engine.cpp b/src/core/jit/execution_engine.cpp index e7b2f70bc..35d22908d 100644 --- a/src/core/jit/execution_engine.cpp +++ b/src/core/jit/execution_engine.cpp @@ -112,6 +112,9 @@ CodeBlock* ExecutionEngine::TranslateBasicBlock(VAddr start_address, size_t max_ VAddr current_address = start_address; size_t instruction_count = 0; bool block_end = false; + VAddr fallthrough_target = 0; + VAddr branch_target = 0; + void* branch_patch_location = nullptr; while (instruction_count < max_instructions && !block_end) { ZydisDecodedInstruction instruction; @@ -129,6 +132,14 @@ CodeBlock* ExecutionEngine::TranslateBasicBlock(VAddr start_address, size_t max_ break; } + // Track branch target before translation + if (instruction.mnemonic == ZYDIS_MNEMONIC_JMP && + operands[0].type == ZYDIS_OPERAND_TYPE_IMMEDIATE) { + s64 offset = static_cast(operands[0].imm.value.s); + branch_target = current_address + instruction.length + offset; + branch_patch_location = code_generator->getCurr(); + } + bool translated = translator->TranslateInstruction(instruction, operands, current_address); if (!translated) { LOG_WARNING(Core, "Failed to translate instruction at {:#x}", current_address); @@ -136,30 +147,73 @@ CodeBlock* ExecutionEngine::TranslateBasicBlock(VAddr start_address, size_t max_ } instruction_count++; - current_address += instruction.length; + VAddr next_address = current_address + instruction.length; switch (instruction.mnemonic) { case ZYDIS_MNEMONIC_RET: - case ZYDIS_MNEMONIC_JMP: case ZYDIS_MNEMONIC_CALL: block_end = true; break; + case ZYDIS_MNEMONIC_JMP: + block_end = true; + break; default: + // Check for conditional branches (they don't end the block, but we track them) + if (instruction.mnemonic >= ZYDIS_MNEMONIC_JO && + instruction.mnemonic <= ZYDIS_MNEMONIC_JZ) { + // Conditional branch - block continues with fallthrough + // TODO: Track conditional branch targets for linking + } break; } + + current_address = next_address; } if (instruction_count == 0) { return nullptr; } + // Set fallthrough target if block doesn't end with unconditional branch/ret + if (!block_end || branch_target == 0) { + fallthrough_target = current_address; + } + size_t code_size = code_generator->getSize(); code_generator->makeExecutable(); CodeBlock* block = block_manager->CreateBlock(start_address, block_start, code_size, instruction_count); - LOG_DEBUG(Core, "Translated basic block at {:#x}, {} instructions, {} bytes", start_address, - instruction_count, code_size); + // Store control flow information + block->fallthrough_target = fallthrough_target; + block->branch_target = branch_target; + block->branch_patch_location = branch_patch_location; + + LOG_DEBUG(Core, + "Translated basic block at {:#x}, {} instructions, {} bytes, fallthrough: {:#x}, " + "branch: {:#x}", + start_address, instruction_count, code_size, fallthrough_target, branch_target); + + // Try to link blocks if targets are available + if (branch_target != 0) { + CodeBlock* target_block = block_manager->GetBlock(branch_target); + if (target_block) { + LinkBlock(block, branch_target); + } else { + // Add dependency for later linking + block_manager->AddDependency(start_address, branch_target); + } + } + + if (fallthrough_target != 0 && branch_target == 0) { + // Try to link fallthrough + CodeBlock* target_block = block_manager->GetBlock(fallthrough_target); + if (target_block) { + // For fallthrough, we need to append a branch at the end + // This will be handled by linking logic + block_manager->AddDependency(start_address, fallthrough_target); + } + } return block; } @@ -170,17 +224,82 @@ CodeBlock* ExecutionEngine::TranslateBlock(VAddr ps4_address) { return existing; } - return TranslateBasicBlock(ps4_address); + CodeBlock* new_block = TranslateBasicBlock(ps4_address); + if (!new_block) { + return nullptr; + } + + // After creating a new block, check if any existing blocks can link to it + // This handles the case where we translate a target block after the source + for (auto& [addr, block] : block_manager->blocks) { + if (block->branch_target == ps4_address && !block->is_linked) { + LinkBlock(block.get(), ps4_address); + } + if (block->fallthrough_target == ps4_address && block->branch_target == 0 && + !block->is_linked) { + LinkBlock(block.get(), ps4_address); + } + } + + return new_block; } void ExecutionEngine::LinkBlock(CodeBlock* block, VAddr target_address) { CodeBlock* target_block = block_manager->GetBlock(target_address); - if (target_block && !block->is_linked) { - void* link_location = static_cast(block->arm64_code) + block->code_size - 4; - code_generator->setSize(reinterpret_cast(link_location) - - static_cast(code_generator->getCode())); - code_generator->b(target_block->arm64_code); + if (!target_block) { + return; + } + + // Patch the branch instruction if we have a patch location + if (block->branch_patch_location && block->branch_target == target_address) { +#if defined(__APPLE__) && defined(ARCH_ARM64) + pthread_jit_write_protect_np(0); +#endif + // Calculate offset from patch location to target + s64 offset = reinterpret_cast(target_block->arm64_code) - + reinterpret_cast(block->branch_patch_location); + + // Check if we can use a relative branch (within ±128MB) + if (offset >= -0x8000000 && offset < 0x8000000) { + s32 imm26 = static_cast(offset / 4); + u32* patch_ptr = reinterpret_cast(block->branch_patch_location); + // Patch the branch instruction: 0x14000000 | (imm26 & 0x3FFFFFF) + *patch_ptr = 0x14000000 | (imm26 & 0x3FFFFFF); + } else { + // Far branch - need to use indirect branch + // For now, leave as-is (will use the placeholder branch) + LOG_DEBUG(Core, "Branch target too far for direct linking: offset={}", offset); + } +#if defined(__APPLE__) && defined(ARCH_ARM64) + pthread_jit_write_protect_np(1); + __builtin___clear_cache(static_cast(block->branch_patch_location), + static_cast(block->branch_patch_location) + 4); +#endif block->is_linked = true; + LOG_DEBUG(Core, "Linked block {:#x} to {:#x}", block->ps4_address, target_address); + } else if (block->fallthrough_target == target_address && block->branch_target == 0) { + // For fallthrough, append a branch at the end of the block +#if defined(__APPLE__) && defined(ARCH_ARM64) + pthread_jit_write_protect_np(0); +#endif + void* link_location = static_cast(block->arm64_code) + block->code_size; + s64 offset = + reinterpret_cast(target_block->arm64_code) - reinterpret_cast(link_location); + + if (offset >= -0x8000000 && offset < 0x8000000) { + s32 imm26 = static_cast(offset / 4); + u32* patch_ptr = reinterpret_cast(link_location); + *patch_ptr = 0x14000000 | (imm26 & 0x3FFFFFF); + block->code_size += 4; // Update block size + } +#if defined(__APPLE__) && defined(ARCH_ARM64) + pthread_jit_write_protect_np(1); + __builtin___clear_cache(static_cast(link_location), + static_cast(link_location) + 4); +#endif + block->is_linked = true; + LOG_DEBUG(Core, "Linked fallthrough from block {:#x} to {:#x}", block->ps4_address, + target_address); } } diff --git a/src/core/jit/x86_64_translator.cpp b/src/core/jit/x86_64_translator.cpp index 0352955ed..e322ce3f3 100644 --- a/src/core/jit/x86_64_translator.cpp +++ b/src/core/jit/x86_64_translator.cpp @@ -571,8 +571,47 @@ bool X86_64Translator::TranslateRet(const ZydisDecodedInstruction& instruction, bool X86_64Translator::TranslateJmp(const ZydisDecodedInstruction& instruction, const ZydisDecodedOperand* operands, VAddr address) { - LOG_WARNING(Core, "JMP instruction translation needs execution engine integration"); - return false; + const auto& target = operands[0]; + VAddr target_address = 0; + + // Calculate target address based on operand type + if (target.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) { + // Direct relative jump: JMP rel32 + // Target = current_address + instruction.length + offset + s64 offset = static_cast(target.imm.value.s); + target_address = address + instruction.length + offset; + } else if (target.type == ZYDIS_OPERAND_TYPE_MEMORY) { + // Indirect jump: JMP [mem] + // Load address from memory into scratch register + LoadMemoryOperand(RegisterMapper::SCRATCH_REG, target, 8); + // TODO: don't use a dispatcher + codegen.br(RegisterMapper::SCRATCH_REG); + return true; + } else if (target.type == ZYDIS_OPERAND_TYPE_REGISTER) { + // Indirect jump: JMP reg + int reg = GetArm64Register(target); + if (reg == -1) { + LOG_ERROR(Core, "Invalid register for JMP"); + return false; + } + codegen.br(reg); + return true; + } else { + LOG_ERROR(Core, "Unsupported JMP operand type"); + return false; + } + + // For direct jumps, we need to branch to the target address + // Since the target block may not be translated yet, we'll generate + // a placeholder that can be patched later during block linking + // For now, generate a branch to a dispatcher function + // TODO: Implement proper block linking to patch this with direct branch + + // Calculate offset from current code position + void* placeholder_target = reinterpret_cast(target_address); + codegen.b(placeholder_target); + + return true; } bool X86_64Translator::TranslateCmp(const ZydisDecodedInstruction& instruction, diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9dcf67f74..48537e79d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -6,6 +6,7 @@ add_executable(jit_tests test_register_mapping.cpp test_block_manager.cpp test_execution_engine.cpp + test_block_linking.cpp main.cpp ) diff --git a/tests/test_block_linking.cpp b/tests/test_block_linking.cpp new file mode 100644 index 000000000..c1dc46103 --- /dev/null +++ b/tests/test_block_linking.cpp @@ -0,0 +1,247 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/decoder.h" +#include "core/jit/arm64_codegen.h" +#include "core/jit/block_manager.h" +#include "core/jit/register_mapping.h" +#include "core/jit/x86_64_translator.h" +#include +#include +#if defined(__APPLE__) && defined(ARCH_ARM64) +#include +#endif + +using namespace Core::Jit; + +class BlockLinkingTest : public ::testing::Test { +protected: + void SetUp() override { + // Allocate executable memory for test code +#if defined(__APPLE__) && defined(ARCH_ARM64) + test_code_buffer = mmap(nullptr, 64 * 1024, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(test_code_buffer, MAP_FAILED) + << "Failed to allocate executable memory for test"; + pthread_jit_write_protect_np(0); +#else + test_code_buffer = + mmap(nullptr, 64 * 1024, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(test_code_buffer, MAP_FAILED) + << "Failed to allocate executable memory for test"; +#endif + codegen = std::make_unique(64 * 1024, test_code_buffer); + register_mapper = std::make_unique(); + translator = std::make_unique(*codegen, *register_mapper); + block_manager = std::make_unique(); + } + + void TearDown() override { + translator.reset(); + register_mapper.reset(); + codegen.reset(); + block_manager.reset(); + if (test_code_buffer != MAP_FAILED) { + munmap(test_code_buffer, 64 * 1024); + } + } + + void *test_code_buffer = MAP_FAILED; + std::unique_ptr codegen; + std::unique_ptr register_mapper; + std::unique_ptr translator; + std::unique_ptr block_manager; +}; + +// Test that JMP translation can handle direct immediate addresses +TEST_F(BlockLinkingTest, TranslateDirectJmp) { + // Create a simple x86_64 JMP instruction: JMP +0x1000 (relative jump) + // x86_64 encoding: E9 (near relative jump, 32-bit offset) + // E9 00 10 00 00 = JMP +0x1000 + u8 x86_jmp[] = {0xE9, 0x00, 0x10, 0x00, 0x00}; + + ZydisDecodedInstruction instruction; + ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; + + ZyanStatus status = Common::Decoder::Instance()->decodeInstruction( + instruction, operands, x86_jmp, sizeof(x86_jmp)); + + if (!ZYAN_SUCCESS(status)) { + GTEST_SKIP() + << "Failed to decode JMP instruction - Zydis may not be available"; + } + + // JMP translation should succeed (even if target isn't linked yet) + bool result = translator->TranslateJmp(instruction, operands, 0x400000); + EXPECT_TRUE(result) << "JMP translation should succeed"; + EXPECT_GT(codegen->getSize(), 0) << "JMP should generate ARM64 code"; +} + +// Test that we can create two blocks and link them +TEST_F(BlockLinkingTest, CreateAndLinkBlocks) { + VAddr block1_addr = 0x400000; + VAddr block2_addr = 0x401000; + + // Allocate separate memory for each block to avoid issues +#if defined(__APPLE__) && defined(ARCH_ARM64) + void *block1_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(block1_mem, MAP_FAILED); + pthread_jit_write_protect_np(0); + + void *block2_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(block2_mem, MAP_FAILED); +#else + void *block1_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(block1_mem, MAP_FAILED); + + void *block2_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(block2_mem, MAP_FAILED); +#endif + + // Write simple NOP instructions + u32 nop = 0xD503201F; // ARM64 NOP + *reinterpret_cast(block1_mem) = nop; + *reinterpret_cast(block2_mem) = nop; + +#if defined(__APPLE__) && defined(ARCH_ARM64) + pthread_jit_write_protect_np(1); + mprotect(block1_mem, 4096, PROT_READ | PROT_EXEC); + mprotect(block2_mem, 4096, PROT_READ | PROT_EXEC); +#endif + + // Create blocks + CodeBlock *block1 = block_manager->CreateBlock(block1_addr, block1_mem, 4, 1); + ASSERT_NE(block1, nullptr); + + CodeBlock *block2 = block_manager->CreateBlock(block2_addr, block2_mem, 4, 1); + ASSERT_NE(block2, nullptr); + + // Verify blocks exist + EXPECT_EQ(block_manager->GetBlockCount(), 2); + EXPECT_NE(block_manager->GetBlock(block1_addr), nullptr); + EXPECT_NE(block_manager->GetBlock(block2_addr), nullptr); + + // Test that blocks can be retrieved + CodeBlock *retrieved_block1 = block_manager->GetBlock(block1_addr); + CodeBlock *retrieved_block2 = block_manager->GetBlock(block2_addr); + EXPECT_EQ(retrieved_block1, block1); + EXPECT_EQ(retrieved_block2, block2); + + // Cleanup + munmap(block1_mem, 4096); + munmap(block2_mem, 4096); +} + +// Test that block linking tracks dependencies +TEST_F(BlockLinkingTest, BlockDependencies) { + VAddr block1_addr = 0x400000; + VAddr block2_addr = 0x401000; + + // Allocate memory for blocks +#if defined(__APPLE__) && defined(ARCH_ARM64) + void *block1_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(block1_mem, MAP_FAILED); + pthread_jit_write_protect_np(0); + + void *block2_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(block2_mem, MAP_FAILED); + + u32 nop = 0xD503201F; + *reinterpret_cast(block1_mem) = nop; + *reinterpret_cast(block2_mem) = nop; + + pthread_jit_write_protect_np(1); + mprotect(block1_mem, 4096, PROT_READ | PROT_EXEC); + mprotect(block2_mem, 4096, PROT_READ | PROT_EXEC); +#else + void *block1_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(block1_mem, MAP_FAILED); + + void *block2_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(block2_mem, MAP_FAILED); + + u32 nop = 0xD503201F; + *reinterpret_cast(block1_mem) = nop; + *reinterpret_cast(block2_mem) = nop; +#endif + + // Create blocks + CodeBlock *block1 = block_manager->CreateBlock(block1_addr, block1_mem, 4, 1); + CodeBlock *block2 = block_manager->CreateBlock(block2_addr, block2_mem, 4, 1); + + // Add dependency: block1 depends on block2 + block_manager->AddDependency(block1_addr, block2_addr); + + // Verify dependency is tracked + EXPECT_EQ(block1->dependencies.count(block2_addr), 1); + + // Cleanup + munmap(block1_mem, 4096); + munmap(block2_mem, 4096); +} + +// Test that invalidating a block invalidates dependent blocks +TEST_F(BlockLinkingTest, InvalidateDependentBlocks) { + VAddr block1_addr = 0x400000; + VAddr block2_addr = 0x401000; + + // Allocate memory for blocks +#if defined(__APPLE__) && defined(ARCH_ARM64) + void *block1_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(block1_mem, MAP_FAILED); + pthread_jit_write_protect_np(0); + + void *block2_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(block2_mem, MAP_FAILED); + + u32 nop = 0xD503201F; + *reinterpret_cast(block1_mem) = nop; + *reinterpret_cast(block2_mem) = nop; + + pthread_jit_write_protect_np(1); + mprotect(block1_mem, 4096, PROT_READ | PROT_EXEC); + mprotect(block2_mem, 4096, PROT_READ | PROT_EXEC); +#else + void *block1_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(block1_mem, MAP_FAILED); + + void *block2_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(block2_mem, MAP_FAILED); + + u32 nop = 0xD503201F; + *reinterpret_cast(block1_mem) = nop; + *reinterpret_cast(block2_mem) = nop; +#endif + + // Create blocks with dependency + CodeBlock *block1 = block_manager->CreateBlock(block1_addr, block1_mem, 4, 1); + CodeBlock *block2 = block_manager->CreateBlock(block2_addr, block2_mem, 4, 1); + + block_manager->AddDependency(block1_addr, block2_addr); + + // Invalidate block2 + block_manager->InvalidateBlock(block2_addr); + + // block2 should be removed + EXPECT_EQ(block_manager->GetBlock(block2_addr), nullptr); + // block1 should still exist (dependency tracking doesn't auto-invalidate) + // But in a real implementation, we might want to invalidate dependents + EXPECT_NE(block_manager->GetBlock(block1_addr), nullptr); + + // Cleanup + munmap(block1_mem, 4096); + munmap(block2_mem, 4096); +} From 851131e888c4236099f934066364bd32924fcda3 Mon Sep 17 00:00:00 2001 From: AlpinDale Date: Tue, 9 Dec 2025 04:38:54 +0430 Subject: [PATCH 16/19] CALL/RET translation --- src/core/jit/execution_engine.cpp | 8 +- src/core/jit/x86_64_translator.cpp | 68 ++++++++++++- tests/CMakeLists.txt | 1 + tests/test_call_ret.cpp | 151 +++++++++++++++++++++++++++++ 4 files changed, 224 insertions(+), 4 deletions(-) create mode 100644 tests/test_call_ret.cpp diff --git a/src/core/jit/execution_engine.cpp b/src/core/jit/execution_engine.cpp index 35d22908d..ed7c986e2 100644 --- a/src/core/jit/execution_engine.cpp +++ b/src/core/jit/execution_engine.cpp @@ -132,12 +132,18 @@ CodeBlock* ExecutionEngine::TranslateBasicBlock(VAddr start_address, size_t max_ break; } - // Track branch target before translation + // Track branch/call target before translation if (instruction.mnemonic == ZYDIS_MNEMONIC_JMP && operands[0].type == ZYDIS_OPERAND_TYPE_IMMEDIATE) { s64 offset = static_cast(operands[0].imm.value.s); branch_target = current_address + instruction.length + offset; branch_patch_location = code_generator->getCurr(); + } else if (instruction.mnemonic == ZYDIS_MNEMONIC_CALL && + operands[0].type == ZYDIS_OPERAND_TYPE_IMMEDIATE) { + // Track CALL target for potential linking (though CALL typically goes to HLE) + s64 offset = static_cast(operands[0].imm.value.s); + branch_target = current_address + instruction.length + offset; + branch_patch_location = code_generator->getCurr(); } bool translated = translator->TranslateInstruction(instruction, operands, current_address); diff --git a/src/core/jit/x86_64_translator.cpp b/src/core/jit/x86_64_translator.cpp index e322ce3f3..84fe8f7ec 100644 --- a/src/core/jit/x86_64_translator.cpp +++ b/src/core/jit/x86_64_translator.cpp @@ -559,13 +559,75 @@ bool X86_64Translator::TranslatePop(const ZydisDecodedInstruction& instruction, bool X86_64Translator::TranslateCall(const ZydisDecodedInstruction& instruction, const ZydisDecodedOperand* operands, VAddr address) { - LOG_WARNING(Core, "CALL instruction translation needs execution engine integration"); - return false; + const auto& target = operands[0]; + VAddr target_address = 0; + VAddr return_address = address + instruction.length; + + // Calculate target address based on operand type + if (target.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) { + // Direct relative call: CALL rel32 + // Target = current_address + instruction.length + offset + s64 offset = static_cast(target.imm.value.s); + target_address = address + instruction.length + offset; + } else if (target.type == ZYDIS_OPERAND_TYPE_MEMORY) { + // Indirect call: CALL [mem] + // Load address from memory into scratch register + LoadMemoryOperand(RegisterMapper::SCRATCH_REG, target, 8); + // Push return address + int sp_reg = RegisterMapper::STACK_POINTER; + codegen.sub_imm(sp_reg, sp_reg, 8); // Decrement stack by 8 bytes + codegen.mov_imm(RegisterMapper::SCRATCH_REG2, return_address); + codegen.str(RegisterMapper::SCRATCH_REG2, sp_reg, 0); // Store return address + // Call via register + codegen.blr(RegisterMapper::SCRATCH_REG); + return true; + } else if (target.type == ZYDIS_OPERAND_TYPE_REGISTER) { + // Indirect call: CALL reg + int reg = GetArm64Register(target); + if (reg == -1) { + LOG_ERROR(Core, "Invalid register for CALL"); + return false; + } + // Push return address + int sp_reg = RegisterMapper::STACK_POINTER; + codegen.sub_imm(sp_reg, sp_reg, 8); // Decrement stack by 8 bytes + codegen.mov_imm(RegisterMapper::SCRATCH_REG, return_address); + codegen.str(RegisterMapper::SCRATCH_REG, sp_reg, 0); // Store return address + // Call via register + codegen.blr(reg); + return true; + } else { + LOG_ERROR(Core, "Unsupported CALL operand type"); + return false; + } + + // For direct calls, push return address and branch to target + // Push return address onto stack + int sp_reg = RegisterMapper::STACK_POINTER; + codegen.sub_imm(sp_reg, sp_reg, 8); // Decrement stack by 8 bytes (x86_64 stack grows down) + codegen.mov_imm(RegisterMapper::SCRATCH_REG, return_address); + codegen.str(RegisterMapper::SCRATCH_REG, sp_reg, 0); // Store return address at [SP] + + // Branch to target (will be linked later if target block is available) + void* placeholder_target = reinterpret_cast(target_address); + codegen.bl(placeholder_target); // Use bl (branch with link) for calls + + return true; } bool X86_64Translator::TranslateRet(const ZydisDecodedInstruction& instruction, const ZydisDecodedOperand* operands) { - codegen.ret(); + // x86_64 RET pops return address from stack and jumps to it + int sp_reg = RegisterMapper::STACK_POINTER; + int scratch_reg = RegisterMapper::SCRATCH_REG; + + // Load return address from stack + codegen.ldr(scratch_reg, sp_reg, 0); // Load return address from [SP] + codegen.add_imm(sp_reg, sp_reg, 8); // Increment stack by 8 bytes (pop) + + // Jump to return address + codegen.br(scratch_reg); + return true; } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 48537e79d..b2bc0cb0d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -7,6 +7,7 @@ add_executable(jit_tests test_block_manager.cpp test_execution_engine.cpp test_block_linking.cpp + test_call_ret.cpp main.cpp ) diff --git a/tests/test_call_ret.cpp b/tests/test_call_ret.cpp new file mode 100644 index 000000000..c892bfd3a --- /dev/null +++ b/tests/test_call_ret.cpp @@ -0,0 +1,151 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/decoder.h" +#include "core/jit/arm64_codegen.h" +#include "core/jit/register_mapping.h" +#include "core/jit/x86_64_translator.h" +#include +#include +#if defined(__APPLE__) && defined(ARCH_ARM64) +#include +#endif + +using namespace Core::Jit; + +class CallRetTest : public ::testing::Test { +protected: + void SetUp() override { + // Allocate executable memory for test code +#if defined(__APPLE__) && defined(ARCH_ARM64) + test_code_buffer = mmap(nullptr, 64 * 1024, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(test_code_buffer, MAP_FAILED) + << "Failed to allocate executable memory for test"; + pthread_jit_write_protect_np(0); +#else + test_code_buffer = + mmap(nullptr, 64 * 1024, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(test_code_buffer, MAP_FAILED) + << "Failed to allocate executable memory for test"; +#endif + codegen = std::make_unique(64 * 1024, test_code_buffer); + register_mapper = std::make_unique(); + translator = std::make_unique(*codegen, *register_mapper); + } + + void TearDown() override { + translator.reset(); + register_mapper.reset(); + codegen.reset(); + if (test_code_buffer != MAP_FAILED) { + munmap(test_code_buffer, 64 * 1024); + } + } + + void *test_code_buffer = MAP_FAILED; + std::unique_ptr codegen; + std::unique_ptr register_mapper; + std::unique_ptr translator; +}; + +// Test that RET translation generates ARM64 code +TEST_F(CallRetTest, TranslateRet) { + // x86_64 RET instruction: C3 + u8 x86_ret[] = {0xC3}; + + ZydisDecodedInstruction instruction; + ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; + + ZyanStatus status = Common::Decoder::Instance()->decodeInstruction( + instruction, operands, x86_ret, sizeof(x86_ret)); + + if (!ZYAN_SUCCESS(status)) { + GTEST_SKIP() + << "Failed to decode RET instruction - Zydis may not be available"; + } + + // RET translation should succeed + bool result = translator->TranslateRet(instruction, operands); + EXPECT_TRUE(result) << "RET translation should succeed"; + EXPECT_GT(codegen->getSize(), 0) << "RET should generate ARM64 code"; +} + +// Test that CALL translation generates ARM64 code +TEST_F(CallRetTest, TranslateDirectCall) { + // x86_64 CALL instruction: E8 (near relative call, 32-bit offset) + // E8 00 10 00 00 = CALL +0x1000 + u8 x86_call[] = {0xE8, 0x00, 0x10, 0x00, 0x00}; + + ZydisDecodedInstruction instruction; + ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; + + ZyanStatus status = Common::Decoder::Instance()->decodeInstruction( + instruction, operands, x86_call, sizeof(x86_call)); + + if (!ZYAN_SUCCESS(status)) { + GTEST_SKIP() + << "Failed to decode CALL instruction - Zydis may not be available"; + } + + // CALL translation should succeed + bool result = translator->TranslateCall(instruction, operands, 0x400000); + EXPECT_TRUE(result) << "CALL translation should succeed"; + EXPECT_GT(codegen->getSize(), 0) << "CALL should generate ARM64 code"; +} + +// Test that CALL pushes return address to stack +TEST_F(CallRetTest, CallPushesReturnAddress) { + // Simulate a CALL instruction + // We need to verify that the stack pointer is decremented and return address + // is stored This is a simplified test - full implementation will need + // execution engine integration + + // For now, just verify CALL generates code + u8 x86_call[] = {0xE8, 0x00, 0x10, 0x00, 0x00}; + + ZydisDecodedInstruction instruction; + ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; + + ZyanStatus status = Common::Decoder::Instance()->decodeInstruction( + instruction, operands, x86_call, sizeof(x86_call)); + + if (!ZYAN_SUCCESS(status)) { + GTEST_SKIP() << "Failed to decode CALL instruction"; + } + + size_t size_before = codegen->getSize(); + bool result = translator->TranslateCall(instruction, operands, 0x400000); + size_t size_after = codegen->getSize(); + + EXPECT_TRUE(result); + EXPECT_GT(size_after, size_before) << "CALL should generate code"; + // CALL should generate more code than a simple branch (needs stack + // manipulation) + EXPECT_GE(size_after - size_before, 4) + << "CALL should generate multiple instructions"; +} + +// Test that RET pops return address from stack +TEST_F(CallRetTest, RetPopsReturnAddress) { + // RET instruction should pop return address and jump to it + u8 x86_ret[] = {0xC3}; + + ZydisDecodedInstruction instruction; + ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; + + ZyanStatus status = Common::Decoder::Instance()->decodeInstruction( + instruction, operands, x86_ret, sizeof(x86_ret)); + + if (!ZYAN_SUCCESS(status)) { + GTEST_SKIP() << "Failed to decode RET instruction"; + } + + size_t size_before = codegen->getSize(); + bool result = translator->TranslateRet(instruction, operands); + size_t size_after = codegen->getSize(); + + EXPECT_TRUE(result); + EXPECT_GT(size_after, size_before) << "RET should generate code"; +} From 611acc7ca4c1edde4d328d2ddb426fbcdca70c51 Mon Sep 17 00:00:00 2001 From: AlpinDale Date: Tue, 9 Dec 2025 05:15:16 +0430 Subject: [PATCH 17/19] HLE bridge --- src/core/jit/hle_bridge.cpp | 139 ++++++++++++++++++++++++++++++++++++ src/core/jit/hle_bridge.h | 40 +++++++++++ tests/CMakeLists.txt | 3 + tests/test_hle_bridge.cpp | 83 +++++++++++++++++++++ 4 files changed, 265 insertions(+) create mode 100644 src/core/jit/hle_bridge.cpp create mode 100644 src/core/jit/hle_bridge.h create mode 100644 tests/test_hle_bridge.cpp diff --git a/src/core/jit/hle_bridge.cpp b/src/core/jit/hle_bridge.cpp new file mode 100644 index 000000000..21d179769 --- /dev/null +++ b/src/core/jit/hle_bridge.cpp @@ -0,0 +1,139 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include "common/logging/log.h" +#include "hle_bridge.h" + +namespace Core::Jit { + +HleBridge::HleBridge(Arm64CodeGenerator& codegen, RegisterMapper& reg_mapper) + : codegen(codegen), reg_mapper(reg_mapper), calling_conv(codegen, reg_mapper) {} + +void HleBridge::GenerateBridge(void* hle_func, int int_arg_count, int float_arg_count) { + // Save caller-saved registers (x86_64: RAX, RCX, RDX, RSI, RDI, R8-R11) + // These correspond to ARM64: X0-X7, X9-X15 (some are callee-saved, but we save all to be safe) + SaveCallerSavedRegisters(); + + // Map x86_64 arguments to ARM64 calling convention + // x86_64 System V ABI: RDI, RSI, RDX, RCX, R8, R9 (integer), XMM0-XMM7 (float) + // ARM64: X0-X7 (integer), V0-V7 (float) + MapArguments(int_arg_count, float_arg_count); + + // Call the HLE function + calling_conv.CallFunction(hle_func); + + // Map return value from ARM64 X0 to x86_64 RAX + MapReturnValue(); + + // Restore caller-saved registers + RestoreCallerSavedRegisters(); +} + +void HleBridge::SaveCallerSavedRegisters() { + // x86_64 caller-saved registers: RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11 + // Map to ARM64 and save them + // Note: We need to be careful about which registers are actually caller-saved in ARM64 + // ARM64 caller-saved: X0-X7, X9-X15, V0-V7, V16-V31 + // We'll save the x86_64 registers that map to ARM64 caller-saved registers + + // Save integer registers that are caller-saved + // RAX -> X0, RCX -> X1, RDX -> X2, RSI -> X3, RDI -> X0 (reused), R8 -> X4, R9 -> X5 + // We'll save X0-X7 to be safe (they're all caller-saved in ARM64) + for (int i = 0; i < 8; ++i) { + codegen.push(i); // Save X0-X7 + } + + // Save XMM registers (V0-V7 in ARM64) + // x86_64 XMM0-XMM7 map to ARM64 V0-V7 + for (int i = 0; i < 8; ++i) { + codegen.sub_imm(31, 31, 16); // Decrement stack pointer by 16 bytes + codegen.str_v(i, 31, 0); // Store V0-V7 + } +} + +void HleBridge::RestoreCallerSavedRegisters() { + // Restore XMM registers first (reverse order) + for (int i = 7; i >= 0; --i) { + codegen.ldr_v(i, 31, 0); // Load V0-V7 + codegen.add_imm(31, 31, 16); // Increment stack pointer by 16 bytes + } + + // Restore integer registers (reverse order) + for (int i = 7; i >= 0; --i) { + codegen.pop(i); // Restore X0-X7 + } +} + +void HleBridge::MapArguments(int int_arg_count, int float_arg_count) { + // x86_64 System V ABI argument registers: + // Integer: RDI (arg1), RSI (arg2), RDX (arg3), RCX (arg4), R8 (arg5), R9 (arg6) + // Float: XMM0 (arg1), XMM1 (arg2), XMM2 (arg3), XMM3 (arg4), XMM4 (arg5), XMM5 (arg6), XMM6 + // (arg7), XMM7 (arg8) + + // ARM64 calling convention: + // Integer: X0 (arg1), X1 (arg2), X2 (arg3), X3 (arg4), X4 (arg5), X5 (arg6), X6 (arg7), X7 + // (arg8) Float: V0 (arg1), V1 (arg2), V2 (arg3), V3 (arg4), V4 (arg5), V5 (arg6), V6 (arg7), V7 + // (arg8) + + // Map integer arguments + static constexpr X86_64Register x86_int_args[] = { + X86_64Register::RDI, // arg1 + X86_64Register::RSI, // arg2 + X86_64Register::RDX, // arg3 + X86_64Register::RCX, // arg4 + X86_64Register::R8, // arg5 + X86_64Register::R9, // arg6 + }; + + for (int i = 0; i < int_arg_count && i < 6; ++i) { + int x86_reg = reg_mapper.MapX86_64ToArm64(x86_int_args[i]); + int arm64_arg_reg = i; // X0, X1, X2, etc. + if (x86_reg != arm64_arg_reg) { + codegen.mov(arm64_arg_reg, x86_reg); + } + } + + // Map floating point arguments + static constexpr X86_64Register x86_float_args[] = { + X86_64Register::XMM0, // arg1 + X86_64Register::XMM1, // arg2 + X86_64Register::XMM2, // arg3 + X86_64Register::XMM3, // arg4 + X86_64Register::XMM4, // arg5 + X86_64Register::XMM5, // arg6 + X86_64Register::XMM6, // arg7 + X86_64Register::XMM7, // arg8 + }; + + for (int i = 0; i < float_arg_count && i < 8; ++i) { + int x86_xmm_reg = reg_mapper.MapX86_64XmmToArm64Neon(x86_float_args[i]); + int arm64_arg_reg = i; // V0, V1, V2, etc. + if (x86_xmm_reg != arm64_arg_reg) { + codegen.mov_v(arm64_arg_reg, x86_xmm_reg); + } + } +} + +void HleBridge::MapReturnValue() { + // Return value: ARM64 X0 -> x86_64 RAX + int arm64_return = 0; // X0 + int x86_return = reg_mapper.MapX86_64ToArm64(X86_64Register::RAX); + if (x86_return != arm64_return) { + codegen.mov(x86_return, arm64_return); + } +} + +bool HleBridge::IsHleAddress(VAddr address) { + // TODO: Implement HLE address lookup + (void)address; + return false; +} + +void* HleBridge::GetHleFunction(VAddr address) { + // TODO: Implement HLE function lookup + (void)address; + return nullptr; +} + +} // namespace Core::Jit diff --git a/src/core/jit/hle_bridge.h b/src/core/jit/hle_bridge.h new file mode 100644 index 000000000..3866cad8c --- /dev/null +++ b/src/core/jit/hle_bridge.h @@ -0,0 +1,40 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "arm64_codegen.h" +#include "calling_convention.h" +#include "register_mapping.h" + +namespace Core::Jit { + +class HleBridge { +public: + explicit HleBridge(Arm64CodeGenerator& codegen, RegisterMapper& reg_mapper); + ~HleBridge() = default; + + // Generate bridge code to call an HLE function + // hle_func: Pointer to the HLE function + // int_arg_count: Number of integer arguments (0-6 for x86_64 System V ABI) + // float_arg_count: Number of floating point arguments (0-8 for x86_64 System V ABI) + void GenerateBridge(void* hle_func, int int_arg_count = 0, int float_arg_count = 0); + + // Check if an address is an HLE function + static bool IsHleAddress(VAddr address); + + // Get HLE function pointer for an address + static void* GetHleFunction(VAddr address); + +private: + void SaveCallerSavedRegisters(); + void RestoreCallerSavedRegisters(); + void MapArguments(int int_arg_count, int float_arg_count); + void MapReturnValue(); + + Arm64CodeGenerator& codegen; + RegisterMapper& reg_mapper; + CallingConvention calling_conv; +}; + +} // namespace Core::Jit diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b2bc0cb0d..3ca86fe8a 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -8,6 +8,7 @@ add_executable(jit_tests test_execution_engine.cpp test_block_linking.cpp test_call_ret.cpp + test_hle_bridge.cpp main.cpp ) @@ -25,6 +26,8 @@ if (ARCHITECTURE STREQUAL "arm64") ${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/simd_translator.h ${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/calling_convention.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/calling_convention.h + ${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/hle_bridge.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/hle_bridge.h ) endif() diff --git a/tests/test_hle_bridge.cpp b/tests/test_hle_bridge.cpp new file mode 100644 index 000000000..e3134af47 --- /dev/null +++ b/tests/test_hle_bridge.cpp @@ -0,0 +1,83 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "core/jit/arm64_codegen.h" +#include "core/jit/hle_bridge.h" +#include "core/jit/register_mapping.h" +#include +#include +#if defined(__APPLE__) && defined(ARCH_ARM64) +#include +#endif + +using namespace Core::Jit; + +// Simple test HLE function +extern "C" PS4_SYSV_ABI u64 TestHleFunction(u64 arg1, u64 arg2) { + return arg1 + arg2; +} + +class HleBridgeTest : public ::testing::Test { +protected: + void SetUp() override { + // Allocate executable memory for test code +#if defined(__APPLE__) && defined(ARCH_ARM64) + test_code_buffer = mmap(nullptr, 64 * 1024, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(test_code_buffer, MAP_FAILED) + << "Failed to allocate executable memory for test"; + pthread_jit_write_protect_np(0); +#else + test_code_buffer = + mmap(nullptr, 64 * 1024, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(test_code_buffer, MAP_FAILED) + << "Failed to allocate executable memory for test"; +#endif + codegen = std::make_unique(64 * 1024, test_code_buffer); + register_mapper = std::make_unique(); + hle_bridge = std::make_unique(*codegen, *register_mapper); + } + + void TearDown() override { + hle_bridge.reset(); + register_mapper.reset(); + codegen.reset(); + if (test_code_buffer != MAP_FAILED) { + munmap(test_code_buffer, 64 * 1024); + } + } + + void *test_code_buffer = MAP_FAILED; + std::unique_ptr codegen; + std::unique_ptr register_mapper; + std::unique_ptr hle_bridge; +}; + +// Test that HLE bridge can be constructed +TEST_F(HleBridgeTest, Construction) { EXPECT_NE(hle_bridge, nullptr); } + +// Test that we can generate a bridge to an HLE function +TEST_F(HleBridgeTest, GenerateBridge) { + void *hle_func = reinterpret_cast(TestHleFunction); + + // Generate bridge code + hle_bridge->GenerateBridge(hle_func, 2); // 2 integer arguments + + // Should generate some code + EXPECT_GT(codegen->getSize(), 0) << "HLE bridge should generate code"; +} + +// Test that bridge preserves caller-saved registers +TEST_F(HleBridgeTest, BridgePreservesRegisters) { + // This is a placeholder test - full register preservation testing + // would require execution, which is complex + void *hle_func = reinterpret_cast(TestHleFunction); + + size_t size_before = codegen->getSize(); + hle_bridge->GenerateBridge(hle_func, 2); + size_t size_after = codegen->getSize(); + + // Bridge should generate substantial code for register preservation + EXPECT_GT(size_after - size_before, 8) << "Bridge should preserve registers"; +} From c2c82646a6e65b69e04237a900eb9208c463064e Mon Sep 17 00:00:00 2001 From: AlpinDale Date: Tue, 9 Dec 2025 06:17:37 +0430 Subject: [PATCH 18/19] FEX submodule --- .gitmodules | 3 +++ CMakeLists.txt | 5 ++++- externals/FEX | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) create mode 160000 externals/FEX diff --git a/.gitmodules b/.gitmodules index c5d05edd3..30ab2ff98 100644 --- a/.gitmodules +++ b/.gitmodules @@ -120,3 +120,6 @@ [submodule "externals/miniz"] path = externals/miniz url = https://github.com/richgel999/miniz +[submodule "externals/FEX"] + path = externals/FEX + url = https://github.com/FEX-Emu/FEX diff --git a/CMakeLists.txt b/CMakeLists.txt index 037af5dc4..876ef9a35 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -862,7 +862,10 @@ if (ARCHITECTURE STREQUAL "arm64") src/core/jit/calling_convention.cpp src/core/jit/calling_convention.h src/core/jit/simd_translator.cpp - src/core/jit/simd_translator.h) + src/core/jit/simd_translator.h + src/core/jit/hle_bridge.cpp + src/core/jit/hle_bridge.h + ) endif() set(SHADER_RECOMPILER src/shader_recompiler/profile.h diff --git a/externals/FEX b/externals/FEX new file mode 160000 index 000000000..e8591090f --- /dev/null +++ b/externals/FEX @@ -0,0 +1 @@ +Subproject commit e8591090f246c49631c14ef70f32c7df14b5646e From b76e05d5ddaa93039a6e9565fb3bf237e77e95a8 Mon Sep 17 00:00:00 2001 From: AlpinDale Date: Tue, 9 Dec 2025 07:16:22 +0430 Subject: [PATCH 19/19] better block linking with delinkers; memory operand optims --- src/core/jit/arm64_codegen.cpp | 5 ++ src/core/jit/arm64_codegen.h | 1 + src/core/jit/block_manager.cpp | 32 ++++++++++++ src/core/jit/block_manager.h | 30 ++++++++++++ src/core/jit/execution_engine.cpp | 55 ++++++++++++++++----- src/core/jit/x86_64_translator.cpp | 78 ++++++++++++++++++++++++------ 6 files changed, 174 insertions(+), 27 deletions(-) diff --git a/src/core/jit/arm64_codegen.cpp b/src/core/jit/arm64_codegen.cpp index 516240295..bbcab2809 100644 --- a/src/core/jit/arm64_codegen.cpp +++ b/src/core/jit/arm64_codegen.cpp @@ -228,6 +228,11 @@ void Arm64CodeGenerator::add(int dst, int src1, int src2) { emit32(0x8B000000 | (dst << 0) | (src1 << 5) | (src2 << 16)); } +void Arm64CodeGenerator::add(int dst, int src1, int src2, int shift) { + ASSERT_MSG(shift >= 0 && shift <= 3, "Invalid shift amount"); + emit32(0x8B000000 | (dst << 0) | (src1 << 5) | (src2 << 16) | (shift << 12)); +} + void Arm64CodeGenerator::add_imm(int dst, int src1, s32 imm) { if (imm >= 0 && imm < 4096) { emit32(0x91000000 | (dst << 0) | (src1 << 5) | (imm << 10)); diff --git a/src/core/jit/arm64_codegen.h b/src/core/jit/arm64_codegen.h index 519802c44..82151ac45 100644 --- a/src/core/jit/arm64_codegen.h +++ b/src/core/jit/arm64_codegen.h @@ -43,6 +43,7 @@ public: // Arithmetic operations void add(int dst, int src1, int src2); + void add(int dst, int src1, int src2, int shift); void add_imm(int dst, int src1, s32 imm); void sub(int dst, int src1, int src2); void sub_imm(int dst, int src1, s32 imm); diff --git a/src/core/jit/block_manager.cpp b/src/core/jit/block_manager.cpp index 172a817ca..dd6b6e6cb 100644 --- a/src/core/jit/block_manager.cpp +++ b/src/core/jit/block_manager.cpp @@ -37,6 +37,16 @@ CodeBlock* BlockManager::CreateBlock(VAddr ps4_address, void* arm64_code, size_t void BlockManager::InvalidateBlock(VAddr ps4_address) { std::lock_guard lock(mutex); + + // Delink all links pointing to this block + auto lower = block_links.lower_bound({ps4_address, nullptr}); + auto upper = block_links.upper_bound( + {ps4_address, reinterpret_cast(UINTPTR_MAX)}); + for (auto it = lower; it != upper;) { + it->second(it->first.host_link); + it = block_links.erase(it); + } + blocks.erase(ps4_address); LOG_DEBUG(Core, "Invalidated code block at PS4 address {:#x}", ps4_address); } @@ -44,6 +54,17 @@ void BlockManager::InvalidateBlock(VAddr ps4_address) { void BlockManager::InvalidateRange(VAddr start, VAddr end) { std::lock_guard lock(mutex); + // Delink all links pointing to blocks in this range + auto link_it = block_links.begin(); + while (link_it != block_links.end()) { + if (link_it->first.guest_destination >= start && link_it->first.guest_destination < end) { + link_it->second(link_it->first.host_link); + link_it = block_links.erase(link_it); + } else { + ++link_it; + } + } + auto it = blocks.begin(); while (it != blocks.end()) { VAddr block_addr = it->first; @@ -77,8 +98,19 @@ void BlockManager::AddDependency(VAddr block_address, VAddr dependency) { } } +void BlockManager::AddBlockLink(VAddr guest_dest, ExitFunctionLinkData* link_data, + BlockDelinkerFunc delinker) { + std::lock_guard lock(mutex); + block_links[{guest_dest, link_data}] = delinker; +} + void BlockManager::Clear() { std::lock_guard lock(mutex); + // Delink all links before clearing + for (auto& [tag, delinker] : block_links) { + delinker(tag.host_link); + } + block_links.clear(); blocks.clear(); } diff --git a/src/core/jit/block_manager.h b/src/core/jit/block_manager.h index 07d6f80fa..90a8436dc 100644 --- a/src/core/jit/block_manager.h +++ b/src/core/jit/block_manager.h @@ -3,6 +3,9 @@ #pragma once +#include +#include +#include #include #include #include @@ -11,6 +14,30 @@ namespace Core::Jit { +struct ExitFunctionLinkData { + void* host_code; + VAddr guest_rip; + void* caller_address; + u32 original_instruction; +}; + +using BlockDelinkerFunc = std::function; + +struct BlockLinkTag { + VAddr guest_destination; + ExitFunctionLinkData* host_link; + + bool operator<(const BlockLinkTag& other) const { + if (guest_destination < other.guest_destination) { + return true; + } else if (guest_destination == other.guest_destination) { + return host_link < other.host_link; + } else { + return false; + } + } +}; + struct CodeBlock { VAddr ps4_address; void* arm64_code; @@ -41,6 +68,8 @@ public: void InvalidateBlock(VAddr ps4_address); void InvalidateRange(VAddr start, VAddr end); void AddDependency(VAddr block_address, VAddr dependency); + void AddBlockLink(VAddr guest_dest, ExitFunctionLinkData* link_data, + BlockDelinkerFunc delinker); void Clear(); size_t GetBlockCount() const { @@ -49,6 +78,7 @@ public: size_t GetTotalCodeSize() const; std::unordered_map> blocks; + std::map block_links; mutable std::mutex mutex; }; diff --git a/src/core/jit/execution_engine.cpp b/src/core/jit/execution_engine.cpp index ed7c986e2..65f2ffb06 100644 --- a/src/core/jit/execution_engine.cpp +++ b/src/core/jit/execution_engine.cpp @@ -250,6 +250,18 @@ CodeBlock* ExecutionEngine::TranslateBlock(VAddr ps4_address) { return new_block; } +static void DirectBlockDelinker(ExitFunctionLinkData* record, bool is_call) { + void* caller_addr = record->caller_address; + u32 original_inst = record->original_instruction; + + std::atomic_ref(*reinterpret_cast(caller_addr)) + .store(original_inst, std::memory_order::relaxed); +#if defined(__APPLE__) && defined(ARCH_ARM64) + __builtin___clear_cache(static_cast(caller_addr), static_cast(caller_addr) + 4); +#endif + delete record; +} + void ExecutionEngine::LinkBlock(CodeBlock* block, VAddr target_address) { CodeBlock* target_block = block_manager->GetBlock(target_address); if (!target_block) { @@ -261,25 +273,33 @@ void ExecutionEngine::LinkBlock(CodeBlock* block, VAddr target_address) { #if defined(__APPLE__) && defined(ARCH_ARM64) pthread_jit_write_protect_np(0); #endif - // Calculate offset from patch location to target - s64 offset = reinterpret_cast(target_block->arm64_code) - - reinterpret_cast(block->branch_patch_location); + void* caller_address = block->branch_patch_location; + s64 offset = + reinterpret_cast(target_block->arm64_code) - reinterpret_cast(caller_address); // Check if we can use a relative branch (within ±128MB) if (offset >= -0x8000000 && offset < 0x8000000) { s32 imm26 = static_cast(offset / 4); - u32* patch_ptr = reinterpret_cast(block->branch_patch_location); - // Patch the branch instruction: 0x14000000 | (imm26 & 0x3FFFFFF) - *patch_ptr = 0x14000000 | (imm26 & 0x3FFFFFF); + u32* patch_ptr = reinterpret_cast(caller_address); + u32 branch_inst = 0x14000000 | (imm26 & 0x3FFFFFF); + + u32 original_inst = *patch_ptr; + std::atomic_ref(*patch_ptr).store(branch_inst, std::memory_order::relaxed); + + // Register delinker + ExitFunctionLinkData* link_data = new ExitFunctionLinkData{ + target_block->arm64_code, target_address, caller_address, original_inst}; + block_manager->AddBlockLink(target_address, link_data, [](ExitFunctionLinkData* r) { + DirectBlockDelinker(r, false); + }); } else { - // Far branch - need to use indirect branch - // For now, leave as-is (will use the placeholder branch) + // Far branch - need to use indirect branch via thunk LOG_DEBUG(Core, "Branch target too far for direct linking: offset={}", offset); } #if defined(__APPLE__) && defined(ARCH_ARM64) pthread_jit_write_protect_np(1); - __builtin___clear_cache(static_cast(block->branch_patch_location), - static_cast(block->branch_patch_location) + 4); + __builtin___clear_cache(static_cast(caller_address), + static_cast(caller_address) + 4); #endif block->is_linked = true; LOG_DEBUG(Core, "Linked block {:#x} to {:#x}", block->ps4_address, target_address); @@ -295,8 +315,19 @@ void ExecutionEngine::LinkBlock(CodeBlock* block, VAddr target_address) { if (offset >= -0x8000000 && offset < 0x8000000) { s32 imm26 = static_cast(offset / 4); u32* patch_ptr = reinterpret_cast(link_location); - *patch_ptr = 0x14000000 | (imm26 & 0x3FFFFFF); - block->code_size += 4; // Update block size + u32 branch_inst = 0x14000000 | (imm26 & 0x3FFFFFF); + u32 original_inst = 0x14000002; + + std::atomic_ref(*patch_ptr).store(branch_inst, std::memory_order::relaxed); + + // Register delinker + ExitFunctionLinkData* link_data = new ExitFunctionLinkData{ + target_block->arm64_code, target_address, link_location, original_inst}; + block_manager->AddBlockLink(target_address, link_data, [](ExitFunctionLinkData* r) { + DirectBlockDelinker(r, false); + }); + + block->code_size += 4; } #if defined(__APPLE__) && defined(ARCH_ARM64) pthread_jit_write_protect_np(1); diff --git a/src/core/jit/x86_64_translator.cpp b/src/core/jit/x86_64_translator.cpp index 84fe8f7ec..d6391266c 100644 --- a/src/core/jit/x86_64_translator.cpp +++ b/src/core/jit/x86_64_translator.cpp @@ -116,29 +116,77 @@ void X86_64Translator::CalculateMemoryAddress(int dst_reg, const ZydisDecodedOpe } } - if (base_reg == -1 && index_reg == -1 && mem.disp.value == 0) { + s64 displacement = mem.disp.value; + + if (base_reg == -1 && index_reg == -1 && displacement == 0) { codegen.mov(dst_reg, 0); return; } - if (base_reg != -1) { - codegen.mov(dst_reg, base_reg); - } else { - codegen.mov(dst_reg, 0); - } - - if (index_reg != -1) { - if (mem.scale > 0 && mem.scale <= 8) { - codegen.mov(RegisterMapper::SCRATCH_REG, static_cast(mem.scale)); - codegen.mul(RegisterMapper::SCRATCH_REG, index_reg, RegisterMapper::SCRATCH_REG); - codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG); + if (index_reg == -1) { + if (base_reg != -1) { + if (displacement == 0) { + codegen.mov(dst_reg, base_reg); + } else if (displacement >= -256 && displacement < 256) { + codegen.mov(dst_reg, base_reg); + codegen.add_imm(dst_reg, dst_reg, static_cast(displacement)); + } else { + codegen.mov(dst_reg, base_reg); + codegen.mov_imm(RegisterMapper::SCRATCH_REG, displacement); + codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG); + } } else { - codegen.add(dst_reg, dst_reg, index_reg); + codegen.mov_imm(dst_reg, displacement); } + return; } - if (mem.disp.value != 0) { - codegen.add(dst_reg, dst_reg, static_cast(mem.disp.value)); + if (base_reg == -1) { + base_reg = 0; + } + + int scale = mem.scale; + if (scale == 0) { + scale = 1; + } + + if (scale == 1) { + if (displacement == 0) { + codegen.add(dst_reg, base_reg, index_reg); + } else if (displacement >= -256 && displacement < 256) { + codegen.add(dst_reg, base_reg, index_reg); + codegen.add_imm(dst_reg, dst_reg, static_cast(displacement)); + } else { + codegen.add(dst_reg, base_reg, index_reg); + codegen.mov_imm(RegisterMapper::SCRATCH_REG, displacement); + codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG); + } + } else if (scale == 2 || scale == 4 || scale == 8) { + int shift = (scale == 2) ? 1 : (scale == 4) ? 2 : 3; + if (displacement == 0) { + codegen.add(dst_reg, base_reg, index_reg, shift); + } else { + codegen.add(dst_reg, base_reg, index_reg, shift); + if (displacement >= -256 && displacement < 256) { + codegen.add_imm(dst_reg, dst_reg, static_cast(displacement)); + } else { + codegen.mov_imm(RegisterMapper::SCRATCH_REG, displacement); + codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG); + } + } + } else { + codegen.mov(dst_reg, base_reg); + codegen.mov_imm(RegisterMapper::SCRATCH_REG, scale); + codegen.mul(RegisterMapper::SCRATCH_REG, index_reg, RegisterMapper::SCRATCH_REG); + codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG); + if (displacement != 0) { + if (displacement >= -256 && displacement < 256) { + codegen.add_imm(dst_reg, dst_reg, static_cast(displacement)); + } else { + codegen.mov_imm(RegisterMapper::SCRATCH_REG, displacement); + codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG); + } + } } }