diff --git a/CMakeDarwinPresets.json b/CMakeDarwinPresets.json index 10824a66c..8e09600ec 100644 --- a/CMakeDarwinPresets.json +++ b/CMakeDarwinPresets.json @@ -17,6 +17,19 @@ "CMAKE_INSTALL_PREFIX": "${sourceDir}/Build/${presetName}", "CMAKE_OSX_ARCHITECTURES": "x86_64" } + }, + { + "name": "arm64-Clang-Base", + "hidden": true, + "generator": "Ninja", + "binaryDir": "${sourceDir}/Build/${presetName}", + "cacheVariables": { + "CMAKE_C_COMPILER": "/usr/bin/clang", + "CMAKE_CXX_COMPILER": "/usr/bin/clang++", + "CMAKE_INSTALL_PREFIX": "${sourceDir}/Build/${presetName}", + "CMAKE_OSX_ARCHITECTURES": "arm64", + "CMAKE_PREFIX_PATH": "/opt/homebrew" + } } ] } \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 04534ec26..df2905b70 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -550,13 +550,15 @@ set(USBD_LIB src/core/libraries/usbd/usbd.cpp src/core/libraries/usbd/emulated/skylander.h ) -set(FIBER_LIB src/core/libraries/fiber/fiber_context.s - src/core/libraries/fiber/fiber.cpp +set(FIBER_LIB src/core/libraries/fiber/fiber.cpp src/core/libraries/fiber/fiber.h src/core/libraries/fiber/fiber_error.h ) -set_source_files_properties(src/core/libraries/fiber/fiber_context.s PROPERTIES COMPILE_OPTIONS -Wno-unused-command-line-argument) +if(ARCHITECTURE STREQUAL "x86_64") + list(APPEND FIBER_LIB src/core/libraries/fiber/fiber_context.s) + set_source_files_properties(src/core/libraries/fiber/fiber_context.s PROPERTIES COMPILE_OPTIONS -Wno-unused-command-line-argument) +endif() set(VDEC_LIB src/core/libraries/videodec/videodec2_impl.cpp src/core/libraries/videodec/videodec2_impl.h diff --git a/CMakePresets.json b/CMakePresets.json index c34007a34..1914f9895 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -30,6 +30,30 @@ "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } + }, + { + "name": "arm64-Clang-Debug", + "displayName": "Clang ARM64 Debug", + "inherits": ["arm64-Clang-Base"], + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Debug" + } + }, + { + "name": "arm64-Clang-Release", + "displayName": "Clang ARM64 Release", + "inherits": ["arm64-Clang-Base"], + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Release" + } + }, + { + "name": "arm64-Clang-RelWithDebInfo", + "displayName": "Clang ARM64 RelWithDebInfo", + "inherits": ["arm64-Clang-Base"], + "cacheVariables": { + "CMAKE_BUILD_TYPE": "RelWithDebInfo" + } } ] } \ No newline at end of file diff --git a/src/common/signal_context.cpp b/src/common/signal_context.cpp index 112160bc8..929b250ec 100644 --- a/src/common/signal_context.cpp +++ b/src/common/signal_context.cpp @@ -19,14 +19,22 @@ void* GetXmmPointer(void* ctx, u8 index) { case index: \ return (void*)(&((EXCEPTION_POINTERS*)ctx)->ContextRecord->Xmm##index.Low) #elif defined(__APPLE__) +#if defined(ARCH_X86_64) #define CASE(index) \ case index: \ return (void*)(&((ucontext_t*)ctx)->uc_mcontext->__fs.__fpu_xmm##index); +#elif defined(ARCH_ARM64) + UNREACHABLE_MSG("XMM registers not available on ARM64"); + return nullptr; +#else +#error "Unsupported architecture" +#endif #else #define CASE(index) \ case index: \ return (void*)(&((ucontext_t*)ctx)->uc_mcontext.fpregs->_xmm[index].element[0]) #endif +#if !defined(ARCH_ARM64) || !defined(__APPLE__) switch (index) { CASE(0); CASE(1); @@ -50,13 +58,20 @@ void* GetXmmPointer(void* ctx, u8 index) { } } #undef CASE +#endif } void* GetRip(void* ctx) { #if defined(_WIN32) return (void*)((EXCEPTION_POINTERS*)ctx)->ContextRecord->Rip; #elif defined(__APPLE__) +#if defined(ARCH_X86_64) return (void*)((ucontext_t*)ctx)->uc_mcontext->__ss.__rip; +#elif defined(ARCH_ARM64) + return (void*)((ucontext_t*)ctx)->uc_mcontext->__ss.__pc; +#else +#error "Unsupported architecture" +#endif #else return (void*)((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RIP]; #endif @@ -66,7 +81,13 @@ void IncrementRip(void* ctx, u64 length) { #if defined(_WIN32) ((EXCEPTION_POINTERS*)ctx)->ContextRecord->Rip += length; #elif defined(__APPLE__) +#if defined(ARCH_X86_64) ((ucontext_t*)ctx)->uc_mcontext->__ss.__rip += length; +#elif defined(ARCH_ARM64) + ((ucontext_t*)ctx)->uc_mcontext->__ss.__pc += length; +#else +#error "Unsupported architecture" +#endif #else ((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RIP] += length; #endif diff --git a/src/common/va_ctx.h b/src/common/va_ctx.h index cffe468ff..f39ab70b4 100644 --- a/src/common/va_ctx.h +++ b/src/common/va_ctx.h @@ -2,9 +2,16 @@ // SPDX-License-Identifier: GPL-2.0-or-later #pragma once -#include +#include "common/arch.h" #include "common/types.h" +#ifdef ARCH_X86_64 +#include +#elif defined(ARCH_ARM64) +#include +#endif + +#ifdef ARCH_X86_64 #define VA_ARGS \ uint64_t rdi, uint64_t rsi, uint64_t rdx, uint64_t rcx, uint64_t r8, uint64_t r9, \ uint64_t overflow_arg_area, __m128 xmm0, __m128 xmm1, __m128 xmm2, __m128 xmm3, \ @@ -30,6 +37,17 @@ (ctx).va_list.gp_offset = offsetof(::Common::VaRegSave, gp); \ (ctx).va_list.fp_offset = offsetof(::Common::VaRegSave, fp); \ (ctx).va_list.overflow_arg_area = &overflow_arg_area; +#elif defined(ARCH_ARM64) +#define VA_ARGS ... +#define VA_CTX(ctx) \ + alignas(16)::Common::VaCtx ctx{}; \ + (ctx).va_list.reg_save_area = nullptr; \ + (ctx).va_list.gp_offset = 0; \ + (ctx).va_list.fp_offset = 0; \ + (ctx).va_list.overflow_arg_area = nullptr; +#else +#error "Unsupported architecture" +#endif namespace Common { @@ -44,7 +62,9 @@ struct VaList { struct VaRegSave { u64 gp[6]; +#ifdef ARCH_X86_64 __m128 fp[8]; +#endif }; struct VaCtx { diff --git a/src/core/libraries/fiber/fiber.cpp b/src/core/libraries/fiber/fiber.cpp index 2ebfbd244..8e95ca769 100644 --- a/src/core/libraries/fiber/fiber.cpp +++ b/src/core/libraries/fiber/fiber.cpp @@ -3,6 +3,7 @@ #include "fiber.h" +#include "common/arch.h" #include "common/elf_info.h" #include "common/logging/log.h" #include "core/libraries/fiber/fiber_error.h" @@ -23,12 +24,35 @@ OrbisFiberContext* GetFiberContext() { return Core::GetTcbBase()->tcb_fiber; } +#ifdef ARCH_X86_64 extern "C" s32 PS4_SYSV_ABI _sceFiberSetJmp(OrbisFiberContext* ctx) asm("_sceFiberSetJmp"); extern "C" s32 PS4_SYSV_ABI _sceFiberLongJmp(OrbisFiberContext* ctx) asm("_sceFiberLongJmp"); extern "C" void PS4_SYSV_ABI _sceFiberSwitchEntry(OrbisFiberData* data, bool set_fpu) asm("_sceFiberSwitchEntry"); +#elif defined(ARCH_ARM64) +extern "C" s32 PS4_SYSV_ABI _sceFiberSetJmp(OrbisFiberContext* ctx); +extern "C" s32 PS4_SYSV_ABI _sceFiberLongJmp(OrbisFiberContext* ctx); +extern "C" void PS4_SYSV_ABI _sceFiberSwitchEntry(OrbisFiberData* data, + bool set_fpu); +#endif extern "C" void PS4_SYSV_ABI _sceFiberForceQuit(u64 ret) asm("_sceFiberForceQuit"); +#ifdef ARCH_ARM64 +extern "C" s32 PS4_SYSV_ABI _sceFiberSetJmp(OrbisFiberContext* ctx) { + UNREACHABLE_MSG("ARM64 fiber implementation not yet complete"); + return 0; +} + +extern "C" s32 PS4_SYSV_ABI _sceFiberLongJmp(OrbisFiberContext* ctx) { + UNREACHABLE_MSG("ARM64 fiber implementation not yet complete"); + return 0; +} + +extern "C" void PS4_SYSV_ABI _sceFiberSwitchEntry(OrbisFiberData* data, bool set_fpu) { + UNREACHABLE_MSG("ARM64 fiber implementation not yet complete"); +} +#endif + extern "C" void PS4_SYSV_ABI _sceFiberForceQuit(u64 ret) { OrbisFiberContext* g_ctx = GetFiberContext(); g_ctx->return_val = ret; diff --git a/src/core/libraries/kernel/kernel.cpp b/src/core/libraries/kernel/kernel.cpp index 6594bfab2..434526982 100644 --- a/src/core/libraries/kernel/kernel.cpp +++ b/src/core/libraries/kernel/kernel.cpp @@ -318,8 +318,8 @@ void RegisterLib(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("Mv1zUObHvXI", "libkernel", 1, "libkernel", sceKernelGetSystemSwVersion); LIB_FUNCTION("igMefp4SAv0", "libkernel", 1, "libkernel", get_authinfo); LIB_FUNCTION("G-MYv5erXaU", "libkernel", 1, "libkernel", sceKernelGetAppInfo); - LIB_FUNCTION("PfccT7qURYE", "libkernel", 1, "libkernel", kernel_ioctl); - LIB_FUNCTION("wW+k21cmbwQ", "libkernel", 1, "libkernel", kernel_ioctl); + LIB_FUNCTION_VARIADIC("PfccT7qURYE", "libkernel", 1, "libkernel", kernel_ioctl); + LIB_FUNCTION_VARIADIC("wW+k21cmbwQ", "libkernel", 1, "libkernel", kernel_ioctl); LIB_FUNCTION("JGfTMBOdUJo", "libkernel", 1, "libkernel", sceKernelGetFsSandboxRandomWord); LIB_FUNCTION("6xVpy0Fdq+I", "libkernel", 1, "libkernel", _sigprocmask); LIB_FUNCTION("Xjoosiw+XPI", "libkernel", 1, "libkernel", sceKernelUuidCreate); diff --git a/src/core/libraries/kernel/threads/exception.cpp b/src/core/libraries/kernel/threads/exception.cpp index 95ced79c0..094cfb61f 100644 --- a/src/core/libraries/kernel/threads/exception.cpp +++ b/src/core/libraries/kernel/threads/exception.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/arch.h" #include "common/assert.h" #include "core/libraries/kernel/orbis_error.h" #include "core/libraries/kernel/threads/exception.h" @@ -23,6 +24,7 @@ void SigactionHandler(int signum, siginfo_t* inf, ucontext_t* raw_context) { if (handler) { auto ctx = Ucontext{}; #ifdef __APPLE__ +#ifdef ARCH_X86_64 const auto& regs = raw_context->uc_mcontext->__ss; ctx.uc_mcontext.mc_r8 = regs.__r8; ctx.uc_mcontext.mc_r9 = regs.__r9; @@ -42,7 +44,13 @@ void SigactionHandler(int signum, siginfo_t* inf, ucontext_t* raw_context) { ctx.uc_mcontext.mc_rsp = regs.__rsp; ctx.uc_mcontext.mc_fs = regs.__fs; ctx.uc_mcontext.mc_gs = regs.__gs; +#elif defined(ARCH_ARM64) + UNREACHABLE_MSG("ARM64 exception handling not yet implemented"); #else +#error "Unsupported architecture" +#endif +#else +#ifdef ARCH_X86_64 const auto& regs = raw_context->uc_mcontext.gregs; ctx.uc_mcontext.mc_r8 = regs[REG_R8]; ctx.uc_mcontext.mc_r9 = regs[REG_R9]; @@ -62,6 +70,11 @@ void SigactionHandler(int signum, siginfo_t* inf, ucontext_t* raw_context) { ctx.uc_mcontext.mc_rsp = regs[REG_RSP]; ctx.uc_mcontext.mc_fs = (regs[REG_CSGSFS] >> 32) & 0xFFFF; ctx.uc_mcontext.mc_gs = (regs[REG_CSGSFS] >> 16) & 0xFFFF; +#elif defined(ARCH_ARM64) + UNREACHABLE_MSG("ARM64 exception handling not yet implemented"); +#else +#error "Unsupported architecture" +#endif #endif handler(POSIX_SIGUSR1, &ctx); } diff --git a/src/core/libraries/kernel/threads/mutex.cpp b/src/core/libraries/kernel/threads/mutex.cpp index 5d97c5dc1..006f86084 100644 --- a/src/core/libraries/kernel/threads/mutex.cpp +++ b/src/core/libraries/kernel/threads/mutex.cpp @@ -18,7 +18,13 @@ static std::mutex MutxStaticLock; #define THR_ADAPTIVE_MUTEX_INITIALIZER ((PthreadMutex*)1) #define THR_MUTEX_DESTROYED ((PthreadMutex*)2) +#ifdef ARCH_X86_64 #define CPU_SPINWAIT __asm__ volatile("pause") +#elif defined(ARCH_ARM64) +#define CPU_SPINWAIT __asm__ volatile("yield") +#else +#define CPU_SPINWAIT +#endif #define CHECK_AND_INIT_MUTEX \ if (PthreadMutex* m = *mutex; m <= THR_MUTEX_DESTROYED) [[unlikely]] { \ diff --git a/src/core/libraries/libc_internal/libc_internal_io.cpp b/src/core/libraries/libc_internal/libc_internal_io.cpp index 8105b66cc..1677d41a4 100644 --- a/src/core/libraries/libc_internal/libc_internal_io.cpp +++ b/src/core/libraries/libc_internal/libc_internal_io.cpp @@ -18,6 +18,6 @@ int PS4_SYSV_ABI internal_snprintf(char* s, size_t n, VA_ARGS) { return snprintf_ctx(s, n, &ctx); } void RegisterlibSceLibcInternalIo(Core::Loader::SymbolsResolver* sym) { - LIB_FUNCTION("eLdDw6l0-bU", "libSceLibcInternal", 1, "libSceLibcInternal", internal_snprintf); + LIB_FUNCTION_VARIADIC("eLdDw6l0-bU", "libSceLibcInternal", 1, "libSceLibcInternal", internal_snprintf); } } // namespace Libraries::LibcInternal \ No newline at end of file diff --git a/src/core/libraries/libs.h b/src/core/libraries/libs.h index 1229e6411..b170ad554 100644 --- a/src/core/libraries/libs.h +++ b/src/core/libraries/libs.h @@ -19,6 +19,18 @@ sym->AddSymbol(sr, func); \ } +#define LIB_FUNCTION_VARIADIC(nid, lib, libversion, mod, function) \ + { \ + Core::Loader::SymbolResolver sr{}; \ + sr.name = nid; \ + sr.library = lib; \ + sr.library_version = libversion; \ + sr.module = mod; \ + sr.type = Core::Loader::SymbolType::Function; \ + auto func = reinterpret_cast(function); \ + sym->AddSymbol(sr, func); \ + } + #define LIB_OBJ(nid, lib, libversion, mod, obj) \ { \ Core::Loader::SymbolResolver sr{}; \ diff --git a/src/core/linker.cpp b/src/core/linker.cpp index ac6b37769..7ac8791ae 100644 --- a/src/core/linker.cpp +++ b/src/core/linker.cpp @@ -49,6 +49,26 @@ static PS4_SYSV_ABI void* RunMainEntry [[noreturn]] (EntryParams* params) { : "rax", "rsi", "rdi"); UNREACHABLE(); } +#elif defined(ARCH_ARM64) +static PS4_SYSV_ABI void* RunMainEntry [[noreturn]] (EntryParams* params) { + void* entry = reinterpret_cast(params->entry_addr); + asm volatile("mov x2, sp\n" + "and x2, x2, #0xFFFFFFFFFFFFFFF0\n" + "sub x2, x2, #8\n" + "mov sp, x2\n" + "ldr x0, [%1, #8]\n" + "sub sp, sp, #16\n" + "str x0, [sp]\n" + "ldr x0, [%1]\n" + "str x0, [sp, #8]\n" + "mov x0, %1\n" + "mov x1, %2\n" + "br %0\n" + : + : "r"(entry), "r"(params), "r"(ProgramExitFunc) + : "x0", "x1", "x2", "memory"); + UNREACHABLE(); +} #endif Linker::Linker() : memory{Memory::Instance()} {} diff --git a/src/core/thread.cpp b/src/core/thread.cpp index 0015f40b9..82fa81867 100644 --- a/src/core/thread.cpp +++ b/src/core/thread.cpp @@ -11,8 +11,10 @@ #include #include #include +#ifdef ARCH_X86_64 #include #endif +#endif namespace Core { @@ -126,8 +128,10 @@ void NativeThread::Exit() { void NativeThread::Initialize() { // Set MXCSR and FPUCW registers to the values used by Orbis. +#ifdef ARCH_X86_64 _mm_setcsr(ORBIS_MXCSR); asm volatile("fldcw %0" : : "m"(ORBIS_FPUCW)); +#endif #if _WIN64 tid = GetCurrentThreadId(); #else diff --git a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp index e1f9f2c5a..e45ecf66c 100644 --- a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp +++ b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp @@ -4,8 +4,11 @@ #include #include +#ifdef ARCH_X86_64 #include #include +#endif +#include "common/arch.h" #include "common/config.h" #include "common/io_file.h" #include "common/logging/log.h" @@ -23,19 +26,27 @@ #include "src/common/arch.h" #include "src/common/decoder.h" +#ifdef ARCH_X86_64 using namespace Xbyak::util; static Xbyak::CodeGenerator g_srt_codegen(32_MB); static const u8* g_srt_codegen_start = nullptr; +#endif namespace Shader { +#ifdef ARCH_X86_64 PFN_SrtWalker RegisterWalkerCode(const u8* ptr, size_t size) { const auto func_addr = (PFN_SrtWalker)g_srt_codegen.getCurr(); g_srt_codegen.db(ptr, size); g_srt_codegen.ready(); return func_addr; } +#else +PFN_SrtWalker RegisterWalkerCode(const u8* ptr, size_t size) { + return nullptr; +} +#endif } // namespace Shader @@ -69,12 +80,12 @@ static void DumpSrtProgram(const Shader::Info& info, const u8* code, size_t code } static bool SrtWalkerSignalHandler(void* context, void* fault_address) { - // Only handle if the fault address is within the SRT code range +#ifdef ARCH_X86_64 const u8* code_start = g_srt_codegen_start; const u8* code_end = code_start + g_srt_codegen.getSize(); const void* code = Common::GetRip(context); if (code < code_start || code >= code_end) { - return false; // Not in SRT code range + return false; } // Patch instruction to zero register @@ -117,6 +128,9 @@ static bool SrtWalkerSignalHandler(void* context, void* fault_address) { LOG_DEBUG(Render_Recompiler, "Patched SRT walker at {}", code); return true; +#else + return false; +#endif } using namespace Shader; @@ -159,6 +173,7 @@ namespace Shader::Optimization { namespace { +#ifdef ARCH_X86_64 static inline void PushPtr(Xbyak::CodeGenerator& c, u32 off_dw) { c.push(rdi); c.mov(rdi, ptr[rdi + (off_dw << 2)]); @@ -169,18 +184,12 @@ static inline void PushPtr(Xbyak::CodeGenerator& c, u32 off_dw) { static inline void PopPtr(Xbyak::CodeGenerator& c) { c.pop(rdi); }; - static void VisitPointer(u32 off_dw, IR::Inst* subtree, PassInfo& pass_info, Xbyak::CodeGenerator& c) { PushPtr(c, off_dw); PassInfo::PtrUserList* use_list = pass_info.GetUsesAsPointer(subtree); ASSERT(use_list); - // First copy all the src data from this tree level - // That way, all data that was contiguous in the guest SRT is also contiguous in the - // flattened buffer. - // TODO src and dst are contiguous. Optimize with wider loads/stores - // TODO if this subtree is dynamically indexed, don't compact it (keep it sparse) for (auto [src_off_dw, use] : *use_list) { c.mov(r10d, ptr[rdi + (src_off_dw << 2)]); c.mov(ptr[rsi + (pass_info.dst_off_dw << 2)], r10d); @@ -189,7 +198,6 @@ static void VisitPointer(u32 off_dw, IR::Inst* subtree, PassInfo& pass_info, pass_info.dst_off_dw++; } - // Then visit any children used as pointers for (const auto [src_off_dw, use] : *use_list) { if (pass_info.GetUsesAsPointer(use)) { VisitPointer(src_off_dw, use, pass_info, c); @@ -236,6 +244,10 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) { info.srt_info.flattened_bufsize_dw = pass_info.dst_off_dw; } +#else +static void GenerateSrtProgram(Info& info, PassInfo& pass_info) { +} +#endif }; // namespace @@ -293,7 +305,9 @@ void FlattenExtendedUserdataPass(IR::Program& program) { } } +#ifdef ARCH_X86_64 GenerateSrtProgram(info, pass_info); +#endif // Assign offsets to duplicate readconsts for (IR::Inst* readconst : all_readconsts) {