core: add macOS arm64 support

This commit is contained in:
AlpinDale 2025-12-08 23:45:03 +04:30
parent 65f0b07c34
commit 382910c520
14 changed files with 189 additions and 16 deletions

View File

@ -17,6 +17,19 @@
"CMAKE_INSTALL_PREFIX": "${sourceDir}/Build/${presetName}",
"CMAKE_OSX_ARCHITECTURES": "x86_64"
}
},
{
"name": "arm64-Clang-Base",
"hidden": true,
"generator": "Ninja",
"binaryDir": "${sourceDir}/Build/${presetName}",
"cacheVariables": {
"CMAKE_C_COMPILER": "/usr/bin/clang",
"CMAKE_CXX_COMPILER": "/usr/bin/clang++",
"CMAKE_INSTALL_PREFIX": "${sourceDir}/Build/${presetName}",
"CMAKE_OSX_ARCHITECTURES": "arm64",
"CMAKE_PREFIX_PATH": "/opt/homebrew"
}
}
]
}

View File

@ -550,13 +550,15 @@ set(USBD_LIB src/core/libraries/usbd/usbd.cpp
src/core/libraries/usbd/emulated/skylander.h
)
set(FIBER_LIB src/core/libraries/fiber/fiber_context.s
src/core/libraries/fiber/fiber.cpp
set(FIBER_LIB src/core/libraries/fiber/fiber.cpp
src/core/libraries/fiber/fiber.h
src/core/libraries/fiber/fiber_error.h
)
set_source_files_properties(src/core/libraries/fiber/fiber_context.s PROPERTIES COMPILE_OPTIONS -Wno-unused-command-line-argument)
if(ARCHITECTURE STREQUAL "x86_64")
list(APPEND FIBER_LIB src/core/libraries/fiber/fiber_context.s)
set_source_files_properties(src/core/libraries/fiber/fiber_context.s PROPERTIES COMPILE_OPTIONS -Wno-unused-command-line-argument)
endif()
set(VDEC_LIB src/core/libraries/videodec/videodec2_impl.cpp
src/core/libraries/videodec/videodec2_impl.h

View File

@ -30,6 +30,30 @@
"cacheVariables": {
"CMAKE_BUILD_TYPE": "RelWithDebInfo"
}
},
{
"name": "arm64-Clang-Debug",
"displayName": "Clang ARM64 Debug",
"inherits": ["arm64-Clang-Base"],
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Debug"
}
},
{
"name": "arm64-Clang-Release",
"displayName": "Clang ARM64 Release",
"inherits": ["arm64-Clang-Base"],
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Release"
}
},
{
"name": "arm64-Clang-RelWithDebInfo",
"displayName": "Clang ARM64 RelWithDebInfo",
"inherits": ["arm64-Clang-Base"],
"cacheVariables": {
"CMAKE_BUILD_TYPE": "RelWithDebInfo"
}
}
]
}

View File

@ -19,14 +19,22 @@ void* GetXmmPointer(void* ctx, u8 index) {
case index: \
return (void*)(&((EXCEPTION_POINTERS*)ctx)->ContextRecord->Xmm##index.Low)
#elif defined(__APPLE__)
#if defined(ARCH_X86_64)
#define CASE(index) \
case index: \
return (void*)(&((ucontext_t*)ctx)->uc_mcontext->__fs.__fpu_xmm##index);
#elif defined(ARCH_ARM64)
UNREACHABLE_MSG("XMM registers not available on ARM64");
return nullptr;
#else
#error "Unsupported architecture"
#endif
#else
#define CASE(index) \
case index: \
return (void*)(&((ucontext_t*)ctx)->uc_mcontext.fpregs->_xmm[index].element[0])
#endif
#if !defined(ARCH_ARM64) || !defined(__APPLE__)
switch (index) {
CASE(0);
CASE(1);
@ -50,13 +58,20 @@ void* GetXmmPointer(void* ctx, u8 index) {
}
}
#undef CASE
#endif
}
void* GetRip(void* ctx) {
#if defined(_WIN32)
return (void*)((EXCEPTION_POINTERS*)ctx)->ContextRecord->Rip;
#elif defined(__APPLE__)
#if defined(ARCH_X86_64)
return (void*)((ucontext_t*)ctx)->uc_mcontext->__ss.__rip;
#elif defined(ARCH_ARM64)
return (void*)((ucontext_t*)ctx)->uc_mcontext->__ss.__pc;
#else
#error "Unsupported architecture"
#endif
#else
return (void*)((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RIP];
#endif
@ -66,7 +81,13 @@ void IncrementRip(void* ctx, u64 length) {
#if defined(_WIN32)
((EXCEPTION_POINTERS*)ctx)->ContextRecord->Rip += length;
#elif defined(__APPLE__)
#if defined(ARCH_X86_64)
((ucontext_t*)ctx)->uc_mcontext->__ss.__rip += length;
#elif defined(ARCH_ARM64)
((ucontext_t*)ctx)->uc_mcontext->__ss.__pc += length;
#else
#error "Unsupported architecture"
#endif
#else
((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RIP] += length;
#endif

View File

@ -2,9 +2,16 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <xmmintrin.h>
#include "common/arch.h"
#include "common/types.h"
#ifdef ARCH_X86_64
#include <xmmintrin.h>
#elif defined(ARCH_ARM64)
#include <cstdarg>
#endif
#ifdef ARCH_X86_64
#define VA_ARGS \
uint64_t rdi, uint64_t rsi, uint64_t rdx, uint64_t rcx, uint64_t r8, uint64_t r9, \
uint64_t overflow_arg_area, __m128 xmm0, __m128 xmm1, __m128 xmm2, __m128 xmm3, \
@ -30,6 +37,17 @@
(ctx).va_list.gp_offset = offsetof(::Common::VaRegSave, gp); \
(ctx).va_list.fp_offset = offsetof(::Common::VaRegSave, fp); \
(ctx).va_list.overflow_arg_area = &overflow_arg_area;
#elif defined(ARCH_ARM64)
#define VA_ARGS ...
#define VA_CTX(ctx) \
alignas(16)::Common::VaCtx ctx{}; \
(ctx).va_list.reg_save_area = nullptr; \
(ctx).va_list.gp_offset = 0; \
(ctx).va_list.fp_offset = 0; \
(ctx).va_list.overflow_arg_area = nullptr;
#else
#error "Unsupported architecture"
#endif
namespace Common {
@ -44,7 +62,9 @@ struct VaList {
struct VaRegSave {
u64 gp[6];
#ifdef ARCH_X86_64
__m128 fp[8];
#endif
};
struct VaCtx {

View File

@ -3,6 +3,7 @@
#include "fiber.h"
#include "common/arch.h"
#include "common/elf_info.h"
#include "common/logging/log.h"
#include "core/libraries/fiber/fiber_error.h"
@ -23,12 +24,35 @@ OrbisFiberContext* GetFiberContext() {
return Core::GetTcbBase()->tcb_fiber;
}
#ifdef ARCH_X86_64
extern "C" s32 PS4_SYSV_ABI _sceFiberSetJmp(OrbisFiberContext* ctx) asm("_sceFiberSetJmp");
extern "C" s32 PS4_SYSV_ABI _sceFiberLongJmp(OrbisFiberContext* ctx) asm("_sceFiberLongJmp");
extern "C" void PS4_SYSV_ABI _sceFiberSwitchEntry(OrbisFiberData* data,
bool set_fpu) asm("_sceFiberSwitchEntry");
#elif defined(ARCH_ARM64)
extern "C" s32 PS4_SYSV_ABI _sceFiberSetJmp(OrbisFiberContext* ctx);
extern "C" s32 PS4_SYSV_ABI _sceFiberLongJmp(OrbisFiberContext* ctx);
extern "C" void PS4_SYSV_ABI _sceFiberSwitchEntry(OrbisFiberData* data,
bool set_fpu);
#endif
extern "C" void PS4_SYSV_ABI _sceFiberForceQuit(u64 ret) asm("_sceFiberForceQuit");
#ifdef ARCH_ARM64
extern "C" s32 PS4_SYSV_ABI _sceFiberSetJmp(OrbisFiberContext* ctx) {
UNREACHABLE_MSG("ARM64 fiber implementation not yet complete");
return 0;
}
extern "C" s32 PS4_SYSV_ABI _sceFiberLongJmp(OrbisFiberContext* ctx) {
UNREACHABLE_MSG("ARM64 fiber implementation not yet complete");
return 0;
}
extern "C" void PS4_SYSV_ABI _sceFiberSwitchEntry(OrbisFiberData* data, bool set_fpu) {
UNREACHABLE_MSG("ARM64 fiber implementation not yet complete");
}
#endif
extern "C" void PS4_SYSV_ABI _sceFiberForceQuit(u64 ret) {
OrbisFiberContext* g_ctx = GetFiberContext();
g_ctx->return_val = ret;

View File

@ -318,8 +318,8 @@ void RegisterLib(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("Mv1zUObHvXI", "libkernel", 1, "libkernel", sceKernelGetSystemSwVersion);
LIB_FUNCTION("igMefp4SAv0", "libkernel", 1, "libkernel", get_authinfo);
LIB_FUNCTION("G-MYv5erXaU", "libkernel", 1, "libkernel", sceKernelGetAppInfo);
LIB_FUNCTION("PfccT7qURYE", "libkernel", 1, "libkernel", kernel_ioctl);
LIB_FUNCTION("wW+k21cmbwQ", "libkernel", 1, "libkernel", kernel_ioctl);
LIB_FUNCTION_VARIADIC("PfccT7qURYE", "libkernel", 1, "libkernel", kernel_ioctl);
LIB_FUNCTION_VARIADIC("wW+k21cmbwQ", "libkernel", 1, "libkernel", kernel_ioctl);
LIB_FUNCTION("JGfTMBOdUJo", "libkernel", 1, "libkernel", sceKernelGetFsSandboxRandomWord);
LIB_FUNCTION("6xVpy0Fdq+I", "libkernel", 1, "libkernel", _sigprocmask);
LIB_FUNCTION("Xjoosiw+XPI", "libkernel", 1, "libkernel", sceKernelUuidCreate);

View File

@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/arch.h"
#include "common/assert.h"
#include "core/libraries/kernel/orbis_error.h"
#include "core/libraries/kernel/threads/exception.h"
@ -23,6 +24,7 @@ void SigactionHandler(int signum, siginfo_t* inf, ucontext_t* raw_context) {
if (handler) {
auto ctx = Ucontext{};
#ifdef __APPLE__
#ifdef ARCH_X86_64
const auto& regs = raw_context->uc_mcontext->__ss;
ctx.uc_mcontext.mc_r8 = regs.__r8;
ctx.uc_mcontext.mc_r9 = regs.__r9;
@ -42,7 +44,13 @@ void SigactionHandler(int signum, siginfo_t* inf, ucontext_t* raw_context) {
ctx.uc_mcontext.mc_rsp = regs.__rsp;
ctx.uc_mcontext.mc_fs = regs.__fs;
ctx.uc_mcontext.mc_gs = regs.__gs;
#elif defined(ARCH_ARM64)
UNREACHABLE_MSG("ARM64 exception handling not yet implemented");
#else
#error "Unsupported architecture"
#endif
#else
#ifdef ARCH_X86_64
const auto& regs = raw_context->uc_mcontext.gregs;
ctx.uc_mcontext.mc_r8 = regs[REG_R8];
ctx.uc_mcontext.mc_r9 = regs[REG_R9];
@ -62,6 +70,11 @@ void SigactionHandler(int signum, siginfo_t* inf, ucontext_t* raw_context) {
ctx.uc_mcontext.mc_rsp = regs[REG_RSP];
ctx.uc_mcontext.mc_fs = (regs[REG_CSGSFS] >> 32) & 0xFFFF;
ctx.uc_mcontext.mc_gs = (regs[REG_CSGSFS] >> 16) & 0xFFFF;
#elif defined(ARCH_ARM64)
UNREACHABLE_MSG("ARM64 exception handling not yet implemented");
#else
#error "Unsupported architecture"
#endif
#endif
handler(POSIX_SIGUSR1, &ctx);
}

View File

@ -18,7 +18,13 @@ static std::mutex MutxStaticLock;
#define THR_ADAPTIVE_MUTEX_INITIALIZER ((PthreadMutex*)1)
#define THR_MUTEX_DESTROYED ((PthreadMutex*)2)
#ifdef ARCH_X86_64
#define CPU_SPINWAIT __asm__ volatile("pause")
#elif defined(ARCH_ARM64)
#define CPU_SPINWAIT __asm__ volatile("yield")
#else
#define CPU_SPINWAIT
#endif
#define CHECK_AND_INIT_MUTEX \
if (PthreadMutex* m = *mutex; m <= THR_MUTEX_DESTROYED) [[unlikely]] { \

View File

@ -18,6 +18,6 @@ int PS4_SYSV_ABI internal_snprintf(char* s, size_t n, VA_ARGS) {
return snprintf_ctx(s, n, &ctx);
}
void RegisterlibSceLibcInternalIo(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("eLdDw6l0-bU", "libSceLibcInternal", 1, "libSceLibcInternal", internal_snprintf);
LIB_FUNCTION_VARIADIC("eLdDw6l0-bU", "libSceLibcInternal", 1, "libSceLibcInternal", internal_snprintf);
}
} // namespace Libraries::LibcInternal

View File

@ -19,6 +19,18 @@
sym->AddSymbol(sr, func); \
}
#define LIB_FUNCTION_VARIADIC(nid, lib, libversion, mod, function) \
{ \
Core::Loader::SymbolResolver sr{}; \
sr.name = nid; \
sr.library = lib; \
sr.library_version = libversion; \
sr.module = mod; \
sr.type = Core::Loader::SymbolType::Function; \
auto func = reinterpret_cast<u64>(function); \
sym->AddSymbol(sr, func); \
}
#define LIB_OBJ(nid, lib, libversion, mod, obj) \
{ \
Core::Loader::SymbolResolver sr{}; \

View File

@ -49,6 +49,26 @@ static PS4_SYSV_ABI void* RunMainEntry [[noreturn]] (EntryParams* params) {
: "rax", "rsi", "rdi");
UNREACHABLE();
}
#elif defined(ARCH_ARM64)
static PS4_SYSV_ABI void* RunMainEntry [[noreturn]] (EntryParams* params) {
void* entry = reinterpret_cast<void*>(params->entry_addr);
asm volatile("mov x2, sp\n"
"and x2, x2, #0xFFFFFFFFFFFFFFF0\n"
"sub x2, x2, #8\n"
"mov sp, x2\n"
"ldr x0, [%1, #8]\n"
"sub sp, sp, #16\n"
"str x0, [sp]\n"
"ldr x0, [%1]\n"
"str x0, [sp, #8]\n"
"mov x0, %1\n"
"mov x1, %2\n"
"br %0\n"
:
: "r"(entry), "r"(params), "r"(ProgramExitFunc)
: "x0", "x1", "x2", "memory");
UNREACHABLE();
}
#endif
Linker::Linker() : memory{Memory::Instance()} {}

View File

@ -11,8 +11,10 @@
#include <csignal>
#include <pthread.h>
#include <unistd.h>
#ifdef ARCH_X86_64
#include <xmmintrin.h>
#endif
#endif
namespace Core {
@ -126,8 +128,10 @@ void NativeThread::Exit() {
void NativeThread::Initialize() {
// Set MXCSR and FPUCW registers to the values used by Orbis.
#ifdef ARCH_X86_64
_mm_setcsr(ORBIS_MXCSR);
asm volatile("fldcw %0" : : "m"(ORBIS_FPUCW));
#endif
#if _WIN64
tid = GetCurrentThreadId();
#else

View File

@ -4,8 +4,11 @@
#include <unordered_map>
#include <boost/container/flat_map.hpp>
#ifdef ARCH_X86_64
#include <xbyak/xbyak.h>
#include <xbyak/xbyak_util.h>
#endif
#include "common/arch.h"
#include "common/config.h"
#include "common/io_file.h"
#include "common/logging/log.h"
@ -23,19 +26,27 @@
#include "src/common/arch.h"
#include "src/common/decoder.h"
#ifdef ARCH_X86_64
using namespace Xbyak::util;
static Xbyak::CodeGenerator g_srt_codegen(32_MB);
static const u8* g_srt_codegen_start = nullptr;
#endif
namespace Shader {
#ifdef ARCH_X86_64
PFN_SrtWalker RegisterWalkerCode(const u8* ptr, size_t size) {
const auto func_addr = (PFN_SrtWalker)g_srt_codegen.getCurr();
g_srt_codegen.db(ptr, size);
g_srt_codegen.ready();
return func_addr;
}
#else
PFN_SrtWalker RegisterWalkerCode(const u8* ptr, size_t size) {
return nullptr;
}
#endif
} // namespace Shader
@ -69,12 +80,12 @@ static void DumpSrtProgram(const Shader::Info& info, const u8* code, size_t code
}
static bool SrtWalkerSignalHandler(void* context, void* fault_address) {
// Only handle if the fault address is within the SRT code range
#ifdef ARCH_X86_64
const u8* code_start = g_srt_codegen_start;
const u8* code_end = code_start + g_srt_codegen.getSize();
const void* code = Common::GetRip(context);
if (code < code_start || code >= code_end) {
return false; // Not in SRT code range
return false;
}
// Patch instruction to zero register
@ -117,6 +128,9 @@ static bool SrtWalkerSignalHandler(void* context, void* fault_address) {
LOG_DEBUG(Render_Recompiler, "Patched SRT walker at {}", code);
return true;
#else
return false;
#endif
}
using namespace Shader;
@ -159,6 +173,7 @@ namespace Shader::Optimization {
namespace {
#ifdef ARCH_X86_64
static inline void PushPtr(Xbyak::CodeGenerator& c, u32 off_dw) {
c.push(rdi);
c.mov(rdi, ptr[rdi + (off_dw << 2)]);
@ -169,18 +184,12 @@ static inline void PushPtr(Xbyak::CodeGenerator& c, u32 off_dw) {
static inline void PopPtr(Xbyak::CodeGenerator& c) {
c.pop(rdi);
};
static void VisitPointer(u32 off_dw, IR::Inst* subtree, PassInfo& pass_info,
Xbyak::CodeGenerator& c) {
PushPtr(c, off_dw);
PassInfo::PtrUserList* use_list = pass_info.GetUsesAsPointer(subtree);
ASSERT(use_list);
// First copy all the src data from this tree level
// That way, all data that was contiguous in the guest SRT is also contiguous in the
// flattened buffer.
// TODO src and dst are contiguous. Optimize with wider loads/stores
// TODO if this subtree is dynamically indexed, don't compact it (keep it sparse)
for (auto [src_off_dw, use] : *use_list) {
c.mov(r10d, ptr[rdi + (src_off_dw << 2)]);
c.mov(ptr[rsi + (pass_info.dst_off_dw << 2)], r10d);
@ -189,7 +198,6 @@ static void VisitPointer(u32 off_dw, IR::Inst* subtree, PassInfo& pass_info,
pass_info.dst_off_dw++;
}
// Then visit any children used as pointers
for (const auto [src_off_dw, use] : *use_list) {
if (pass_info.GetUsesAsPointer(use)) {
VisitPointer(src_off_dw, use, pass_info, c);
@ -236,6 +244,10 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
info.srt_info.flattened_bufsize_dw = pass_info.dst_off_dw;
}
#else
static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
}
#endif
}; // namespace
@ -293,7 +305,9 @@ void FlattenExtendedUserdataPass(IR::Program& program) {
}
}
#ifdef ARCH_X86_64
GenerateSrtProgram(info, pass_info);
#endif
// Assign offsets to duplicate readconsts
for (IR::Inst* readconst : all_readconsts) {