This commit is contained in:
AlpinDale 2025-12-16 08:51:29 +08:00 committed by GitHub
commit 3dfa87415a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
49 changed files with 4708 additions and 25 deletions

View File

@ -146,6 +146,57 @@ jobs:
name: shadps4-macos-sdl-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}
path: upload/
macos-sdl-arm64:
runs-on: macos-15
needs: get-info
steps:
- uses: actions/checkout@v5
with:
submodules: recursive
- name: Setup latest Xcode
uses: maxim-lobanov/setup-xcode@v1
with:
xcode-version: latest
- name: Cache CMake Configuration
uses: actions/cache@v4
env:
cache-name: ${{ runner.os }}-sdl-arm64-cache-cmake-configuration
with:
path: |
${{github.workspace}}/build-arm64
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
restore-keys: |
${{ env.cache-name }}-
- name: Cache CMake Build
uses: hendrikmuhs/ccache-action@v1.2.19
env:
cache-name: ${{runner.os}}-sdl-arm64-cache-cmake-build
with:
append-timestamp: false
create-symlink: true
key: ${{env.cache-name}}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
variant: sccache
- name: Configure CMake
run: cmake --fresh -B ${{github.workspace}}/build-arm64 -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
- name: Build
run: cmake --build ${{github.workspace}}/build-arm64 --config ${{env.BUILD_TYPE}} --parallel $(sysctl -n hw.ncpu)
- name: Package and Upload macOS ARM64 SDL artifact
run: |
mkdir upload-arm64
mv ${{github.workspace}}/build-arm64/shadps4 upload-arm64
mv ${{github.workspace}}/build-arm64/MoltenVK_icd.json upload-arm64
mv ${{github.workspace}}/build-arm64/libMoltenVK.dylib upload-arm64
- uses: actions/upload-artifact@v4
with:
name: shadps4-macos-arm64-sdl-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}
path: upload-arm64/
linux-sdl:
runs-on: ubuntu-24.04
needs: get-info
@ -245,7 +296,7 @@ jobs:
pre-release:
if: github.ref == 'refs/heads/main' && github.repository == 'shadps4-emu/shadPS4' && github.event_name == 'push'
needs: [get-info, windows-sdl, macos-sdl, linux-sdl]
needs: [get-info, windows-sdl, macos-sdl, macos-sdl-arm64, linux-sdl]
runs-on: ubuntu-latest
steps:
- name: Download all artifacts

3
.gitmodules vendored
View File

@ -120,3 +120,6 @@
[submodule "externals/miniz"]
path = externals/miniz
url = https://github.com/richgel999/miniz
[submodule "externals/FEX"]
path = externals/FEX
url = https://github.com/FEX-Emu/FEX

View File

@ -17,6 +17,19 @@
"CMAKE_INSTALL_PREFIX": "${sourceDir}/Build/${presetName}",
"CMAKE_OSX_ARCHITECTURES": "x86_64"
}
},
{
"name": "arm64-Clang-Base",
"hidden": true,
"generator": "Ninja",
"binaryDir": "${sourceDir}/Build/${presetName}",
"cacheVariables": {
"CMAKE_C_COMPILER": "/usr/bin/clang",
"CMAKE_CXX_COMPILER": "/usr/bin/clang++",
"CMAKE_INSTALL_PREFIX": "${sourceDir}/Build/${presetName}",
"CMAKE_OSX_ARCHITECTURES": "arm64",
"CMAKE_PREFIX_PATH": "/opt/homebrew"
}
}
]
}

View File

@ -19,6 +19,8 @@ endif()
project(shadPS4 CXX C ASM ${ADDITIONAL_LANGUAGES})
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
# Forcing PIE makes sure that the base address is high enough so that it doesn't clash with the PS4 memory.
if(UNIX AND NOT APPLE)
set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
@ -237,7 +239,6 @@ find_package(toml11 4.2.0 CONFIG)
find_package(tsl-robin-map 1.3.0 CONFIG)
find_package(VulkanHeaders 1.4.329 CONFIG)
find_package(VulkanMemoryAllocator 3.1.0 CONFIG)
find_package(xbyak 7.07 CONFIG)
find_package(xxHash 0.8.2 MODULE)
find_package(ZLIB 1.3 MODULE)
find_package(Zydis 5.0.0 CONFIG)
@ -550,13 +551,15 @@ set(USBD_LIB src/core/libraries/usbd/usbd.cpp
src/core/libraries/usbd/emulated/skylander.h
)
set(FIBER_LIB src/core/libraries/fiber/fiber_context.s
src/core/libraries/fiber/fiber.cpp
set(FIBER_LIB src/core/libraries/fiber/fiber.cpp
src/core/libraries/fiber/fiber.h
src/core/libraries/fiber/fiber_error.h
)
set_source_files_properties(src/core/libraries/fiber/fiber_context.s PROPERTIES COMPILE_OPTIONS -Wno-unused-command-line-argument)
if(ARCHITECTURE STREQUAL "x86_64")
list(APPEND FIBER_LIB src/core/libraries/fiber/fiber_context.s)
set_source_files_properties(src/core/libraries/fiber/fiber_context.s PROPERTIES COMPILE_OPTIONS -Wno-unused-command-line-argument)
endif()
set(VDEC_LIB src/core/libraries/videodec/videodec2_impl.cpp
src/core/libraries/videodec/videodec2_impl.h
@ -844,6 +847,27 @@ if (ARCHITECTURE STREQUAL "x86_64")
src/core/cpu_patches.h)
endif()
if (ARCHITECTURE STREQUAL "arm64")
set(CORE ${CORE}
src/core/jit/arm64_codegen.cpp
src/core/jit/arm64_codegen.h
src/core/jit/register_mapping.cpp
src/core/jit/register_mapping.h
src/core/jit/x86_64_translator.cpp
src/core/jit/x86_64_translator.h
src/core/jit/block_manager.cpp
src/core/jit/block_manager.h
src/core/jit/execution_engine.cpp
src/core/jit/execution_engine.h
src/core/jit/calling_convention.cpp
src/core/jit/calling_convention.h
src/core/jit/simd_translator.cpp
src/core/jit/simd_translator.h
src/core/jit/hle_bridge.cpp
src/core/jit/hle_bridge.h
)
endif()
set(SHADER_RECOMPILER src/shader_recompiler/profile.h
src/shader_recompiler/recompiler.cpp
src/shader_recompiler/recompiler.h
@ -1083,7 +1107,10 @@ add_executable(shadps4
create_target_directory_groups(shadps4)
target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui gcn half::half ZLIB::ZLIB PNG::PNG)
target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui gcn half::half ZLIB::ZLIB PNG::PNG)
if(ARCHITECTURE STREQUAL "x86_64")
target_link_libraries(shadps4 PRIVATE xbyak::xbyak)
endif()
target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 SDL3_mixer::SDL3_mixer pugixml::pugixml)
target_link_libraries(shadps4 PRIVATE stb::headers libusb::usb lfreist-hwinfo::hwinfo nlohmann_json::nlohmann_json miniz)
@ -1218,3 +1245,22 @@ endif()
# Install rules
install(TARGETS shadps4 BUNDLE DESTINATION .)
# Testing
option(BUILD_TESTS "Build test suite" OFF)
if(BUILD_TESTS)
enable_testing()
include(FetchContent)
FetchContent_Declare(
googletest
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG v1.17.0
)
# For Windows: Prevent overriding the parent project's compiler/linker settings
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(googletest)
add_subdirectory(tests)
endif()

View File

@ -30,6 +30,30 @@
"cacheVariables": {
"CMAKE_BUILD_TYPE": "RelWithDebInfo"
}
},
{
"name": "arm64-Clang-Debug",
"displayName": "Clang ARM64 Debug",
"inherits": ["arm64-Clang-Base"],
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Debug"
}
},
{
"name": "arm64-Clang-Release",
"displayName": "Clang ARM64 Release",
"inherits": ["arm64-Clang-Base"],
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Release"
}
},
{
"name": "arm64-Clang-RelWithDebInfo",
"displayName": "Clang ARM64 RelWithDebInfo",
"inherits": ["arm64-Clang-Base"],
"cacheVariables": {
"CMAKE_BUILD_TYPE": "RelWithDebInfo"
}
}
]
}

View File

@ -112,9 +112,11 @@ if (NOT TARGET tsl::robin_map)
add_subdirectory(robin-map)
endif()
# Xbyak
if (NOT TARGET xbyak::xbyak)
add_subdirectory(xbyak)
# Xbyak (x86_64 only)
if (ARCHITECTURE STREQUAL "x86_64")
if (NOT TARGET xbyak::xbyak)
add_subdirectory(xbyak)
endif()
endif()
# MagicEnum

1
externals/FEX vendored Submodule

@ -0,0 +1 @@
Subproject commit e8591090f246c49631c14ef70f32c7df14b5646e

View File

@ -19,14 +19,22 @@ void* GetXmmPointer(void* ctx, u8 index) {
case index: \
return (void*)(&((EXCEPTION_POINTERS*)ctx)->ContextRecord->Xmm##index.Low)
#elif defined(__APPLE__)
#if defined(ARCH_X86_64)
#define CASE(index) \
case index: \
return (void*)(&((ucontext_t*)ctx)->uc_mcontext->__fs.__fpu_xmm##index);
#elif defined(ARCH_ARM64)
UNREACHABLE_MSG("XMM registers not available on ARM64");
return nullptr;
#else
#error "Unsupported architecture"
#endif
#else
#define CASE(index) \
case index: \
return (void*)(&((ucontext_t*)ctx)->uc_mcontext.fpregs->_xmm[index].element[0])
#endif
#if !defined(ARCH_ARM64) || !defined(__APPLE__)
switch (index) {
CASE(0);
CASE(1);
@ -50,13 +58,20 @@ void* GetXmmPointer(void* ctx, u8 index) {
}
}
#undef CASE
#endif
}
void* GetRip(void* ctx) {
#if defined(_WIN32)
return (void*)((EXCEPTION_POINTERS*)ctx)->ContextRecord->Rip;
#elif defined(__APPLE__)
#if defined(ARCH_X86_64)
return (void*)((ucontext_t*)ctx)->uc_mcontext->__ss.__rip;
#elif defined(ARCH_ARM64)
return (void*)((ucontext_t*)ctx)->uc_mcontext->__ss.__pc;
#else
#error "Unsupported architecture"
#endif
#else
return (void*)((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RIP];
#endif
@ -66,7 +81,13 @@ void IncrementRip(void* ctx, u64 length) {
#if defined(_WIN32)
((EXCEPTION_POINTERS*)ctx)->ContextRecord->Rip += length;
#elif defined(__APPLE__)
#if defined(ARCH_X86_64)
((ucontext_t*)ctx)->uc_mcontext->__ss.__rip += length;
#elif defined(ARCH_ARM64)
((ucontext_t*)ctx)->uc_mcontext->__ss.__pc += length;
#else
#error "Unsupported architecture"
#endif
#else
((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RIP] += length;
#endif

View File

@ -2,9 +2,16 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <xmmintrin.h>
#include "common/arch.h"
#include "common/types.h"
#ifdef ARCH_X86_64
#include <xmmintrin.h>
#elif defined(ARCH_ARM64)
#include <cstdarg>
#endif
#ifdef ARCH_X86_64
#define VA_ARGS \
uint64_t rdi, uint64_t rsi, uint64_t rdx, uint64_t rcx, uint64_t r8, uint64_t r9, \
uint64_t overflow_arg_area, __m128 xmm0, __m128 xmm1, __m128 xmm2, __m128 xmm3, \
@ -30,6 +37,17 @@
(ctx).va_list.gp_offset = offsetof(::Common::VaRegSave, gp); \
(ctx).va_list.fp_offset = offsetof(::Common::VaRegSave, fp); \
(ctx).va_list.overflow_arg_area = &overflow_arg_area;
#elif defined(ARCH_ARM64)
#define VA_ARGS ...
#define VA_CTX(ctx) \
alignas(16)::Common::VaCtx ctx{}; \
(ctx).va_list.reg_save_area = nullptr; \
(ctx).va_list.gp_offset = 0; \
(ctx).va_list.fp_offset = 0; \
(ctx).va_list.overflow_arg_area = nullptr;
#else
#error "Unsupported architecture"
#endif
namespace Common {
@ -44,7 +62,9 @@ struct VaList {
struct VaRegSave {
u64 gp[6];
#ifdef ARCH_X86_64
__m128 fp[8];
#endif
};
struct VaCtx {

View File

@ -20,12 +20,16 @@
#include <sys/mman.h>
#endif
#if defined(__APPLE__) && defined(ARCH_X86_64)
#if defined(__APPLE__) && (defined(ARCH_X86_64) || defined(ARCH_ARM64))
// Reserve space for the system address space using a zerofill section.
// Note: These assembly directives are x86_64-specific, but the memory layout constants
// below apply to both x86_64 and ARM64 on macOS.
#if defined(ARCH_X86_64)
asm(".zerofill SYSTEM_MANAGED,SYSTEM_MANAGED,__SYSTEM_MANAGED,0x7FFBFC000");
asm(".zerofill SYSTEM_RESERVED,SYSTEM_RESERVED,__SYSTEM_RESERVED,0x7C0004000");
asm(".zerofill USER_AREA,USER_AREA,__USER_AREA,0x5F9000000000");
#endif
#endif
namespace Core {
@ -33,7 +37,7 @@ namespace Core {
constexpr VAddr SYSTEM_MANAGED_MIN = 0x400000ULL;
constexpr VAddr SYSTEM_MANAGED_MAX = 0x7FFFFBFFFULL;
constexpr VAddr SYSTEM_RESERVED_MIN = 0x7FFFFC000ULL;
#if defined(__APPLE__) && defined(ARCH_X86_64)
#if defined(__APPLE__) && (defined(ARCH_X86_64) || defined(ARCH_ARM64))
// Commpage ranges from 0xFC0000000 - 0xFFFFFFFFF, so decrease the system reserved maximum.
constexpr VAddr SYSTEM_RESERVED_MAX = 0xFBFFFFFFFULL;
// GPU-reserved memory ranges from 0x1000000000 - 0x6FFFFFFFFF, so increase the user minimum.
@ -512,11 +516,13 @@ struct AddressSpace::Impl {
user_size = UserSize;
constexpr int protection_flags = PROT_READ | PROT_WRITE;
#if defined(__APPLE__) && (defined(ARCH_X86_64) || defined(ARCH_ARM64))
// On macOS (both x86_64 and ARM64), we run into limitations due to the commpage from
// 0xFC0000000 - 0xFFFFFFFFF and the GPU carveout region from 0x1000000000 - 0x6FFFFFFFFF.
// Because this creates gaps in the available virtual memory region, we map memory space
// using three distinct parts.
#if defined(ARCH_X86_64)
constexpr int map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_FIXED;
#if defined(__APPLE__) && defined(ARCH_X86_64)
// On ARM64 Macs, we run into limitations due to the commpage from 0xFC0000000 - 0xFFFFFFFFF
// and the GPU carveout region from 0x1000000000 - 0x6FFFFFFFFF. Because this creates gaps
// in the available virtual memory region, we map memory space using three distinct parts.
system_managed_base =
reinterpret_cast<u8*>(mmap(reinterpret_cast<void*>(SYSTEM_MANAGED_MIN),
system_managed_size, protection_flags, map_flags, -1, 0));
@ -525,9 +531,50 @@ struct AddressSpace::Impl {
system_reserved_size, protection_flags, map_flags, -1, 0));
user_base = reinterpret_cast<u8*>(
mmap(reinterpret_cast<void*>(USER_MIN), user_size, protection_flags, map_flags, -1, 0));
#elif defined(ARCH_ARM64)
// On ARM64 macOS, MAP_FIXED doesn't work at low addresses (0x400000) due to system
// restrictions. Map memory wherever possible and use offset calculations. This is a
// temporary solution until proper address translation is implemented for ARM64. Note: This
// means the PS4 virtual addresses won't match host addresses, so instruction
// translation/JIT will need to handle the offset.
constexpr int map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
// Map the three regions separately, but let the system choose addresses
system_managed_base = reinterpret_cast<u8*>(
mmap(nullptr, system_managed_size, protection_flags, map_flags, -1, 0));
if (system_managed_base == MAP_FAILED) {
LOG_CRITICAL(Kernel_Vmm, "mmap failed for system_managed_base: {}", strerror(errno));
throw std::bad_alloc{};
}
system_reserved_base = reinterpret_cast<u8*>(
mmap(nullptr, system_reserved_size, protection_flags, map_flags, -1, 0));
if (system_reserved_base == MAP_FAILED) {
LOG_CRITICAL(Kernel_Vmm, "mmap failed for system_reserved_base: {}", strerror(errno));
throw std::bad_alloc{};
}
user_base =
reinterpret_cast<u8*>(mmap(nullptr, user_size, protection_flags, map_flags, -1, 0));
if (user_base == MAP_FAILED) {
LOG_CRITICAL(Kernel_Vmm, "mmap failed for user_base: {}", strerror(errno));
throw std::bad_alloc{};
}
LOG_WARNING(
Kernel_Vmm,
"ARM64 macOS: Using flexible memory layout. "
"PS4 addresses will be offset from host addresses. "
"system_managed: {} (expected {}), system_reserved: {} (expected {}), user: {} "
"(expected {})",
fmt::ptr(system_managed_base), fmt::ptr(reinterpret_cast<void*>(SYSTEM_MANAGED_MIN)),
fmt::ptr(system_reserved_base), fmt::ptr(reinterpret_cast<void*>(SYSTEM_RESERVED_MIN)),
fmt::ptr(user_base), fmt::ptr(reinterpret_cast<void*>(USER_MIN)));
#endif
#else
const auto virtual_size = system_managed_size + system_reserved_size + user_size;
#if defined(ARCH_X86_64)
constexpr int map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_FIXED;
const auto virtual_base =
reinterpret_cast<u8*>(mmap(reinterpret_cast<void*>(SYSTEM_MANAGED_MIN), virtual_size,
protection_flags, map_flags, -1, 0));
@ -535,6 +582,7 @@ struct AddressSpace::Impl {
system_reserved_base = reinterpret_cast<u8*>(SYSTEM_RESERVED_MIN);
user_base = reinterpret_cast<u8*>(USER_MIN);
#else
constexpr int map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
// Map memory wherever possible and instruction translation can handle offsetting to the
// base.
const auto virtual_base =
@ -560,7 +608,7 @@ struct AddressSpace::Impl {
fmt::ptr(user_base + user_size - 1));
const VAddr system_managed_addr = reinterpret_cast<VAddr>(system_managed_base);
const VAddr system_reserved_addr = reinterpret_cast<VAddr>(system_managed_base);
const VAddr system_reserved_addr = reinterpret_cast<VAddr>(system_reserved_base);
const VAddr user_addr = reinterpret_cast<VAddr>(user_base);
m_free_regions.insert({system_managed_addr, system_managed_addr + system_managed_size});
m_free_regions.insert({system_reserved_addr, system_reserved_addr + system_reserved_size});
@ -607,8 +655,32 @@ struct AddressSpace::Impl {
const int handle = phys_addr != -1 ? (fd == -1 ? backing_fd : fd) : -1;
const off_t host_offset = phys_addr != -1 ? phys_addr : 0;
const int flag = phys_addr != -1 ? MAP_SHARED : (MAP_ANONYMOUS | MAP_PRIVATE);
#if defined(__APPLE__) && defined(ARCH_ARM64)
// On ARM64 macOS, translate PS4 virtual addresses to host addresses
void* host_addr = nullptr;
if (virtual_addr >= SYSTEM_MANAGED_MIN && virtual_addr <= SYSTEM_MANAGED_MAX) {
// System managed region
u64 offset = virtual_addr - SYSTEM_MANAGED_MIN;
host_addr = system_managed_base + offset;
} else if (virtual_addr >= SYSTEM_RESERVED_MIN && virtual_addr <= SYSTEM_RESERVED_MAX) {
// System reserved region
u64 offset = virtual_addr - SYSTEM_RESERVED_MIN;
host_addr = system_reserved_base + offset;
} else if (virtual_addr >= USER_MIN && virtual_addr <= USER_MAX) {
// User region
u64 offset = virtual_addr - USER_MIN;
host_addr = user_base + offset;
} else {
LOG_CRITICAL(Kernel_Vmm, "Invalid virtual address for mapping: {:#x}", virtual_addr);
return MAP_FAILED;
}
void* ret = mmap(host_addr, size, prot, MAP_FIXED | flag, handle, host_offset);
#else
void* ret = mmap(reinterpret_cast<void*>(virtual_addr), size, prot, MAP_FIXED | flag,
handle, host_offset);
#endif
ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno));
return ret;
}
@ -628,9 +700,29 @@ struct AddressSpace::Impl {
// Free the relevant region.
m_free_regions.insert({start_address, end_address});
#if defined(__APPLE__) && defined(ARCH_ARM64)
// On ARM64 macOS, translate PS4 virtual addresses to host addresses
void* host_addr = nullptr;
if (start_address >= SYSTEM_MANAGED_MIN && start_address <= SYSTEM_MANAGED_MAX) {
u64 offset = start_address - SYSTEM_MANAGED_MIN;
host_addr = system_managed_base + offset;
} else if (start_address >= SYSTEM_RESERVED_MIN && start_address <= SYSTEM_RESERVED_MAX) {
u64 offset = start_address - SYSTEM_RESERVED_MIN;
host_addr = system_reserved_base + offset;
} else if (start_address >= USER_MIN && start_address <= USER_MAX) {
u64 offset = start_address - USER_MIN;
host_addr = user_base + offset;
} else {
LOG_CRITICAL(Kernel_Vmm, "Invalid virtual address for unmapping: {:#x}", start_address);
return;
}
void* ret = mmap(host_addr, end_address - start_address, PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
#else
// Return the adjusted pointers.
void* ret = mmap(reinterpret_cast<void*>(start_address), end_address - start_address,
PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
#endif
ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno));
}
@ -642,12 +734,31 @@ struct AddressSpace::Impl {
if (write) {
flags |= PROT_WRITE;
}
#ifdef ARCH_X86_64
#if defined(ARCH_X86_64)
if (execute) {
flags |= PROT_EXEC;
}
#endif
#if defined(__APPLE__) && defined(ARCH_ARM64)
// On ARM64 macOS, translate PS4 virtual addresses to host addresses
void* host_addr = nullptr;
if (virtual_addr >= SYSTEM_MANAGED_MIN && virtual_addr <= SYSTEM_MANAGED_MAX) {
u64 offset = virtual_addr - SYSTEM_MANAGED_MIN;
host_addr = system_managed_base + offset;
} else if (virtual_addr >= SYSTEM_RESERVED_MIN && virtual_addr <= SYSTEM_RESERVED_MAX) {
u64 offset = virtual_addr - SYSTEM_RESERVED_MIN;
host_addr = system_reserved_base + offset;
} else if (virtual_addr >= USER_MIN && virtual_addr <= USER_MAX) {
u64 offset = virtual_addr - USER_MIN;
host_addr = user_base + offset;
} else {
LOG_CRITICAL(Kernel_Vmm, "Invalid virtual address for protection: {:#x}", virtual_addr);
return;
}
int ret = mprotect(host_addr, size, flags);
#else
int ret = mprotect(reinterpret_cast<void*>(virtual_addr), size, flags);
#endif
ASSERT_MSG(ret == 0, "mprotect failed: {}", strerror(errno));
}
@ -677,7 +788,7 @@ AddressSpace::~AddressSpace() = default;
void* AddressSpace::Map(VAddr virtual_addr, size_t size, u64 alignment, PAddr phys_addr,
bool is_exec) {
#if ARCH_X86_64
#if defined(ARCH_X86_64)
const auto prot = is_exec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE;
#else
// On non-native architectures, we can simplify things by ignoring the execute flag for the
@ -747,4 +858,27 @@ boost::icl::interval_set<VAddr> AddressSpace::GetUsableRegions() {
#endif
}
void* AddressSpace::TranslateAddress(VAddr ps4_addr) const {
#if defined(ARCH_X86_64)
// On x86_64, PS4 addresses are directly mapped, so we can cast them
return reinterpret_cast<void*>(ps4_addr);
#elif defined(ARCH_ARM64) && defined(__APPLE__)
// On ARM64 macOS, translate PS4 virtual addresses to host addresses
if (ps4_addr >= SYSTEM_MANAGED_MIN && ps4_addr <= SYSTEM_MANAGED_MAX) {
u64 offset = ps4_addr - SYSTEM_MANAGED_MIN;
return system_managed_base + offset;
} else if (ps4_addr >= SYSTEM_RESERVED_MIN && ps4_addr <= SYSTEM_RESERVED_MAX) {
u64 offset = ps4_addr - SYSTEM_RESERVED_MIN;
return system_reserved_base + offset;
} else if (ps4_addr >= USER_MIN && ps4_addr <= USER_MAX) {
u64 offset = ps4_addr - USER_MIN;
return user_base + offset;
}
return nullptr;
#else
// Generic ARM64 or other platforms
return reinterpret_cast<void*>(ps4_addr);
#endif
}
} // namespace Core

View File

@ -88,6 +88,9 @@ public:
// Returns an interval set containing all usable regions.
boost::icl::interval_set<VAddr> GetUsableRegions();
// Translate PS4 virtual address to host address (for ARM64)
void* TranslateAddress(VAddr ps4_addr) const;
private:
struct Impl;
std::unique_ptr<Impl> impl;

View File

@ -0,0 +1,567 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstring>
#include <sys/mman.h>
#include "arm64_codegen.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/types.h"
#if defined(__APPLE__) && defined(ARCH_ARM64)
#include <pthread.h>
#endif
namespace Core::Jit {
static constexpr size_t PAGE_SIZE = 4096;
static constexpr size_t ALIGNMENT = 16;
static size_t alignUp(size_t value, size_t alignment) {
return (value + alignment - 1) & ~(alignment - 1);
}
static void* allocateExecutableMemory(size_t size) {
size = alignUp(size, PAGE_SIZE);
#if defined(__APPLE__) && defined(ARCH_ARM64)
// On macOS ARM64:
// 1. Allocate with PROT_READ | PROT_WRITE (no PROT_EXEC initially)
// 2. Use pthread_jit_write_protect_np to allow writing
// 3. After writing, use mprotect to add PROT_EXEC
void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (ptr == MAP_FAILED) {
LOG_CRITICAL(Core, "Failed to allocate executable memory: {} (errno={})", strerror(errno),
errno);
return nullptr;
}
// Initially disable write protection so we can write code
pthread_jit_write_protect_np(0);
return ptr;
#else
void* ptr =
mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (ptr == MAP_FAILED) {
LOG_CRITICAL(Core, "Failed to allocate executable memory: {}", strerror(errno));
return nullptr;
}
return ptr;
#endif
}
Arm64CodeGenerator::Arm64CodeGenerator(size_t buffer_size, void* code_ptr)
: buffer_size(alignUp(buffer_size, PAGE_SIZE)), owns_buffer(code_ptr == nullptr) {
if (code_ptr) {
code_buffer = code_ptr;
this->code_ptr = code_ptr;
} else {
code_buffer = allocateExecutableMemory(buffer_size);
this->code_ptr = code_buffer;
}
if (!code_buffer) {
throw std::bad_alloc();
}
}
Arm64CodeGenerator::~Arm64CodeGenerator() {
if (owns_buffer && code_buffer) {
munmap(code_buffer, buffer_size);
}
}
void Arm64CodeGenerator::reset() {
code_ptr = code_buffer;
fixups.clear();
}
void Arm64CodeGenerator::setSize(size_t offset) {
code_ptr = static_cast<u8*>(code_buffer) + offset;
}
void Arm64CodeGenerator::emit32(u32 instruction) {
#if defined(__APPLE__) && defined(ARCH_ARM64)
// On macOS ARM64, disable write protection before writing
pthread_jit_write_protect_np(0);
#endif
u8* curr = static_cast<u8*>(code_ptr);
u8* end = static_cast<u8*>(code_buffer) + buffer_size;
ASSERT_MSG(curr + 4 <= end, "Code buffer overflow");
*reinterpret_cast<u32*>(curr) = instruction;
code_ptr = curr + 4;
#if defined(__APPLE__) && defined(ARCH_ARM64)
// Re-enable write protection after writing
pthread_jit_write_protect_np(1);
#endif
}
void Arm64CodeGenerator::emit64(u64 instruction) {
emit32(static_cast<u32>(instruction));
emit32(static_cast<u32>(instruction >> 32));
}
void* Arm64CodeGenerator::allocateCode(size_t size) {
size = alignUp(size, ALIGNMENT);
void* result = code_ptr;
u8* curr = static_cast<u8*>(code_ptr);
u8* end = static_cast<u8*>(code_buffer) + buffer_size;
code_ptr = curr + size;
ASSERT_MSG(static_cast<u8*>(code_ptr) <= end, "Code buffer overflow");
return result;
}
void Arm64CodeGenerator::makeExecutable() {
size_t size = getSize();
size = alignUp(size, PAGE_SIZE);
#if defined(__APPLE__) && defined(ARCH_ARM64)
// On macOS ARM64, re-enable write protection before making executable
pthread_jit_write_protect_np(1);
// Flush instruction cache
__builtin___clear_cache(static_cast<char*>(code_buffer),
static_cast<char*>(code_buffer) + size);
#endif
if (mprotect(code_buffer, size, PROT_READ | PROT_EXEC) != 0) {
LOG_CRITICAL(Core, "Failed to make code executable: {}", strerror(errno));
}
}
// Memory operations
void Arm64CodeGenerator::ldr(int reg, void* addr) {
movz(9, reinterpret_cast<u64>(addr) & 0xFFFF);
movk(9, (reinterpret_cast<u64>(addr) >> 16) & 0xFFFF, 16);
movk(9, (reinterpret_cast<u64>(addr) >> 32) & 0xFFFF, 32);
movk(9, (reinterpret_cast<u64>(addr) >> 48) & 0xFFFF, 48);
ldr(reg, 9, 0);
}
void Arm64CodeGenerator::ldr(int reg, int base_reg, s32 offset) {
if (offset >= 0 && offset < 32768 && (offset % 8 == 0)) {
emit32(0xF9400000 | (reg << 0) | (base_reg << 5) | ((offset / 8) << 10));
} else {
mov_imm(9, offset);
add(9, base_reg, 9);
ldr(reg, 9, 0);
}
}
void Arm64CodeGenerator::ldrh(int reg, int base_reg, s32 offset) {
if (offset >= 0 && offset < 8192 && (offset % 2 == 0)) {
emit32(0x79400000 | (reg << 0) | (base_reg << 5) | ((offset / 2) << 12));
} else {
mov_imm(9, offset);
add(9, base_reg, 9);
ldrh(reg, 9, 0);
}
}
void Arm64CodeGenerator::ldrb(int reg, int base_reg, s32 offset) {
if (offset >= 0 && offset < 4096) {
emit32(0x39400000 | (reg << 0) | (base_reg << 5) | (offset << 12));
} else {
mov_imm(9, offset);
add(9, base_reg, 9);
ldrb(reg, 9, 0);
}
}
void Arm64CodeGenerator::ldp(int reg1, int reg2, int base_reg, s32 offset) {
if (offset >= -256 && offset < 256 && (offset % 8 == 0)) {
s32 scaled_offset = offset / 8;
u32 imm7 = (scaled_offset >= 0) ? scaled_offset : (64 + scaled_offset);
emit32(0xA9400000 | (reg1 << 0) | (reg2 << 10) | (base_reg << 5) | (imm7 << 15));
} else {
mov_imm(9, offset);
add(9, base_reg, 9);
ldp(reg1, reg2, 9, 0);
}
}
void Arm64CodeGenerator::str(int reg, void* addr) {
movz(9, reinterpret_cast<u64>(addr) & 0xFFFF);
movk(9, (reinterpret_cast<u64>(addr) >> 16) & 0xFFFF, 16);
movk(9, (reinterpret_cast<u64>(addr) >> 32) & 0xFFFF, 32);
movk(9, (reinterpret_cast<u64>(addr) >> 48) & 0xFFFF, 48);
str(reg, 9, 0);
}
void Arm64CodeGenerator::str(int reg, int base_reg, s32 offset) {
if (offset >= 0 && offset < 32768 && (offset % 8 == 0)) {
emit32(0xF9000000 | (reg << 0) | (base_reg << 5) | ((offset / 8) << 10));
} else {
mov_imm(9, offset);
add(9, base_reg, 9);
str(reg, 9, 0);
}
}
void Arm64CodeGenerator::strh(int reg, int base_reg, s32 offset) {
if (offset >= 0 && offset < 8192 && (offset % 2 == 0)) {
emit32(0x79000000 | (reg << 0) | (base_reg << 5) | ((offset / 2) << 12));
} else {
mov_imm(9, offset);
add(9, base_reg, 9);
strh(reg, 9, 0);
}
}
void Arm64CodeGenerator::strb(int reg, int base_reg, s32 offset) {
if (offset >= 0 && offset < 4096) {
emit32(0x39000000 | (reg << 0) | (base_reg << 5) | (offset << 12));
} else {
mov_imm(9, offset);
add(9, base_reg, 9);
strb(reg, 9, 0);
}
}
void Arm64CodeGenerator::stp(int reg1, int reg2, int base_reg, s32 offset) {
if (offset >= -256 && offset < 256 && (offset % 8 == 0)) {
s32 scaled_offset = offset / 8;
u32 imm7 = (scaled_offset >= 0) ? scaled_offset : (64 + scaled_offset);
emit32(0xA9000000 | (reg1 << 0) | (reg2 << 10) | (base_reg << 5) | (imm7 << 15));
} else {
mov_imm(9, offset);
add(9, base_reg, 9);
stp(reg1, reg2, 9, 0);
}
}
// Arithmetic operations
void Arm64CodeGenerator::add(int dst, int src1, int src2) {
emit32(0x8B000000 | (dst << 0) | (src1 << 5) | (src2 << 16));
}
void Arm64CodeGenerator::add(int dst, int src1, int src2, int shift) {
ASSERT_MSG(shift >= 0 && shift <= 3, "Invalid shift amount");
emit32(0x8B000000 | (dst << 0) | (src1 << 5) | (src2 << 16) | (shift << 12));
}
void Arm64CodeGenerator::add_imm(int dst, int src1, s32 imm) {
if (imm >= 0 && imm < 4096) {
emit32(0x91000000 | (dst << 0) | (src1 << 5) | (imm << 10));
} else if (imm < 0 && imm > -4096) {
sub_imm(dst, src1, -imm);
} else {
mov_imm(9, imm);
add(dst, src1, 9);
}
}
void Arm64CodeGenerator::sub(int dst, int src1, int src2) {
emit32(0xCB000000 | (dst << 0) | (src1 << 5) | (src2 << 16));
}
void Arm64CodeGenerator::sub_imm(int dst, int src1, s32 imm) {
if (imm >= 0 && imm < 4096) {
emit32(0xD1000000 | (dst << 0) | (src1 << 5) | (imm << 10));
} else if (imm < 0 && imm > -4096) {
add_imm(dst, src1, -imm);
} else {
mov_imm(9, imm);
sub(dst, src1, 9);
}
}
void Arm64CodeGenerator::mul(int dst, int src1, int src2) {
emit32(0x9B007C00 | (dst << 0) | (src1 << 5) | (src2 << 16));
}
void Arm64CodeGenerator::sdiv(int dst, int src1, int src2) {
emit32(0x9AC00C00 | (dst << 0) | (src1 << 5) | (src2 << 16));
}
void Arm64CodeGenerator::udiv(int dst, int src1, int src2) {
emit32(0x9AC00800 | (dst << 0) | (src1 << 5) | (src2 << 16));
}
void Arm64CodeGenerator::and_(int dst, int src1, int src2) {
emit32(0x8A000000 | (dst << 0) | (src1 << 5) | (src2 << 16));
}
void Arm64CodeGenerator::and_(int dst, int src1, u64 imm) {
if (imm <= 0xFFF) {
emit32(0x92000000 | (dst << 0) | (src1 << 5) | (static_cast<u32>(imm) << 10));
} else {
mov_imm(9, imm);
and_(dst, src1, 9);
}
}
void Arm64CodeGenerator::orr(int dst, int src1, int src2) {
emit32(0xAA000000 | (dst << 0) | (src1 << 5) | (src2 << 16));
}
void Arm64CodeGenerator::orr(int dst, int src1, u64 imm) {
if (imm <= 0xFFF) {
emit32(0xB2000000 | (dst << 0) | (src1 << 5) | (static_cast<u32>(imm) << 10));
} else {
mov_imm(9, imm);
orr(dst, src1, 9);
}
}
void Arm64CodeGenerator::eor(int dst, int src1, int src2) {
emit32(0xCA000000 | (dst << 0) | (src1 << 5) | (src2 << 16));
}
void Arm64CodeGenerator::eor(int dst, int src1, u64 imm) {
if (imm <= 0xFFF) {
emit32(0xD2000000 | (dst << 0) | (src1 << 5) | (static_cast<u32>(imm) << 10));
} else {
mov_imm(9, imm);
eor(dst, src1, 9);
}
}
void Arm64CodeGenerator::mvn(int dst, int src) {
emit32(0xAA200000 | (dst << 0) | (src << 16));
}
void Arm64CodeGenerator::lsl(int dst, int src1, int src2) {
emit32(0x9AC02000 | (dst << 0) | (src1 << 5) | (src2 << 16));
}
void Arm64CodeGenerator::lsl(int dst, int src1, u8 shift) {
ASSERT_MSG(shift < 64, "Shift amount must be < 64");
emit32(0xD3400000 | (dst << 0) | (src1 << 5) | (shift << 10));
}
void Arm64CodeGenerator::lsr(int dst, int src1, int src2) {
emit32(0x9AC02400 | (dst << 0) | (src1 << 5) | (src2 << 16));
}
void Arm64CodeGenerator::lsr(int dst, int src1, u8 shift) {
ASSERT_MSG(shift < 64, "Shift amount must be < 64");
emit32(0xD3500000 | (dst << 0) | (src1 << 5) | (shift << 10));
}
void Arm64CodeGenerator::asr(int dst, int src1, int src2) {
emit32(0x9AC02800 | (dst << 0) | (src1 << 5) | (src2 << 16));
}
void Arm64CodeGenerator::asr(int dst, int src1, u8 shift) {
ASSERT_MSG(shift < 64, "Shift amount must be < 64");
emit32(0xD3600000 | (dst << 0) | (src1 << 5) | (shift << 10));
}
// Move operations
void Arm64CodeGenerator::mov(int dst, int src) {
if (dst != src) {
emit32(0xAA0003E0 | (dst << 0) | (src << 16));
}
}
void Arm64CodeGenerator::mov_imm(int dst, s64 imm) {
if (imm >= 0 && imm <= 0xFFFF) {
movz(dst, static_cast<u16>(imm));
} else if (imm >= -0x10000 && imm < 0) {
movn(dst, static_cast<u16>(-imm - 1));
} else {
movz(dst, imm & 0xFFFF);
if ((imm >> 16) & 0xFFFF) {
movk(dst, (imm >> 16) & 0xFFFF, 16);
}
if ((imm >> 32) & 0xFFFF) {
movk(dst, (imm >> 32) & 0xFFFF, 32);
}
if ((imm >> 48) & 0xFFFF) {
movk(dst, (imm >> 48) & 0xFFFF, 48);
}
}
}
void Arm64CodeGenerator::movz(int dst, u16 imm, u8 shift) {
ASSERT_MSG(shift % 16 == 0 && shift < 64, "Shift must be multiple of 16 and < 64");
emit32(0xD2800000 | (dst << 0) | (imm << 5) | ((shift / 16) << 21));
}
void Arm64CodeGenerator::movk(int dst, u16 imm, u8 shift) {
ASSERT_MSG(shift % 16 == 0 && shift < 64, "Shift must be multiple of 16 and < 64");
emit32(0xF2800000 | (dst << 0) | (imm << 5) | ((shift / 16) << 21));
}
void Arm64CodeGenerator::movn(int dst, u16 imm, u8 shift) {
ASSERT_MSG(shift % 16 == 0 && shift < 64, "Shift must be multiple of 16 and < 64");
emit32(0x92800000 | (dst << 0) | (imm << 5) | ((shift / 16) << 21));
}
// Compare operations
void Arm64CodeGenerator::cmp(int reg1, int reg2) {
emit32(0xEB000000 | (31 << 0) | (reg1 << 5) | (reg2 << 16));
}
void Arm64CodeGenerator::cmp_imm(int reg, s32 imm) {
if (imm >= 0 && imm < 4096) {
emit32(0xF1000000 | (31 << 0) | (reg << 5) | (imm << 10));
} else {
mov_imm(9, imm);
cmp(reg, 9);
}
}
void Arm64CodeGenerator::tst(int reg1, int reg2) {
emit32(0xEA000000 | (31 << 0) | (reg1 << 5) | (reg2 << 16));
}
void Arm64CodeGenerator::tst(int reg, u64 imm) {
if (imm <= 0xFFF) {
emit32(0xF2000000 | (31 << 0) | (reg << 5) | (static_cast<u32>(imm) << 10));
} else {
mov(9, imm);
tst(reg, 9);
}
}
// Branch operations
void Arm64CodeGenerator::b(void* target) {
s64 offset = reinterpret_cast<s64>(target) - reinterpret_cast<s64>(code_ptr);
if (offset >= -0x8000000 && offset < 0x8000000) {
s32 imm26 = static_cast<s32>(offset / 4);
emit32(0x14000000 | (imm26 & 0x3FFFFFF));
} else {
movz(9, reinterpret_cast<u64>(target) & 0xFFFF);
movk(9, (reinterpret_cast<u64>(target) >> 16) & 0xFFFF, 16);
movk(9, (reinterpret_cast<u64>(target) >> 32) & 0xFFFF, 32);
movk(9, (reinterpret_cast<u64>(target) >> 48) & 0xFFFF, 48);
br(9);
}
}
void Arm64CodeGenerator::b(int condition, void* target) {
s64 offset = reinterpret_cast<s64>(target) - reinterpret_cast<s64>(code_ptr);
if (offset >= -0x8000000 && offset < 0x8000000) {
s32 imm19 = static_cast<s32>(offset / 4);
emit32(0x54000000 | (condition << 0) | (imm19 << 5));
} else {
movz(9, reinterpret_cast<u64>(target) & 0xFFFF);
movk(9, (reinterpret_cast<u64>(target) >> 16) & 0xFFFF, 16);
movk(9, (reinterpret_cast<u64>(target) >> 32) & 0xFFFF, 32);
movk(9, (reinterpret_cast<u64>(target) >> 48) & 0xFFFF, 48);
emit32(0x54000000 | (condition << 0) | (0 << 5));
br(9);
}
}
void Arm64CodeGenerator::bl(void* target) {
s64 offset = reinterpret_cast<s64>(target) - reinterpret_cast<s64>(code_ptr);
if (offset >= -0x8000000 && offset < 0x8000000) {
s32 imm26 = static_cast<s32>(offset / 4);
emit32(0x94000000 | (imm26 & 0x3FFFFFF));
} else {
movz(9, reinterpret_cast<u64>(target) & 0xFFFF);
movk(9, (reinterpret_cast<u64>(target) >> 16) & 0xFFFF, 16);
movk(9, (reinterpret_cast<u64>(target) >> 32) & 0xFFFF, 32);
movk(9, (reinterpret_cast<u64>(target) >> 48) & 0xFFFF, 48);
blr(9);
}
}
void Arm64CodeGenerator::br(int reg) {
emit32(0xD61F0000 | (reg << 5));
}
void Arm64CodeGenerator::blr(int reg) {
emit32(0xD63F0000 | (reg << 5));
}
void Arm64CodeGenerator::ret(int reg) {
emit32(0xD65F0000 | (reg << 5));
}
// Conditional branches
void Arm64CodeGenerator::b_eq(void* target) {
b(0, target);
}
void Arm64CodeGenerator::b_ne(void* target) {
b(1, target);
}
void Arm64CodeGenerator::b_lt(void* target) {
b(11, target);
}
void Arm64CodeGenerator::b_le(void* target) {
b(13, target);
}
void Arm64CodeGenerator::b_gt(void* target) {
b(12, target);
}
void Arm64CodeGenerator::b_ge(void* target) {
b(10, target);
}
void Arm64CodeGenerator::b_lo(void* target) {
b(3, target);
}
void Arm64CodeGenerator::b_ls(void* target) {
b(9, target);
}
void Arm64CodeGenerator::b_hi(void* target) {
b(8, target);
}
void Arm64CodeGenerator::b_hs(void* target) {
b(2, target);
}
// Stack operations
void Arm64CodeGenerator::push(int reg) {
sub(31, 31, 16);
str(reg, 31, 0);
}
void Arm64CodeGenerator::push(int reg1, int reg2) {
sub(31, 31, 16);
stp(reg1, reg2, 31, 0);
}
void Arm64CodeGenerator::pop(int reg) {
ldr(reg, 31, 0);
add(31, 31, 16);
}
void Arm64CodeGenerator::pop(int reg1, int reg2) {
ldp(reg1, reg2, 31, 0);
add(31, 31, 16);
}
// System operations
void Arm64CodeGenerator::nop() {
emit32(0xD503201F);
}
void Arm64CodeGenerator::brk(u16 imm) {
emit32(0xD4200000 | (imm << 5));
}
// NEON/SIMD operations
void Arm64CodeGenerator::ldr_v(int vreg, int base_reg, s32 offset) {
if (offset >= 0 && offset < 4096 && (offset % 16 == 0)) {
emit32(0x3DC00000 | (vreg << 0) | (base_reg << 5) | ((offset / 16) << 12));
} else {
mov_imm(9, offset);
add(9, base_reg, 9);
ldr_v(vreg, 9, 0);
}
}
void Arm64CodeGenerator::str_v(int vreg, int base_reg, s32 offset) {
if (offset >= 0 && offset < 4096 && (offset % 16 == 0)) {
emit32(0x3D800000 | (vreg << 0) | (base_reg << 5) | ((offset / 16) << 12));
} else {
mov_imm(9, offset);
add(9, base_reg, 9);
str_v(vreg, 9, 0);
}
}
void Arm64CodeGenerator::mov_v(int vdst, int vsrc) {
emit32(0x4EA01C00 | (vdst << 0) | (vsrc << 5));
}
void Arm64CodeGenerator::add_v(int vdst, int vsrc1, int vsrc2) {
emit32(0x4E208400 | (vdst << 0) | (vsrc1 << 5) | (vsrc2 << 16));
}
void Arm64CodeGenerator::sub_v(int vdst, int vsrc1, int vsrc2) {
emit32(0x4EA08400 | (vdst << 0) | (vsrc1 << 5) | (vsrc2 << 16));
}
void Arm64CodeGenerator::mul_v(int vdst, int vsrc1, int vsrc2) {
emit32(0x4E209C00 | (vdst << 0) | (vsrc1 << 5) | (vsrc2 << 16));
}
} // namespace Core::Jit

View File

@ -0,0 +1,132 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <vector>
#include "common/types.h"
namespace Core::Jit {
class Arm64CodeGenerator {
public:
explicit Arm64CodeGenerator(size_t buffer_size = 64_KB, void* code_ptr = nullptr);
~Arm64CodeGenerator();
Arm64CodeGenerator(const Arm64CodeGenerator&) = delete;
Arm64CodeGenerator& operator=(const Arm64CodeGenerator&) = delete;
void* getCode() const {
return code_buffer;
}
void* getCurr() const {
return code_ptr;
}
size_t getSize() const {
return static_cast<u8*>(code_ptr) - static_cast<u8*>(code_buffer);
}
void reset();
void setSize(size_t offset);
// Memory operations
void ldr(int reg, void* addr);
void ldr(int reg, int base_reg, s32 offset = 0);
void ldrh(int reg, int base_reg, s32 offset = 0);
void ldrb(int reg, int base_reg, s32 offset = 0);
void ldp(int reg1, int reg2, int base_reg, s32 offset = 0);
void str(int reg, void* addr);
void str(int reg, int base_reg, s32 offset = 0);
void strh(int reg, int base_reg, s32 offset = 0);
void strb(int reg, int base_reg, s32 offset = 0);
void stp(int reg1, int reg2, int base_reg, s32 offset = 0);
// Arithmetic operations
void add(int dst, int src1, int src2);
void add(int dst, int src1, int src2, int shift);
void add_imm(int dst, int src1, s32 imm);
void sub(int dst, int src1, int src2);
void sub_imm(int dst, int src1, s32 imm);
void mul(int dst, int src1, int src2);
void sdiv(int dst, int src1, int src2);
void udiv(int dst, int src1, int src2);
void and_(int dst, int src1, int src2);
void and_(int dst, int src1, u64 imm);
void orr(int dst, int src1, int src2);
void orr(int dst, int src1, u64 imm);
void eor(int dst, int src1, int src2);
void eor(int dst, int src1, u64 imm);
void mvn(int dst, int src);
void lsl(int dst, int src1, int src2);
void lsl(int dst, int src1, u8 shift);
void lsr(int dst, int src1, int src2);
void lsr(int dst, int src1, u8 shift);
void asr(int dst, int src1, int src2);
void asr(int dst, int src1, u8 shift);
// Move operations
void mov(int dst, int src);
void mov_imm(int dst, s64 imm);
void movz(int dst, u16 imm, u8 shift = 0);
void movk(int dst, u16 imm, u8 shift = 0);
void movn(int dst, u16 imm, u8 shift = 0);
// Compare operations
void cmp(int reg1, int reg2);
void cmp_imm(int reg, s32 imm);
void tst(int reg1, int reg2);
void tst(int reg, u64 imm);
// Branch operations
void b(void* target);
void b(int condition, void* target);
void bl(void* target);
void br(int reg);
void blr(int reg);
void ret(int reg = 30); // X30 is LR by default
// Conditional branches
void b_eq(void* target);
void b_ne(void* target);
void b_lt(void* target);
void b_le(void* target);
void b_gt(void* target);
void b_ge(void* target);
void b_lo(void* target); // unsigned lower
void b_ls(void* target); // unsigned lower or same
void b_hi(void* target); // unsigned higher
void b_hs(void* target); // unsigned higher or same
// Stack operations
void push(int reg);
void push(int reg1, int reg2);
void pop(int reg);
void pop(int reg1, int reg2);
// System operations
void nop();
void brk(u16 imm = 0);
// NEON/SIMD operations (for XMM registers)
void ldr_v(int vreg, int base_reg, s32 offset = 0);
void str_v(int vreg, int base_reg, s32 offset = 0);
void mov_v(int vdst, int vsrc);
void add_v(int vdst, int vsrc1, int vsrc2);
void sub_v(int vdst, int vsrc1, int vsrc2);
void mul_v(int vdst, int vsrc1, int vsrc2);
void makeExecutable();
private:
void emit32(u32 instruction);
void emit64(u64 instruction);
void* allocateCode(size_t size);
void* code_buffer;
void* code_ptr;
size_t buffer_size;
bool owns_buffer;
std::vector<std::pair<void*, void*>> fixups; // (fixup_location, target_address)
};
} // namespace Core::Jit

View File

@ -0,0 +1,126 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "block_manager.h"
#include "common/logging/log.h"
namespace Core::Jit {
BlockManager::BlockManager() = default;
BlockManager::~BlockManager() {
Clear();
}
CodeBlock* BlockManager::GetBlock(VAddr ps4_address) {
std::lock_guard<std::mutex> lock(mutex);
auto it = blocks.find(ps4_address);
if (it != blocks.end()) {
return it->second.get();
}
return nullptr;
}
CodeBlock* BlockManager::CreateBlock(VAddr ps4_address, void* arm64_code, size_t code_size,
size_t instruction_count) {
std::lock_guard<std::mutex> lock(mutex);
auto block = std::make_unique<CodeBlock>(ps4_address, arm64_code, code_size, instruction_count);
CodeBlock* result = block.get();
blocks[ps4_address] = std::move(block);
LOG_DEBUG(Core, "Created code block at PS4 address {:#x}, ARM64 code: {}, size: {}",
ps4_address, arm64_code, code_size);
return result;
}
void BlockManager::InvalidateBlock(VAddr ps4_address) {
std::lock_guard<std::mutex> lock(mutex);
// Delink all links pointing to this block
auto lower = block_links.lower_bound({ps4_address, nullptr});
auto upper = block_links.upper_bound(
{ps4_address, reinterpret_cast<ExitFunctionLinkData*>(UINTPTR_MAX)});
for (auto it = lower; it != upper;) {
it->second(it->first.host_link);
it = block_links.erase(it);
}
blocks.erase(ps4_address);
LOG_DEBUG(Core, "Invalidated code block at PS4 address {:#x}", ps4_address);
}
void BlockManager::InvalidateRange(VAddr start, VAddr end) {
std::lock_guard<std::mutex> lock(mutex);
// Delink all links pointing to blocks in this range
auto link_it = block_links.begin();
while (link_it != block_links.end()) {
if (link_it->first.guest_destination >= start && link_it->first.guest_destination < end) {
link_it->second(link_it->first.host_link);
link_it = block_links.erase(link_it);
} else {
++link_it;
}
}
auto it = blocks.begin();
while (it != blocks.end()) {
VAddr block_addr = it->first;
if (block_addr >= start && block_addr < end) {
it = blocks.erase(it);
} else {
auto& deps = it->second->dependencies;
bool has_dependency_in_range = false;
for (VAddr dep : deps) {
if (dep >= start && dep < end) {
has_dependency_in_range = true;
break;
}
}
if (has_dependency_in_range) {
it = blocks.erase(it);
} else {
++it;
}
}
}
LOG_DEBUG(Core, "Invalidated code blocks in range {:#x} - {:#x}", start, end);
}
void BlockManager::AddDependency(VAddr block_address, VAddr dependency) {
std::lock_guard<std::mutex> lock(mutex);
auto it = blocks.find(block_address);
if (it != blocks.end()) {
it->second->dependencies.insert(dependency);
}
}
void BlockManager::AddBlockLink(VAddr guest_dest, ExitFunctionLinkData* link_data,
BlockDelinkerFunc delinker) {
std::lock_guard<std::mutex> lock(mutex);
block_links[{guest_dest, link_data}] = delinker;
}
void BlockManager::Clear() {
std::lock_guard<std::mutex> lock(mutex);
// Delink all links before clearing
for (auto& [tag, delinker] : block_links) {
delinker(tag.host_link);
}
block_links.clear();
blocks.clear();
}
size_t BlockManager::GetTotalCodeSize() const {
std::lock_guard<std::mutex> lock(mutex);
size_t total = 0;
for (const auto& [addr, block] : blocks) {
total += block->code_size;
}
return total;
}
} // namespace Core::Jit

View File

@ -0,0 +1,85 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <atomic>
#include <functional>
#include <map>
#include <memory>
#include <mutex>
#include <set>
#include <unordered_map>
#include "common/types.h"
namespace Core::Jit {
struct ExitFunctionLinkData {
void* host_code;
VAddr guest_rip;
void* caller_address;
u32 original_instruction;
};
using BlockDelinkerFunc = std::function<void(ExitFunctionLinkData*)>;
struct BlockLinkTag {
VAddr guest_destination;
ExitFunctionLinkData* host_link;
bool operator<(const BlockLinkTag& other) const {
if (guest_destination < other.guest_destination) {
return true;
} else if (guest_destination == other.guest_destination) {
return host_link < other.host_link;
} else {
return false;
}
}
};
struct CodeBlock {
VAddr ps4_address;
void* arm64_code;
size_t code_size;
size_t instruction_count;
std::set<VAddr> dependencies;
bool is_linked;
// Control flow targets for linking
VAddr fallthrough_target; // Next sequential address (if block doesn't end with branch)
VAddr branch_target; // Direct branch target (JMP)
void* branch_patch_location; // Location in ARM64 code to patch for direct branch
CodeBlock(VAddr addr, void* code, size_t size, size_t count)
: ps4_address(addr), arm64_code(code), code_size(size), instruction_count(count),
is_linked(false), fallthrough_target(0), branch_target(0),
branch_patch_location(nullptr) {}
};
class BlockManager {
public:
BlockManager();
~BlockManager();
CodeBlock* GetBlock(VAddr ps4_address);
CodeBlock* CreateBlock(VAddr ps4_address, void* arm64_code, size_t code_size,
size_t instruction_count);
void InvalidateBlock(VAddr ps4_address);
void InvalidateRange(VAddr start, VAddr end);
void AddDependency(VAddr block_address, VAddr dependency);
void AddBlockLink(VAddr guest_dest, ExitFunctionLinkData* link_data,
BlockDelinkerFunc delinker);
void Clear();
size_t GetBlockCount() const {
return blocks.size();
}
size_t GetTotalCodeSize() const;
std::unordered_map<VAddr, std::unique_ptr<CodeBlock>> blocks;
std::map<BlockLinkTag, BlockDelinkerFunc> block_links;
mutable std::mutex mutex;
};
} // namespace Core::Jit

View File

@ -0,0 +1,63 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "calling_convention.h"
#include "common/assert.h"
namespace Core::Jit {
CallingConvention::CallingConvention(Arm64CodeGenerator& codegen, RegisterMapper& reg_mapper)
: codegen(codegen), reg_mapper(reg_mapper) {}
void CallingConvention::PrepareCall(int arg_count, const std::vector<int>& arg_regs) {
ASSERT_MSG(arg_count <= MAX_INT_ARGS, "Too many arguments");
ASSERT_MSG(arg_regs.size() >= static_cast<size_t>(arg_count), "Not enough argument registers");
for (int i = 0; i < arg_count && i < MAX_INT_ARGS; i++) {
int arm64_arg_reg = i;
int x86_arg_reg = arg_regs[i];
int mapped_reg = reg_mapper.MapX86_64ToArm64(static_cast<X86_64Register>(x86_arg_reg));
if (mapped_reg != arm64_arg_reg) {
codegen.mov(arm64_arg_reg, mapped_reg);
}
}
}
void CallingConvention::CallFunction(void* function_ptr) {
codegen.movz(16, reinterpret_cast<u64>(function_ptr) & 0xFFFF);
codegen.movk(16, (reinterpret_cast<u64>(function_ptr) >> 16) & 0xFFFF, 16);
codegen.movk(16, (reinterpret_cast<u64>(function_ptr) >> 32) & 0xFFFF, 32);
codegen.movk(16, (reinterpret_cast<u64>(function_ptr) >> 48) & 0xFFFF, 48);
codegen.blr(16);
}
void CallingConvention::CallFunction(int reg) {
codegen.blr(reg);
}
void CallingConvention::Return(int return_reg) {
if (return_reg >= 0) {
int arm64_return = reg_mapper.MapX86_64ToArm64(X86_64Register::RAX);
if (return_reg != arm64_return) {
codegen.mov(arm64_return, return_reg);
}
}
codegen.ret();
}
void CallingConvention::SaveCallerSavedRegisters() {
saved_registers.clear();
for (int i = 0; i < 8; i++) {
codegen.push(i);
saved_registers.push_back(i);
}
}
void CallingConvention::RestoreCallerSavedRegisters() {
for (auto it = saved_registers.rbegin(); it != saved_registers.rend(); ++it) {
codegen.pop(*it);
}
saved_registers.clear();
}
} // namespace Core::Jit

View File

@ -0,0 +1,33 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <vector>
#include "arm64_codegen.h"
#include "register_mapping.h"
namespace Core::Jit {
class CallingConvention {
public:
explicit CallingConvention(Arm64CodeGenerator& codegen, RegisterMapper& reg_mapper);
void PrepareCall(int arg_count, const std::vector<int>& arg_regs);
void CallFunction(void* function_ptr);
void CallFunction(int reg);
void Return(int return_reg = -1);
void SaveCallerSavedRegisters();
void RestoreCallerSavedRegisters();
static constexpr int MAX_INT_ARGS = 8;
static constexpr int MAX_FLOAT_ARGS = 8;
private:
Arm64CodeGenerator& codegen;
RegisterMapper& reg_mapper;
std::vector<int> saved_registers;
};
} // namespace Core::Jit

View File

@ -0,0 +1,391 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstring>
#include <sys/mman.h>
#include "common/decoder.h"
#include "common/logging/log.h"
#include "core/memory.h"
#include "execution_engine.h"
#if defined(__APPLE__) && defined(ARCH_ARM64)
#include <pthread.h>
#endif
namespace Core::Jit {
static size_t alignUp(size_t value, size_t alignment) {
return (value + alignment - 1) & ~(alignment - 1);
}
static void* AllocateExecutableMemory(size_t size) {
size = alignUp(size, 4096);
#if defined(__APPLE__) && defined(ARCH_ARM64)
// On macOS ARM64:
// 1. Allocate with PROT_READ | PROT_WRITE (no PROT_EXEC initially)
// 2. Use pthread_jit_write_protect_np to allow writing
// 3. After writing, use mprotect to add PROT_EXEC
void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (ptr == MAP_FAILED) {
LOG_CRITICAL(Core, "Failed to allocate executable memory: {} (errno={})", strerror(errno),
errno);
return nullptr;
}
// Initially disable write protection so we can write code
pthread_jit_write_protect_np(0);
return ptr;
#else
void* ptr =
mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (ptr == MAP_FAILED) {
LOG_CRITICAL(Core, "Failed to allocate executable memory: {}", strerror(errno));
return nullptr;
}
return ptr;
#endif
}
ExecutionEngine::ExecutionEngine()
: code_buffer(nullptr), code_buffer_size(DEFAULT_CODE_BUFFER_SIZE), code_buffer_used(0) {
block_manager = std::make_unique<BlockManager>();
register_mapper = std::make_unique<RegisterMapper>();
}
ExecutionEngine::~ExecutionEngine() {
Shutdown();
}
void ExecutionEngine::Initialize() {
if (IsInitialized()) {
LOG_DEBUG(Core, "JIT Execution Engine already initialized");
return;
}
code_buffer = AllocateExecutableMemory(code_buffer_size);
if (!code_buffer) {
throw std::bad_alloc();
}
code_generator = std::make_unique<Arm64CodeGenerator>(code_buffer_size, code_buffer);
translator = std::make_unique<X86_64Translator>(*code_generator, *register_mapper);
LOG_INFO(Core, "JIT Execution Engine initialized");
}
void ExecutionEngine::Shutdown() {
if (code_buffer) {
#if defined(__APPLE__) && defined(ARCH_ARM64)
// On macOS ARM64, ensure write protection is enabled before unmapping
pthread_jit_write_protect_np(1);
#endif
munmap(code_buffer, code_buffer_size);
code_buffer = nullptr;
}
code_generator.reset();
translator.reset();
block_manager.reset();
register_mapper.reset();
}
void* ExecutionEngine::AllocateCodeBuffer(size_t size) {
size = (size + 15) & ~15;
if (code_buffer_used + size > code_buffer_size) {
LOG_WARNING(Core, "Code buffer exhausted, need to allocate more");
return nullptr;
}
void* result = static_cast<u8*>(code_buffer) + code_buffer_used;
code_buffer_used += size;
return result;
}
CodeBlock* ExecutionEngine::TranslateBasicBlock(VAddr start_address, size_t max_instructions) {
auto* memory = Core::Memory::Instance();
auto& address_space = memory->GetAddressSpace();
void* ps4_code_ptr = address_space.TranslateAddress(start_address);
if (!ps4_code_ptr) {
LOG_ERROR(Core, "Invalid PS4 address for translation: {:#x}", start_address);
return nullptr;
}
code_generator->reset();
void* block_start = code_generator->getCurr();
VAddr current_address = start_address;
size_t instruction_count = 0;
bool block_end = false;
VAddr fallthrough_target = 0;
VAddr branch_target = 0;
void* branch_patch_location = nullptr;
while (instruction_count < max_instructions && !block_end) {
ZydisDecodedInstruction instruction;
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
void* code_ptr = address_space.TranslateAddress(current_address);
if (!code_ptr) {
break;
}
ZyanStatus status =
Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_ptr, 15);
if (!ZYAN_SUCCESS(status)) {
LOG_WARNING(Core, "Failed to decode instruction at {:#x}", current_address);
break;
}
// Track branch/call target before translation
if (instruction.mnemonic == ZYDIS_MNEMONIC_JMP &&
operands[0].type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
s64 offset = static_cast<s64>(operands[0].imm.value.s);
branch_target = current_address + instruction.length + offset;
branch_patch_location = code_generator->getCurr();
} else if (instruction.mnemonic == ZYDIS_MNEMONIC_CALL &&
operands[0].type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
// Track CALL target for potential linking (though CALL typically goes to HLE)
s64 offset = static_cast<s64>(operands[0].imm.value.s);
branch_target = current_address + instruction.length + offset;
branch_patch_location = code_generator->getCurr();
}
bool translated = translator->TranslateInstruction(instruction, operands, current_address);
if (!translated) {
LOG_WARNING(Core, "Failed to translate instruction at {:#x}", current_address);
break;
}
instruction_count++;
VAddr next_address = current_address + instruction.length;
switch (instruction.mnemonic) {
case ZYDIS_MNEMONIC_RET:
case ZYDIS_MNEMONIC_CALL:
block_end = true;
break;
case ZYDIS_MNEMONIC_JMP:
block_end = true;
break;
default:
// Check for conditional branches (they don't end the block, but we track them)
if (instruction.mnemonic >= ZYDIS_MNEMONIC_JO &&
instruction.mnemonic <= ZYDIS_MNEMONIC_JZ) {
// Conditional branch - block continues with fallthrough
// TODO: Track conditional branch targets for linking
}
break;
}
current_address = next_address;
}
if (instruction_count == 0) {
return nullptr;
}
// Set fallthrough target if block doesn't end with unconditional branch/ret
if (!block_end || branch_target == 0) {
fallthrough_target = current_address;
}
size_t code_size = code_generator->getSize();
code_generator->makeExecutable();
CodeBlock* block =
block_manager->CreateBlock(start_address, block_start, code_size, instruction_count);
// Store control flow information
block->fallthrough_target = fallthrough_target;
block->branch_target = branch_target;
block->branch_patch_location = branch_patch_location;
LOG_DEBUG(Core,
"Translated basic block at {:#x}, {} instructions, {} bytes, fallthrough: {:#x}, "
"branch: {:#x}",
start_address, instruction_count, code_size, fallthrough_target, branch_target);
// Try to link blocks if targets are available
if (branch_target != 0) {
CodeBlock* target_block = block_manager->GetBlock(branch_target);
if (target_block) {
LinkBlock(block, branch_target);
} else {
// Add dependency for later linking
block_manager->AddDependency(start_address, branch_target);
}
}
if (fallthrough_target != 0 && branch_target == 0) {
// Try to link fallthrough
CodeBlock* target_block = block_manager->GetBlock(fallthrough_target);
if (target_block) {
// For fallthrough, we need to append a branch at the end
// This will be handled by linking logic
block_manager->AddDependency(start_address, fallthrough_target);
}
}
return block;
}
CodeBlock* ExecutionEngine::TranslateBlock(VAddr ps4_address) {
CodeBlock* existing = block_manager->GetBlock(ps4_address);
if (existing) {
return existing;
}
CodeBlock* new_block = TranslateBasicBlock(ps4_address);
if (!new_block) {
return nullptr;
}
// After creating a new block, check if any existing blocks can link to it
// This handles the case where we translate a target block after the source
for (auto& [addr, block] : block_manager->blocks) {
if (block->branch_target == ps4_address && !block->is_linked) {
LinkBlock(block.get(), ps4_address);
}
if (block->fallthrough_target == ps4_address && block->branch_target == 0 &&
!block->is_linked) {
LinkBlock(block.get(), ps4_address);
}
}
return new_block;
}
static void DirectBlockDelinker(ExitFunctionLinkData* record, bool is_call) {
void* caller_addr = record->caller_address;
u32 original_inst = record->original_instruction;
std::atomic_ref<u32>(*reinterpret_cast<u32*>(caller_addr))
.store(original_inst, std::memory_order::relaxed);
#if defined(__APPLE__) && defined(ARCH_ARM64)
__builtin___clear_cache(static_cast<char*>(caller_addr), static_cast<char*>(caller_addr) + 4);
#endif
delete record;
}
void ExecutionEngine::LinkBlock(CodeBlock* block, VAddr target_address) {
CodeBlock* target_block = block_manager->GetBlock(target_address);
if (!target_block) {
return;
}
// Patch the branch instruction if we have a patch location
if (block->branch_patch_location && block->branch_target == target_address) {
#if defined(__APPLE__) && defined(ARCH_ARM64)
pthread_jit_write_protect_np(0);
#endif
void* caller_address = block->branch_patch_location;
s64 offset =
reinterpret_cast<s64>(target_block->arm64_code) - reinterpret_cast<s64>(caller_address);
// Check if we can use a relative branch (within ±128MB)
if (offset >= -0x8000000 && offset < 0x8000000) {
s32 imm26 = static_cast<s32>(offset / 4);
u32* patch_ptr = reinterpret_cast<u32*>(caller_address);
u32 branch_inst = 0x14000000 | (imm26 & 0x3FFFFFF);
u32 original_inst = *patch_ptr;
std::atomic_ref<u32>(*patch_ptr).store(branch_inst, std::memory_order::relaxed);
// Register delinker
ExitFunctionLinkData* link_data = new ExitFunctionLinkData{
target_block->arm64_code, target_address, caller_address, original_inst};
block_manager->AddBlockLink(target_address, link_data, [](ExitFunctionLinkData* r) {
DirectBlockDelinker(r, false);
});
} else {
// Far branch - need to use indirect branch via thunk
LOG_DEBUG(Core, "Branch target too far for direct linking: offset={}", offset);
}
#if defined(__APPLE__) && defined(ARCH_ARM64)
pthread_jit_write_protect_np(1);
__builtin___clear_cache(static_cast<char*>(caller_address),
static_cast<char*>(caller_address) + 4);
#endif
block->is_linked = true;
LOG_DEBUG(Core, "Linked block {:#x} to {:#x}", block->ps4_address, target_address);
} else if (block->fallthrough_target == target_address && block->branch_target == 0) {
// For fallthrough, append a branch at the end of the block
#if defined(__APPLE__) && defined(ARCH_ARM64)
pthread_jit_write_protect_np(0);
#endif
void* link_location = static_cast<u8*>(block->arm64_code) + block->code_size;
s64 offset =
reinterpret_cast<s64>(target_block->arm64_code) - reinterpret_cast<s64>(link_location);
if (offset >= -0x8000000 && offset < 0x8000000) {
s32 imm26 = static_cast<s32>(offset / 4);
u32* patch_ptr = reinterpret_cast<u32*>(link_location);
u32 branch_inst = 0x14000000 | (imm26 & 0x3FFFFFF);
u32 original_inst = 0x14000002;
std::atomic_ref<u32>(*patch_ptr).store(branch_inst, std::memory_order::relaxed);
// Register delinker
ExitFunctionLinkData* link_data = new ExitFunctionLinkData{
target_block->arm64_code, target_address, link_location, original_inst};
block_manager->AddBlockLink(target_address, link_data, [](ExitFunctionLinkData* r) {
DirectBlockDelinker(r, false);
});
block->code_size += 4;
}
#if defined(__APPLE__) && defined(ARCH_ARM64)
pthread_jit_write_protect_np(1);
__builtin___clear_cache(static_cast<char*>(link_location),
static_cast<char*>(link_location) + 4);
#endif
block->is_linked = true;
LOG_DEBUG(Core, "Linked fallthrough from block {:#x} to {:#x}", block->ps4_address,
target_address);
}
}
bool ExecutionEngine::ExecuteBlock(VAddr ps4_address) {
CodeBlock* block = TranslateBlock(ps4_address);
if (!block) {
LOG_ERROR(Core, "Failed to translate or find block at {:#x}", ps4_address);
return false;
}
typedef void (*BlockFunc)();
BlockFunc func = reinterpret_cast<BlockFunc>(block->arm64_code);
func();
return true;
}
void ExecutionEngine::InvalidateBlock(VAddr ps4_address) {
block_manager->InvalidateBlock(ps4_address);
}
void ExecutionEngine::InvalidateRange(VAddr start, VAddr end) {
block_manager->InvalidateRange(start, end);
}
bool ExecutionEngine::IsJitCode(void* code_ptr) const {
if (!code_buffer) {
return false;
}
u8* ptr = static_cast<u8*>(code_ptr);
u8* start = static_cast<u8*>(code_buffer);
u8* end = start + code_buffer_size;
return ptr >= start && ptr < end;
}
VAddr ExecutionEngine::GetPs4AddressForJitCode(void* code_ptr) const {
if (!IsJitCode(code_ptr)) {
return 0;
}
std::lock_guard<std::mutex> lock(block_manager->mutex);
for (const auto& [ps4_addr, block] : block_manager->blocks) {
u8* block_start = static_cast<u8*>(block->arm64_code);
u8* block_end = block_start + block->code_size;
u8* ptr = static_cast<u8*>(code_ptr);
if (ptr >= block_start && ptr < block_end) {
return ps4_addr;
}
}
return 0;
}
} // namespace Core::Jit

View File

@ -0,0 +1,56 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include "arm64_codegen.h"
#include "block_manager.h"
#include "common/singleton.h"
#include "common/types.h"
#include "register_mapping.h"
#include "x86_64_translator.h"
namespace Core::Jit {
class ExecutionEngine {
public:
ExecutionEngine();
~ExecutionEngine();
bool ExecuteBlock(VAddr ps4_address);
CodeBlock* TranslateBlock(VAddr ps4_address);
void InvalidateBlock(VAddr ps4_address);
void InvalidateRange(VAddr start, VAddr end);
bool IsJitCode(void* code_ptr) const;
VAddr GetPs4AddressForJitCode(void* code_ptr) const;
void Initialize();
void Shutdown();
bool IsInitialized() const {
return code_buffer != nullptr;
}
private:
CodeBlock* TranslateBasicBlock(VAddr start_address, size_t max_instructions = 100);
void* AllocateCodeBuffer(size_t size);
void LinkBlock(CodeBlock* block, VAddr target_address);
std::unique_ptr<BlockManager> block_manager;
std::unique_ptr<RegisterMapper> register_mapper;
std::unique_ptr<Arm64CodeGenerator> code_generator;
std::unique_ptr<X86_64Translator> translator;
void* code_buffer;
size_t code_buffer_size;
size_t code_buffer_used;
static constexpr size_t DEFAULT_CODE_BUFFER_SIZE = 64_MB;
friend class BlockManager;
};
using JitEngine = Common::Singleton<ExecutionEngine>;
} // namespace Core::Jit

139
src/core/jit/hle_bridge.cpp Normal file
View File

@ -0,0 +1,139 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstring>
#include "common/logging/log.h"
#include "hle_bridge.h"
namespace Core::Jit {
HleBridge::HleBridge(Arm64CodeGenerator& codegen, RegisterMapper& reg_mapper)
: codegen(codegen), reg_mapper(reg_mapper), calling_conv(codegen, reg_mapper) {}
void HleBridge::GenerateBridge(void* hle_func, int int_arg_count, int float_arg_count) {
// Save caller-saved registers (x86_64: RAX, RCX, RDX, RSI, RDI, R8-R11)
// These correspond to ARM64: X0-X7, X9-X15 (some are callee-saved, but we save all to be safe)
SaveCallerSavedRegisters();
// Map x86_64 arguments to ARM64 calling convention
// x86_64 System V ABI: RDI, RSI, RDX, RCX, R8, R9 (integer), XMM0-XMM7 (float)
// ARM64: X0-X7 (integer), V0-V7 (float)
MapArguments(int_arg_count, float_arg_count);
// Call the HLE function
calling_conv.CallFunction(hle_func);
// Map return value from ARM64 X0 to x86_64 RAX
MapReturnValue();
// Restore caller-saved registers
RestoreCallerSavedRegisters();
}
void HleBridge::SaveCallerSavedRegisters() {
// x86_64 caller-saved registers: RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11
// Map to ARM64 and save them
// Note: We need to be careful about which registers are actually caller-saved in ARM64
// ARM64 caller-saved: X0-X7, X9-X15, V0-V7, V16-V31
// We'll save the x86_64 registers that map to ARM64 caller-saved registers
// Save integer registers that are caller-saved
// RAX -> X0, RCX -> X1, RDX -> X2, RSI -> X3, RDI -> X0 (reused), R8 -> X4, R9 -> X5
// We'll save X0-X7 to be safe (they're all caller-saved in ARM64)
for (int i = 0; i < 8; ++i) {
codegen.push(i); // Save X0-X7
}
// Save XMM registers (V0-V7 in ARM64)
// x86_64 XMM0-XMM7 map to ARM64 V0-V7
for (int i = 0; i < 8; ++i) {
codegen.sub_imm(31, 31, 16); // Decrement stack pointer by 16 bytes
codegen.str_v(i, 31, 0); // Store V0-V7
}
}
void HleBridge::RestoreCallerSavedRegisters() {
// Restore XMM registers first (reverse order)
for (int i = 7; i >= 0; --i) {
codegen.ldr_v(i, 31, 0); // Load V0-V7
codegen.add_imm(31, 31, 16); // Increment stack pointer by 16 bytes
}
// Restore integer registers (reverse order)
for (int i = 7; i >= 0; --i) {
codegen.pop(i); // Restore X0-X7
}
}
void HleBridge::MapArguments(int int_arg_count, int float_arg_count) {
// x86_64 System V ABI argument registers:
// Integer: RDI (arg1), RSI (arg2), RDX (arg3), RCX (arg4), R8 (arg5), R9 (arg6)
// Float: XMM0 (arg1), XMM1 (arg2), XMM2 (arg3), XMM3 (arg4), XMM4 (arg5), XMM5 (arg6), XMM6
// (arg7), XMM7 (arg8)
// ARM64 calling convention:
// Integer: X0 (arg1), X1 (arg2), X2 (arg3), X3 (arg4), X4 (arg5), X5 (arg6), X6 (arg7), X7
// (arg8) Float: V0 (arg1), V1 (arg2), V2 (arg3), V3 (arg4), V4 (arg5), V5 (arg6), V6 (arg7), V7
// (arg8)
// Map integer arguments
static constexpr X86_64Register x86_int_args[] = {
X86_64Register::RDI, // arg1
X86_64Register::RSI, // arg2
X86_64Register::RDX, // arg3
X86_64Register::RCX, // arg4
X86_64Register::R8, // arg5
X86_64Register::R9, // arg6
};
for (int i = 0; i < int_arg_count && i < 6; ++i) {
int x86_reg = reg_mapper.MapX86_64ToArm64(x86_int_args[i]);
int arm64_arg_reg = i; // X0, X1, X2, etc.
if (x86_reg != arm64_arg_reg) {
codegen.mov(arm64_arg_reg, x86_reg);
}
}
// Map floating point arguments
static constexpr X86_64Register x86_float_args[] = {
X86_64Register::XMM0, // arg1
X86_64Register::XMM1, // arg2
X86_64Register::XMM2, // arg3
X86_64Register::XMM3, // arg4
X86_64Register::XMM4, // arg5
X86_64Register::XMM5, // arg6
X86_64Register::XMM6, // arg7
X86_64Register::XMM7, // arg8
};
for (int i = 0; i < float_arg_count && i < 8; ++i) {
int x86_xmm_reg = reg_mapper.MapX86_64XmmToArm64Neon(x86_float_args[i]);
int arm64_arg_reg = i; // V0, V1, V2, etc.
if (x86_xmm_reg != arm64_arg_reg) {
codegen.mov_v(arm64_arg_reg, x86_xmm_reg);
}
}
}
void HleBridge::MapReturnValue() {
// Return value: ARM64 X0 -> x86_64 RAX
int arm64_return = 0; // X0
int x86_return = reg_mapper.MapX86_64ToArm64(X86_64Register::RAX);
if (x86_return != arm64_return) {
codegen.mov(x86_return, arm64_return);
}
}
bool HleBridge::IsHleAddress(VAddr address) {
// TODO: Implement HLE address lookup
(void)address;
return false;
}
void* HleBridge::GetHleFunction(VAddr address) {
// TODO: Implement HLE function lookup
(void)address;
return nullptr;
}
} // namespace Core::Jit

40
src/core/jit/hle_bridge.h Normal file
View File

@ -0,0 +1,40 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "arm64_codegen.h"
#include "calling_convention.h"
#include "register_mapping.h"
namespace Core::Jit {
class HleBridge {
public:
explicit HleBridge(Arm64CodeGenerator& codegen, RegisterMapper& reg_mapper);
~HleBridge() = default;
// Generate bridge code to call an HLE function
// hle_func: Pointer to the HLE function
// int_arg_count: Number of integer arguments (0-6 for x86_64 System V ABI)
// float_arg_count: Number of floating point arguments (0-8 for x86_64 System V ABI)
void GenerateBridge(void* hle_func, int int_arg_count = 0, int float_arg_count = 0);
// Check if an address is an HLE function
static bool IsHleAddress(VAddr address);
// Get HLE function pointer for an address
static void* GetHleFunction(VAddr address);
private:
void SaveCallerSavedRegisters();
void RestoreCallerSavedRegisters();
void MapArguments(int int_arg_count, int float_arg_count);
void MapReturnValue();
Arm64CodeGenerator& codegen;
RegisterMapper& reg_mapper;
CallingConvention calling_conv;
};
} // namespace Core::Jit

View File

@ -0,0 +1,268 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstring>
#include "arm64_codegen.h"
#include "common/assert.h"
#include "register_mapping.h"
namespace Core::Jit {
RegisterMapper::RegisterMapper() : register_save_area(nullptr) {
x86_to_arm64_map.fill(INVALID_MAPPING);
spilled_registers.fill(false);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::RAX)] =
GetArm64RegisterNumber(Arm64Register::X0);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::RCX)] =
GetArm64RegisterNumber(Arm64Register::X1);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::RDX)] =
GetArm64RegisterNumber(Arm64Register::X2);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::RBX)] =
GetArm64RegisterNumber(Arm64Register::X19);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::RSP)] =
GetArm64RegisterNumber(Arm64Register::SP);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::RBP)] =
GetArm64RegisterNumber(Arm64Register::X29);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::RSI)] =
GetArm64RegisterNumber(Arm64Register::X3);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::RDI)] =
GetArm64RegisterNumber(Arm64Register::X0);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::R8)] =
GetArm64RegisterNumber(Arm64Register::X4);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::R9)] =
GetArm64RegisterNumber(Arm64Register::X5);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::R10)] =
GetArm64RegisterNumber(Arm64Register::X6);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::R11)] =
GetArm64RegisterNumber(Arm64Register::X7);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::R12)] =
GetArm64RegisterNumber(Arm64Register::X20);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::R13)] =
GetArm64RegisterNumber(Arm64Register::X21);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::R14)] =
GetArm64RegisterNumber(Arm64Register::X22);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::R15)] =
GetArm64RegisterNumber(Arm64Register::X23);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM0)] =
GetArm64RegisterNumber(Arm64Register::V0);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM1)] =
GetArm64RegisterNumber(Arm64Register::V1);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM2)] =
GetArm64RegisterNumber(Arm64Register::V2);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM3)] =
GetArm64RegisterNumber(Arm64Register::V3);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM4)] =
GetArm64RegisterNumber(Arm64Register::V4);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM5)] =
GetArm64RegisterNumber(Arm64Register::V5);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM6)] =
GetArm64RegisterNumber(Arm64Register::V6);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM7)] =
GetArm64RegisterNumber(Arm64Register::V7);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM8)] =
GetArm64RegisterNumber(Arm64Register::V8);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM9)] =
GetArm64RegisterNumber(Arm64Register::V9);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM10)] =
GetArm64RegisterNumber(Arm64Register::V10);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM11)] =
GetArm64RegisterNumber(Arm64Register::V11);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM12)] =
GetArm64RegisterNumber(Arm64Register::V12);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM13)] =
GetArm64RegisterNumber(Arm64Register::V13);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM14)] =
GetArm64RegisterNumber(Arm64Register::V14);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM15)] =
GetArm64RegisterNumber(Arm64Register::V15);
x86_to_arm64_map[static_cast<size_t>(X86_64Register::FLAGS)] =
GetArm64RegisterNumber(Arm64Register::X11);
}
int RegisterMapper::MapX86_64ToArm64(X86_64Register x86_reg) {
size_t index = static_cast<size_t>(x86_reg);
ASSERT_MSG(index < static_cast<size_t>(X86_64Register::COUNT), "Invalid x86_64 register");
return x86_to_arm64_map[index];
}
int RegisterMapper::MapX86_64XmmToArm64Neon(X86_64Register xmm_reg) {
if (!IsXmmRegister(xmm_reg)) {
return INVALID_MAPPING;
}
return MapX86_64ToArm64(xmm_reg);
}
bool RegisterMapper::IsXmmRegister(X86_64Register reg) {
return reg >= X86_64Register::XMM0 && reg <= X86_64Register::XMM15;
}
void RegisterMapper::SpillRegister(X86_64Register x86_reg) {
size_t index = static_cast<size_t>(x86_reg);
ASSERT_MSG(index < static_cast<size_t>(X86_64Register::COUNT), "Invalid x86_64 register");
spilled_registers[index] = true;
}
void RegisterMapper::ReloadRegister(X86_64Register x86_reg) {
size_t index = static_cast<size_t>(x86_reg);
ASSERT_MSG(index < static_cast<size_t>(X86_64Register::COUNT), "Invalid x86_64 register");
spilled_registers[index] = false;
}
bool RegisterMapper::IsRegisterSpilled(X86_64Register x86_reg) const {
size_t index = static_cast<size_t>(x86_reg);
ASSERT_MSG(index < static_cast<size_t>(X86_64Register::COUNT), "Invalid x86_64 register");
return spilled_registers[index];
}
void RegisterMapper::SaveRegister(Arm64CodeGenerator& codegen, X86_64Register x86_reg,
RegisterContext* ctx) {
if (!ctx) {
return;
}
int arm64_reg = MapX86_64ToArm64(x86_reg);
if (arm64_reg == INVALID_MAPPING) {
return;
}
size_t index = static_cast<size_t>(x86_reg);
if (IsXmmRegister(x86_reg)) {
int vreg = MapX86_64XmmToArm64Neon(x86_reg);
if (vreg != INVALID_MAPPING) {
codegen.movz(SCRATCH_REG,
reinterpret_cast<u64>(&ctx->xmm_regs[index - 16][0]) & 0xFFFF);
codegen.movk(SCRATCH_REG,
(reinterpret_cast<u64>(&ctx->xmm_regs[index - 16][0]) >> 16) & 0xFFFF, 16);
codegen.movk(SCRATCH_REG,
(reinterpret_cast<u64>(&ctx->xmm_regs[index - 16][0]) >> 32) & 0xFFFF, 32);
codegen.movk(SCRATCH_REG,
(reinterpret_cast<u64>(&ctx->xmm_regs[index - 16][0]) >> 48) & 0xFFFF, 48);
codegen.str_v(vreg, SCRATCH_REG, 0);
}
} else if (x86_reg == X86_64Register::FLAGS) {
codegen.movz(SCRATCH_REG, reinterpret_cast<u64>(&ctx->flags) & 0xFFFF);
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->flags) >> 16) & 0xFFFF, 16);
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->flags) >> 32) & 0xFFFF, 32);
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->flags) >> 48) & 0xFFFF, 48);
codegen.str(arm64_reg, SCRATCH_REG, 0);
} else if (x86_reg == X86_64Register::RSP || x86_reg == X86_64Register::RBP) {
if (arm64_reg == STACK_POINTER) {
codegen.mov(SCRATCH_REG, STACK_POINTER);
codegen.movz(SCRATCH_REG2, reinterpret_cast<u64>(&ctx->rsp) & 0xFFFF);
codegen.movk(SCRATCH_REG2, (reinterpret_cast<u64>(&ctx->rsp) >> 16) & 0xFFFF, 16);
codegen.movk(SCRATCH_REG2, (reinterpret_cast<u64>(&ctx->rsp) >> 32) & 0xFFFF, 32);
codegen.movk(SCRATCH_REG2, (reinterpret_cast<u64>(&ctx->rsp) >> 48) & 0xFFFF, 48);
codegen.str(SCRATCH_REG, SCRATCH_REG2, 0);
} else {
codegen.movz(SCRATCH_REG, reinterpret_cast<u64>(&ctx->rbp) & 0xFFFF);
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->rbp) >> 16) & 0xFFFF, 16);
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->rbp) >> 32) & 0xFFFF, 32);
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->rbp) >> 48) & 0xFFFF, 48);
codegen.str(arm64_reg, SCRATCH_REG, 0);
}
} else {
if (index < 16) {
codegen.movz(SCRATCH_REG, reinterpret_cast<u64>(&ctx->gp_regs[index]) & 0xFFFF);
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->gp_regs[index]) >> 16) & 0xFFFF,
16);
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->gp_regs[index]) >> 32) & 0xFFFF,
32);
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->gp_regs[index]) >> 48) & 0xFFFF,
48);
codegen.str(arm64_reg, SCRATCH_REG, 0);
}
}
}
void RegisterMapper::RestoreRegister(Arm64CodeGenerator& codegen, X86_64Register x86_reg,
RegisterContext* ctx) {
if (!ctx) {
return;
}
int arm64_reg = MapX86_64ToArm64(x86_reg);
if (arm64_reg == INVALID_MAPPING) {
return;
}
size_t index = static_cast<size_t>(x86_reg);
if (IsXmmRegister(x86_reg)) {
int vreg = MapX86_64XmmToArm64Neon(x86_reg);
if (vreg != INVALID_MAPPING) {
codegen.movz(SCRATCH_REG,
reinterpret_cast<u64>(&ctx->xmm_regs[index - 16][0]) & 0xFFFF);
codegen.movk(SCRATCH_REG,
(reinterpret_cast<u64>(&ctx->xmm_regs[index - 16][0]) >> 16) & 0xFFFF, 16);
codegen.movk(SCRATCH_REG,
(reinterpret_cast<u64>(&ctx->xmm_regs[index - 16][0]) >> 32) & 0xFFFF, 32);
codegen.movk(SCRATCH_REG,
(reinterpret_cast<u64>(&ctx->xmm_regs[index - 16][0]) >> 48) & 0xFFFF, 48);
codegen.ldr_v(vreg, SCRATCH_REG, 0);
}
} else if (x86_reg == X86_64Register::FLAGS) {
codegen.movz(SCRATCH_REG, reinterpret_cast<u64>(&ctx->flags) & 0xFFFF);
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->flags) >> 16) & 0xFFFF, 16);
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->flags) >> 32) & 0xFFFF, 32);
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->flags) >> 48) & 0xFFFF, 48);
codegen.ldr(arm64_reg, SCRATCH_REG, 0);
} else if (x86_reg == X86_64Register::RSP || x86_reg == X86_64Register::RBP) {
if (arm64_reg == STACK_POINTER) {
codegen.movz(SCRATCH_REG, reinterpret_cast<u64>(&ctx->rsp) & 0xFFFF);
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->rsp) >> 16) & 0xFFFF, 16);
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->rsp) >> 32) & 0xFFFF, 32);
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->rsp) >> 48) & 0xFFFF, 48);
codegen.ldr(SCRATCH_REG2, SCRATCH_REG, 0);
codegen.mov(STACK_POINTER, SCRATCH_REG2);
} else {
codegen.movz(SCRATCH_REG, reinterpret_cast<u64>(&ctx->rbp) & 0xFFFF);
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->rbp) >> 16) & 0xFFFF, 16);
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->rbp) >> 32) & 0xFFFF, 32);
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->rbp) >> 48) & 0xFFFF, 48);
codegen.ldr(arm64_reg, SCRATCH_REG, 0);
}
} else {
if (index < 16) {
codegen.movz(SCRATCH_REG, reinterpret_cast<u64>(&ctx->gp_regs[index]) & 0xFFFF);
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->gp_regs[index]) >> 16) & 0xFFFF,
16);
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->gp_regs[index]) >> 32) & 0xFFFF,
32);
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->gp_regs[index]) >> 48) & 0xFFFF,
48);
codegen.ldr(arm64_reg, SCRATCH_REG, 0);
}
}
}
void RegisterMapper::SaveAllRegisters(Arm64CodeGenerator& codegen, RegisterContext* ctx) {
if (!ctx) {
return;
}
for (int i = 0; i < 16; i++) {
SaveRegister(codegen, static_cast<X86_64Register>(i), ctx);
}
for (int i = 16; i < 32; i++) {
SaveRegister(codegen, static_cast<X86_64Register>(i), ctx);
}
SaveRegister(codegen, X86_64Register::FLAGS, ctx);
}
void RegisterMapper::RestoreAllRegisters(Arm64CodeGenerator& codegen, RegisterContext* ctx) {
if (!ctx) {
return;
}
RestoreRegister(codegen, X86_64Register::FLAGS, ctx);
for (int i = 16; i < 32; i++) {
RestoreRegister(codegen, static_cast<X86_64Register>(i), ctx);
}
for (int i = 0; i < 16; i++) {
RestoreRegister(codegen, static_cast<X86_64Register>(i), ctx);
}
}
} // namespace Core::Jit

View File

@ -0,0 +1,147 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include "common/types.h"
#include "core/jit/arm64_codegen.h"
namespace Core::Jit {
enum class X86_64Register : u8 {
RAX = 0,
RCX = 1,
RDX = 2,
RBX = 3,
RSP = 4,
RBP = 5,
RSI = 6,
RDI = 7,
R8 = 8,
R9 = 9,
R10 = 10,
R11 = 11,
R12 = 12,
R13 = 13,
R14 = 14,
R15 = 15,
XMM0 = 16,
XMM1 = 17,
XMM2 = 18,
XMM3 = 19,
XMM4 = 20,
XMM5 = 21,
XMM6 = 22,
XMM7 = 23,
XMM8 = 24,
XMM9 = 25,
XMM10 = 26,
XMM11 = 27,
XMM12 = 28,
XMM13 = 29,
XMM14 = 30,
XMM15 = 31,
FLAGS = 32,
COUNT = 33
};
enum class Arm64Register : u8 {
X0 = 0,
X1 = 1,
X2 = 2,
X3 = 3,
X4 = 4,
X5 = 5,
X6 = 6,
X7 = 7,
X8 = 8,
X9 = 9,
X10 = 10,
X11 = 11,
X12 = 12,
X13 = 13,
X14 = 14,
X15 = 15,
X16 = 16,
X17 = 17,
X18 = 18,
X19 = 19,
X20 = 20,
X21 = 21,
X22 = 22,
X23 = 23,
X24 = 24,
X25 = 25,
X26 = 26,
X27 = 27,
X28 = 28,
X29 = 29,
X30 = 30,
SP = 31,
V0 = 32,
V1 = 33,
V2 = 34,
V3 = 35,
V4 = 36,
V5 = 37,
V6 = 38,
V7 = 39,
V8 = 40,
V9 = 41,
V10 = 42,
V11 = 43,
V12 = 44,
V13 = 45,
V14 = 46,
V15 = 47,
COUNT = 48
};
struct RegisterContext {
u64 gp_regs[16];
u64 xmm_regs[16][2];
u64 flags;
u64 rsp;
u64 rbp;
};
class RegisterMapper {
public:
RegisterMapper();
int MapX86_64ToArm64(X86_64Register x86_reg);
int MapX86_64XmmToArm64Neon(X86_64Register xmm_reg);
bool IsXmmRegister(X86_64Register reg);
void SpillRegister(X86_64Register x86_reg);
void ReloadRegister(X86_64Register x86_reg);
bool IsRegisterSpilled(X86_64Register x86_reg) const;
void SaveAllRegisters(Arm64CodeGenerator& codegen, RegisterContext* ctx);
void RestoreAllRegisters(Arm64CodeGenerator& codegen, RegisterContext* ctx);
void SaveRegister(Arm64CodeGenerator& codegen, X86_64Register x86_reg, RegisterContext* ctx);
void RestoreRegister(Arm64CodeGenerator& codegen, X86_64Register x86_reg, RegisterContext* ctx);
static constexpr int SCRATCH_REG = 9;
static constexpr int SCRATCH_REG2 = 10;
static constexpr int FLAGS_REG = 11;
static constexpr int STACK_POINTER = 31;
private:
static constexpr int INVALID_MAPPING = -1;
std::array<int, static_cast<size_t>(X86_64Register::COUNT)> x86_to_arm64_map;
std::array<bool, static_cast<size_t>(X86_64Register::COUNT)> spilled_registers;
void* register_save_area;
};
inline int GetArm64RegisterNumber(Arm64Register reg) {
return static_cast<int>(reg);
}
inline int GetX86_64RegisterNumber(X86_64Register reg) {
return static_cast<int>(reg);
}
} // namespace Core::Jit

View File

@ -0,0 +1,206 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "common/logging/log.h"
#include "register_mapping.h"
#include "simd_translator.h"
namespace Core::Jit {
SimdTranslator::SimdTranslator(Arm64CodeGenerator& codegen, RegisterMapper& reg_mapper)
: codegen(codegen), reg_mapper(reg_mapper) {}
int SimdTranslator::GetArm64NeonRegister(const ZydisDecodedOperand& operand) {
if (operand.type != ZYDIS_OPERAND_TYPE_REGISTER) {
return -1;
}
if (operand.reg.value < ZYDIS_REGISTER_XMM0 || operand.reg.value > ZYDIS_REGISTER_XMM15) {
return -1;
}
X86_64Register xmm_reg =
static_cast<X86_64Register>(static_cast<int>(X86_64Register::XMM0) +
static_cast<int>(operand.reg.value - ZYDIS_REGISTER_XMM0));
return reg_mapper.MapX86_64XmmToArm64Neon(xmm_reg);
}
void SimdTranslator::LoadMemoryOperandV(int vreg, const ZydisDecodedOperand& mem_op) {
ASSERT_MSG(mem_op.type == ZYDIS_OPERAND_TYPE_MEMORY, "Expected memory operand");
int addr_reg = RegisterMapper::SCRATCH_REG;
codegen.mov(addr_reg, 0);
if (mem_op.mem.base != ZYDIS_REGISTER_NONE && mem_op.mem.base != ZYDIS_REGISTER_RIP) {
if (mem_op.mem.base >= ZYDIS_REGISTER_RAX && mem_op.mem.base <= ZYDIS_REGISTER_R15) {
X86_64Register x86_base =
static_cast<X86_64Register>(mem_op.mem.base - ZYDIS_REGISTER_RAX);
if (x86_base < X86_64Register::COUNT) {
int base_reg = reg_mapper.MapX86_64ToArm64(x86_base);
codegen.mov(addr_reg, base_reg);
}
}
}
if (mem_op.mem.disp.value != 0) {
codegen.add(addr_reg, addr_reg, static_cast<s32>(mem_op.mem.disp.value));
}
codegen.ldr_v(vreg, addr_reg, 0);
}
void SimdTranslator::StoreMemoryOperandV(int vreg, const ZydisDecodedOperand& mem_op) {
ASSERT_MSG(mem_op.type == ZYDIS_OPERAND_TYPE_MEMORY, "Expected memory operand");
int addr_reg = RegisterMapper::SCRATCH_REG;
codegen.mov(addr_reg, 0);
if (mem_op.mem.base != ZYDIS_REGISTER_NONE) {
if (mem_op.mem.base >= ZYDIS_REGISTER_RAX && mem_op.mem.base <= ZYDIS_REGISTER_R15) {
X86_64Register x86_base =
static_cast<X86_64Register>(mem_op.mem.base - ZYDIS_REGISTER_RAX);
if (x86_base < X86_64Register::COUNT) {
int base_reg = reg_mapper.MapX86_64ToArm64(x86_base);
codegen.mov(addr_reg, base_reg);
}
}
}
if (mem_op.mem.disp.value != 0) {
codegen.add(addr_reg, addr_reg, static_cast<s32>(mem_op.mem.disp.value));
}
codegen.str_v(vreg, addr_reg, 0);
}
bool SimdTranslator::TranslateSseInstruction(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands) {
switch (instruction.mnemonic) {
case ZYDIS_MNEMONIC_MOVAPS:
return TranslateMovaps(instruction, operands);
case ZYDIS_MNEMONIC_MOVUPS:
return TranslateMovups(instruction, operands);
case ZYDIS_MNEMONIC_ADDPS:
return TranslateAddps(instruction, operands);
case ZYDIS_MNEMONIC_SUBPS:
return TranslateSubps(instruction, operands);
case ZYDIS_MNEMONIC_MULPS:
return TranslateMulps(instruction, operands);
default:
LOG_WARNING(Core, "Unsupported SSE instruction: {}",
ZydisMnemonicGetString(instruction.mnemonic));
return false;
}
}
bool SimdTranslator::TranslateMovaps(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands) {
const auto& dst = operands[0];
const auto& src = operands[1];
int dst_vreg = GetArm64NeonRegister(dst);
if (dst_vreg == -1) {
return false;
}
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
int src_vreg = GetArm64NeonRegister(src);
if (src_vreg == -1) {
return false;
}
codegen.mov_v(dst_vreg, src_vreg);
} else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) {
LoadMemoryOperandV(dst_vreg, src);
} else {
return false;
}
return true;
}
bool SimdTranslator::TranslateMovups(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands) {
return TranslateMovaps(instruction, operands);
}
bool SimdTranslator::TranslateAddps(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands) {
const auto& dst = operands[0];
const auto& src = operands[1];
int dst_vreg = GetArm64NeonRegister(dst);
if (dst_vreg == -1) {
return false;
}
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
int src_vreg = GetArm64NeonRegister(src);
if (src_vreg == -1) {
return false;
}
codegen.add_v(dst_vreg, dst_vreg, src_vreg);
} else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) {
int scratch_vreg = 8;
LoadMemoryOperandV(scratch_vreg, src);
codegen.add_v(dst_vreg, dst_vreg, scratch_vreg);
} else {
return false;
}
return true;
}
bool SimdTranslator::TranslateSubps(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands) {
const auto& dst = operands[0];
const auto& src = operands[1];
int dst_vreg = GetArm64NeonRegister(dst);
if (dst_vreg == -1) {
return false;
}
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
int src_vreg = GetArm64NeonRegister(src);
if (src_vreg == -1) {
return false;
}
codegen.sub_v(dst_vreg, dst_vreg, src_vreg);
} else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) {
int scratch_vreg = 8;
LoadMemoryOperandV(scratch_vreg, src);
codegen.sub_v(dst_vreg, dst_vreg, scratch_vreg);
} else {
return false;
}
return true;
}
bool SimdTranslator::TranslateMulps(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands) {
const auto& dst = operands[0];
const auto& src = operands[1];
int dst_vreg = GetArm64NeonRegister(dst);
if (dst_vreg == -1) {
return false;
}
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
int src_vreg = GetArm64NeonRegister(src);
if (src_vreg == -1) {
return false;
}
codegen.mul_v(dst_vreg, dst_vreg, src_vreg);
} else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) {
int scratch_vreg = 8;
LoadMemoryOperandV(scratch_vreg, src);
codegen.mul_v(dst_vreg, dst_vreg, scratch_vreg);
} else {
return false;
}
return true;
}
} // namespace Core::Jit

View File

@ -0,0 +1,39 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <Zydis/Zydis.h>
#include "arm64_codegen.h"
#include "register_mapping.h"
namespace Core::Jit {
class SimdTranslator {
public:
explicit SimdTranslator(Arm64CodeGenerator& codegen, RegisterMapper& reg_mapper);
bool TranslateSseInstruction(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands);
bool TranslateMovaps(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands);
bool TranslateMovups(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands);
bool TranslateAddps(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands);
bool TranslateSubps(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands);
bool TranslateMulps(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands);
private:
int GetArm64NeonRegister(const ZydisDecodedOperand& operand);
void LoadMemoryOperandV(int vreg, const ZydisDecodedOperand& mem_op);
void StoreMemoryOperandV(int vreg, const ZydisDecodedOperand& mem_op);
Arm64CodeGenerator& codegen;
RegisterMapper& reg_mapper;
};
} // namespace Core::Jit

View File

@ -0,0 +1,850 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstring>
#include "common/assert.h"
#include "common/logging/log.h"
#include "register_mapping.h"
#include "x86_64_translator.h"
namespace Core::Jit {
X86_64Translator::X86_64Translator(Arm64CodeGenerator& codegen, RegisterMapper& reg_mapper)
: codegen(codegen), reg_mapper(reg_mapper) {}
bool X86_64Translator::TranslateInstruction(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands, VAddr address) {
switch (instruction.mnemonic) {
case ZYDIS_MNEMONIC_MOV:
return TranslateMov(instruction, operands);
case ZYDIS_MNEMONIC_ADD:
return TranslateAdd(instruction, operands);
case ZYDIS_MNEMONIC_SUB:
return TranslateSub(instruction, operands);
case ZYDIS_MNEMONIC_MUL:
return TranslateMul(instruction, operands);
case ZYDIS_MNEMONIC_DIV:
case ZYDIS_MNEMONIC_IDIV:
return TranslateDiv(instruction, operands);
case ZYDIS_MNEMONIC_AND:
return TranslateAnd(instruction, operands);
case ZYDIS_MNEMONIC_OR:
return TranslateOr(instruction, operands);
case ZYDIS_MNEMONIC_XOR:
return TranslateXor(instruction, operands);
case ZYDIS_MNEMONIC_NOT:
return TranslateNot(instruction, operands);
case ZYDIS_MNEMONIC_SHL:
return TranslateShl(instruction, operands);
case ZYDIS_MNEMONIC_SHR:
return TranslateShr(instruction, operands);
case ZYDIS_MNEMONIC_SAR:
return TranslateSar(instruction, operands);
case ZYDIS_MNEMONIC_PUSH:
return TranslatePush(instruction, operands);
case ZYDIS_MNEMONIC_POP:
return TranslatePop(instruction, operands);
case ZYDIS_MNEMONIC_CALL:
return TranslateCall(instruction, operands, address);
case ZYDIS_MNEMONIC_RET:
return TranslateRet(instruction, operands);
case ZYDIS_MNEMONIC_JMP:
return TranslateJmp(instruction, operands, address);
case ZYDIS_MNEMONIC_CMP:
return TranslateCmp(instruction, operands);
case ZYDIS_MNEMONIC_TEST:
return TranslateTest(instruction, operands);
case ZYDIS_MNEMONIC_LEA:
return TranslateLea(instruction, operands);
default:
LOG_ERROR(Core, "Unsupported instruction: {}",
ZydisMnemonicGetString(instruction.mnemonic));
return false;
}
}
X86_64Register X86_64Translator::ZydisToX86_64Register(ZydisRegister reg) {
if (reg >= ZYDIS_REGISTER_RAX && reg <= ZYDIS_REGISTER_R15) {
return static_cast<X86_64Register>(static_cast<int>(reg - ZYDIS_REGISTER_RAX));
} else if (reg >= ZYDIS_REGISTER_XMM0 && reg <= ZYDIS_REGISTER_XMM15) {
return static_cast<X86_64Register>(static_cast<int>(X86_64Register::XMM0) +
static_cast<int>(reg - ZYDIS_REGISTER_XMM0));
}
return X86_64Register::COUNT;
}
int X86_64Translator::GetArm64Register(const ZydisDecodedOperand& operand) {
if (operand.type != ZYDIS_OPERAND_TYPE_REGISTER) {
return -1;
}
X86_64Register x86_reg = ZydisToX86_64Register(operand.reg.value);
if (x86_reg == X86_64Register::COUNT) {
return -1;
}
return reg_mapper.MapX86_64ToArm64(x86_reg);
}
int X86_64Translator::GetArm64XmmRegister(const ZydisDecodedOperand& operand) {
if (operand.type != ZYDIS_OPERAND_TYPE_REGISTER) {
return -1;
}
X86_64Register x86_reg = ZydisToX86_64Register(operand.reg.value);
if (!reg_mapper.IsXmmRegister(x86_reg)) {
return -1;
}
return reg_mapper.MapX86_64XmmToArm64Neon(x86_reg);
}
void X86_64Translator::CalculateMemoryAddress(int dst_reg, const ZydisDecodedOperand& mem_op) {
ASSERT_MSG(mem_op.type == ZYDIS_OPERAND_TYPE_MEMORY, "Expected memory operand");
const auto& mem = mem_op.mem;
int base_reg = -1;
int index_reg = -1;
if (mem.base != ZYDIS_REGISTER_NONE && mem.base != ZYDIS_REGISTER_RIP) {
X86_64Register x86_base = ZydisToX86_64Register(mem.base);
if (x86_base != X86_64Register::COUNT) {
base_reg = reg_mapper.MapX86_64ToArm64(x86_base);
}
}
if (mem.index != ZYDIS_REGISTER_NONE) {
X86_64Register x86_index = ZydisToX86_64Register(mem.index);
if (x86_index != X86_64Register::COUNT) {
index_reg = reg_mapper.MapX86_64ToArm64(x86_index);
}
}
s64 displacement = mem.disp.value;
if (base_reg == -1 && index_reg == -1 && displacement == 0) {
codegen.mov(dst_reg, 0);
return;
}
if (index_reg == -1) {
if (base_reg != -1) {
if (displacement == 0) {
codegen.mov(dst_reg, base_reg);
} else if (displacement >= -256 && displacement < 256) {
codegen.mov(dst_reg, base_reg);
codegen.add_imm(dst_reg, dst_reg, static_cast<s32>(displacement));
} else {
codegen.mov(dst_reg, base_reg);
codegen.mov_imm(RegisterMapper::SCRATCH_REG, displacement);
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
}
} else {
codegen.mov_imm(dst_reg, displacement);
}
return;
}
if (base_reg == -1) {
base_reg = 0;
}
int scale = mem.scale;
if (scale == 0) {
scale = 1;
}
if (scale == 1) {
if (displacement == 0) {
codegen.add(dst_reg, base_reg, index_reg);
} else if (displacement >= -256 && displacement < 256) {
codegen.add(dst_reg, base_reg, index_reg);
codegen.add_imm(dst_reg, dst_reg, static_cast<s32>(displacement));
} else {
codegen.add(dst_reg, base_reg, index_reg);
codegen.mov_imm(RegisterMapper::SCRATCH_REG, displacement);
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
}
} else if (scale == 2 || scale == 4 || scale == 8) {
int shift = (scale == 2) ? 1 : (scale == 4) ? 2 : 3;
if (displacement == 0) {
codegen.add(dst_reg, base_reg, index_reg, shift);
} else {
codegen.add(dst_reg, base_reg, index_reg, shift);
if (displacement >= -256 && displacement < 256) {
codegen.add_imm(dst_reg, dst_reg, static_cast<s32>(displacement));
} else {
codegen.mov_imm(RegisterMapper::SCRATCH_REG, displacement);
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
}
}
} else {
codegen.mov(dst_reg, base_reg);
codegen.mov_imm(RegisterMapper::SCRATCH_REG, scale);
codegen.mul(RegisterMapper::SCRATCH_REG, index_reg, RegisterMapper::SCRATCH_REG);
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
if (displacement != 0) {
if (displacement >= -256 && displacement < 256) {
codegen.add_imm(dst_reg, dst_reg, static_cast<s32>(displacement));
} else {
codegen.mov_imm(RegisterMapper::SCRATCH_REG, displacement);
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
}
}
}
}
void X86_64Translator::LoadMemoryOperand(int dst_reg, const ZydisDecodedOperand& mem_op,
size_t size) {
CalculateMemoryAddress(RegisterMapper::SCRATCH_REG, mem_op);
if (mem_op.mem.base == ZYDIS_REGISTER_RIP) {
LOG_WARNING(Core, "RIP-relative addressing not fully supported in JIT");
}
switch (size) {
case 1:
codegen.ldrb(dst_reg, RegisterMapper::SCRATCH_REG, 0);
break;
case 2:
codegen.ldrh(dst_reg, RegisterMapper::SCRATCH_REG, 0);
break;
case 4:
case 8:
codegen.ldr(dst_reg, RegisterMapper::SCRATCH_REG, 0);
break;
default:
ASSERT_MSG(false, "Unsupported memory load size: {}", size);
}
}
void X86_64Translator::StoreMemoryOperand(int src_reg, const ZydisDecodedOperand& mem_op,
size_t size) {
CalculateMemoryAddress(RegisterMapper::SCRATCH_REG, mem_op);
if (mem_op.mem.base == ZYDIS_REGISTER_RIP) {
LOG_WARNING(Core, "RIP-relative addressing not fully supported in JIT");
}
switch (size) {
case 1:
codegen.strb(src_reg, RegisterMapper::SCRATCH_REG, 0);
break;
case 2:
codegen.strh(src_reg, RegisterMapper::SCRATCH_REG, 0);
break;
case 4:
case 8:
codegen.str(src_reg, RegisterMapper::SCRATCH_REG, 0);
break;
default:
ASSERT_MSG(false, "Unsupported memory store size: {}", size);
}
}
void X86_64Translator::LoadImmediate(int dst_reg, const ZydisDecodedOperand& imm_op) {
ASSERT_MSG(imm_op.type == ZYDIS_OPERAND_TYPE_IMMEDIATE, "Expected immediate operand");
s64 value = static_cast<s64>(imm_op.imm.value.s);
codegen.mov(dst_reg, value);
}
bool X86_64Translator::TranslateMov(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands) {
const auto& dst = operands[0];
const auto& src = operands[1];
if (dst.type == ZYDIS_OPERAND_TYPE_REGISTER) {
int dst_reg = GetArm64Register(dst);
if (dst_reg == -1) {
return false;
}
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
int src_reg = GetArm64Register(src);
if (src_reg == -1) {
return false;
}
codegen.mov(dst_reg, src_reg);
} else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
LoadImmediate(dst_reg, src);
} else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) {
LoadMemoryOperand(dst_reg, src, instruction.operand_width / 8);
} else {
return false;
}
} else if (dst.type == ZYDIS_OPERAND_TYPE_MEMORY) {
int src_reg = -1;
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
src_reg = GetArm64Register(src);
if (src_reg == -1) {
return false;
}
} else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
LoadImmediate(RegisterMapper::SCRATCH_REG, src);
src_reg = RegisterMapper::SCRATCH_REG;
} else {
return false;
}
StoreMemoryOperand(src_reg, dst, instruction.operand_width / 8);
} else {
return false;
}
return true;
}
bool X86_64Translator::TranslateAdd(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands) {
const auto& dst = operands[0];
const auto& src = operands[1];
int dst_reg = GetArm64Register(dst);
if (dst_reg == -1) {
return false;
}
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
int src_reg = GetArm64Register(src);
if (src_reg == -1) {
return false;
}
codegen.add(dst_reg, dst_reg, src_reg);
} else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
s32 imm = static_cast<s32>(src.imm.value.s);
codegen.add_imm(dst_reg, dst_reg, imm);
} else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) {
LoadMemoryOperand(RegisterMapper::SCRATCH_REG, src, instruction.operand_width / 8);
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
} else {
return false;
}
return true;
}
bool X86_64Translator::TranslateSub(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands) {
const auto& dst = operands[0];
const auto& src = operands[1];
int dst_reg = GetArm64Register(dst);
if (dst_reg == -1) {
return false;
}
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
int src_reg = GetArm64Register(src);
if (src_reg == -1) {
return false;
}
codegen.sub(dst_reg, dst_reg, src_reg);
} else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
s32 imm = static_cast<s32>(src.imm.value.s);
codegen.sub_imm(dst_reg, dst_reg, imm);
} else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) {
LoadMemoryOperand(RegisterMapper::SCRATCH_REG, src, instruction.operand_width / 8);
codegen.sub(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
} else {
return false;
}
return true;
}
bool X86_64Translator::TranslateMul(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands) {
const auto& dst = operands[0];
int dst_reg = GetArm64Register(dst);
if (dst_reg == -1) {
return false;
}
if (operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER) {
int src_reg = GetArm64Register(operands[1]);
if (src_reg == -1) {
return false;
}
codegen.mul(dst_reg, dst_reg, src_reg);
} else if (operands[1].type == ZYDIS_OPERAND_TYPE_MEMORY) {
LoadMemoryOperand(RegisterMapper::SCRATCH_REG, operands[1], instruction.operand_width / 8);
codegen.mul(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
} else {
return false;
}
return true;
}
bool X86_64Translator::TranslateDiv(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands) {
LOG_WARNING(Core, "DIV instruction translation not fully implemented");
return false;
}
bool X86_64Translator::TranslateAnd(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands) {
const auto& dst = operands[0];
const auto& src = operands[1];
int dst_reg = GetArm64Register(dst);
if (dst_reg == -1) {
return false;
}
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
int src_reg = GetArm64Register(src);
if (src_reg == -1) {
return false;
}
codegen.and_(dst_reg, dst_reg, src_reg);
} else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
u64 imm = static_cast<u64>(src.imm.value.u);
codegen.and_(dst_reg, dst_reg, imm);
} else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) {
LoadMemoryOperand(RegisterMapper::SCRATCH_REG, src, instruction.operand_width / 8);
codegen.and_(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
} else {
return false;
}
return true;
}
bool X86_64Translator::TranslateOr(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands) {
const auto& dst = operands[0];
const auto& src = operands[1];
int dst_reg = GetArm64Register(dst);
if (dst_reg == -1) {
return false;
}
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
int src_reg = GetArm64Register(src);
if (src_reg == -1) {
return false;
}
codegen.orr(dst_reg, dst_reg, src_reg);
} else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
u64 imm = static_cast<u64>(src.imm.value.u);
codegen.orr(dst_reg, dst_reg, imm);
} else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) {
LoadMemoryOperand(RegisterMapper::SCRATCH_REG, src, instruction.operand_width / 8);
codegen.orr(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
} else {
return false;
}
return true;
}
bool X86_64Translator::TranslateXor(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands) {
const auto& dst = operands[0];
const auto& src = operands[1];
int dst_reg = GetArm64Register(dst);
if (dst_reg == -1) {
return false;
}
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
int src_reg = GetArm64Register(src);
if (src_reg == -1) {
return false;
}
codegen.eor(dst_reg, dst_reg, src_reg);
} else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
u64 imm = static_cast<u64>(src.imm.value.u);
codegen.eor(dst_reg, dst_reg, imm);
} else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) {
LoadMemoryOperand(RegisterMapper::SCRATCH_REG, src, instruction.operand_width / 8);
codegen.eor(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
} else {
return false;
}
return true;
}
bool X86_64Translator::TranslateNot(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands) {
const auto& dst = operands[0];
int dst_reg = GetArm64Register(dst);
if (dst_reg == -1) {
return false;
}
codegen.mvn(dst_reg, dst_reg);
return true;
}
bool X86_64Translator::TranslateShl(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands) {
const auto& dst = operands[0];
const auto& src = operands[1];
int dst_reg = GetArm64Register(dst);
if (dst_reg == -1) {
return false;
}
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER &&
(src.reg.value == ZYDIS_REGISTER_CL || src.reg.value == ZYDIS_REGISTER_RCX)) {
int cl_reg = reg_mapper.MapX86_64ToArm64(X86_64Register::RCX);
codegen.lsl(dst_reg, dst_reg, cl_reg);
} else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
u64 shift_val = src.imm.value.u;
if (shift_val < 64) {
codegen.lsl(dst_reg, dst_reg, static_cast<u8>(shift_val));
} else {
codegen.mov(dst_reg, 0);
}
} else {
return false;
}
return true;
}
bool X86_64Translator::TranslateShr(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands) {
const auto& dst = operands[0];
const auto& src = operands[1];
int dst_reg = GetArm64Register(dst);
if (dst_reg == -1) {
return false;
}
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER &&
(src.reg.value == ZYDIS_REGISTER_CL || src.reg.value == ZYDIS_REGISTER_RCX)) {
int cl_reg = reg_mapper.MapX86_64ToArm64(X86_64Register::RCX);
codegen.lsr(dst_reg, dst_reg, cl_reg);
} else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
u64 shift_val = src.imm.value.u;
if (shift_val < 64) {
codegen.lsr(dst_reg, dst_reg, static_cast<u8>(shift_val));
} else {
codegen.mov(dst_reg, 0);
}
} else {
return false;
}
return true;
}
bool X86_64Translator::TranslateSar(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands) {
const auto& dst = operands[0];
const auto& src = operands[1];
int dst_reg = GetArm64Register(dst);
if (dst_reg == -1) {
return false;
}
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER &&
(src.reg.value == ZYDIS_REGISTER_CL || src.reg.value == ZYDIS_REGISTER_RCX)) {
int cl_reg = reg_mapper.MapX86_64ToArm64(X86_64Register::RCX);
codegen.asr(dst_reg, dst_reg, cl_reg);
} else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
u64 shift_val = src.imm.value.u;
if (shift_val < 64) {
codegen.asr(dst_reg, dst_reg, static_cast<u8>(shift_val));
} else {
codegen.mov(dst_reg, 0);
}
} else {
return false;
}
return true;
}
bool X86_64Translator::TranslatePush(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands) {
const auto& src = operands[0];
int sp_reg = reg_mapper.MapX86_64ToArm64(X86_64Register::RSP);
codegen.sub(sp_reg, sp_reg, 8);
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
int src_reg = GetArm64Register(src);
if (src_reg == -1) {
return false;
}
codegen.str(src_reg, sp_reg, 0);
} else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
LoadImmediate(RegisterMapper::SCRATCH_REG, src);
codegen.str(RegisterMapper::SCRATCH_REG, sp_reg, 0);
} else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) {
LoadMemoryOperand(RegisterMapper::SCRATCH_REG, src, instruction.operand_width / 8);
codegen.str(RegisterMapper::SCRATCH_REG, sp_reg, 0);
} else {
return false;
}
return true;
}
bool X86_64Translator::TranslatePop(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands) {
const auto& dst = operands[0];
int dst_reg = GetArm64Register(dst);
if (dst_reg == -1) {
return false;
}
int sp_reg = reg_mapper.MapX86_64ToArm64(X86_64Register::RSP);
codegen.ldr(dst_reg, sp_reg, 0);
codegen.add(sp_reg, sp_reg, 8);
return true;
}
bool X86_64Translator::TranslateCall(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands, VAddr address) {
const auto& target = operands[0];
VAddr target_address = 0;
VAddr return_address = address + instruction.length;
// Calculate target address based on operand type
if (target.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
// Direct relative call: CALL rel32
// Target = current_address + instruction.length + offset
s64 offset = static_cast<s64>(target.imm.value.s);
target_address = address + instruction.length + offset;
} else if (target.type == ZYDIS_OPERAND_TYPE_MEMORY) {
// Indirect call: CALL [mem]
// Load address from memory into scratch register
LoadMemoryOperand(RegisterMapper::SCRATCH_REG, target, 8);
// Push return address
int sp_reg = RegisterMapper::STACK_POINTER;
codegen.sub_imm(sp_reg, sp_reg, 8); // Decrement stack by 8 bytes
codegen.mov_imm(RegisterMapper::SCRATCH_REG2, return_address);
codegen.str(RegisterMapper::SCRATCH_REG2, sp_reg, 0); // Store return address
// Call via register
codegen.blr(RegisterMapper::SCRATCH_REG);
return true;
} else if (target.type == ZYDIS_OPERAND_TYPE_REGISTER) {
// Indirect call: CALL reg
int reg = GetArm64Register(target);
if (reg == -1) {
LOG_ERROR(Core, "Invalid register for CALL");
return false;
}
// Push return address
int sp_reg = RegisterMapper::STACK_POINTER;
codegen.sub_imm(sp_reg, sp_reg, 8); // Decrement stack by 8 bytes
codegen.mov_imm(RegisterMapper::SCRATCH_REG, return_address);
codegen.str(RegisterMapper::SCRATCH_REG, sp_reg, 0); // Store return address
// Call via register
codegen.blr(reg);
return true;
} else {
LOG_ERROR(Core, "Unsupported CALL operand type");
return false;
}
// For direct calls, push return address and branch to target
// Push return address onto stack
int sp_reg = RegisterMapper::STACK_POINTER;
codegen.sub_imm(sp_reg, sp_reg, 8); // Decrement stack by 8 bytes (x86_64 stack grows down)
codegen.mov_imm(RegisterMapper::SCRATCH_REG, return_address);
codegen.str(RegisterMapper::SCRATCH_REG, sp_reg, 0); // Store return address at [SP]
// Branch to target (will be linked later if target block is available)
void* placeholder_target = reinterpret_cast<void*>(target_address);
codegen.bl(placeholder_target); // Use bl (branch with link) for calls
return true;
}
bool X86_64Translator::TranslateRet(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands) {
// x86_64 RET pops return address from stack and jumps to it
int sp_reg = RegisterMapper::STACK_POINTER;
int scratch_reg = RegisterMapper::SCRATCH_REG;
// Load return address from stack
codegen.ldr(scratch_reg, sp_reg, 0); // Load return address from [SP]
codegen.add_imm(sp_reg, sp_reg, 8); // Increment stack by 8 bytes (pop)
// Jump to return address
codegen.br(scratch_reg);
return true;
}
bool X86_64Translator::TranslateJmp(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands, VAddr address) {
const auto& target = operands[0];
VAddr target_address = 0;
// Calculate target address based on operand type
if (target.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
// Direct relative jump: JMP rel32
// Target = current_address + instruction.length + offset
s64 offset = static_cast<s64>(target.imm.value.s);
target_address = address + instruction.length + offset;
} else if (target.type == ZYDIS_OPERAND_TYPE_MEMORY) {
// Indirect jump: JMP [mem]
// Load address from memory into scratch register
LoadMemoryOperand(RegisterMapper::SCRATCH_REG, target, 8);
// TODO: don't use a dispatcher
codegen.br(RegisterMapper::SCRATCH_REG);
return true;
} else if (target.type == ZYDIS_OPERAND_TYPE_REGISTER) {
// Indirect jump: JMP reg
int reg = GetArm64Register(target);
if (reg == -1) {
LOG_ERROR(Core, "Invalid register for JMP");
return false;
}
codegen.br(reg);
return true;
} else {
LOG_ERROR(Core, "Unsupported JMP operand type");
return false;
}
// For direct jumps, we need to branch to the target address
// Since the target block may not be translated yet, we'll generate
// a placeholder that can be patched later during block linking
// For now, generate a branch to a dispatcher function
// TODO: Implement proper block linking to patch this with direct branch
// Calculate offset from current code position
void* placeholder_target = reinterpret_cast<void*>(target_address);
codegen.b(placeholder_target);
return true;
}
bool X86_64Translator::TranslateCmp(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands) {
const auto& dst = operands[0];
const auto& src = operands[1];
int dst_reg = GetArm64Register(dst);
if (dst_reg == -1) {
return false;
}
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
int src_reg = GetArm64Register(src);
if (src_reg == -1) {
return false;
}
codegen.cmp(dst_reg, src_reg);
} else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
s32 imm = static_cast<s32>(src.imm.value.s);
codegen.cmp_imm(dst_reg, imm);
} else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) {
LoadMemoryOperand(RegisterMapper::SCRATCH_REG, src, instruction.operand_width / 8);
codegen.cmp(dst_reg, RegisterMapper::SCRATCH_REG);
} else {
return false;
}
return true;
}
bool X86_64Translator::TranslateTest(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands) {
const auto& dst = operands[0];
const auto& src = operands[1];
int dst_reg = GetArm64Register(dst);
if (dst_reg == -1) {
return false;
}
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
int src_reg = GetArm64Register(src);
if (src_reg == -1) {
return false;
}
codegen.tst(dst_reg, src_reg);
} else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
u64 imm = static_cast<u64>(src.imm.value.u);
codegen.tst(dst_reg, imm);
} else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) {
LoadMemoryOperand(RegisterMapper::SCRATCH_REG, src, instruction.operand_width / 8);
codegen.tst(dst_reg, RegisterMapper::SCRATCH_REG);
} else {
return false;
}
return true;
}
bool X86_64Translator::TranslateLea(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands) {
const auto& dst = operands[0];
const auto& src = operands[1];
ASSERT_MSG(src.type == ZYDIS_OPERAND_TYPE_MEMORY, "LEA source must be memory");
int dst_reg = GetArm64Register(dst);
if (dst_reg == -1) {
return false;
}
CalculateMemoryAddress(dst_reg, src);
return true;
}
void X86_64Translator::UpdateFlagsForArithmetic(int result_reg, int src1_reg, int src2_reg,
bool is_subtract) {
int flags_reg = reg_mapper.MapX86_64ToArm64(X86_64Register::FLAGS);
codegen.cmp(result_reg, 0);
codegen.mov(RegisterMapper::SCRATCH_REG, 0);
codegen.b_eq(codegen.getCurr());
codegen.mov(RegisterMapper::SCRATCH_REG, 1 << 6);
codegen.b(codegen.getCurr());
}
void X86_64Translator::UpdateFlagsForLogical(int result_reg) {
codegen.cmp(result_reg, 0);
}
void X86_64Translator::UpdateFlagsForShift(int result_reg, int shift_amount) {
codegen.cmp(result_reg, 0);
}
int X86_64Translator::GetConditionCode(ZydisMnemonic mnemonic) {
switch (mnemonic) {
case ZYDIS_MNEMONIC_JZ:
return 0;
case ZYDIS_MNEMONIC_JNZ:
return 1;
case ZYDIS_MNEMONIC_JL:
return 11;
case ZYDIS_MNEMONIC_JLE:
return 13;
case ZYDIS_MNEMONIC_JNLE:
return 12;
case ZYDIS_MNEMONIC_JNL:
return 10;
case ZYDIS_MNEMONIC_JB:
return 3;
case ZYDIS_MNEMONIC_JBE:
return 9;
case ZYDIS_MNEMONIC_JNBE:
return 8;
case ZYDIS_MNEMONIC_JNB:
return 2;
default:
return -1;
}
}
} // namespace Core::Jit

View File

@ -0,0 +1,80 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <Zydis/Zydis.h>
#include "arm64_codegen.h"
#include "common/types.h"
#include "register_mapping.h"
namespace Core::Jit {
class X86_64Translator {
public:
explicit X86_64Translator(Arm64CodeGenerator& codegen, RegisterMapper& reg_mapper);
~X86_64Translator() = default;
bool TranslateInstruction(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands, VAddr address);
bool TranslateMov(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands);
bool TranslateAdd(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands);
bool TranslateSub(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands);
bool TranslateMul(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands);
bool TranslateDiv(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands);
bool TranslateAnd(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands);
bool TranslateOr(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands);
bool TranslateXor(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands);
bool TranslateNot(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands);
bool TranslateShl(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands);
bool TranslateShr(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands);
bool TranslateSar(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands);
bool TranslatePush(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands);
bool TranslatePop(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands);
bool TranslateCall(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands, VAddr address);
bool TranslateRet(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands);
bool TranslateJmp(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands, VAddr address);
bool TranslateCmp(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands);
bool TranslateTest(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands);
bool TranslateLea(const ZydisDecodedInstruction& instruction,
const ZydisDecodedOperand* operands);
void UpdateFlagsForArithmetic(int result_reg, int src1_reg, int src2_reg, bool is_subtract);
void UpdateFlagsForLogical(int result_reg);
void UpdateFlagsForShift(int result_reg, int shift_amount);
int GetConditionCode(ZydisMnemonic mnemonic);
private:
int GetArm64Register(const ZydisDecodedOperand& operand);
int GetArm64XmmRegister(const ZydisDecodedOperand& operand);
void LoadMemoryOperand(int dst_reg, const ZydisDecodedOperand& mem_op, size_t size);
void StoreMemoryOperand(int src_reg, const ZydisDecodedOperand& mem_op, size_t size);
void LoadImmediate(int dst_reg, const ZydisDecodedOperand& imm_op);
void CalculateMemoryAddress(int dst_reg, const ZydisDecodedOperand& mem_op);
X86_64Register ZydisToX86_64Register(ZydisRegister reg);
Arm64CodeGenerator& codegen;
RegisterMapper& reg_mapper;
};
} // namespace Core::Jit

View File

@ -3,6 +3,7 @@
#include "fiber.h"
#include "common/arch.h"
#include "common/elf_info.h"
#include "common/logging/log.h"
#include "core/libraries/fiber/fiber_error.h"
@ -23,12 +24,34 @@ OrbisFiberContext* GetFiberContext() {
return Core::GetTcbBase()->tcb_fiber;
}
#ifdef ARCH_X86_64
extern "C" s32 PS4_SYSV_ABI _sceFiberSetJmp(OrbisFiberContext* ctx) asm("_sceFiberSetJmp");
extern "C" s32 PS4_SYSV_ABI _sceFiberLongJmp(OrbisFiberContext* ctx) asm("_sceFiberLongJmp");
extern "C" void PS4_SYSV_ABI _sceFiberSwitchEntry(OrbisFiberData* data,
bool set_fpu) asm("_sceFiberSwitchEntry");
#elif defined(ARCH_ARM64)
extern "C" s32 PS4_SYSV_ABI _sceFiberSetJmp(OrbisFiberContext* ctx);
extern "C" s32 PS4_SYSV_ABI _sceFiberLongJmp(OrbisFiberContext* ctx);
extern "C" void PS4_SYSV_ABI _sceFiberSwitchEntry(OrbisFiberData* data, bool set_fpu);
#endif
extern "C" void PS4_SYSV_ABI _sceFiberForceQuit(u64 ret) asm("_sceFiberForceQuit");
#ifdef ARCH_ARM64
extern "C" s32 PS4_SYSV_ABI _sceFiberSetJmp(OrbisFiberContext* ctx) {
UNREACHABLE_MSG("ARM64 fiber implementation not yet complete");
return 0;
}
extern "C" s32 PS4_SYSV_ABI _sceFiberLongJmp(OrbisFiberContext* ctx) {
UNREACHABLE_MSG("ARM64 fiber implementation not yet complete");
return 0;
}
extern "C" void PS4_SYSV_ABI _sceFiberSwitchEntry(OrbisFiberData* data, bool set_fpu) {
UNREACHABLE_MSG("ARM64 fiber implementation not yet complete");
}
#endif
extern "C" void PS4_SYSV_ABI _sceFiberForceQuit(u64 ret) {
OrbisFiberContext* g_ctx = GetFiberContext();
g_ctx->return_val = ret;

View File

@ -318,8 +318,8 @@ void RegisterLib(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("Mv1zUObHvXI", "libkernel", 1, "libkernel", sceKernelGetSystemSwVersion);
LIB_FUNCTION("igMefp4SAv0", "libkernel", 1, "libkernel", get_authinfo);
LIB_FUNCTION("G-MYv5erXaU", "libkernel", 1, "libkernel", sceKernelGetAppInfo);
LIB_FUNCTION("PfccT7qURYE", "libkernel", 1, "libkernel", kernel_ioctl);
LIB_FUNCTION("wW+k21cmbwQ", "libkernel", 1, "libkernel", kernel_ioctl);
LIB_FUNCTION_VARIADIC("PfccT7qURYE", "libkernel", 1, "libkernel", kernel_ioctl);
LIB_FUNCTION_VARIADIC("wW+k21cmbwQ", "libkernel", 1, "libkernel", kernel_ioctl);
LIB_FUNCTION("JGfTMBOdUJo", "libkernel", 1, "libkernel", sceKernelGetFsSandboxRandomWord);
LIB_FUNCTION("6xVpy0Fdq+I", "libkernel", 1, "libkernel", _sigprocmask);
LIB_FUNCTION("Xjoosiw+XPI", "libkernel", 1, "libkernel", sceKernelUuidCreate);

View File

@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/arch.h"
#include "common/assert.h"
#include "core/libraries/kernel/orbis_error.h"
#include "core/libraries/kernel/threads/exception.h"
@ -23,6 +24,7 @@ void SigactionHandler(int signum, siginfo_t* inf, ucontext_t* raw_context) {
if (handler) {
auto ctx = Ucontext{};
#ifdef __APPLE__
#ifdef ARCH_X86_64
const auto& regs = raw_context->uc_mcontext->__ss;
ctx.uc_mcontext.mc_r8 = regs.__r8;
ctx.uc_mcontext.mc_r9 = regs.__r9;
@ -42,7 +44,13 @@ void SigactionHandler(int signum, siginfo_t* inf, ucontext_t* raw_context) {
ctx.uc_mcontext.mc_rsp = regs.__rsp;
ctx.uc_mcontext.mc_fs = regs.__fs;
ctx.uc_mcontext.mc_gs = regs.__gs;
#elif defined(ARCH_ARM64)
UNREACHABLE_MSG("ARM64 exception handling not yet implemented");
#else
#error "Unsupported architecture"
#endif
#else
#ifdef ARCH_X86_64
const auto& regs = raw_context->uc_mcontext.gregs;
ctx.uc_mcontext.mc_r8 = regs[REG_R8];
ctx.uc_mcontext.mc_r9 = regs[REG_R9];
@ -62,6 +70,11 @@ void SigactionHandler(int signum, siginfo_t* inf, ucontext_t* raw_context) {
ctx.uc_mcontext.mc_rsp = regs[REG_RSP];
ctx.uc_mcontext.mc_fs = (regs[REG_CSGSFS] >> 32) & 0xFFFF;
ctx.uc_mcontext.mc_gs = (regs[REG_CSGSFS] >> 16) & 0xFFFF;
#elif defined(ARCH_ARM64)
UNREACHABLE_MSG("ARM64 exception handling not yet implemented");
#else
#error "Unsupported architecture"
#endif
#endif
handler(POSIX_SIGUSR1, &ctx);
}

View File

@ -18,7 +18,13 @@ static std::mutex MutxStaticLock;
#define THR_ADAPTIVE_MUTEX_INITIALIZER ((PthreadMutex*)1)
#define THR_MUTEX_DESTROYED ((PthreadMutex*)2)
#ifdef ARCH_X86_64
#define CPU_SPINWAIT __asm__ volatile("pause")
#elif defined(ARCH_ARM64)
#define CPU_SPINWAIT __asm__ volatile("yield")
#else
#define CPU_SPINWAIT
#endif
#define CHECK_AND_INIT_MUTEX \
if (PthreadMutex* m = *mutex; m <= THR_MUTEX_DESTROYED) [[unlikely]] { \

View File

@ -18,6 +18,7 @@ int PS4_SYSV_ABI internal_snprintf(char* s, size_t n, VA_ARGS) {
return snprintf_ctx(s, n, &ctx);
}
void RegisterlibSceLibcInternalIo(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("eLdDw6l0-bU", "libSceLibcInternal", 1, "libSceLibcInternal", internal_snprintf);
LIB_FUNCTION_VARIADIC("eLdDw6l0-bU", "libSceLibcInternal", 1, "libSceLibcInternal",
internal_snprintf);
}
} // namespace Libraries::LibcInternal

View File

@ -19,6 +19,18 @@
sym->AddSymbol(sr, func); \
}
#define LIB_FUNCTION_VARIADIC(nid, lib, libversion, mod, function) \
{ \
Core::Loader::SymbolResolver sr{}; \
sr.name = nid; \
sr.library = lib; \
sr.library_version = libversion; \
sr.module = mod; \
sr.type = Core::Loader::SymbolType::Function; \
auto func = reinterpret_cast<u64>(function); \
sym->AddSymbol(sr, func); \
}
#define LIB_OBJ(nid, lib, libversion, mod, obj) \
{ \
Core::Loader::SymbolResolver sr{}; \

View File

@ -20,6 +20,9 @@
#include "core/memory.h"
#include "core/tls.h"
#include "ipc/ipc.h"
#ifdef ARCH_ARM64
#include "core/jit/execution_engine.h"
#endif
namespace Core {
@ -49,6 +52,20 @@ static PS4_SYSV_ABI void* RunMainEntry [[noreturn]] (EntryParams* params) {
: "rax", "rsi", "rdi");
UNREACHABLE();
}
#elif defined(ARCH_ARM64)
static PS4_SYSV_ABI void* RunMainEntry [[noreturn]] (EntryParams* params) {
auto* jit = Core::Jit::JitEngine::Instance();
if (jit) {
// JIT should already be initialized in Emulator::Run(), but check just in case
if (!jit->IsInitialized()) {
jit->Initialize();
}
jit->ExecuteBlock(params->entry_addr);
} else {
LOG_CRITICAL(Core_Linker, "JIT engine not available");
}
UNREACHABLE();
}
#endif
Linker::Linker() : memory{Memory::Instance()} {}

View File

@ -6,6 +6,7 @@
#include "common/config.h"
#include "common/debug.h"
#include "core/file_sys/fs.h"
#include "core/jit/execution_engine.h"
#include "core/libraries/kernel/memory.h"
#include "core/libraries/kernel/orbis_error.h"
#include "core/libraries/kernel/process.h"
@ -849,6 +850,15 @@ s64 MemoryManager::ProtectBytes(VAddr addr, VirtualMemoryArea& vma_base, u64 siz
impl.Protect(addr, size, perms);
#ifdef ARCH_ARM64
if (True(prot & MemoryProt::CpuWrite) && vma_base.type == VMAType::Code) {
auto* jit = Core::Jit::JitEngine::Instance();
if (jit) {
jit->InvalidateRange(addr, addr + adjusted_size);
}
}
#endif
return adjusted_size;
}

View File

@ -6,6 +6,9 @@
#include "common/decoder.h"
#include "common/signal_context.h"
#include "core/signals.h"
#ifdef ARCH_ARM64
#include "core/jit/execution_engine.h"
#endif
#ifdef _WIN32
#include <windows.h>
@ -79,6 +82,15 @@ static void SignalHandler(int sig, siginfo_t* info, void* raw_context) {
case SIGSEGV:
case SIGBUS: {
const bool is_write = Common::IsWriteError(raw_context);
#ifdef ARCH_ARM64
auto* jit = Core::Jit::JitEngine::Instance();
if (jit && jit->IsJitCode(code_address)) {
VAddr ps4_addr = jit->GetPs4AddressForJitCode(code_address);
if (ps4_addr != 0) {
jit->InvalidateBlock(ps4_addr);
}
}
#endif
if (!signals->DispatchAccessViolation(raw_context, info->si_addr)) {
UNREACHABLE_MSG(
"Unhandled access violation in thread '{}' at code address {}: {} address {}",
@ -87,13 +99,20 @@ static void SignalHandler(int sig, siginfo_t* info, void* raw_context) {
}
break;
}
case SIGILL:
case SIGILL: {
#ifdef ARCH_ARM64
auto* jit = Core::Jit::JitEngine::Instance();
if (jit && jit->IsJitCode(code_address)) {
LOG_ERROR(Core, "Illegal instruction in JIT code at {}", fmt::ptr(code_address));
}
#endif
if (!signals->DispatchIllegalInstruction(raw_context)) {
UNREACHABLE_MSG("Unhandled illegal instruction in thread '{}' at code address {}: {}",
GetThreadName(), fmt::ptr(code_address),
DisassembleInstruction(code_address));
}
break;
}
case SIGUSR1: { // Sleep thread until signal is received
sigset_t sigset;
sigemptyset(&sigset);

View File

@ -11,8 +11,10 @@
#include <csignal>
#include <pthread.h>
#include <unistd.h>
#ifdef ARCH_X86_64
#include <xmmintrin.h>
#endif
#endif
namespace Core {
@ -126,8 +128,10 @@ void NativeThread::Exit() {
void NativeThread::Initialize() {
// Set MXCSR and FPUCW registers to the values used by Orbis.
#ifdef ARCH_X86_64
_mm_setcsr(ORBIS_MXCSR);
asm volatile("fldcw %0" : : "m"(ORBIS_FPUCW));
#endif
#if _WIN64
tid = GetCurrentThreadId();
#else

View File

@ -30,6 +30,7 @@
#include "core/file_format/psf.h"
#include "core/file_format/trp.h"
#include "core/file_sys/fs.h"
#include "core/jit/execution_engine.h"
#include "core/libraries/disc_map/disc_map.h"
#include "core/libraries/font/font.h"
#include "core/libraries/font/fontft.h"
@ -261,6 +262,19 @@ void Emulator::Run(std::filesystem::path file, std::vector<std::string> args,
controller = Common::Singleton<Input::GameController>::Instance();
linker = Common::Singleton<Core::Linker>::Instance();
#ifdef ARCH_ARM64
// Initialize JIT engine early for ARM64 builds
auto* jit = Core::Jit::JitEngine::Instance();
if (jit) {
try {
jit->Initialize();
LOG_INFO(Loader, "JIT Execution Engine initialized");
} catch (const std::bad_alloc& e) {
LOG_CRITICAL(Loader, "Failed to initialize JIT engine: {}", e.what());
}
}
#endif
// Load renderdoc module
VideoCore::LoadRenderDoc();

View File

@ -4,9 +4,13 @@
#include <unordered_map>
#include <boost/container/flat_map.hpp>
#include "common/arch.h"
#ifdef ARCH_X86_64
#include <xbyak/xbyak.h>
#include <xbyak/xbyak_util.h>
#endif
#include "common/config.h"
#include "common/decoder.h"
#include "common/io_file.h"
#include "common/logging/log.h"
#include "common/path_util.h"
@ -20,22 +24,28 @@
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/ir/srt_gvn_table.h"
#include "shader_recompiler/ir/value.h"
#include "src/common/arch.h"
#include "src/common/decoder.h"
#ifdef ARCH_X86_64
using namespace Xbyak::util;
static Xbyak::CodeGenerator g_srt_codegen(32_MB);
static const u8* g_srt_codegen_start = nullptr;
#endif
namespace Shader {
#ifdef ARCH_X86_64
PFN_SrtWalker RegisterWalkerCode(const u8* ptr, size_t size) {
const auto func_addr = (PFN_SrtWalker)g_srt_codegen.getCurr();
g_srt_codegen.db(ptr, size);
g_srt_codegen.ready();
return func_addr;
}
#else
PFN_SrtWalker RegisterWalkerCode(const u8* ptr, size_t size) {
return nullptr;
}
#endif
} // namespace Shader
@ -69,6 +79,7 @@ static void DumpSrtProgram(const Shader::Info& info, const u8* code, size_t code
}
static bool SrtWalkerSignalHandler(void* context, void* fault_address) {
#ifdef ARCH_X86_64
// Only handle if the fault address is within the SRT code range
const u8* code_start = g_srt_codegen_start;
const u8* code_end = code_start + g_srt_codegen.getSize();
@ -117,6 +128,9 @@ static bool SrtWalkerSignalHandler(void* context, void* fault_address) {
LOG_DEBUG(Render_Recompiler, "Patched SRT walker at {}", code);
return true;
#else
return false;
#endif
}
using namespace Shader;
@ -159,6 +173,7 @@ namespace Shader::Optimization {
namespace {
#ifdef ARCH_X86_64
static inline void PushPtr(Xbyak::CodeGenerator& c, u32 off_dw) {
c.push(rdi);
c.mov(rdi, ptr[rdi + (off_dw << 2)]);
@ -236,6 +251,9 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
info.srt_info.flattened_bufsize_dw = pass_info.dst_off_dw;
}
#else
static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {}
#endif
}; // namespace
@ -293,7 +311,9 @@ void FlattenExtendedUserdataPass(IR::Program& program) {
}
}
#ifdef ARCH_X86_64
GenerateSrtProgram(info, pass_info);
#endif
// Assign offsets to duplicate readconsts
for (IR::Inst* readconst : all_readconsts) {

63
tests/CMakeLists.txt Normal file
View File

@ -0,0 +1,63 @@
# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
# SPDX-License-Identifier: GPL-2.0-or-later
add_executable(jit_tests
test_arm64_codegen.cpp
test_register_mapping.cpp
test_block_manager.cpp
test_execution_engine.cpp
test_block_linking.cpp
test_call_ret.cpp
test_hle_bridge.cpp
main.cpp
)
if (ARCHITECTURE STREQUAL "arm64")
target_sources(jit_tests PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/arm64_codegen.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/arm64_codegen.h
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/register_mapping.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/register_mapping.h
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/block_manager.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/block_manager.h
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/x86_64_translator.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/x86_64_translator.h
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/simd_translator.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/simd_translator.h
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/calling_convention.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/calling_convention.h
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/hle_bridge.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/hle_bridge.h
)
endif()
target_sources(jit_tests PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/../src/common/assert.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../src/common/decoder.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_logging_stub.cpp
)
target_link_libraries(jit_tests PRIVATE
GTest::gtest
GTest::gtest_main
GTest::gmock
Zydis::Zydis
fmt::fmt
)
target_include_directories(jit_tests PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/../src
${CMAKE_CURRENT_SOURCE_DIR}/../externals/zydis/include
)
target_compile_definitions(jit_tests PRIVATE
ARCH_ARM64
)
# to make ctest work
add_test(NAME JitTests COMMAND jit_tests)
set_tests_properties(JitTests PROPERTIES
TIMEOUT 60
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
)

9
tests/main.cpp Normal file
View File

@ -0,0 +1,9 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <gtest/gtest.h>
int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

View File

@ -0,0 +1,111 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "core/jit/arm64_codegen.h"
#include <cstring>
#include <gtest/gtest.h>
#include <sys/mman.h>
using namespace Core::Jit;
class Arm64CodeGenTest : public ::testing::Test {
protected:
void SetUp() override { test_gen = std::make_unique<Arm64CodeGenerator>(); }
void TearDown() override { test_gen.reset(); }
std::unique_ptr<Arm64CodeGenerator> test_gen;
};
TEST_F(Arm64CodeGenTest, Constructor) {
EXPECT_NE(test_gen->getCode(), nullptr);
EXPECT_EQ(test_gen->getSize(), 0);
}
TEST_F(Arm64CodeGenTest, Reset) {
test_gen->add(0, 1, 2);
size_t size_after_add = test_gen->getSize();
EXPECT_GT(size_after_add, 0);
test_gen->reset();
EXPECT_EQ(test_gen->getSize(), 0);
}
TEST_F(Arm64CodeGenTest, AddInstruction) {
test_gen->add(0, 1, 2); // X0 = X1 + X2
EXPECT_GT(test_gen->getSize(), 0);
EXPECT_LE(test_gen->getSize(), 4); // Should be 4 bytes (one instruction)
}
TEST_F(Arm64CodeGenTest, AddImmediate) {
test_gen->add_imm(0, 1, 42); // X0 = X1 + 42
EXPECT_GT(test_gen->getSize(), 0);
}
TEST_F(Arm64CodeGenTest, MovRegister) {
test_gen->mov(0, 1); // X0 = X1
EXPECT_GT(test_gen->getSize(), 0);
}
TEST_F(Arm64CodeGenTest, MovImmediate) {
test_gen->mov(0, 0x1234LL); // X0 = 0x1234
EXPECT_GT(test_gen->getSize(), 0);
// Large immediate may require multiple instructions
EXPECT_LE(test_gen->getSize(),
16); // Up to 4 instructions for 64-bit immediate
}
TEST_F(Arm64CodeGenTest, LoadStore) {
test_gen->ldr(0, 1, 0); // X0 = [X1]
test_gen->str(0, 1, 0); // [X1] = X0
EXPECT_GE(test_gen->getSize(), 8); // At least 2 instructions
}
TEST_F(Arm64CodeGenTest, Branch) {
void *target = test_gen->getCode(); // Branch to start of code
test_gen->b(target);
EXPECT_GT(test_gen->getSize(), 0);
}
TEST_F(Arm64CodeGenTest, ConditionalBranch) {
void *target = test_gen->getCode(); // Branch to start of code
test_gen->b(0, target); // Branch if equal
EXPECT_GT(test_gen->getSize(), 0);
}
TEST_F(Arm64CodeGenTest, Compare) {
test_gen->cmp(0, 1); // Compare X0 and X1
EXPECT_GT(test_gen->getSize(), 0);
}
TEST_F(Arm64CodeGenTest, ArithmeticOperations) {
test_gen->add(0, 1, 2);
test_gen->sub(0, 1, 2);
test_gen->mul(0, 1, 2);
test_gen->and_(0, 1, 2);
test_gen->orr(0, 1, 2);
test_gen->eor(0, 1, 2);
EXPECT_GE(test_gen->getSize(), 24); // At least 6 instructions
}
TEST_F(Arm64CodeGenTest, SIMDOperations) {
test_gen->mov_v(0, 1); // V0 = V1
test_gen->add_v(0, 1, 2); // V0 = V1 + V2
test_gen->sub_v(0, 1, 2); // V0 = V1 - V2
test_gen->mul_v(0, 1, 2); // V0 = V1 * V2
EXPECT_GE(test_gen->getSize(), 16); // At least 4 instructions
}
TEST_F(Arm64CodeGenTest, SetSize) {
test_gen->add(0, 1, 2);
size_t original_size = test_gen->getSize();
EXPECT_GT(original_size, 0);
// Test setting size to 0
test_gen->setSize(0);
EXPECT_EQ(test_gen->getSize(), 0);
// Test setting size back (this should work without throwing)
test_gen->setSize(original_size);
EXPECT_EQ(test_gen->getSize(), original_size);
}

View File

@ -0,0 +1,247 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/decoder.h"
#include "core/jit/arm64_codegen.h"
#include "core/jit/block_manager.h"
#include "core/jit/register_mapping.h"
#include "core/jit/x86_64_translator.h"
#include <gtest/gtest.h>
#include <sys/mman.h>
#if defined(__APPLE__) && defined(ARCH_ARM64)
#include <pthread.h>
#endif
using namespace Core::Jit;
class BlockLinkingTest : public ::testing::Test {
protected:
void SetUp() override {
// Allocate executable memory for test code
#if defined(__APPLE__) && defined(ARCH_ARM64)
test_code_buffer = mmap(nullptr, 64 * 1024, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(test_code_buffer, MAP_FAILED)
<< "Failed to allocate executable memory for test";
pthread_jit_write_protect_np(0);
#else
test_code_buffer =
mmap(nullptr, 64 * 1024, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(test_code_buffer, MAP_FAILED)
<< "Failed to allocate executable memory for test";
#endif
codegen = std::make_unique<Arm64CodeGenerator>(64 * 1024, test_code_buffer);
register_mapper = std::make_unique<RegisterMapper>();
translator = std::make_unique<X86_64Translator>(*codegen, *register_mapper);
block_manager = std::make_unique<BlockManager>();
}
void TearDown() override {
translator.reset();
register_mapper.reset();
codegen.reset();
block_manager.reset();
if (test_code_buffer != MAP_FAILED) {
munmap(test_code_buffer, 64 * 1024);
}
}
void *test_code_buffer = MAP_FAILED;
std::unique_ptr<Arm64CodeGenerator> codegen;
std::unique_ptr<RegisterMapper> register_mapper;
std::unique_ptr<X86_64Translator> translator;
std::unique_ptr<BlockManager> block_manager;
};
// Test that JMP translation can handle direct immediate addresses
TEST_F(BlockLinkingTest, TranslateDirectJmp) {
// Create a simple x86_64 JMP instruction: JMP +0x1000 (relative jump)
// x86_64 encoding: E9 <offset> (near relative jump, 32-bit offset)
// E9 00 10 00 00 = JMP +0x1000
u8 x86_jmp[] = {0xE9, 0x00, 0x10, 0x00, 0x00};
ZydisDecodedInstruction instruction;
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
ZyanStatus status = Common::Decoder::Instance()->decodeInstruction(
instruction, operands, x86_jmp, sizeof(x86_jmp));
if (!ZYAN_SUCCESS(status)) {
GTEST_SKIP()
<< "Failed to decode JMP instruction - Zydis may not be available";
}
// JMP translation should succeed (even if target isn't linked yet)
bool result = translator->TranslateJmp(instruction, operands, 0x400000);
EXPECT_TRUE(result) << "JMP translation should succeed";
EXPECT_GT(codegen->getSize(), 0) << "JMP should generate ARM64 code";
}
// Test that we can create two blocks and link them
TEST_F(BlockLinkingTest, CreateAndLinkBlocks) {
VAddr block1_addr = 0x400000;
VAddr block2_addr = 0x401000;
// Allocate separate memory for each block to avoid issues
#if defined(__APPLE__) && defined(ARCH_ARM64)
void *block1_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(block1_mem, MAP_FAILED);
pthread_jit_write_protect_np(0);
void *block2_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(block2_mem, MAP_FAILED);
#else
void *block1_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(block1_mem, MAP_FAILED);
void *block2_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(block2_mem, MAP_FAILED);
#endif
// Write simple NOP instructions
u32 nop = 0xD503201F; // ARM64 NOP
*reinterpret_cast<u32 *>(block1_mem) = nop;
*reinterpret_cast<u32 *>(block2_mem) = nop;
#if defined(__APPLE__) && defined(ARCH_ARM64)
pthread_jit_write_protect_np(1);
mprotect(block1_mem, 4096, PROT_READ | PROT_EXEC);
mprotect(block2_mem, 4096, PROT_READ | PROT_EXEC);
#endif
// Create blocks
CodeBlock *block1 = block_manager->CreateBlock(block1_addr, block1_mem, 4, 1);
ASSERT_NE(block1, nullptr);
CodeBlock *block2 = block_manager->CreateBlock(block2_addr, block2_mem, 4, 1);
ASSERT_NE(block2, nullptr);
// Verify blocks exist
EXPECT_EQ(block_manager->GetBlockCount(), 2);
EXPECT_NE(block_manager->GetBlock(block1_addr), nullptr);
EXPECT_NE(block_manager->GetBlock(block2_addr), nullptr);
// Test that blocks can be retrieved
CodeBlock *retrieved_block1 = block_manager->GetBlock(block1_addr);
CodeBlock *retrieved_block2 = block_manager->GetBlock(block2_addr);
EXPECT_EQ(retrieved_block1, block1);
EXPECT_EQ(retrieved_block2, block2);
// Cleanup
munmap(block1_mem, 4096);
munmap(block2_mem, 4096);
}
// Test that block linking tracks dependencies
TEST_F(BlockLinkingTest, BlockDependencies) {
VAddr block1_addr = 0x400000;
VAddr block2_addr = 0x401000;
// Allocate memory for blocks
#if defined(__APPLE__) && defined(ARCH_ARM64)
void *block1_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(block1_mem, MAP_FAILED);
pthread_jit_write_protect_np(0);
void *block2_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(block2_mem, MAP_FAILED);
u32 nop = 0xD503201F;
*reinterpret_cast<u32 *>(block1_mem) = nop;
*reinterpret_cast<u32 *>(block2_mem) = nop;
pthread_jit_write_protect_np(1);
mprotect(block1_mem, 4096, PROT_READ | PROT_EXEC);
mprotect(block2_mem, 4096, PROT_READ | PROT_EXEC);
#else
void *block1_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(block1_mem, MAP_FAILED);
void *block2_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(block2_mem, MAP_FAILED);
u32 nop = 0xD503201F;
*reinterpret_cast<u32 *>(block1_mem) = nop;
*reinterpret_cast<u32 *>(block2_mem) = nop;
#endif
// Create blocks
CodeBlock *block1 = block_manager->CreateBlock(block1_addr, block1_mem, 4, 1);
CodeBlock *block2 = block_manager->CreateBlock(block2_addr, block2_mem, 4, 1);
// Add dependency: block1 depends on block2
block_manager->AddDependency(block1_addr, block2_addr);
// Verify dependency is tracked
EXPECT_EQ(block1->dependencies.count(block2_addr), 1);
// Cleanup
munmap(block1_mem, 4096);
munmap(block2_mem, 4096);
}
// Test that invalidating a block invalidates dependent blocks
TEST_F(BlockLinkingTest, InvalidateDependentBlocks) {
VAddr block1_addr = 0x400000;
VAddr block2_addr = 0x401000;
// Allocate memory for blocks
#if defined(__APPLE__) && defined(ARCH_ARM64)
void *block1_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(block1_mem, MAP_FAILED);
pthread_jit_write_protect_np(0);
void *block2_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(block2_mem, MAP_FAILED);
u32 nop = 0xD503201F;
*reinterpret_cast<u32 *>(block1_mem) = nop;
*reinterpret_cast<u32 *>(block2_mem) = nop;
pthread_jit_write_protect_np(1);
mprotect(block1_mem, 4096, PROT_READ | PROT_EXEC);
mprotect(block2_mem, 4096, PROT_READ | PROT_EXEC);
#else
void *block1_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(block1_mem, MAP_FAILED);
void *block2_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(block2_mem, MAP_FAILED);
u32 nop = 0xD503201F;
*reinterpret_cast<u32 *>(block1_mem) = nop;
*reinterpret_cast<u32 *>(block2_mem) = nop;
#endif
// Create blocks with dependency
CodeBlock *block1 = block_manager->CreateBlock(block1_addr, block1_mem, 4, 1);
CodeBlock *block2 = block_manager->CreateBlock(block2_addr, block2_mem, 4, 1);
block_manager->AddDependency(block1_addr, block2_addr);
// Invalidate block2
block_manager->InvalidateBlock(block2_addr);
// block2 should be removed
EXPECT_EQ(block_manager->GetBlock(block2_addr), nullptr);
// block1 should still exist (dependency tracking doesn't auto-invalidate)
// But in a real implementation, we might want to invalidate dependents
EXPECT_NE(block_manager->GetBlock(block1_addr), nullptr);
// Cleanup
munmap(block1_mem, 4096);
munmap(block2_mem, 4096);
}

View File

@ -0,0 +1,180 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "core/jit/block_manager.h"
#include <gtest/gtest.h>
#include <sys/mman.h>
#if defined(__APPLE__) && defined(ARCH_ARM64)
#include <pthread.h>
#endif
using namespace Core::Jit;
class BlockManagerTest : public ::testing::Test {
protected:
void SetUp() override {
// Allocate executable memory for test code blocks
#if defined(__APPLE__) && defined(ARCH_ARM64)
// On macOS ARM64, use the JIT API approach
test_code = mmap(nullptr, 64 * 1024, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(test_code, MAP_FAILED)
<< "Failed to allocate executable memory for test";
pthread_jit_write_protect_np(0); // Disable write protection for writing
// Will make executable later if needed
#else
test_code = mmap(nullptr, 64 * 1024, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(test_code, MAP_FAILED)
<< "Failed to allocate executable memory for test";
#endif
}
void TearDown() override {
if (test_code != MAP_FAILED) {
munmap(test_code, 64 * 1024);
}
}
void *test_code = MAP_FAILED;
};
TEST_F(BlockManagerTest, Constructor) {
BlockManager manager;
EXPECT_EQ(manager.GetBlockCount(), 0);
EXPECT_EQ(manager.GetTotalCodeSize(), 0);
}
TEST_F(BlockManagerTest, CreateBlock) {
BlockManager manager;
VAddr ps4_addr = 0x400000;
void *arm64_code = test_code;
size_t code_size = 1024;
size_t instruction_count = 10;
CodeBlock *block =
manager.CreateBlock(ps4_addr, arm64_code, code_size, instruction_count);
ASSERT_NE(block, nullptr);
EXPECT_EQ(block->ps4_address, ps4_addr);
EXPECT_EQ(block->arm64_code, arm64_code);
EXPECT_EQ(block->code_size, code_size);
EXPECT_EQ(block->instruction_count, instruction_count);
EXPECT_FALSE(block->is_linked);
EXPECT_EQ(manager.GetBlockCount(), 1);
EXPECT_EQ(manager.GetTotalCodeSize(), code_size);
}
TEST_F(BlockManagerTest, GetBlock) {
BlockManager manager;
VAddr ps4_addr = 0x400000;
void *arm64_code = test_code;
// Block doesn't exist yet
CodeBlock *block = manager.GetBlock(ps4_addr);
EXPECT_EQ(block, nullptr);
manager.CreateBlock(ps4_addr, arm64_code, 1024, 10);
// Now it should exist
block = manager.GetBlock(ps4_addr);
ASSERT_NE(block, nullptr);
EXPECT_EQ(block->ps4_address, ps4_addr);
}
TEST_F(BlockManagerTest, MultipleBlocks) {
BlockManager manager;
// Create multiple blocks
for (int i = 0; i < 10; ++i) {
VAddr ps4_addr = 0x400000 + (i * 0x1000);
void *arm64_code = static_cast<char *>(test_code) + (i * 1024);
manager.CreateBlock(ps4_addr, arm64_code, 1024, 10);
}
EXPECT_EQ(manager.GetBlockCount(), 10);
EXPECT_EQ(manager.GetTotalCodeSize(), 10 * 1024);
}
TEST_F(BlockManagerTest, InvalidateBlock) {
BlockManager manager;
VAddr ps4_addr = 0x400000;
// Create and verify block exists
manager.CreateBlock(ps4_addr, test_code, 1024, 10);
EXPECT_NE(manager.GetBlock(ps4_addr), nullptr);
// Invalidate block
manager.InvalidateBlock(ps4_addr);
// Block should no longer exist
EXPECT_EQ(manager.GetBlock(ps4_addr), nullptr);
EXPECT_EQ(manager.GetBlockCount(), 0);
EXPECT_EQ(manager.GetTotalCodeSize(), 0);
}
TEST_F(BlockManagerTest, InvalidateRange) {
BlockManager manager;
// Create blocks at different addresses
manager.CreateBlock(0x400000, test_code, 1024, 10);
manager.CreateBlock(0x401000, static_cast<char *>(test_code) + 1024, 1024,
10);
manager.CreateBlock(0x402000, static_cast<char *>(test_code) + 2048, 1024,
10);
manager.CreateBlock(0x500000, static_cast<char *>(test_code) + 3072, 1024,
10);
EXPECT_EQ(manager.GetBlockCount(), 4);
// Invalidate range that covers first 3 blocks
manager.InvalidateRange(0x400000, 0x403000);
// First 3 blocks should be gone, last one should remain
EXPECT_EQ(manager.GetBlock(0x400000), nullptr);
EXPECT_EQ(manager.GetBlock(0x401000), nullptr);
EXPECT_EQ(manager.GetBlock(0x402000), nullptr);
EXPECT_NE(manager.GetBlock(0x500000), nullptr);
EXPECT_EQ(manager.GetBlockCount(), 1);
}
TEST_F(BlockManagerTest, AddDependency) {
BlockManager manager;
VAddr block_addr = 0x400000;
VAddr dep_addr = 0x500000;
CodeBlock *block = manager.CreateBlock(block_addr, test_code, 1024, 10);
manager.AddDependency(block_addr, dep_addr);
EXPECT_EQ(block->dependencies.size(), 1);
EXPECT_NE(block->dependencies.find(dep_addr), block->dependencies.end());
}
TEST_F(BlockManagerTest, MultipleDependencies) {
BlockManager manager;
VAddr block_addr = 0x400000;
CodeBlock *block = manager.CreateBlock(block_addr, test_code, 1024, 10);
manager.AddDependency(block_addr, 0x500000);
manager.AddDependency(block_addr, 0x600000);
manager.AddDependency(block_addr, 0x700000);
EXPECT_EQ(block->dependencies.size(), 3);
}
TEST_F(BlockManagerTest, Clear) {
BlockManager manager;
// Create multiple blocks
for (int i = 0; i < 5; ++i) {
VAddr ps4_addr = 0x400000 + (i * 0x1000);
void *arm64_code = static_cast<char *>(test_code) + (i * 1024);
manager.CreateBlock(ps4_addr, arm64_code, 1024, 10);
}
EXPECT_EQ(manager.GetBlockCount(), 5);
manager.Clear();
EXPECT_EQ(manager.GetBlockCount(), 0);
EXPECT_EQ(manager.GetTotalCodeSize(), 0);
}

151
tests/test_call_ret.cpp Normal file
View File

@ -0,0 +1,151 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/decoder.h"
#include "core/jit/arm64_codegen.h"
#include "core/jit/register_mapping.h"
#include "core/jit/x86_64_translator.h"
#include <gtest/gtest.h>
#include <sys/mman.h>
#if defined(__APPLE__) && defined(ARCH_ARM64)
#include <pthread.h>
#endif
using namespace Core::Jit;
class CallRetTest : public ::testing::Test {
protected:
void SetUp() override {
// Allocate executable memory for test code
#if defined(__APPLE__) && defined(ARCH_ARM64)
test_code_buffer = mmap(nullptr, 64 * 1024, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(test_code_buffer, MAP_FAILED)
<< "Failed to allocate executable memory for test";
pthread_jit_write_protect_np(0);
#else
test_code_buffer =
mmap(nullptr, 64 * 1024, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(test_code_buffer, MAP_FAILED)
<< "Failed to allocate executable memory for test";
#endif
codegen = std::make_unique<Arm64CodeGenerator>(64 * 1024, test_code_buffer);
register_mapper = std::make_unique<RegisterMapper>();
translator = std::make_unique<X86_64Translator>(*codegen, *register_mapper);
}
void TearDown() override {
translator.reset();
register_mapper.reset();
codegen.reset();
if (test_code_buffer != MAP_FAILED) {
munmap(test_code_buffer, 64 * 1024);
}
}
void *test_code_buffer = MAP_FAILED;
std::unique_ptr<Arm64CodeGenerator> codegen;
std::unique_ptr<RegisterMapper> register_mapper;
std::unique_ptr<X86_64Translator> translator;
};
// Test that RET translation generates ARM64 code
TEST_F(CallRetTest, TranslateRet) {
// x86_64 RET instruction: C3
u8 x86_ret[] = {0xC3};
ZydisDecodedInstruction instruction;
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
ZyanStatus status = Common::Decoder::Instance()->decodeInstruction(
instruction, operands, x86_ret, sizeof(x86_ret));
if (!ZYAN_SUCCESS(status)) {
GTEST_SKIP()
<< "Failed to decode RET instruction - Zydis may not be available";
}
// RET translation should succeed
bool result = translator->TranslateRet(instruction, operands);
EXPECT_TRUE(result) << "RET translation should succeed";
EXPECT_GT(codegen->getSize(), 0) << "RET should generate ARM64 code";
}
// Test that CALL translation generates ARM64 code
TEST_F(CallRetTest, TranslateDirectCall) {
// x86_64 CALL instruction: E8 <offset> (near relative call, 32-bit offset)
// E8 00 10 00 00 = CALL +0x1000
u8 x86_call[] = {0xE8, 0x00, 0x10, 0x00, 0x00};
ZydisDecodedInstruction instruction;
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
ZyanStatus status = Common::Decoder::Instance()->decodeInstruction(
instruction, operands, x86_call, sizeof(x86_call));
if (!ZYAN_SUCCESS(status)) {
GTEST_SKIP()
<< "Failed to decode CALL instruction - Zydis may not be available";
}
// CALL translation should succeed
bool result = translator->TranslateCall(instruction, operands, 0x400000);
EXPECT_TRUE(result) << "CALL translation should succeed";
EXPECT_GT(codegen->getSize(), 0) << "CALL should generate ARM64 code";
}
// Test that CALL pushes return address to stack
TEST_F(CallRetTest, CallPushesReturnAddress) {
// Simulate a CALL instruction
// We need to verify that the stack pointer is decremented and return address
// is stored This is a simplified test - full implementation will need
// execution engine integration
// For now, just verify CALL generates code
u8 x86_call[] = {0xE8, 0x00, 0x10, 0x00, 0x00};
ZydisDecodedInstruction instruction;
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
ZyanStatus status = Common::Decoder::Instance()->decodeInstruction(
instruction, operands, x86_call, sizeof(x86_call));
if (!ZYAN_SUCCESS(status)) {
GTEST_SKIP() << "Failed to decode CALL instruction";
}
size_t size_before = codegen->getSize();
bool result = translator->TranslateCall(instruction, operands, 0x400000);
size_t size_after = codegen->getSize();
EXPECT_TRUE(result);
EXPECT_GT(size_after, size_before) << "CALL should generate code";
// CALL should generate more code than a simple branch (needs stack
// manipulation)
EXPECT_GE(size_after - size_before, 4)
<< "CALL should generate multiple instructions";
}
// Test that RET pops return address from stack
TEST_F(CallRetTest, RetPopsReturnAddress) {
// RET instruction should pop return address and jump to it
u8 x86_ret[] = {0xC3};
ZydisDecodedInstruction instruction;
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
ZyanStatus status = Common::Decoder::Instance()->decodeInstruction(
instruction, operands, x86_ret, sizeof(x86_ret));
if (!ZYAN_SUCCESS(status)) {
GTEST_SKIP() << "Failed to decode RET instruction";
}
size_t size_before = codegen->getSize();
bool result = translator->TranslateRet(instruction, operands);
size_t size_after = codegen->getSize();
EXPECT_TRUE(result);
EXPECT_GT(size_after, size_before) << "RET should generate code";
}

View File

@ -0,0 +1,49 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "core/jit/arm64_codegen.h"
#include "core/jit/block_manager.h"
#include "core/jit/register_mapping.h"
#include <gtest/gtest.h>
#include <sys/mman.h>
using namespace Core::Jit;
// NOTE: ExecutionEngine requires MemoryManager and AddressSpace which have
// heavy dependencies. These tests focus on the components that can be tested in
// isolation. Full integration tests would require the complete emulator system
// to be initialized. Let's just skip them for now.
class ExecutionEngineComponentTest : public ::testing::Test {
protected:
void SetUp() override {}
void TearDown() override {}
};
// Test that the components used by ExecutionEngine can be constructed
TEST_F(ExecutionEngineComponentTest, ComponentConstruction) {
BlockManager block_manager;
RegisterMapper register_mapper;
Arm64CodeGenerator code_generator;
// All components should construct successfully
EXPECT_EQ(block_manager.GetBlockCount(), 0);
EXPECT_NE(code_generator.getCode(), nullptr);
}
// Test block invalidation through BlockManager (used by ExecutionEngine)
TEST_F(ExecutionEngineComponentTest, BlockInvalidation) {
BlockManager block_manager;
VAddr test_addr = 0x400000;
// Invalidate should not crash even if block doesn't exist
EXPECT_NO_THROW(block_manager.InvalidateBlock(test_addr));
}
TEST_F(ExecutionEngineComponentTest, BlockInvalidateRange) {
BlockManager block_manager;
// Invalidate range should not crash
EXPECT_NO_THROW(block_manager.InvalidateRange(0x400000, 0x500000));
}

83
tests/test_hle_bridge.cpp Normal file
View File

@ -0,0 +1,83 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "core/jit/arm64_codegen.h"
#include "core/jit/hle_bridge.h"
#include "core/jit/register_mapping.h"
#include <gtest/gtest.h>
#include <sys/mman.h>
#if defined(__APPLE__) && defined(ARCH_ARM64)
#include <pthread.h>
#endif
using namespace Core::Jit;
// Simple test HLE function
extern "C" PS4_SYSV_ABI u64 TestHleFunction(u64 arg1, u64 arg2) {
return arg1 + arg2;
}
class HleBridgeTest : public ::testing::Test {
protected:
void SetUp() override {
// Allocate executable memory for test code
#if defined(__APPLE__) && defined(ARCH_ARM64)
test_code_buffer = mmap(nullptr, 64 * 1024, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(test_code_buffer, MAP_FAILED)
<< "Failed to allocate executable memory for test";
pthread_jit_write_protect_np(0);
#else
test_code_buffer =
mmap(nullptr, 64 * 1024, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(test_code_buffer, MAP_FAILED)
<< "Failed to allocate executable memory for test";
#endif
codegen = std::make_unique<Arm64CodeGenerator>(64 * 1024, test_code_buffer);
register_mapper = std::make_unique<RegisterMapper>();
hle_bridge = std::make_unique<HleBridge>(*codegen, *register_mapper);
}
void TearDown() override {
hle_bridge.reset();
register_mapper.reset();
codegen.reset();
if (test_code_buffer != MAP_FAILED) {
munmap(test_code_buffer, 64 * 1024);
}
}
void *test_code_buffer = MAP_FAILED;
std::unique_ptr<Arm64CodeGenerator> codegen;
std::unique_ptr<RegisterMapper> register_mapper;
std::unique_ptr<HleBridge> hle_bridge;
};
// Test that HLE bridge can be constructed
TEST_F(HleBridgeTest, Construction) { EXPECT_NE(hle_bridge, nullptr); }
// Test that we can generate a bridge to an HLE function
TEST_F(HleBridgeTest, GenerateBridge) {
void *hle_func = reinterpret_cast<void *>(TestHleFunction);
// Generate bridge code
hle_bridge->GenerateBridge(hle_func, 2); // 2 integer arguments
// Should generate some code
EXPECT_GT(codegen->getSize(), 0) << "HLE bridge should generate code";
}
// Test that bridge preserves caller-saved registers
TEST_F(HleBridgeTest, BridgePreservesRegisters) {
// This is a placeholder test - full register preservation testing
// would require execution, which is complex
void *hle_func = reinterpret_cast<void *>(TestHleFunction);
size_t size_before = codegen->getSize();
hle_bridge->GenerateBridge(hle_func, 2);
size_t size_after = codegen->getSize();
// Bridge should generate substantial code for register preservation
EXPECT_GT(size_after - size_before, 8) << "Bridge should preserve registers";
}

View File

@ -0,0 +1,25 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/logging/types.h"
#include <fmt/format.h>
namespace Common::Log {
void FmtLogMessageImpl(Class log_class, Level log_level, const char *filename,
unsigned int line_num, const char *function,
const char *format, const fmt::format_args &args) {
// Stub implementation - just ignore logs in tests
(void)log_class;
(void)log_level;
(void)filename;
(void)line_num;
(void)function;
(void)format;
(void)args;
}
void Start() {}
void Stop() {}
} // namespace Common::Log

View File

@ -0,0 +1,86 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "core/jit/register_mapping.h"
#include <gtest/gtest.h>
using namespace Core::Jit;
class RegisterMappingTest : public ::testing::Test {
protected:
RegisterMapper mapper;
};
TEST_F(RegisterMappingTest, MapGeneralPurposeRegisters) {
// Test mapping of common x86_64 registers
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::RAX), 0); // X0
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::RCX), 1); // X1
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::RDX), 2); // X2
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::RSI), 3); // X3
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::RDI),
0); // X0 (same as RAX)
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::R8), 4); // X4
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::R9), 5); // X5
}
TEST_F(RegisterMappingTest, MapStackPointer) {
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::RSP), 31); // SP
}
TEST_F(RegisterMappingTest, MapFramePointer) {
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::RBP), 29); // FP
}
TEST_F(RegisterMappingTest, MapCalleeSavedRegisters) {
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::RBX), 19); // X19
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::R12), 20); // X20
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::R13), 21); // X21
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::R14), 22); // X22
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::R15), 23); // X23
}
TEST_F(RegisterMappingTest, MapFlagsRegister) {
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::FLAGS), 11); // X11
}
TEST_F(RegisterMappingTest, MapXMMRegisters) {
// Test mapping of XMM registers to NEON registers (V registers start at 32)
EXPECT_EQ(mapper.MapX86_64XmmToArm64Neon(X86_64Register::XMM0), 32); // V0
EXPECT_EQ(mapper.MapX86_64XmmToArm64Neon(X86_64Register::XMM1), 33); // V1
EXPECT_EQ(mapper.MapX86_64XmmToArm64Neon(X86_64Register::XMM2), 34); // V2
EXPECT_EQ(mapper.MapX86_64XmmToArm64Neon(X86_64Register::XMM3), 35); // V3
EXPECT_EQ(mapper.MapX86_64XmmToArm64Neon(X86_64Register::XMM4), 36); // V4
EXPECT_EQ(mapper.MapX86_64XmmToArm64Neon(X86_64Register::XMM5), 37); // V5
EXPECT_EQ(mapper.MapX86_64XmmToArm64Neon(X86_64Register::XMM6), 38); // V6
EXPECT_EQ(mapper.MapX86_64XmmToArm64Neon(X86_64Register::XMM7), 39); // V7
}
TEST_F(RegisterMappingTest, MapAllXMMRegisters) {
// Test all 16 XMM registers (V registers start at 32)
for (int i = 0; i < 16; ++i) {
X86_64Register xmm_reg =
static_cast<X86_64Register>(static_cast<int>(X86_64Register::XMM0) + i);
int neon_reg = mapper.MapX86_64XmmToArm64Neon(xmm_reg);
EXPECT_EQ(neon_reg, 32 + i) << "XMM" << i << " should map to V" << i
<< " (register number " << (32 + i) << ")";
}
}
TEST_F(RegisterMappingTest, InvalidRegister) {
// COUNT is not a valid register
// NOTE: The implementation uses ASSERT_MSG which will crash on invalid input
// This test verifies that valid registers work correctly
// Testing invalid registers would require a different implementation that
// returns error codes For now, we just verify that the last valid register
// works
int result = mapper.MapX86_64ToArm64(X86_64Register::XMM15);
EXPECT_GE(result, 0) << "Last valid register should map correctly";
}
TEST_F(RegisterMappingTest, RegisterMappingConsistency) {
// Test that register mappings are consistent
// RAX should always map to the same ARM64 register
int reg1 = mapper.MapX86_64ToArm64(X86_64Register::RAX);
int reg2 = mapper.MapX86_64ToArm64(X86_64Register::RAX);
EXPECT_EQ(reg1, reg2);
}