mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-12-16 12:09:07 +00:00
Merge b76e05d5dd into eae5e0ad55
This commit is contained in:
commit
3dfa87415a
53
.github/workflows/build.yml
vendored
53
.github/workflows/build.yml
vendored
@ -146,6 +146,57 @@ jobs:
|
||||
name: shadps4-macos-sdl-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}
|
||||
path: upload/
|
||||
|
||||
macos-sdl-arm64:
|
||||
runs-on: macos-15
|
||||
needs: get-info
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Setup latest Xcode
|
||||
uses: maxim-lobanov/setup-xcode@v1
|
||||
with:
|
||||
xcode-version: latest
|
||||
|
||||
- name: Cache CMake Configuration
|
||||
uses: actions/cache@v4
|
||||
env:
|
||||
cache-name: ${{ runner.os }}-sdl-arm64-cache-cmake-configuration
|
||||
with:
|
||||
path: |
|
||||
${{github.workspace}}/build-arm64
|
||||
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
|
||||
restore-keys: |
|
||||
${{ env.cache-name }}-
|
||||
|
||||
- name: Cache CMake Build
|
||||
uses: hendrikmuhs/ccache-action@v1.2.19
|
||||
env:
|
||||
cache-name: ${{runner.os}}-sdl-arm64-cache-cmake-build
|
||||
with:
|
||||
append-timestamp: false
|
||||
create-symlink: true
|
||||
key: ${{env.cache-name}}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
|
||||
variant: sccache
|
||||
|
||||
- name: Configure CMake
|
||||
run: cmake --fresh -B ${{github.workspace}}/build-arm64 -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
|
||||
|
||||
- name: Build
|
||||
run: cmake --build ${{github.workspace}}/build-arm64 --config ${{env.BUILD_TYPE}} --parallel $(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Package and Upload macOS ARM64 SDL artifact
|
||||
run: |
|
||||
mkdir upload-arm64
|
||||
mv ${{github.workspace}}/build-arm64/shadps4 upload-arm64
|
||||
mv ${{github.workspace}}/build-arm64/MoltenVK_icd.json upload-arm64
|
||||
mv ${{github.workspace}}/build-arm64/libMoltenVK.dylib upload-arm64
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: shadps4-macos-arm64-sdl-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}
|
||||
path: upload-arm64/
|
||||
|
||||
linux-sdl:
|
||||
runs-on: ubuntu-24.04
|
||||
needs: get-info
|
||||
@ -245,7 +296,7 @@ jobs:
|
||||
|
||||
pre-release:
|
||||
if: github.ref == 'refs/heads/main' && github.repository == 'shadps4-emu/shadPS4' && github.event_name == 'push'
|
||||
needs: [get-info, windows-sdl, macos-sdl, linux-sdl]
|
||||
needs: [get-info, windows-sdl, macos-sdl, macos-sdl-arm64, linux-sdl]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Download all artifacts
|
||||
|
||||
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -120,3 +120,6 @@
|
||||
[submodule "externals/miniz"]
|
||||
path = externals/miniz
|
||||
url = https://github.com/richgel999/miniz
|
||||
[submodule "externals/FEX"]
|
||||
path = externals/FEX
|
||||
url = https://github.com/FEX-Emu/FEX
|
||||
|
||||
@ -17,6 +17,19 @@
|
||||
"CMAKE_INSTALL_PREFIX": "${sourceDir}/Build/${presetName}",
|
||||
"CMAKE_OSX_ARCHITECTURES": "x86_64"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "arm64-Clang-Base",
|
||||
"hidden": true,
|
||||
"generator": "Ninja",
|
||||
"binaryDir": "${sourceDir}/Build/${presetName}",
|
||||
"cacheVariables": {
|
||||
"CMAKE_C_COMPILER": "/usr/bin/clang",
|
||||
"CMAKE_CXX_COMPILER": "/usr/bin/clang++",
|
||||
"CMAKE_INSTALL_PREFIX": "${sourceDir}/Build/${presetName}",
|
||||
"CMAKE_OSX_ARCHITECTURES": "arm64",
|
||||
"CMAKE_PREFIX_PATH": "/opt/homebrew"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -19,6 +19,8 @@ endif()
|
||||
|
||||
project(shadPS4 CXX C ASM ${ADDITIONAL_LANGUAGES})
|
||||
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
# Forcing PIE makes sure that the base address is high enough so that it doesn't clash with the PS4 memory.
|
||||
if(UNIX AND NOT APPLE)
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
|
||||
@ -237,7 +239,6 @@ find_package(toml11 4.2.0 CONFIG)
|
||||
find_package(tsl-robin-map 1.3.0 CONFIG)
|
||||
find_package(VulkanHeaders 1.4.329 CONFIG)
|
||||
find_package(VulkanMemoryAllocator 3.1.0 CONFIG)
|
||||
find_package(xbyak 7.07 CONFIG)
|
||||
find_package(xxHash 0.8.2 MODULE)
|
||||
find_package(ZLIB 1.3 MODULE)
|
||||
find_package(Zydis 5.0.0 CONFIG)
|
||||
@ -550,13 +551,15 @@ set(USBD_LIB src/core/libraries/usbd/usbd.cpp
|
||||
src/core/libraries/usbd/emulated/skylander.h
|
||||
)
|
||||
|
||||
set(FIBER_LIB src/core/libraries/fiber/fiber_context.s
|
||||
src/core/libraries/fiber/fiber.cpp
|
||||
set(FIBER_LIB src/core/libraries/fiber/fiber.cpp
|
||||
src/core/libraries/fiber/fiber.h
|
||||
src/core/libraries/fiber/fiber_error.h
|
||||
)
|
||||
|
||||
set_source_files_properties(src/core/libraries/fiber/fiber_context.s PROPERTIES COMPILE_OPTIONS -Wno-unused-command-line-argument)
|
||||
if(ARCHITECTURE STREQUAL "x86_64")
|
||||
list(APPEND FIBER_LIB src/core/libraries/fiber/fiber_context.s)
|
||||
set_source_files_properties(src/core/libraries/fiber/fiber_context.s PROPERTIES COMPILE_OPTIONS -Wno-unused-command-line-argument)
|
||||
endif()
|
||||
|
||||
set(VDEC_LIB src/core/libraries/videodec/videodec2_impl.cpp
|
||||
src/core/libraries/videodec/videodec2_impl.h
|
||||
@ -844,6 +847,27 @@ if (ARCHITECTURE STREQUAL "x86_64")
|
||||
src/core/cpu_patches.h)
|
||||
endif()
|
||||
|
||||
if (ARCHITECTURE STREQUAL "arm64")
|
||||
set(CORE ${CORE}
|
||||
src/core/jit/arm64_codegen.cpp
|
||||
src/core/jit/arm64_codegen.h
|
||||
src/core/jit/register_mapping.cpp
|
||||
src/core/jit/register_mapping.h
|
||||
src/core/jit/x86_64_translator.cpp
|
||||
src/core/jit/x86_64_translator.h
|
||||
src/core/jit/block_manager.cpp
|
||||
src/core/jit/block_manager.h
|
||||
src/core/jit/execution_engine.cpp
|
||||
src/core/jit/execution_engine.h
|
||||
src/core/jit/calling_convention.cpp
|
||||
src/core/jit/calling_convention.h
|
||||
src/core/jit/simd_translator.cpp
|
||||
src/core/jit/simd_translator.h
|
||||
src/core/jit/hle_bridge.cpp
|
||||
src/core/jit/hle_bridge.h
|
||||
)
|
||||
endif()
|
||||
|
||||
set(SHADER_RECOMPILER src/shader_recompiler/profile.h
|
||||
src/shader_recompiler/recompiler.cpp
|
||||
src/shader_recompiler/recompiler.h
|
||||
@ -1083,7 +1107,10 @@ add_executable(shadps4
|
||||
|
||||
create_target_directory_groups(shadps4)
|
||||
|
||||
target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui gcn half::half ZLIB::ZLIB PNG::PNG)
|
||||
target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui gcn half::half ZLIB::ZLIB PNG::PNG)
|
||||
if(ARCHITECTURE STREQUAL "x86_64")
|
||||
target_link_libraries(shadps4 PRIVATE xbyak::xbyak)
|
||||
endif()
|
||||
target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 SDL3_mixer::SDL3_mixer pugixml::pugixml)
|
||||
target_link_libraries(shadps4 PRIVATE stb::headers libusb::usb lfreist-hwinfo::hwinfo nlohmann_json::nlohmann_json miniz)
|
||||
|
||||
@ -1218,3 +1245,22 @@ endif()
|
||||
|
||||
# Install rules
|
||||
install(TARGETS shadps4 BUNDLE DESTINATION .)
|
||||
|
||||
# Testing
|
||||
option(BUILD_TESTS "Build test suite" OFF)
|
||||
|
||||
if(BUILD_TESTS)
|
||||
enable_testing()
|
||||
|
||||
include(FetchContent)
|
||||
FetchContent_Declare(
|
||||
googletest
|
||||
GIT_REPOSITORY https://github.com/google/googletest.git
|
||||
GIT_TAG v1.17.0
|
||||
)
|
||||
# For Windows: Prevent overriding the parent project's compiler/linker settings
|
||||
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
|
||||
FetchContent_MakeAvailable(googletest)
|
||||
|
||||
add_subdirectory(tests)
|
||||
endif()
|
||||
|
||||
@ -30,6 +30,30 @@
|
||||
"cacheVariables": {
|
||||
"CMAKE_BUILD_TYPE": "RelWithDebInfo"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "arm64-Clang-Debug",
|
||||
"displayName": "Clang ARM64 Debug",
|
||||
"inherits": ["arm64-Clang-Base"],
|
||||
"cacheVariables": {
|
||||
"CMAKE_BUILD_TYPE": "Debug"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "arm64-Clang-Release",
|
||||
"displayName": "Clang ARM64 Release",
|
||||
"inherits": ["arm64-Clang-Base"],
|
||||
"cacheVariables": {
|
||||
"CMAKE_BUILD_TYPE": "Release"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "arm64-Clang-RelWithDebInfo",
|
||||
"displayName": "Clang ARM64 RelWithDebInfo",
|
||||
"inherits": ["arm64-Clang-Base"],
|
||||
"cacheVariables": {
|
||||
"CMAKE_BUILD_TYPE": "RelWithDebInfo"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
6
externals/CMakeLists.txt
vendored
6
externals/CMakeLists.txt
vendored
@ -112,9 +112,11 @@ if (NOT TARGET tsl::robin_map)
|
||||
add_subdirectory(robin-map)
|
||||
endif()
|
||||
|
||||
# Xbyak
|
||||
if (NOT TARGET xbyak::xbyak)
|
||||
# Xbyak (x86_64 only)
|
||||
if (ARCHITECTURE STREQUAL "x86_64")
|
||||
if (NOT TARGET xbyak::xbyak)
|
||||
add_subdirectory(xbyak)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# MagicEnum
|
||||
|
||||
1
externals/FEX
vendored
Submodule
1
externals/FEX
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit e8591090f246c49631c14ef70f32c7df14b5646e
|
||||
@ -19,14 +19,22 @@ void* GetXmmPointer(void* ctx, u8 index) {
|
||||
case index: \
|
||||
return (void*)(&((EXCEPTION_POINTERS*)ctx)->ContextRecord->Xmm##index.Low)
|
||||
#elif defined(__APPLE__)
|
||||
#if defined(ARCH_X86_64)
|
||||
#define CASE(index) \
|
||||
case index: \
|
||||
return (void*)(&((ucontext_t*)ctx)->uc_mcontext->__fs.__fpu_xmm##index);
|
||||
#elif defined(ARCH_ARM64)
|
||||
UNREACHABLE_MSG("XMM registers not available on ARM64");
|
||||
return nullptr;
|
||||
#else
|
||||
#error "Unsupported architecture"
|
||||
#endif
|
||||
#else
|
||||
#define CASE(index) \
|
||||
case index: \
|
||||
return (void*)(&((ucontext_t*)ctx)->uc_mcontext.fpregs->_xmm[index].element[0])
|
||||
#endif
|
||||
#if !defined(ARCH_ARM64) || !defined(__APPLE__)
|
||||
switch (index) {
|
||||
CASE(0);
|
||||
CASE(1);
|
||||
@ -50,13 +58,20 @@ void* GetXmmPointer(void* ctx, u8 index) {
|
||||
}
|
||||
}
|
||||
#undef CASE
|
||||
#endif
|
||||
}
|
||||
|
||||
void* GetRip(void* ctx) {
|
||||
#if defined(_WIN32)
|
||||
return (void*)((EXCEPTION_POINTERS*)ctx)->ContextRecord->Rip;
|
||||
#elif defined(__APPLE__)
|
||||
#if defined(ARCH_X86_64)
|
||||
return (void*)((ucontext_t*)ctx)->uc_mcontext->__ss.__rip;
|
||||
#elif defined(ARCH_ARM64)
|
||||
return (void*)((ucontext_t*)ctx)->uc_mcontext->__ss.__pc;
|
||||
#else
|
||||
#error "Unsupported architecture"
|
||||
#endif
|
||||
#else
|
||||
return (void*)((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RIP];
|
||||
#endif
|
||||
@ -66,7 +81,13 @@ void IncrementRip(void* ctx, u64 length) {
|
||||
#if defined(_WIN32)
|
||||
((EXCEPTION_POINTERS*)ctx)->ContextRecord->Rip += length;
|
||||
#elif defined(__APPLE__)
|
||||
#if defined(ARCH_X86_64)
|
||||
((ucontext_t*)ctx)->uc_mcontext->__ss.__rip += length;
|
||||
#elif defined(ARCH_ARM64)
|
||||
((ucontext_t*)ctx)->uc_mcontext->__ss.__pc += length;
|
||||
#else
|
||||
#error "Unsupported architecture"
|
||||
#endif
|
||||
#else
|
||||
((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RIP] += length;
|
||||
#endif
|
||||
|
||||
@ -2,9 +2,16 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
#pragma once
|
||||
|
||||
#include <xmmintrin.h>
|
||||
#include "common/arch.h"
|
||||
#include "common/types.h"
|
||||
|
||||
#ifdef ARCH_X86_64
|
||||
#include <xmmintrin.h>
|
||||
#elif defined(ARCH_ARM64)
|
||||
#include <cstdarg>
|
||||
#endif
|
||||
|
||||
#ifdef ARCH_X86_64
|
||||
#define VA_ARGS \
|
||||
uint64_t rdi, uint64_t rsi, uint64_t rdx, uint64_t rcx, uint64_t r8, uint64_t r9, \
|
||||
uint64_t overflow_arg_area, __m128 xmm0, __m128 xmm1, __m128 xmm2, __m128 xmm3, \
|
||||
@ -30,6 +37,17 @@
|
||||
(ctx).va_list.gp_offset = offsetof(::Common::VaRegSave, gp); \
|
||||
(ctx).va_list.fp_offset = offsetof(::Common::VaRegSave, fp); \
|
||||
(ctx).va_list.overflow_arg_area = &overflow_arg_area;
|
||||
#elif defined(ARCH_ARM64)
|
||||
#define VA_ARGS ...
|
||||
#define VA_CTX(ctx) \
|
||||
alignas(16)::Common::VaCtx ctx{}; \
|
||||
(ctx).va_list.reg_save_area = nullptr; \
|
||||
(ctx).va_list.gp_offset = 0; \
|
||||
(ctx).va_list.fp_offset = 0; \
|
||||
(ctx).va_list.overflow_arg_area = nullptr;
|
||||
#else
|
||||
#error "Unsupported architecture"
|
||||
#endif
|
||||
|
||||
namespace Common {
|
||||
|
||||
@ -44,7 +62,9 @@ struct VaList {
|
||||
|
||||
struct VaRegSave {
|
||||
u64 gp[6];
|
||||
#ifdef ARCH_X86_64
|
||||
__m128 fp[8];
|
||||
#endif
|
||||
};
|
||||
|
||||
struct VaCtx {
|
||||
|
||||
@ -20,12 +20,16 @@
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__) && defined(ARCH_X86_64)
|
||||
#if defined(__APPLE__) && (defined(ARCH_X86_64) || defined(ARCH_ARM64))
|
||||
// Reserve space for the system address space using a zerofill section.
|
||||
// Note: These assembly directives are x86_64-specific, but the memory layout constants
|
||||
// below apply to both x86_64 and ARM64 on macOS.
|
||||
#if defined(ARCH_X86_64)
|
||||
asm(".zerofill SYSTEM_MANAGED,SYSTEM_MANAGED,__SYSTEM_MANAGED,0x7FFBFC000");
|
||||
asm(".zerofill SYSTEM_RESERVED,SYSTEM_RESERVED,__SYSTEM_RESERVED,0x7C0004000");
|
||||
asm(".zerofill USER_AREA,USER_AREA,__USER_AREA,0x5F9000000000");
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace Core {
|
||||
|
||||
@ -33,7 +37,7 @@ namespace Core {
|
||||
constexpr VAddr SYSTEM_MANAGED_MIN = 0x400000ULL;
|
||||
constexpr VAddr SYSTEM_MANAGED_MAX = 0x7FFFFBFFFULL;
|
||||
constexpr VAddr SYSTEM_RESERVED_MIN = 0x7FFFFC000ULL;
|
||||
#if defined(__APPLE__) && defined(ARCH_X86_64)
|
||||
#if defined(__APPLE__) && (defined(ARCH_X86_64) || defined(ARCH_ARM64))
|
||||
// Commpage ranges from 0xFC0000000 - 0xFFFFFFFFF, so decrease the system reserved maximum.
|
||||
constexpr VAddr SYSTEM_RESERVED_MAX = 0xFBFFFFFFFULL;
|
||||
// GPU-reserved memory ranges from 0x1000000000 - 0x6FFFFFFFFF, so increase the user minimum.
|
||||
@ -512,11 +516,13 @@ struct AddressSpace::Impl {
|
||||
user_size = UserSize;
|
||||
|
||||
constexpr int protection_flags = PROT_READ | PROT_WRITE;
|
||||
#if defined(__APPLE__) && (defined(ARCH_X86_64) || defined(ARCH_ARM64))
|
||||
// On macOS (both x86_64 and ARM64), we run into limitations due to the commpage from
|
||||
// 0xFC0000000 - 0xFFFFFFFFF and the GPU carveout region from 0x1000000000 - 0x6FFFFFFFFF.
|
||||
// Because this creates gaps in the available virtual memory region, we map memory space
|
||||
// using three distinct parts.
|
||||
#if defined(ARCH_X86_64)
|
||||
constexpr int map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_FIXED;
|
||||
#if defined(__APPLE__) && defined(ARCH_X86_64)
|
||||
// On ARM64 Macs, we run into limitations due to the commpage from 0xFC0000000 - 0xFFFFFFFFF
|
||||
// and the GPU carveout region from 0x1000000000 - 0x6FFFFFFFFF. Because this creates gaps
|
||||
// in the available virtual memory region, we map memory space using three distinct parts.
|
||||
system_managed_base =
|
||||
reinterpret_cast<u8*>(mmap(reinterpret_cast<void*>(SYSTEM_MANAGED_MIN),
|
||||
system_managed_size, protection_flags, map_flags, -1, 0));
|
||||
@ -525,9 +531,50 @@ struct AddressSpace::Impl {
|
||||
system_reserved_size, protection_flags, map_flags, -1, 0));
|
||||
user_base = reinterpret_cast<u8*>(
|
||||
mmap(reinterpret_cast<void*>(USER_MIN), user_size, protection_flags, map_flags, -1, 0));
|
||||
#elif defined(ARCH_ARM64)
|
||||
// On ARM64 macOS, MAP_FIXED doesn't work at low addresses (0x400000) due to system
|
||||
// restrictions. Map memory wherever possible and use offset calculations. This is a
|
||||
// temporary solution until proper address translation is implemented for ARM64. Note: This
|
||||
// means the PS4 virtual addresses won't match host addresses, so instruction
|
||||
// translation/JIT will need to handle the offset.
|
||||
constexpr int map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
|
||||
|
||||
// Map the three regions separately, but let the system choose addresses
|
||||
system_managed_base = reinterpret_cast<u8*>(
|
||||
mmap(nullptr, system_managed_size, protection_flags, map_flags, -1, 0));
|
||||
if (system_managed_base == MAP_FAILED) {
|
||||
LOG_CRITICAL(Kernel_Vmm, "mmap failed for system_managed_base: {}", strerror(errno));
|
||||
throw std::bad_alloc{};
|
||||
}
|
||||
|
||||
system_reserved_base = reinterpret_cast<u8*>(
|
||||
mmap(nullptr, system_reserved_size, protection_flags, map_flags, -1, 0));
|
||||
if (system_reserved_base == MAP_FAILED) {
|
||||
LOG_CRITICAL(Kernel_Vmm, "mmap failed for system_reserved_base: {}", strerror(errno));
|
||||
throw std::bad_alloc{};
|
||||
}
|
||||
|
||||
user_base =
|
||||
reinterpret_cast<u8*>(mmap(nullptr, user_size, protection_flags, map_flags, -1, 0));
|
||||
if (user_base == MAP_FAILED) {
|
||||
LOG_CRITICAL(Kernel_Vmm, "mmap failed for user_base: {}", strerror(errno));
|
||||
throw std::bad_alloc{};
|
||||
}
|
||||
|
||||
LOG_WARNING(
|
||||
Kernel_Vmm,
|
||||
"ARM64 macOS: Using flexible memory layout. "
|
||||
"PS4 addresses will be offset from host addresses. "
|
||||
"system_managed: {} (expected {}), system_reserved: {} (expected {}), user: {} "
|
||||
"(expected {})",
|
||||
fmt::ptr(system_managed_base), fmt::ptr(reinterpret_cast<void*>(SYSTEM_MANAGED_MIN)),
|
||||
fmt::ptr(system_reserved_base), fmt::ptr(reinterpret_cast<void*>(SYSTEM_RESERVED_MIN)),
|
||||
fmt::ptr(user_base), fmt::ptr(reinterpret_cast<void*>(USER_MIN)));
|
||||
#endif
|
||||
#else
|
||||
const auto virtual_size = system_managed_size + system_reserved_size + user_size;
|
||||
#if defined(ARCH_X86_64)
|
||||
constexpr int map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_FIXED;
|
||||
const auto virtual_base =
|
||||
reinterpret_cast<u8*>(mmap(reinterpret_cast<void*>(SYSTEM_MANAGED_MIN), virtual_size,
|
||||
protection_flags, map_flags, -1, 0));
|
||||
@ -535,6 +582,7 @@ struct AddressSpace::Impl {
|
||||
system_reserved_base = reinterpret_cast<u8*>(SYSTEM_RESERVED_MIN);
|
||||
user_base = reinterpret_cast<u8*>(USER_MIN);
|
||||
#else
|
||||
constexpr int map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
|
||||
// Map memory wherever possible and instruction translation can handle offsetting to the
|
||||
// base.
|
||||
const auto virtual_base =
|
||||
@ -560,7 +608,7 @@ struct AddressSpace::Impl {
|
||||
fmt::ptr(user_base + user_size - 1));
|
||||
|
||||
const VAddr system_managed_addr = reinterpret_cast<VAddr>(system_managed_base);
|
||||
const VAddr system_reserved_addr = reinterpret_cast<VAddr>(system_managed_base);
|
||||
const VAddr system_reserved_addr = reinterpret_cast<VAddr>(system_reserved_base);
|
||||
const VAddr user_addr = reinterpret_cast<VAddr>(user_base);
|
||||
m_free_regions.insert({system_managed_addr, system_managed_addr + system_managed_size});
|
||||
m_free_regions.insert({system_reserved_addr, system_reserved_addr + system_reserved_size});
|
||||
@ -607,8 +655,32 @@ struct AddressSpace::Impl {
|
||||
const int handle = phys_addr != -1 ? (fd == -1 ? backing_fd : fd) : -1;
|
||||
const off_t host_offset = phys_addr != -1 ? phys_addr : 0;
|
||||
const int flag = phys_addr != -1 ? MAP_SHARED : (MAP_ANONYMOUS | MAP_PRIVATE);
|
||||
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
// On ARM64 macOS, translate PS4 virtual addresses to host addresses
|
||||
void* host_addr = nullptr;
|
||||
if (virtual_addr >= SYSTEM_MANAGED_MIN && virtual_addr <= SYSTEM_MANAGED_MAX) {
|
||||
// System managed region
|
||||
u64 offset = virtual_addr - SYSTEM_MANAGED_MIN;
|
||||
host_addr = system_managed_base + offset;
|
||||
} else if (virtual_addr >= SYSTEM_RESERVED_MIN && virtual_addr <= SYSTEM_RESERVED_MAX) {
|
||||
// System reserved region
|
||||
u64 offset = virtual_addr - SYSTEM_RESERVED_MIN;
|
||||
host_addr = system_reserved_base + offset;
|
||||
} else if (virtual_addr >= USER_MIN && virtual_addr <= USER_MAX) {
|
||||
// User region
|
||||
u64 offset = virtual_addr - USER_MIN;
|
||||
host_addr = user_base + offset;
|
||||
} else {
|
||||
LOG_CRITICAL(Kernel_Vmm, "Invalid virtual address for mapping: {:#x}", virtual_addr);
|
||||
return MAP_FAILED;
|
||||
}
|
||||
|
||||
void* ret = mmap(host_addr, size, prot, MAP_FIXED | flag, handle, host_offset);
|
||||
#else
|
||||
void* ret = mmap(reinterpret_cast<void*>(virtual_addr), size, prot, MAP_FIXED | flag,
|
||||
handle, host_offset);
|
||||
#endif
|
||||
ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno));
|
||||
return ret;
|
||||
}
|
||||
@ -628,9 +700,29 @@ struct AddressSpace::Impl {
|
||||
// Free the relevant region.
|
||||
m_free_regions.insert({start_address, end_address});
|
||||
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
// On ARM64 macOS, translate PS4 virtual addresses to host addresses
|
||||
void* host_addr = nullptr;
|
||||
if (start_address >= SYSTEM_MANAGED_MIN && start_address <= SYSTEM_MANAGED_MAX) {
|
||||
u64 offset = start_address - SYSTEM_MANAGED_MIN;
|
||||
host_addr = system_managed_base + offset;
|
||||
} else if (start_address >= SYSTEM_RESERVED_MIN && start_address <= SYSTEM_RESERVED_MAX) {
|
||||
u64 offset = start_address - SYSTEM_RESERVED_MIN;
|
||||
host_addr = system_reserved_base + offset;
|
||||
} else if (start_address >= USER_MIN && start_address <= USER_MAX) {
|
||||
u64 offset = start_address - USER_MIN;
|
||||
host_addr = user_base + offset;
|
||||
} else {
|
||||
LOG_CRITICAL(Kernel_Vmm, "Invalid virtual address for unmapping: {:#x}", start_address);
|
||||
return;
|
||||
}
|
||||
void* ret = mmap(host_addr, end_address - start_address, PROT_NONE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
|
||||
#else
|
||||
// Return the adjusted pointers.
|
||||
void* ret = mmap(reinterpret_cast<void*>(start_address), end_address - start_address,
|
||||
PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
|
||||
#endif
|
||||
ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno));
|
||||
}
|
||||
|
||||
@ -642,12 +734,31 @@ struct AddressSpace::Impl {
|
||||
if (write) {
|
||||
flags |= PROT_WRITE;
|
||||
}
|
||||
#ifdef ARCH_X86_64
|
||||
#if defined(ARCH_X86_64)
|
||||
if (execute) {
|
||||
flags |= PROT_EXEC;
|
||||
}
|
||||
#endif
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
// On ARM64 macOS, translate PS4 virtual addresses to host addresses
|
||||
void* host_addr = nullptr;
|
||||
if (virtual_addr >= SYSTEM_MANAGED_MIN && virtual_addr <= SYSTEM_MANAGED_MAX) {
|
||||
u64 offset = virtual_addr - SYSTEM_MANAGED_MIN;
|
||||
host_addr = system_managed_base + offset;
|
||||
} else if (virtual_addr >= SYSTEM_RESERVED_MIN && virtual_addr <= SYSTEM_RESERVED_MAX) {
|
||||
u64 offset = virtual_addr - SYSTEM_RESERVED_MIN;
|
||||
host_addr = system_reserved_base + offset;
|
||||
} else if (virtual_addr >= USER_MIN && virtual_addr <= USER_MAX) {
|
||||
u64 offset = virtual_addr - USER_MIN;
|
||||
host_addr = user_base + offset;
|
||||
} else {
|
||||
LOG_CRITICAL(Kernel_Vmm, "Invalid virtual address for protection: {:#x}", virtual_addr);
|
||||
return;
|
||||
}
|
||||
int ret = mprotect(host_addr, size, flags);
|
||||
#else
|
||||
int ret = mprotect(reinterpret_cast<void*>(virtual_addr), size, flags);
|
||||
#endif
|
||||
ASSERT_MSG(ret == 0, "mprotect failed: {}", strerror(errno));
|
||||
}
|
||||
|
||||
@ -677,7 +788,7 @@ AddressSpace::~AddressSpace() = default;
|
||||
|
||||
void* AddressSpace::Map(VAddr virtual_addr, size_t size, u64 alignment, PAddr phys_addr,
|
||||
bool is_exec) {
|
||||
#if ARCH_X86_64
|
||||
#if defined(ARCH_X86_64)
|
||||
const auto prot = is_exec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE;
|
||||
#else
|
||||
// On non-native architectures, we can simplify things by ignoring the execute flag for the
|
||||
@ -747,4 +858,27 @@ boost::icl::interval_set<VAddr> AddressSpace::GetUsableRegions() {
|
||||
#endif
|
||||
}
|
||||
|
||||
void* AddressSpace::TranslateAddress(VAddr ps4_addr) const {
|
||||
#if defined(ARCH_X86_64)
|
||||
// On x86_64, PS4 addresses are directly mapped, so we can cast them
|
||||
return reinterpret_cast<void*>(ps4_addr);
|
||||
#elif defined(ARCH_ARM64) && defined(__APPLE__)
|
||||
// On ARM64 macOS, translate PS4 virtual addresses to host addresses
|
||||
if (ps4_addr >= SYSTEM_MANAGED_MIN && ps4_addr <= SYSTEM_MANAGED_MAX) {
|
||||
u64 offset = ps4_addr - SYSTEM_MANAGED_MIN;
|
||||
return system_managed_base + offset;
|
||||
} else if (ps4_addr >= SYSTEM_RESERVED_MIN && ps4_addr <= SYSTEM_RESERVED_MAX) {
|
||||
u64 offset = ps4_addr - SYSTEM_RESERVED_MIN;
|
||||
return system_reserved_base + offset;
|
||||
} else if (ps4_addr >= USER_MIN && ps4_addr <= USER_MAX) {
|
||||
u64 offset = ps4_addr - USER_MIN;
|
||||
return user_base + offset;
|
||||
}
|
||||
return nullptr;
|
||||
#else
|
||||
// Generic ARM64 or other platforms
|
||||
return reinterpret_cast<void*>(ps4_addr);
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace Core
|
||||
|
||||
@ -88,6 +88,9 @@ public:
|
||||
// Returns an interval set containing all usable regions.
|
||||
boost::icl::interval_set<VAddr> GetUsableRegions();
|
||||
|
||||
// Translate PS4 virtual address to host address (for ARM64)
|
||||
void* TranslateAddress(VAddr ps4_addr) const;
|
||||
|
||||
private:
|
||||
struct Impl;
|
||||
std::unique_ptr<Impl> impl;
|
||||
|
||||
567
src/core/jit/arm64_codegen.cpp
Normal file
567
src/core/jit/arm64_codegen.cpp
Normal file
@ -0,0 +1,567 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <cstring>
|
||||
#include <sys/mman.h>
|
||||
#include "arm64_codegen.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/types.h"
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
||||
namespace Core::Jit {
|
||||
|
||||
static constexpr size_t PAGE_SIZE = 4096;
|
||||
static constexpr size_t ALIGNMENT = 16;
|
||||
|
||||
static size_t alignUp(size_t value, size_t alignment) {
|
||||
return (value + alignment - 1) & ~(alignment - 1);
|
||||
}
|
||||
|
||||
static void* allocateExecutableMemory(size_t size) {
|
||||
size = alignUp(size, PAGE_SIZE);
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
// On macOS ARM64:
|
||||
// 1. Allocate with PROT_READ | PROT_WRITE (no PROT_EXEC initially)
|
||||
// 2. Use pthread_jit_write_protect_np to allow writing
|
||||
// 3. After writing, use mprotect to add PROT_EXEC
|
||||
void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (ptr == MAP_FAILED) {
|
||||
LOG_CRITICAL(Core, "Failed to allocate executable memory: {} (errno={})", strerror(errno),
|
||||
errno);
|
||||
return nullptr;
|
||||
}
|
||||
// Initially disable write protection so we can write code
|
||||
pthread_jit_write_protect_np(0);
|
||||
return ptr;
|
||||
#else
|
||||
void* ptr =
|
||||
mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (ptr == MAP_FAILED) {
|
||||
LOG_CRITICAL(Core, "Failed to allocate executable memory: {}", strerror(errno));
|
||||
return nullptr;
|
||||
}
|
||||
return ptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
Arm64CodeGenerator::Arm64CodeGenerator(size_t buffer_size, void* code_ptr)
|
||||
: buffer_size(alignUp(buffer_size, PAGE_SIZE)), owns_buffer(code_ptr == nullptr) {
|
||||
if (code_ptr) {
|
||||
code_buffer = code_ptr;
|
||||
this->code_ptr = code_ptr;
|
||||
} else {
|
||||
code_buffer = allocateExecutableMemory(buffer_size);
|
||||
this->code_ptr = code_buffer;
|
||||
}
|
||||
if (!code_buffer) {
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
}
|
||||
|
||||
Arm64CodeGenerator::~Arm64CodeGenerator() {
|
||||
if (owns_buffer && code_buffer) {
|
||||
munmap(code_buffer, buffer_size);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::reset() {
|
||||
code_ptr = code_buffer;
|
||||
fixups.clear();
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::setSize(size_t offset) {
|
||||
code_ptr = static_cast<u8*>(code_buffer) + offset;
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::emit32(u32 instruction) {
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
// On macOS ARM64, disable write protection before writing
|
||||
pthread_jit_write_protect_np(0);
|
||||
#endif
|
||||
u8* curr = static_cast<u8*>(code_ptr);
|
||||
u8* end = static_cast<u8*>(code_buffer) + buffer_size;
|
||||
ASSERT_MSG(curr + 4 <= end, "Code buffer overflow");
|
||||
*reinterpret_cast<u32*>(curr) = instruction;
|
||||
code_ptr = curr + 4;
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
// Re-enable write protection after writing
|
||||
pthread_jit_write_protect_np(1);
|
||||
#endif
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::emit64(u64 instruction) {
|
||||
emit32(static_cast<u32>(instruction));
|
||||
emit32(static_cast<u32>(instruction >> 32));
|
||||
}
|
||||
|
||||
void* Arm64CodeGenerator::allocateCode(size_t size) {
|
||||
size = alignUp(size, ALIGNMENT);
|
||||
void* result = code_ptr;
|
||||
u8* curr = static_cast<u8*>(code_ptr);
|
||||
u8* end = static_cast<u8*>(code_buffer) + buffer_size;
|
||||
code_ptr = curr + size;
|
||||
ASSERT_MSG(static_cast<u8*>(code_ptr) <= end, "Code buffer overflow");
|
||||
return result;
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::makeExecutable() {
|
||||
size_t size = getSize();
|
||||
size = alignUp(size, PAGE_SIZE);
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
// On macOS ARM64, re-enable write protection before making executable
|
||||
pthread_jit_write_protect_np(1);
|
||||
// Flush instruction cache
|
||||
__builtin___clear_cache(static_cast<char*>(code_buffer),
|
||||
static_cast<char*>(code_buffer) + size);
|
||||
#endif
|
||||
if (mprotect(code_buffer, size, PROT_READ | PROT_EXEC) != 0) {
|
||||
LOG_CRITICAL(Core, "Failed to make code executable: {}", strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
// Memory operations
|
||||
void Arm64CodeGenerator::ldr(int reg, void* addr) {
|
||||
movz(9, reinterpret_cast<u64>(addr) & 0xFFFF);
|
||||
movk(9, (reinterpret_cast<u64>(addr) >> 16) & 0xFFFF, 16);
|
||||
movk(9, (reinterpret_cast<u64>(addr) >> 32) & 0xFFFF, 32);
|
||||
movk(9, (reinterpret_cast<u64>(addr) >> 48) & 0xFFFF, 48);
|
||||
ldr(reg, 9, 0);
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::ldr(int reg, int base_reg, s32 offset) {
|
||||
if (offset >= 0 && offset < 32768 && (offset % 8 == 0)) {
|
||||
emit32(0xF9400000 | (reg << 0) | (base_reg << 5) | ((offset / 8) << 10));
|
||||
} else {
|
||||
mov_imm(9, offset);
|
||||
add(9, base_reg, 9);
|
||||
ldr(reg, 9, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::ldrh(int reg, int base_reg, s32 offset) {
|
||||
if (offset >= 0 && offset < 8192 && (offset % 2 == 0)) {
|
||||
emit32(0x79400000 | (reg << 0) | (base_reg << 5) | ((offset / 2) << 12));
|
||||
} else {
|
||||
mov_imm(9, offset);
|
||||
add(9, base_reg, 9);
|
||||
ldrh(reg, 9, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::ldrb(int reg, int base_reg, s32 offset) {
|
||||
if (offset >= 0 && offset < 4096) {
|
||||
emit32(0x39400000 | (reg << 0) | (base_reg << 5) | (offset << 12));
|
||||
} else {
|
||||
mov_imm(9, offset);
|
||||
add(9, base_reg, 9);
|
||||
ldrb(reg, 9, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::ldp(int reg1, int reg2, int base_reg, s32 offset) {
|
||||
if (offset >= -256 && offset < 256 && (offset % 8 == 0)) {
|
||||
s32 scaled_offset = offset / 8;
|
||||
u32 imm7 = (scaled_offset >= 0) ? scaled_offset : (64 + scaled_offset);
|
||||
emit32(0xA9400000 | (reg1 << 0) | (reg2 << 10) | (base_reg << 5) | (imm7 << 15));
|
||||
} else {
|
||||
mov_imm(9, offset);
|
||||
add(9, base_reg, 9);
|
||||
ldp(reg1, reg2, 9, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::str(int reg, void* addr) {
|
||||
movz(9, reinterpret_cast<u64>(addr) & 0xFFFF);
|
||||
movk(9, (reinterpret_cast<u64>(addr) >> 16) & 0xFFFF, 16);
|
||||
movk(9, (reinterpret_cast<u64>(addr) >> 32) & 0xFFFF, 32);
|
||||
movk(9, (reinterpret_cast<u64>(addr) >> 48) & 0xFFFF, 48);
|
||||
str(reg, 9, 0);
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::str(int reg, int base_reg, s32 offset) {
|
||||
if (offset >= 0 && offset < 32768 && (offset % 8 == 0)) {
|
||||
emit32(0xF9000000 | (reg << 0) | (base_reg << 5) | ((offset / 8) << 10));
|
||||
} else {
|
||||
mov_imm(9, offset);
|
||||
add(9, base_reg, 9);
|
||||
str(reg, 9, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::strh(int reg, int base_reg, s32 offset) {
|
||||
if (offset >= 0 && offset < 8192 && (offset % 2 == 0)) {
|
||||
emit32(0x79000000 | (reg << 0) | (base_reg << 5) | ((offset / 2) << 12));
|
||||
} else {
|
||||
mov_imm(9, offset);
|
||||
add(9, base_reg, 9);
|
||||
strh(reg, 9, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::strb(int reg, int base_reg, s32 offset) {
|
||||
if (offset >= 0 && offset < 4096) {
|
||||
emit32(0x39000000 | (reg << 0) | (base_reg << 5) | (offset << 12));
|
||||
} else {
|
||||
mov_imm(9, offset);
|
||||
add(9, base_reg, 9);
|
||||
strb(reg, 9, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::stp(int reg1, int reg2, int base_reg, s32 offset) {
|
||||
if (offset >= -256 && offset < 256 && (offset % 8 == 0)) {
|
||||
s32 scaled_offset = offset / 8;
|
||||
u32 imm7 = (scaled_offset >= 0) ? scaled_offset : (64 + scaled_offset);
|
||||
emit32(0xA9000000 | (reg1 << 0) | (reg2 << 10) | (base_reg << 5) | (imm7 << 15));
|
||||
} else {
|
||||
mov_imm(9, offset);
|
||||
add(9, base_reg, 9);
|
||||
stp(reg1, reg2, 9, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Arithmetic operations
|
||||
void Arm64CodeGenerator::add(int dst, int src1, int src2) {
|
||||
emit32(0x8B000000 | (dst << 0) | (src1 << 5) | (src2 << 16));
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::add(int dst, int src1, int src2, int shift) {
|
||||
ASSERT_MSG(shift >= 0 && shift <= 3, "Invalid shift amount");
|
||||
emit32(0x8B000000 | (dst << 0) | (src1 << 5) | (src2 << 16) | (shift << 12));
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::add_imm(int dst, int src1, s32 imm) {
|
||||
if (imm >= 0 && imm < 4096) {
|
||||
emit32(0x91000000 | (dst << 0) | (src1 << 5) | (imm << 10));
|
||||
} else if (imm < 0 && imm > -4096) {
|
||||
sub_imm(dst, src1, -imm);
|
||||
} else {
|
||||
mov_imm(9, imm);
|
||||
add(dst, src1, 9);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::sub(int dst, int src1, int src2) {
|
||||
emit32(0xCB000000 | (dst << 0) | (src1 << 5) | (src2 << 16));
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::sub_imm(int dst, int src1, s32 imm) {
|
||||
if (imm >= 0 && imm < 4096) {
|
||||
emit32(0xD1000000 | (dst << 0) | (src1 << 5) | (imm << 10));
|
||||
} else if (imm < 0 && imm > -4096) {
|
||||
add_imm(dst, src1, -imm);
|
||||
} else {
|
||||
mov_imm(9, imm);
|
||||
sub(dst, src1, 9);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::mul(int dst, int src1, int src2) {
|
||||
emit32(0x9B007C00 | (dst << 0) | (src1 << 5) | (src2 << 16));
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::sdiv(int dst, int src1, int src2) {
|
||||
emit32(0x9AC00C00 | (dst << 0) | (src1 << 5) | (src2 << 16));
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::udiv(int dst, int src1, int src2) {
|
||||
emit32(0x9AC00800 | (dst << 0) | (src1 << 5) | (src2 << 16));
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::and_(int dst, int src1, int src2) {
|
||||
emit32(0x8A000000 | (dst << 0) | (src1 << 5) | (src2 << 16));
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::and_(int dst, int src1, u64 imm) {
|
||||
if (imm <= 0xFFF) {
|
||||
emit32(0x92000000 | (dst << 0) | (src1 << 5) | (static_cast<u32>(imm) << 10));
|
||||
} else {
|
||||
mov_imm(9, imm);
|
||||
and_(dst, src1, 9);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::orr(int dst, int src1, int src2) {
|
||||
emit32(0xAA000000 | (dst << 0) | (src1 << 5) | (src2 << 16));
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::orr(int dst, int src1, u64 imm) {
|
||||
if (imm <= 0xFFF) {
|
||||
emit32(0xB2000000 | (dst << 0) | (src1 << 5) | (static_cast<u32>(imm) << 10));
|
||||
} else {
|
||||
mov_imm(9, imm);
|
||||
orr(dst, src1, 9);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::eor(int dst, int src1, int src2) {
|
||||
emit32(0xCA000000 | (dst << 0) | (src1 << 5) | (src2 << 16));
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::eor(int dst, int src1, u64 imm) {
|
||||
if (imm <= 0xFFF) {
|
||||
emit32(0xD2000000 | (dst << 0) | (src1 << 5) | (static_cast<u32>(imm) << 10));
|
||||
} else {
|
||||
mov_imm(9, imm);
|
||||
eor(dst, src1, 9);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::mvn(int dst, int src) {
|
||||
emit32(0xAA200000 | (dst << 0) | (src << 16));
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::lsl(int dst, int src1, int src2) {
|
||||
emit32(0x9AC02000 | (dst << 0) | (src1 << 5) | (src2 << 16));
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::lsl(int dst, int src1, u8 shift) {
|
||||
ASSERT_MSG(shift < 64, "Shift amount must be < 64");
|
||||
emit32(0xD3400000 | (dst << 0) | (src1 << 5) | (shift << 10));
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::lsr(int dst, int src1, int src2) {
|
||||
emit32(0x9AC02400 | (dst << 0) | (src1 << 5) | (src2 << 16));
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::lsr(int dst, int src1, u8 shift) {
|
||||
ASSERT_MSG(shift < 64, "Shift amount must be < 64");
|
||||
emit32(0xD3500000 | (dst << 0) | (src1 << 5) | (shift << 10));
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::asr(int dst, int src1, int src2) {
|
||||
emit32(0x9AC02800 | (dst << 0) | (src1 << 5) | (src2 << 16));
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::asr(int dst, int src1, u8 shift) {
|
||||
ASSERT_MSG(shift < 64, "Shift amount must be < 64");
|
||||
emit32(0xD3600000 | (dst << 0) | (src1 << 5) | (shift << 10));
|
||||
}
|
||||
|
||||
// Move operations
|
||||
void Arm64CodeGenerator::mov(int dst, int src) {
|
||||
if (dst != src) {
|
||||
emit32(0xAA0003E0 | (dst << 0) | (src << 16));
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::mov_imm(int dst, s64 imm) {
|
||||
if (imm >= 0 && imm <= 0xFFFF) {
|
||||
movz(dst, static_cast<u16>(imm));
|
||||
} else if (imm >= -0x10000 && imm < 0) {
|
||||
movn(dst, static_cast<u16>(-imm - 1));
|
||||
} else {
|
||||
movz(dst, imm & 0xFFFF);
|
||||
if ((imm >> 16) & 0xFFFF) {
|
||||
movk(dst, (imm >> 16) & 0xFFFF, 16);
|
||||
}
|
||||
if ((imm >> 32) & 0xFFFF) {
|
||||
movk(dst, (imm >> 32) & 0xFFFF, 32);
|
||||
}
|
||||
if ((imm >> 48) & 0xFFFF) {
|
||||
movk(dst, (imm >> 48) & 0xFFFF, 48);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::movz(int dst, u16 imm, u8 shift) {
|
||||
ASSERT_MSG(shift % 16 == 0 && shift < 64, "Shift must be multiple of 16 and < 64");
|
||||
emit32(0xD2800000 | (dst << 0) | (imm << 5) | ((shift / 16) << 21));
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::movk(int dst, u16 imm, u8 shift) {
|
||||
ASSERT_MSG(shift % 16 == 0 && shift < 64, "Shift must be multiple of 16 and < 64");
|
||||
emit32(0xF2800000 | (dst << 0) | (imm << 5) | ((shift / 16) << 21));
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::movn(int dst, u16 imm, u8 shift) {
|
||||
ASSERT_MSG(shift % 16 == 0 && shift < 64, "Shift must be multiple of 16 and < 64");
|
||||
emit32(0x92800000 | (dst << 0) | (imm << 5) | ((shift / 16) << 21));
|
||||
}
|
||||
|
||||
// Compare operations
|
||||
void Arm64CodeGenerator::cmp(int reg1, int reg2) {
|
||||
emit32(0xEB000000 | (31 << 0) | (reg1 << 5) | (reg2 << 16));
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::cmp_imm(int reg, s32 imm) {
|
||||
if (imm >= 0 && imm < 4096) {
|
||||
emit32(0xF1000000 | (31 << 0) | (reg << 5) | (imm << 10));
|
||||
} else {
|
||||
mov_imm(9, imm);
|
||||
cmp(reg, 9);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::tst(int reg1, int reg2) {
|
||||
emit32(0xEA000000 | (31 << 0) | (reg1 << 5) | (reg2 << 16));
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::tst(int reg, u64 imm) {
|
||||
if (imm <= 0xFFF) {
|
||||
emit32(0xF2000000 | (31 << 0) | (reg << 5) | (static_cast<u32>(imm) << 10));
|
||||
} else {
|
||||
mov(9, imm);
|
||||
tst(reg, 9);
|
||||
}
|
||||
}
|
||||
|
||||
// Branch operations
|
||||
void Arm64CodeGenerator::b(void* target) {
|
||||
s64 offset = reinterpret_cast<s64>(target) - reinterpret_cast<s64>(code_ptr);
|
||||
if (offset >= -0x8000000 && offset < 0x8000000) {
|
||||
s32 imm26 = static_cast<s32>(offset / 4);
|
||||
emit32(0x14000000 | (imm26 & 0x3FFFFFF));
|
||||
} else {
|
||||
movz(9, reinterpret_cast<u64>(target) & 0xFFFF);
|
||||
movk(9, (reinterpret_cast<u64>(target) >> 16) & 0xFFFF, 16);
|
||||
movk(9, (reinterpret_cast<u64>(target) >> 32) & 0xFFFF, 32);
|
||||
movk(9, (reinterpret_cast<u64>(target) >> 48) & 0xFFFF, 48);
|
||||
br(9);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::b(int condition, void* target) {
|
||||
s64 offset = reinterpret_cast<s64>(target) - reinterpret_cast<s64>(code_ptr);
|
||||
if (offset >= -0x8000000 && offset < 0x8000000) {
|
||||
s32 imm19 = static_cast<s32>(offset / 4);
|
||||
emit32(0x54000000 | (condition << 0) | (imm19 << 5));
|
||||
} else {
|
||||
movz(9, reinterpret_cast<u64>(target) & 0xFFFF);
|
||||
movk(9, (reinterpret_cast<u64>(target) >> 16) & 0xFFFF, 16);
|
||||
movk(9, (reinterpret_cast<u64>(target) >> 32) & 0xFFFF, 32);
|
||||
movk(9, (reinterpret_cast<u64>(target) >> 48) & 0xFFFF, 48);
|
||||
emit32(0x54000000 | (condition << 0) | (0 << 5));
|
||||
br(9);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::bl(void* target) {
|
||||
s64 offset = reinterpret_cast<s64>(target) - reinterpret_cast<s64>(code_ptr);
|
||||
if (offset >= -0x8000000 && offset < 0x8000000) {
|
||||
s32 imm26 = static_cast<s32>(offset / 4);
|
||||
emit32(0x94000000 | (imm26 & 0x3FFFFFF));
|
||||
} else {
|
||||
movz(9, reinterpret_cast<u64>(target) & 0xFFFF);
|
||||
movk(9, (reinterpret_cast<u64>(target) >> 16) & 0xFFFF, 16);
|
||||
movk(9, (reinterpret_cast<u64>(target) >> 32) & 0xFFFF, 32);
|
||||
movk(9, (reinterpret_cast<u64>(target) >> 48) & 0xFFFF, 48);
|
||||
blr(9);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::br(int reg) {
|
||||
emit32(0xD61F0000 | (reg << 5));
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::blr(int reg) {
|
||||
emit32(0xD63F0000 | (reg << 5));
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::ret(int reg) {
|
||||
emit32(0xD65F0000 | (reg << 5));
|
||||
}
|
||||
|
||||
// Conditional branches
|
||||
void Arm64CodeGenerator::b_eq(void* target) {
|
||||
b(0, target);
|
||||
}
|
||||
void Arm64CodeGenerator::b_ne(void* target) {
|
||||
b(1, target);
|
||||
}
|
||||
void Arm64CodeGenerator::b_lt(void* target) {
|
||||
b(11, target);
|
||||
}
|
||||
void Arm64CodeGenerator::b_le(void* target) {
|
||||
b(13, target);
|
||||
}
|
||||
void Arm64CodeGenerator::b_gt(void* target) {
|
||||
b(12, target);
|
||||
}
|
||||
void Arm64CodeGenerator::b_ge(void* target) {
|
||||
b(10, target);
|
||||
}
|
||||
void Arm64CodeGenerator::b_lo(void* target) {
|
||||
b(3, target);
|
||||
}
|
||||
void Arm64CodeGenerator::b_ls(void* target) {
|
||||
b(9, target);
|
||||
}
|
||||
void Arm64CodeGenerator::b_hi(void* target) {
|
||||
b(8, target);
|
||||
}
|
||||
void Arm64CodeGenerator::b_hs(void* target) {
|
||||
b(2, target);
|
||||
}
|
||||
|
||||
// Stack operations
|
||||
void Arm64CodeGenerator::push(int reg) {
|
||||
sub(31, 31, 16);
|
||||
str(reg, 31, 0);
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::push(int reg1, int reg2) {
|
||||
sub(31, 31, 16);
|
||||
stp(reg1, reg2, 31, 0);
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::pop(int reg) {
|
||||
ldr(reg, 31, 0);
|
||||
add(31, 31, 16);
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::pop(int reg1, int reg2) {
|
||||
ldp(reg1, reg2, 31, 0);
|
||||
add(31, 31, 16);
|
||||
}
|
||||
|
||||
// System operations
|
||||
void Arm64CodeGenerator::nop() {
|
||||
emit32(0xD503201F);
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::brk(u16 imm) {
|
||||
emit32(0xD4200000 | (imm << 5));
|
||||
}
|
||||
|
||||
// NEON/SIMD operations
|
||||
void Arm64CodeGenerator::ldr_v(int vreg, int base_reg, s32 offset) {
|
||||
if (offset >= 0 && offset < 4096 && (offset % 16 == 0)) {
|
||||
emit32(0x3DC00000 | (vreg << 0) | (base_reg << 5) | ((offset / 16) << 12));
|
||||
} else {
|
||||
mov_imm(9, offset);
|
||||
add(9, base_reg, 9);
|
||||
ldr_v(vreg, 9, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::str_v(int vreg, int base_reg, s32 offset) {
|
||||
if (offset >= 0 && offset < 4096 && (offset % 16 == 0)) {
|
||||
emit32(0x3D800000 | (vreg << 0) | (base_reg << 5) | ((offset / 16) << 12));
|
||||
} else {
|
||||
mov_imm(9, offset);
|
||||
add(9, base_reg, 9);
|
||||
str_v(vreg, 9, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::mov_v(int vdst, int vsrc) {
|
||||
emit32(0x4EA01C00 | (vdst << 0) | (vsrc << 5));
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::add_v(int vdst, int vsrc1, int vsrc2) {
|
||||
emit32(0x4E208400 | (vdst << 0) | (vsrc1 << 5) | (vsrc2 << 16));
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::sub_v(int vdst, int vsrc1, int vsrc2) {
|
||||
emit32(0x4EA08400 | (vdst << 0) | (vsrc1 << 5) | (vsrc2 << 16));
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::mul_v(int vdst, int vsrc1, int vsrc2) {
|
||||
emit32(0x4E209C00 | (vdst << 0) | (vsrc1 << 5) | (vsrc2 << 16));
|
||||
}
|
||||
|
||||
} // namespace Core::Jit
|
||||
132
src/core/jit/arm64_codegen.h
Normal file
132
src/core/jit/arm64_codegen.h
Normal file
@ -0,0 +1,132 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include "common/types.h"
|
||||
|
||||
namespace Core::Jit {
|
||||
|
||||
class Arm64CodeGenerator {
|
||||
public:
|
||||
explicit Arm64CodeGenerator(size_t buffer_size = 64_KB, void* code_ptr = nullptr);
|
||||
~Arm64CodeGenerator();
|
||||
|
||||
Arm64CodeGenerator(const Arm64CodeGenerator&) = delete;
|
||||
Arm64CodeGenerator& operator=(const Arm64CodeGenerator&) = delete;
|
||||
|
||||
void* getCode() const {
|
||||
return code_buffer;
|
||||
}
|
||||
void* getCurr() const {
|
||||
return code_ptr;
|
||||
}
|
||||
size_t getSize() const {
|
||||
return static_cast<u8*>(code_ptr) - static_cast<u8*>(code_buffer);
|
||||
}
|
||||
|
||||
void reset();
|
||||
void setSize(size_t offset);
|
||||
|
||||
// Memory operations
|
||||
void ldr(int reg, void* addr);
|
||||
void ldr(int reg, int base_reg, s32 offset = 0);
|
||||
void ldrh(int reg, int base_reg, s32 offset = 0);
|
||||
void ldrb(int reg, int base_reg, s32 offset = 0);
|
||||
void ldp(int reg1, int reg2, int base_reg, s32 offset = 0);
|
||||
void str(int reg, void* addr);
|
||||
void str(int reg, int base_reg, s32 offset = 0);
|
||||
void strh(int reg, int base_reg, s32 offset = 0);
|
||||
void strb(int reg, int base_reg, s32 offset = 0);
|
||||
void stp(int reg1, int reg2, int base_reg, s32 offset = 0);
|
||||
|
||||
// Arithmetic operations
|
||||
void add(int dst, int src1, int src2);
|
||||
void add(int dst, int src1, int src2, int shift);
|
||||
void add_imm(int dst, int src1, s32 imm);
|
||||
void sub(int dst, int src1, int src2);
|
||||
void sub_imm(int dst, int src1, s32 imm);
|
||||
void mul(int dst, int src1, int src2);
|
||||
void sdiv(int dst, int src1, int src2);
|
||||
void udiv(int dst, int src1, int src2);
|
||||
void and_(int dst, int src1, int src2);
|
||||
void and_(int dst, int src1, u64 imm);
|
||||
void orr(int dst, int src1, int src2);
|
||||
void orr(int dst, int src1, u64 imm);
|
||||
void eor(int dst, int src1, int src2);
|
||||
void eor(int dst, int src1, u64 imm);
|
||||
void mvn(int dst, int src);
|
||||
void lsl(int dst, int src1, int src2);
|
||||
void lsl(int dst, int src1, u8 shift);
|
||||
void lsr(int dst, int src1, int src2);
|
||||
void lsr(int dst, int src1, u8 shift);
|
||||
void asr(int dst, int src1, int src2);
|
||||
void asr(int dst, int src1, u8 shift);
|
||||
|
||||
// Move operations
|
||||
void mov(int dst, int src);
|
||||
void mov_imm(int dst, s64 imm);
|
||||
void movz(int dst, u16 imm, u8 shift = 0);
|
||||
void movk(int dst, u16 imm, u8 shift = 0);
|
||||
void movn(int dst, u16 imm, u8 shift = 0);
|
||||
|
||||
// Compare operations
|
||||
void cmp(int reg1, int reg2);
|
||||
void cmp_imm(int reg, s32 imm);
|
||||
void tst(int reg1, int reg2);
|
||||
void tst(int reg, u64 imm);
|
||||
|
||||
// Branch operations
|
||||
void b(void* target);
|
||||
void b(int condition, void* target);
|
||||
void bl(void* target);
|
||||
void br(int reg);
|
||||
void blr(int reg);
|
||||
void ret(int reg = 30); // X30 is LR by default
|
||||
|
||||
// Conditional branches
|
||||
void b_eq(void* target);
|
||||
void b_ne(void* target);
|
||||
void b_lt(void* target);
|
||||
void b_le(void* target);
|
||||
void b_gt(void* target);
|
||||
void b_ge(void* target);
|
||||
void b_lo(void* target); // unsigned lower
|
||||
void b_ls(void* target); // unsigned lower or same
|
||||
void b_hi(void* target); // unsigned higher
|
||||
void b_hs(void* target); // unsigned higher or same
|
||||
|
||||
// Stack operations
|
||||
void push(int reg);
|
||||
void push(int reg1, int reg2);
|
||||
void pop(int reg);
|
||||
void pop(int reg1, int reg2);
|
||||
|
||||
// System operations
|
||||
void nop();
|
||||
void brk(u16 imm = 0);
|
||||
|
||||
// NEON/SIMD operations (for XMM registers)
|
||||
void ldr_v(int vreg, int base_reg, s32 offset = 0);
|
||||
void str_v(int vreg, int base_reg, s32 offset = 0);
|
||||
void mov_v(int vdst, int vsrc);
|
||||
void add_v(int vdst, int vsrc1, int vsrc2);
|
||||
void sub_v(int vdst, int vsrc1, int vsrc2);
|
||||
void mul_v(int vdst, int vsrc1, int vsrc2);
|
||||
|
||||
void makeExecutable();
|
||||
|
||||
private:
|
||||
void emit32(u32 instruction);
|
||||
void emit64(u64 instruction);
|
||||
void* allocateCode(size_t size);
|
||||
|
||||
void* code_buffer;
|
||||
void* code_ptr;
|
||||
size_t buffer_size;
|
||||
bool owns_buffer;
|
||||
std::vector<std::pair<void*, void*>> fixups; // (fixup_location, target_address)
|
||||
};
|
||||
|
||||
} // namespace Core::Jit
|
||||
126
src/core/jit/block_manager.cpp
Normal file
126
src/core/jit/block_manager.cpp
Normal file
@ -0,0 +1,126 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "block_manager.h"
|
||||
#include "common/logging/log.h"
|
||||
|
||||
namespace Core::Jit {
|
||||
|
||||
BlockManager::BlockManager() = default;
|
||||
|
||||
BlockManager::~BlockManager() {
|
||||
Clear();
|
||||
}
|
||||
|
||||
CodeBlock* BlockManager::GetBlock(VAddr ps4_address) {
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
auto it = blocks.find(ps4_address);
|
||||
if (it != blocks.end()) {
|
||||
return it->second.get();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
CodeBlock* BlockManager::CreateBlock(VAddr ps4_address, void* arm64_code, size_t code_size,
|
||||
size_t instruction_count) {
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
|
||||
auto block = std::make_unique<CodeBlock>(ps4_address, arm64_code, code_size, instruction_count);
|
||||
CodeBlock* result = block.get();
|
||||
blocks[ps4_address] = std::move(block);
|
||||
|
||||
LOG_DEBUG(Core, "Created code block at PS4 address {:#x}, ARM64 code: {}, size: {}",
|
||||
ps4_address, arm64_code, code_size);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void BlockManager::InvalidateBlock(VAddr ps4_address) {
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
|
||||
// Delink all links pointing to this block
|
||||
auto lower = block_links.lower_bound({ps4_address, nullptr});
|
||||
auto upper = block_links.upper_bound(
|
||||
{ps4_address, reinterpret_cast<ExitFunctionLinkData*>(UINTPTR_MAX)});
|
||||
for (auto it = lower; it != upper;) {
|
||||
it->second(it->first.host_link);
|
||||
it = block_links.erase(it);
|
||||
}
|
||||
|
||||
blocks.erase(ps4_address);
|
||||
LOG_DEBUG(Core, "Invalidated code block at PS4 address {:#x}", ps4_address);
|
||||
}
|
||||
|
||||
void BlockManager::InvalidateRange(VAddr start, VAddr end) {
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
|
||||
// Delink all links pointing to blocks in this range
|
||||
auto link_it = block_links.begin();
|
||||
while (link_it != block_links.end()) {
|
||||
if (link_it->first.guest_destination >= start && link_it->first.guest_destination < end) {
|
||||
link_it->second(link_it->first.host_link);
|
||||
link_it = block_links.erase(link_it);
|
||||
} else {
|
||||
++link_it;
|
||||
}
|
||||
}
|
||||
|
||||
auto it = blocks.begin();
|
||||
while (it != blocks.end()) {
|
||||
VAddr block_addr = it->first;
|
||||
if (block_addr >= start && block_addr < end) {
|
||||
it = blocks.erase(it);
|
||||
} else {
|
||||
auto& deps = it->second->dependencies;
|
||||
bool has_dependency_in_range = false;
|
||||
for (VAddr dep : deps) {
|
||||
if (dep >= start && dep < end) {
|
||||
has_dependency_in_range = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (has_dependency_in_range) {
|
||||
it = blocks.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LOG_DEBUG(Core, "Invalidated code blocks in range {:#x} - {:#x}", start, end);
|
||||
}
|
||||
|
||||
void BlockManager::AddDependency(VAddr block_address, VAddr dependency) {
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
auto it = blocks.find(block_address);
|
||||
if (it != blocks.end()) {
|
||||
it->second->dependencies.insert(dependency);
|
||||
}
|
||||
}
|
||||
|
||||
void BlockManager::AddBlockLink(VAddr guest_dest, ExitFunctionLinkData* link_data,
|
||||
BlockDelinkerFunc delinker) {
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
block_links[{guest_dest, link_data}] = delinker;
|
||||
}
|
||||
|
||||
void BlockManager::Clear() {
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
// Delink all links before clearing
|
||||
for (auto& [tag, delinker] : block_links) {
|
||||
delinker(tag.host_link);
|
||||
}
|
||||
block_links.clear();
|
||||
blocks.clear();
|
||||
}
|
||||
|
||||
size_t BlockManager::GetTotalCodeSize() const {
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
size_t total = 0;
|
||||
for (const auto& [addr, block] : blocks) {
|
||||
total += block->code_size;
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
} // namespace Core::Jit
|
||||
85
src/core/jit/block_manager.h
Normal file
85
src/core/jit/block_manager.h
Normal file
@ -0,0 +1,85 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
#include "common/types.h"
|
||||
|
||||
namespace Core::Jit {
|
||||
|
||||
struct ExitFunctionLinkData {
|
||||
void* host_code;
|
||||
VAddr guest_rip;
|
||||
void* caller_address;
|
||||
u32 original_instruction;
|
||||
};
|
||||
|
||||
using BlockDelinkerFunc = std::function<void(ExitFunctionLinkData*)>;
|
||||
|
||||
struct BlockLinkTag {
|
||||
VAddr guest_destination;
|
||||
ExitFunctionLinkData* host_link;
|
||||
|
||||
bool operator<(const BlockLinkTag& other) const {
|
||||
if (guest_destination < other.guest_destination) {
|
||||
return true;
|
||||
} else if (guest_destination == other.guest_destination) {
|
||||
return host_link < other.host_link;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct CodeBlock {
|
||||
VAddr ps4_address;
|
||||
void* arm64_code;
|
||||
size_t code_size;
|
||||
size_t instruction_count;
|
||||
std::set<VAddr> dependencies;
|
||||
bool is_linked;
|
||||
|
||||
// Control flow targets for linking
|
||||
VAddr fallthrough_target; // Next sequential address (if block doesn't end with branch)
|
||||
VAddr branch_target; // Direct branch target (JMP)
|
||||
void* branch_patch_location; // Location in ARM64 code to patch for direct branch
|
||||
|
||||
CodeBlock(VAddr addr, void* code, size_t size, size_t count)
|
||||
: ps4_address(addr), arm64_code(code), code_size(size), instruction_count(count),
|
||||
is_linked(false), fallthrough_target(0), branch_target(0),
|
||||
branch_patch_location(nullptr) {}
|
||||
};
|
||||
|
||||
class BlockManager {
|
||||
public:
|
||||
BlockManager();
|
||||
~BlockManager();
|
||||
|
||||
CodeBlock* GetBlock(VAddr ps4_address);
|
||||
CodeBlock* CreateBlock(VAddr ps4_address, void* arm64_code, size_t code_size,
|
||||
size_t instruction_count);
|
||||
void InvalidateBlock(VAddr ps4_address);
|
||||
void InvalidateRange(VAddr start, VAddr end);
|
||||
void AddDependency(VAddr block_address, VAddr dependency);
|
||||
void AddBlockLink(VAddr guest_dest, ExitFunctionLinkData* link_data,
|
||||
BlockDelinkerFunc delinker);
|
||||
void Clear();
|
||||
|
||||
size_t GetBlockCount() const {
|
||||
return blocks.size();
|
||||
}
|
||||
size_t GetTotalCodeSize() const;
|
||||
|
||||
std::unordered_map<VAddr, std::unique_ptr<CodeBlock>> blocks;
|
||||
std::map<BlockLinkTag, BlockDelinkerFunc> block_links;
|
||||
mutable std::mutex mutex;
|
||||
};
|
||||
|
||||
} // namespace Core::Jit
|
||||
63
src/core/jit/calling_convention.cpp
Normal file
63
src/core/jit/calling_convention.cpp
Normal file
@ -0,0 +1,63 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "calling_convention.h"
|
||||
#include "common/assert.h"
|
||||
|
||||
namespace Core::Jit {
|
||||
|
||||
CallingConvention::CallingConvention(Arm64CodeGenerator& codegen, RegisterMapper& reg_mapper)
|
||||
: codegen(codegen), reg_mapper(reg_mapper) {}
|
||||
|
||||
void CallingConvention::PrepareCall(int arg_count, const std::vector<int>& arg_regs) {
|
||||
ASSERT_MSG(arg_count <= MAX_INT_ARGS, "Too many arguments");
|
||||
ASSERT_MSG(arg_regs.size() >= static_cast<size_t>(arg_count), "Not enough argument registers");
|
||||
|
||||
for (int i = 0; i < arg_count && i < MAX_INT_ARGS; i++) {
|
||||
int arm64_arg_reg = i;
|
||||
int x86_arg_reg = arg_regs[i];
|
||||
int mapped_reg = reg_mapper.MapX86_64ToArm64(static_cast<X86_64Register>(x86_arg_reg));
|
||||
if (mapped_reg != arm64_arg_reg) {
|
||||
codegen.mov(arm64_arg_reg, mapped_reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CallingConvention::CallFunction(void* function_ptr) {
|
||||
codegen.movz(16, reinterpret_cast<u64>(function_ptr) & 0xFFFF);
|
||||
codegen.movk(16, (reinterpret_cast<u64>(function_ptr) >> 16) & 0xFFFF, 16);
|
||||
codegen.movk(16, (reinterpret_cast<u64>(function_ptr) >> 32) & 0xFFFF, 32);
|
||||
codegen.movk(16, (reinterpret_cast<u64>(function_ptr) >> 48) & 0xFFFF, 48);
|
||||
codegen.blr(16);
|
||||
}
|
||||
|
||||
void CallingConvention::CallFunction(int reg) {
|
||||
codegen.blr(reg);
|
||||
}
|
||||
|
||||
void CallingConvention::Return(int return_reg) {
|
||||
if (return_reg >= 0) {
|
||||
int arm64_return = reg_mapper.MapX86_64ToArm64(X86_64Register::RAX);
|
||||
if (return_reg != arm64_return) {
|
||||
codegen.mov(arm64_return, return_reg);
|
||||
}
|
||||
}
|
||||
codegen.ret();
|
||||
}
|
||||
|
||||
void CallingConvention::SaveCallerSavedRegisters() {
|
||||
saved_registers.clear();
|
||||
for (int i = 0; i < 8; i++) {
|
||||
codegen.push(i);
|
||||
saved_registers.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
void CallingConvention::RestoreCallerSavedRegisters() {
|
||||
for (auto it = saved_registers.rbegin(); it != saved_registers.rend(); ++it) {
|
||||
codegen.pop(*it);
|
||||
}
|
||||
saved_registers.clear();
|
||||
}
|
||||
|
||||
} // namespace Core::Jit
|
||||
33
src/core/jit/calling_convention.h
Normal file
33
src/core/jit/calling_convention.h
Normal file
@ -0,0 +1,33 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include "arm64_codegen.h"
|
||||
#include "register_mapping.h"
|
||||
|
||||
namespace Core::Jit {
|
||||
|
||||
class CallingConvention {
|
||||
public:
|
||||
explicit CallingConvention(Arm64CodeGenerator& codegen, RegisterMapper& reg_mapper);
|
||||
|
||||
void PrepareCall(int arg_count, const std::vector<int>& arg_regs);
|
||||
void CallFunction(void* function_ptr);
|
||||
void CallFunction(int reg);
|
||||
void Return(int return_reg = -1);
|
||||
|
||||
void SaveCallerSavedRegisters();
|
||||
void RestoreCallerSavedRegisters();
|
||||
|
||||
static constexpr int MAX_INT_ARGS = 8;
|
||||
static constexpr int MAX_FLOAT_ARGS = 8;
|
||||
|
||||
private:
|
||||
Arm64CodeGenerator& codegen;
|
||||
RegisterMapper& reg_mapper;
|
||||
std::vector<int> saved_registers;
|
||||
};
|
||||
|
||||
} // namespace Core::Jit
|
||||
391
src/core/jit/execution_engine.cpp
Normal file
391
src/core/jit/execution_engine.cpp
Normal file
@ -0,0 +1,391 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <cstring>
|
||||
#include <sys/mman.h>
|
||||
#include "common/decoder.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/memory.h"
|
||||
#include "execution_engine.h"
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
||||
namespace Core::Jit {
|
||||
|
||||
static size_t alignUp(size_t value, size_t alignment) {
|
||||
return (value + alignment - 1) & ~(alignment - 1);
|
||||
}
|
||||
|
||||
static void* AllocateExecutableMemory(size_t size) {
|
||||
size = alignUp(size, 4096);
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
// On macOS ARM64:
|
||||
// 1. Allocate with PROT_READ | PROT_WRITE (no PROT_EXEC initially)
|
||||
// 2. Use pthread_jit_write_protect_np to allow writing
|
||||
// 3. After writing, use mprotect to add PROT_EXEC
|
||||
void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (ptr == MAP_FAILED) {
|
||||
LOG_CRITICAL(Core, "Failed to allocate executable memory: {} (errno={})", strerror(errno),
|
||||
errno);
|
||||
return nullptr;
|
||||
}
|
||||
// Initially disable write protection so we can write code
|
||||
pthread_jit_write_protect_np(0);
|
||||
return ptr;
|
||||
#else
|
||||
void* ptr =
|
||||
mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (ptr == MAP_FAILED) {
|
||||
LOG_CRITICAL(Core, "Failed to allocate executable memory: {}", strerror(errno));
|
||||
return nullptr;
|
||||
}
|
||||
return ptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
ExecutionEngine::ExecutionEngine()
|
||||
: code_buffer(nullptr), code_buffer_size(DEFAULT_CODE_BUFFER_SIZE), code_buffer_used(0) {
|
||||
block_manager = std::make_unique<BlockManager>();
|
||||
register_mapper = std::make_unique<RegisterMapper>();
|
||||
}
|
||||
|
||||
ExecutionEngine::~ExecutionEngine() {
|
||||
Shutdown();
|
||||
}
|
||||
|
||||
void ExecutionEngine::Initialize() {
|
||||
if (IsInitialized()) {
|
||||
LOG_DEBUG(Core, "JIT Execution Engine already initialized");
|
||||
return;
|
||||
}
|
||||
|
||||
code_buffer = AllocateExecutableMemory(code_buffer_size);
|
||||
if (!code_buffer) {
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
|
||||
code_generator = std::make_unique<Arm64CodeGenerator>(code_buffer_size, code_buffer);
|
||||
translator = std::make_unique<X86_64Translator>(*code_generator, *register_mapper);
|
||||
|
||||
LOG_INFO(Core, "JIT Execution Engine initialized");
|
||||
}
|
||||
|
||||
void ExecutionEngine::Shutdown() {
|
||||
if (code_buffer) {
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
// On macOS ARM64, ensure write protection is enabled before unmapping
|
||||
pthread_jit_write_protect_np(1);
|
||||
#endif
|
||||
munmap(code_buffer, code_buffer_size);
|
||||
code_buffer = nullptr;
|
||||
}
|
||||
code_generator.reset();
|
||||
translator.reset();
|
||||
block_manager.reset();
|
||||
register_mapper.reset();
|
||||
}
|
||||
|
||||
void* ExecutionEngine::AllocateCodeBuffer(size_t size) {
|
||||
size = (size + 15) & ~15;
|
||||
if (code_buffer_used + size > code_buffer_size) {
|
||||
LOG_WARNING(Core, "Code buffer exhausted, need to allocate more");
|
||||
return nullptr;
|
||||
}
|
||||
void* result = static_cast<u8*>(code_buffer) + code_buffer_used;
|
||||
code_buffer_used += size;
|
||||
return result;
|
||||
}
|
||||
|
||||
CodeBlock* ExecutionEngine::TranslateBasicBlock(VAddr start_address, size_t max_instructions) {
|
||||
auto* memory = Core::Memory::Instance();
|
||||
auto& address_space = memory->GetAddressSpace();
|
||||
void* ps4_code_ptr = address_space.TranslateAddress(start_address);
|
||||
if (!ps4_code_ptr) {
|
||||
LOG_ERROR(Core, "Invalid PS4 address for translation: {:#x}", start_address);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
code_generator->reset();
|
||||
void* block_start = code_generator->getCurr();
|
||||
|
||||
VAddr current_address = start_address;
|
||||
size_t instruction_count = 0;
|
||||
bool block_end = false;
|
||||
VAddr fallthrough_target = 0;
|
||||
VAddr branch_target = 0;
|
||||
void* branch_patch_location = nullptr;
|
||||
|
||||
while (instruction_count < max_instructions && !block_end) {
|
||||
ZydisDecodedInstruction instruction;
|
||||
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
|
||||
|
||||
void* code_ptr = address_space.TranslateAddress(current_address);
|
||||
if (!code_ptr) {
|
||||
break;
|
||||
}
|
||||
|
||||
ZyanStatus status =
|
||||
Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_ptr, 15);
|
||||
if (!ZYAN_SUCCESS(status)) {
|
||||
LOG_WARNING(Core, "Failed to decode instruction at {:#x}", current_address);
|
||||
break;
|
||||
}
|
||||
|
||||
// Track branch/call target before translation
|
||||
if (instruction.mnemonic == ZYDIS_MNEMONIC_JMP &&
|
||||
operands[0].type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
|
||||
s64 offset = static_cast<s64>(operands[0].imm.value.s);
|
||||
branch_target = current_address + instruction.length + offset;
|
||||
branch_patch_location = code_generator->getCurr();
|
||||
} else if (instruction.mnemonic == ZYDIS_MNEMONIC_CALL &&
|
||||
operands[0].type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
|
||||
// Track CALL target for potential linking (though CALL typically goes to HLE)
|
||||
s64 offset = static_cast<s64>(operands[0].imm.value.s);
|
||||
branch_target = current_address + instruction.length + offset;
|
||||
branch_patch_location = code_generator->getCurr();
|
||||
}
|
||||
|
||||
bool translated = translator->TranslateInstruction(instruction, operands, current_address);
|
||||
if (!translated) {
|
||||
LOG_WARNING(Core, "Failed to translate instruction at {:#x}", current_address);
|
||||
break;
|
||||
}
|
||||
|
||||
instruction_count++;
|
||||
VAddr next_address = current_address + instruction.length;
|
||||
|
||||
switch (instruction.mnemonic) {
|
||||
case ZYDIS_MNEMONIC_RET:
|
||||
case ZYDIS_MNEMONIC_CALL:
|
||||
block_end = true;
|
||||
break;
|
||||
case ZYDIS_MNEMONIC_JMP:
|
||||
block_end = true;
|
||||
break;
|
||||
default:
|
||||
// Check for conditional branches (they don't end the block, but we track them)
|
||||
if (instruction.mnemonic >= ZYDIS_MNEMONIC_JO &&
|
||||
instruction.mnemonic <= ZYDIS_MNEMONIC_JZ) {
|
||||
// Conditional branch - block continues with fallthrough
|
||||
// TODO: Track conditional branch targets for linking
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
current_address = next_address;
|
||||
}
|
||||
|
||||
if (instruction_count == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Set fallthrough target if block doesn't end with unconditional branch/ret
|
||||
if (!block_end || branch_target == 0) {
|
||||
fallthrough_target = current_address;
|
||||
}
|
||||
|
||||
size_t code_size = code_generator->getSize();
|
||||
code_generator->makeExecutable();
|
||||
CodeBlock* block =
|
||||
block_manager->CreateBlock(start_address, block_start, code_size, instruction_count);
|
||||
|
||||
// Store control flow information
|
||||
block->fallthrough_target = fallthrough_target;
|
||||
block->branch_target = branch_target;
|
||||
block->branch_patch_location = branch_patch_location;
|
||||
|
||||
LOG_DEBUG(Core,
|
||||
"Translated basic block at {:#x}, {} instructions, {} bytes, fallthrough: {:#x}, "
|
||||
"branch: {:#x}",
|
||||
start_address, instruction_count, code_size, fallthrough_target, branch_target);
|
||||
|
||||
// Try to link blocks if targets are available
|
||||
if (branch_target != 0) {
|
||||
CodeBlock* target_block = block_manager->GetBlock(branch_target);
|
||||
if (target_block) {
|
||||
LinkBlock(block, branch_target);
|
||||
} else {
|
||||
// Add dependency for later linking
|
||||
block_manager->AddDependency(start_address, branch_target);
|
||||
}
|
||||
}
|
||||
|
||||
if (fallthrough_target != 0 && branch_target == 0) {
|
||||
// Try to link fallthrough
|
||||
CodeBlock* target_block = block_manager->GetBlock(fallthrough_target);
|
||||
if (target_block) {
|
||||
// For fallthrough, we need to append a branch at the end
|
||||
// This will be handled by linking logic
|
||||
block_manager->AddDependency(start_address, fallthrough_target);
|
||||
}
|
||||
}
|
||||
|
||||
return block;
|
||||
}
|
||||
|
||||
CodeBlock* ExecutionEngine::TranslateBlock(VAddr ps4_address) {
|
||||
CodeBlock* existing = block_manager->GetBlock(ps4_address);
|
||||
if (existing) {
|
||||
return existing;
|
||||
}
|
||||
|
||||
CodeBlock* new_block = TranslateBasicBlock(ps4_address);
|
||||
if (!new_block) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// After creating a new block, check if any existing blocks can link to it
|
||||
// This handles the case where we translate a target block after the source
|
||||
for (auto& [addr, block] : block_manager->blocks) {
|
||||
if (block->branch_target == ps4_address && !block->is_linked) {
|
||||
LinkBlock(block.get(), ps4_address);
|
||||
}
|
||||
if (block->fallthrough_target == ps4_address && block->branch_target == 0 &&
|
||||
!block->is_linked) {
|
||||
LinkBlock(block.get(), ps4_address);
|
||||
}
|
||||
}
|
||||
|
||||
return new_block;
|
||||
}
|
||||
|
||||
static void DirectBlockDelinker(ExitFunctionLinkData* record, bool is_call) {
|
||||
void* caller_addr = record->caller_address;
|
||||
u32 original_inst = record->original_instruction;
|
||||
|
||||
std::atomic_ref<u32>(*reinterpret_cast<u32*>(caller_addr))
|
||||
.store(original_inst, std::memory_order::relaxed);
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
__builtin___clear_cache(static_cast<char*>(caller_addr), static_cast<char*>(caller_addr) + 4);
|
||||
#endif
|
||||
delete record;
|
||||
}
|
||||
|
||||
void ExecutionEngine::LinkBlock(CodeBlock* block, VAddr target_address) {
|
||||
CodeBlock* target_block = block_manager->GetBlock(target_address);
|
||||
if (!target_block) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Patch the branch instruction if we have a patch location
|
||||
if (block->branch_patch_location && block->branch_target == target_address) {
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
pthread_jit_write_protect_np(0);
|
||||
#endif
|
||||
void* caller_address = block->branch_patch_location;
|
||||
s64 offset =
|
||||
reinterpret_cast<s64>(target_block->arm64_code) - reinterpret_cast<s64>(caller_address);
|
||||
|
||||
// Check if we can use a relative branch (within ±128MB)
|
||||
if (offset >= -0x8000000 && offset < 0x8000000) {
|
||||
s32 imm26 = static_cast<s32>(offset / 4);
|
||||
u32* patch_ptr = reinterpret_cast<u32*>(caller_address);
|
||||
u32 branch_inst = 0x14000000 | (imm26 & 0x3FFFFFF);
|
||||
|
||||
u32 original_inst = *patch_ptr;
|
||||
std::atomic_ref<u32>(*patch_ptr).store(branch_inst, std::memory_order::relaxed);
|
||||
|
||||
// Register delinker
|
||||
ExitFunctionLinkData* link_data = new ExitFunctionLinkData{
|
||||
target_block->arm64_code, target_address, caller_address, original_inst};
|
||||
block_manager->AddBlockLink(target_address, link_data, [](ExitFunctionLinkData* r) {
|
||||
DirectBlockDelinker(r, false);
|
||||
});
|
||||
} else {
|
||||
// Far branch - need to use indirect branch via thunk
|
||||
LOG_DEBUG(Core, "Branch target too far for direct linking: offset={}", offset);
|
||||
}
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
pthread_jit_write_protect_np(1);
|
||||
__builtin___clear_cache(static_cast<char*>(caller_address),
|
||||
static_cast<char*>(caller_address) + 4);
|
||||
#endif
|
||||
block->is_linked = true;
|
||||
LOG_DEBUG(Core, "Linked block {:#x} to {:#x}", block->ps4_address, target_address);
|
||||
} else if (block->fallthrough_target == target_address && block->branch_target == 0) {
|
||||
// For fallthrough, append a branch at the end of the block
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
pthread_jit_write_protect_np(0);
|
||||
#endif
|
||||
void* link_location = static_cast<u8*>(block->arm64_code) + block->code_size;
|
||||
s64 offset =
|
||||
reinterpret_cast<s64>(target_block->arm64_code) - reinterpret_cast<s64>(link_location);
|
||||
|
||||
if (offset >= -0x8000000 && offset < 0x8000000) {
|
||||
s32 imm26 = static_cast<s32>(offset / 4);
|
||||
u32* patch_ptr = reinterpret_cast<u32*>(link_location);
|
||||
u32 branch_inst = 0x14000000 | (imm26 & 0x3FFFFFF);
|
||||
u32 original_inst = 0x14000002;
|
||||
|
||||
std::atomic_ref<u32>(*patch_ptr).store(branch_inst, std::memory_order::relaxed);
|
||||
|
||||
// Register delinker
|
||||
ExitFunctionLinkData* link_data = new ExitFunctionLinkData{
|
||||
target_block->arm64_code, target_address, link_location, original_inst};
|
||||
block_manager->AddBlockLink(target_address, link_data, [](ExitFunctionLinkData* r) {
|
||||
DirectBlockDelinker(r, false);
|
||||
});
|
||||
|
||||
block->code_size += 4;
|
||||
}
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
pthread_jit_write_protect_np(1);
|
||||
__builtin___clear_cache(static_cast<char*>(link_location),
|
||||
static_cast<char*>(link_location) + 4);
|
||||
#endif
|
||||
block->is_linked = true;
|
||||
LOG_DEBUG(Core, "Linked fallthrough from block {:#x} to {:#x}", block->ps4_address,
|
||||
target_address);
|
||||
}
|
||||
}
|
||||
|
||||
bool ExecutionEngine::ExecuteBlock(VAddr ps4_address) {
|
||||
CodeBlock* block = TranslateBlock(ps4_address);
|
||||
if (!block) {
|
||||
LOG_ERROR(Core, "Failed to translate or find block at {:#x}", ps4_address);
|
||||
return false;
|
||||
}
|
||||
|
||||
typedef void (*BlockFunc)();
|
||||
BlockFunc func = reinterpret_cast<BlockFunc>(block->arm64_code);
|
||||
func();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void ExecutionEngine::InvalidateBlock(VAddr ps4_address) {
|
||||
block_manager->InvalidateBlock(ps4_address);
|
||||
}
|
||||
|
||||
void ExecutionEngine::InvalidateRange(VAddr start, VAddr end) {
|
||||
block_manager->InvalidateRange(start, end);
|
||||
}
|
||||
|
||||
bool ExecutionEngine::IsJitCode(void* code_ptr) const {
|
||||
if (!code_buffer) {
|
||||
return false;
|
||||
}
|
||||
u8* ptr = static_cast<u8*>(code_ptr);
|
||||
u8* start = static_cast<u8*>(code_buffer);
|
||||
u8* end = start + code_buffer_size;
|
||||
return ptr >= start && ptr < end;
|
||||
}
|
||||
|
||||
VAddr ExecutionEngine::GetPs4AddressForJitCode(void* code_ptr) const {
|
||||
if (!IsJitCode(code_ptr)) {
|
||||
return 0;
|
||||
}
|
||||
std::lock_guard<std::mutex> lock(block_manager->mutex);
|
||||
for (const auto& [ps4_addr, block] : block_manager->blocks) {
|
||||
u8* block_start = static_cast<u8*>(block->arm64_code);
|
||||
u8* block_end = block_start + block->code_size;
|
||||
u8* ptr = static_cast<u8*>(code_ptr);
|
||||
if (ptr >= block_start && ptr < block_end) {
|
||||
return ps4_addr;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace Core::Jit
|
||||
56
src/core/jit/execution_engine.h
Normal file
56
src/core/jit/execution_engine.h
Normal file
@ -0,0 +1,56 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include "arm64_codegen.h"
|
||||
#include "block_manager.h"
|
||||
#include "common/singleton.h"
|
||||
#include "common/types.h"
|
||||
#include "register_mapping.h"
|
||||
#include "x86_64_translator.h"
|
||||
|
||||
namespace Core::Jit {
|
||||
|
||||
class ExecutionEngine {
|
||||
public:
|
||||
ExecutionEngine();
|
||||
~ExecutionEngine();
|
||||
|
||||
bool ExecuteBlock(VAddr ps4_address);
|
||||
CodeBlock* TranslateBlock(VAddr ps4_address);
|
||||
void InvalidateBlock(VAddr ps4_address);
|
||||
void InvalidateRange(VAddr start, VAddr end);
|
||||
|
||||
bool IsJitCode(void* code_ptr) const;
|
||||
VAddr GetPs4AddressForJitCode(void* code_ptr) const;
|
||||
|
||||
void Initialize();
|
||||
void Shutdown();
|
||||
bool IsInitialized() const {
|
||||
return code_buffer != nullptr;
|
||||
}
|
||||
|
||||
private:
|
||||
CodeBlock* TranslateBasicBlock(VAddr start_address, size_t max_instructions = 100);
|
||||
void* AllocateCodeBuffer(size_t size);
|
||||
void LinkBlock(CodeBlock* block, VAddr target_address);
|
||||
|
||||
std::unique_ptr<BlockManager> block_manager;
|
||||
std::unique_ptr<RegisterMapper> register_mapper;
|
||||
std::unique_ptr<Arm64CodeGenerator> code_generator;
|
||||
std::unique_ptr<X86_64Translator> translator;
|
||||
|
||||
void* code_buffer;
|
||||
size_t code_buffer_size;
|
||||
size_t code_buffer_used;
|
||||
|
||||
static constexpr size_t DEFAULT_CODE_BUFFER_SIZE = 64_MB;
|
||||
|
||||
friend class BlockManager;
|
||||
};
|
||||
|
||||
using JitEngine = Common::Singleton<ExecutionEngine>;
|
||||
|
||||
} // namespace Core::Jit
|
||||
139
src/core/jit/hle_bridge.cpp
Normal file
139
src/core/jit/hle_bridge.cpp
Normal file
@ -0,0 +1,139 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <cstring>
|
||||
#include "common/logging/log.h"
|
||||
#include "hle_bridge.h"
|
||||
|
||||
namespace Core::Jit {
|
||||
|
||||
HleBridge::HleBridge(Arm64CodeGenerator& codegen, RegisterMapper& reg_mapper)
|
||||
: codegen(codegen), reg_mapper(reg_mapper), calling_conv(codegen, reg_mapper) {}
|
||||
|
||||
void HleBridge::GenerateBridge(void* hle_func, int int_arg_count, int float_arg_count) {
|
||||
// Save caller-saved registers (x86_64: RAX, RCX, RDX, RSI, RDI, R8-R11)
|
||||
// These correspond to ARM64: X0-X7, X9-X15 (some are callee-saved, but we save all to be safe)
|
||||
SaveCallerSavedRegisters();
|
||||
|
||||
// Map x86_64 arguments to ARM64 calling convention
|
||||
// x86_64 System V ABI: RDI, RSI, RDX, RCX, R8, R9 (integer), XMM0-XMM7 (float)
|
||||
// ARM64: X0-X7 (integer), V0-V7 (float)
|
||||
MapArguments(int_arg_count, float_arg_count);
|
||||
|
||||
// Call the HLE function
|
||||
calling_conv.CallFunction(hle_func);
|
||||
|
||||
// Map return value from ARM64 X0 to x86_64 RAX
|
||||
MapReturnValue();
|
||||
|
||||
// Restore caller-saved registers
|
||||
RestoreCallerSavedRegisters();
|
||||
}
|
||||
|
||||
void HleBridge::SaveCallerSavedRegisters() {
|
||||
// x86_64 caller-saved registers: RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11
|
||||
// Map to ARM64 and save them
|
||||
// Note: We need to be careful about which registers are actually caller-saved in ARM64
|
||||
// ARM64 caller-saved: X0-X7, X9-X15, V0-V7, V16-V31
|
||||
// We'll save the x86_64 registers that map to ARM64 caller-saved registers
|
||||
|
||||
// Save integer registers that are caller-saved
|
||||
// RAX -> X0, RCX -> X1, RDX -> X2, RSI -> X3, RDI -> X0 (reused), R8 -> X4, R9 -> X5
|
||||
// We'll save X0-X7 to be safe (they're all caller-saved in ARM64)
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
codegen.push(i); // Save X0-X7
|
||||
}
|
||||
|
||||
// Save XMM registers (V0-V7 in ARM64)
|
||||
// x86_64 XMM0-XMM7 map to ARM64 V0-V7
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
codegen.sub_imm(31, 31, 16); // Decrement stack pointer by 16 bytes
|
||||
codegen.str_v(i, 31, 0); // Store V0-V7
|
||||
}
|
||||
}
|
||||
|
||||
void HleBridge::RestoreCallerSavedRegisters() {
|
||||
// Restore XMM registers first (reverse order)
|
||||
for (int i = 7; i >= 0; --i) {
|
||||
codegen.ldr_v(i, 31, 0); // Load V0-V7
|
||||
codegen.add_imm(31, 31, 16); // Increment stack pointer by 16 bytes
|
||||
}
|
||||
|
||||
// Restore integer registers (reverse order)
|
||||
for (int i = 7; i >= 0; --i) {
|
||||
codegen.pop(i); // Restore X0-X7
|
||||
}
|
||||
}
|
||||
|
||||
void HleBridge::MapArguments(int int_arg_count, int float_arg_count) {
|
||||
// x86_64 System V ABI argument registers:
|
||||
// Integer: RDI (arg1), RSI (arg2), RDX (arg3), RCX (arg4), R8 (arg5), R9 (arg6)
|
||||
// Float: XMM0 (arg1), XMM1 (arg2), XMM2 (arg3), XMM3 (arg4), XMM4 (arg5), XMM5 (arg6), XMM6
|
||||
// (arg7), XMM7 (arg8)
|
||||
|
||||
// ARM64 calling convention:
|
||||
// Integer: X0 (arg1), X1 (arg2), X2 (arg3), X3 (arg4), X4 (arg5), X5 (arg6), X6 (arg7), X7
|
||||
// (arg8) Float: V0 (arg1), V1 (arg2), V2 (arg3), V3 (arg4), V4 (arg5), V5 (arg6), V6 (arg7), V7
|
||||
// (arg8)
|
||||
|
||||
// Map integer arguments
|
||||
static constexpr X86_64Register x86_int_args[] = {
|
||||
X86_64Register::RDI, // arg1
|
||||
X86_64Register::RSI, // arg2
|
||||
X86_64Register::RDX, // arg3
|
||||
X86_64Register::RCX, // arg4
|
||||
X86_64Register::R8, // arg5
|
||||
X86_64Register::R9, // arg6
|
||||
};
|
||||
|
||||
for (int i = 0; i < int_arg_count && i < 6; ++i) {
|
||||
int x86_reg = reg_mapper.MapX86_64ToArm64(x86_int_args[i]);
|
||||
int arm64_arg_reg = i; // X0, X1, X2, etc.
|
||||
if (x86_reg != arm64_arg_reg) {
|
||||
codegen.mov(arm64_arg_reg, x86_reg);
|
||||
}
|
||||
}
|
||||
|
||||
// Map floating point arguments
|
||||
static constexpr X86_64Register x86_float_args[] = {
|
||||
X86_64Register::XMM0, // arg1
|
||||
X86_64Register::XMM1, // arg2
|
||||
X86_64Register::XMM2, // arg3
|
||||
X86_64Register::XMM3, // arg4
|
||||
X86_64Register::XMM4, // arg5
|
||||
X86_64Register::XMM5, // arg6
|
||||
X86_64Register::XMM6, // arg7
|
||||
X86_64Register::XMM7, // arg8
|
||||
};
|
||||
|
||||
for (int i = 0; i < float_arg_count && i < 8; ++i) {
|
||||
int x86_xmm_reg = reg_mapper.MapX86_64XmmToArm64Neon(x86_float_args[i]);
|
||||
int arm64_arg_reg = i; // V0, V1, V2, etc.
|
||||
if (x86_xmm_reg != arm64_arg_reg) {
|
||||
codegen.mov_v(arm64_arg_reg, x86_xmm_reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void HleBridge::MapReturnValue() {
|
||||
// Return value: ARM64 X0 -> x86_64 RAX
|
||||
int arm64_return = 0; // X0
|
||||
int x86_return = reg_mapper.MapX86_64ToArm64(X86_64Register::RAX);
|
||||
if (x86_return != arm64_return) {
|
||||
codegen.mov(x86_return, arm64_return);
|
||||
}
|
||||
}
|
||||
|
||||
bool HleBridge::IsHleAddress(VAddr address) {
|
||||
// TODO: Implement HLE address lookup
|
||||
(void)address;
|
||||
return false;
|
||||
}
|
||||
|
||||
void* HleBridge::GetHleFunction(VAddr address) {
|
||||
// TODO: Implement HLE function lookup
|
||||
(void)address;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace Core::Jit
|
||||
40
src/core/jit/hle_bridge.h
Normal file
40
src/core/jit/hle_bridge.h
Normal file
@ -0,0 +1,40 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "arm64_codegen.h"
|
||||
#include "calling_convention.h"
|
||||
#include "register_mapping.h"
|
||||
|
||||
namespace Core::Jit {
|
||||
|
||||
class HleBridge {
|
||||
public:
|
||||
explicit HleBridge(Arm64CodeGenerator& codegen, RegisterMapper& reg_mapper);
|
||||
~HleBridge() = default;
|
||||
|
||||
// Generate bridge code to call an HLE function
|
||||
// hle_func: Pointer to the HLE function
|
||||
// int_arg_count: Number of integer arguments (0-6 for x86_64 System V ABI)
|
||||
// float_arg_count: Number of floating point arguments (0-8 for x86_64 System V ABI)
|
||||
void GenerateBridge(void* hle_func, int int_arg_count = 0, int float_arg_count = 0);
|
||||
|
||||
// Check if an address is an HLE function
|
||||
static bool IsHleAddress(VAddr address);
|
||||
|
||||
// Get HLE function pointer for an address
|
||||
static void* GetHleFunction(VAddr address);
|
||||
|
||||
private:
|
||||
void SaveCallerSavedRegisters();
|
||||
void RestoreCallerSavedRegisters();
|
||||
void MapArguments(int int_arg_count, int float_arg_count);
|
||||
void MapReturnValue();
|
||||
|
||||
Arm64CodeGenerator& codegen;
|
||||
RegisterMapper& reg_mapper;
|
||||
CallingConvention calling_conv;
|
||||
};
|
||||
|
||||
} // namespace Core::Jit
|
||||
268
src/core/jit/register_mapping.cpp
Normal file
268
src/core/jit/register_mapping.cpp
Normal file
@ -0,0 +1,268 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <cstring>
|
||||
#include "arm64_codegen.h"
|
||||
#include "common/assert.h"
|
||||
#include "register_mapping.h"
|
||||
|
||||
namespace Core::Jit {
|
||||
|
||||
RegisterMapper::RegisterMapper() : register_save_area(nullptr) {
|
||||
x86_to_arm64_map.fill(INVALID_MAPPING);
|
||||
spilled_registers.fill(false);
|
||||
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::RAX)] =
|
||||
GetArm64RegisterNumber(Arm64Register::X0);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::RCX)] =
|
||||
GetArm64RegisterNumber(Arm64Register::X1);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::RDX)] =
|
||||
GetArm64RegisterNumber(Arm64Register::X2);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::RBX)] =
|
||||
GetArm64RegisterNumber(Arm64Register::X19);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::RSP)] =
|
||||
GetArm64RegisterNumber(Arm64Register::SP);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::RBP)] =
|
||||
GetArm64RegisterNumber(Arm64Register::X29);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::RSI)] =
|
||||
GetArm64RegisterNumber(Arm64Register::X3);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::RDI)] =
|
||||
GetArm64RegisterNumber(Arm64Register::X0);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::R8)] =
|
||||
GetArm64RegisterNumber(Arm64Register::X4);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::R9)] =
|
||||
GetArm64RegisterNumber(Arm64Register::X5);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::R10)] =
|
||||
GetArm64RegisterNumber(Arm64Register::X6);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::R11)] =
|
||||
GetArm64RegisterNumber(Arm64Register::X7);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::R12)] =
|
||||
GetArm64RegisterNumber(Arm64Register::X20);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::R13)] =
|
||||
GetArm64RegisterNumber(Arm64Register::X21);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::R14)] =
|
||||
GetArm64RegisterNumber(Arm64Register::X22);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::R15)] =
|
||||
GetArm64RegisterNumber(Arm64Register::X23);
|
||||
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM0)] =
|
||||
GetArm64RegisterNumber(Arm64Register::V0);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM1)] =
|
||||
GetArm64RegisterNumber(Arm64Register::V1);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM2)] =
|
||||
GetArm64RegisterNumber(Arm64Register::V2);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM3)] =
|
||||
GetArm64RegisterNumber(Arm64Register::V3);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM4)] =
|
||||
GetArm64RegisterNumber(Arm64Register::V4);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM5)] =
|
||||
GetArm64RegisterNumber(Arm64Register::V5);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM6)] =
|
||||
GetArm64RegisterNumber(Arm64Register::V6);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM7)] =
|
||||
GetArm64RegisterNumber(Arm64Register::V7);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM8)] =
|
||||
GetArm64RegisterNumber(Arm64Register::V8);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM9)] =
|
||||
GetArm64RegisterNumber(Arm64Register::V9);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM10)] =
|
||||
GetArm64RegisterNumber(Arm64Register::V10);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM11)] =
|
||||
GetArm64RegisterNumber(Arm64Register::V11);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM12)] =
|
||||
GetArm64RegisterNumber(Arm64Register::V12);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM13)] =
|
||||
GetArm64RegisterNumber(Arm64Register::V13);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM14)] =
|
||||
GetArm64RegisterNumber(Arm64Register::V14);
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::XMM15)] =
|
||||
GetArm64RegisterNumber(Arm64Register::V15);
|
||||
|
||||
x86_to_arm64_map[static_cast<size_t>(X86_64Register::FLAGS)] =
|
||||
GetArm64RegisterNumber(Arm64Register::X11);
|
||||
}
|
||||
|
||||
int RegisterMapper::MapX86_64ToArm64(X86_64Register x86_reg) {
|
||||
size_t index = static_cast<size_t>(x86_reg);
|
||||
ASSERT_MSG(index < static_cast<size_t>(X86_64Register::COUNT), "Invalid x86_64 register");
|
||||
return x86_to_arm64_map[index];
|
||||
}
|
||||
|
||||
int RegisterMapper::MapX86_64XmmToArm64Neon(X86_64Register xmm_reg) {
|
||||
if (!IsXmmRegister(xmm_reg)) {
|
||||
return INVALID_MAPPING;
|
||||
}
|
||||
return MapX86_64ToArm64(xmm_reg);
|
||||
}
|
||||
|
||||
bool RegisterMapper::IsXmmRegister(X86_64Register reg) {
|
||||
return reg >= X86_64Register::XMM0 && reg <= X86_64Register::XMM15;
|
||||
}
|
||||
|
||||
void RegisterMapper::SpillRegister(X86_64Register x86_reg) {
|
||||
size_t index = static_cast<size_t>(x86_reg);
|
||||
ASSERT_MSG(index < static_cast<size_t>(X86_64Register::COUNT), "Invalid x86_64 register");
|
||||
spilled_registers[index] = true;
|
||||
}
|
||||
|
||||
void RegisterMapper::ReloadRegister(X86_64Register x86_reg) {
|
||||
size_t index = static_cast<size_t>(x86_reg);
|
||||
ASSERT_MSG(index < static_cast<size_t>(X86_64Register::COUNT), "Invalid x86_64 register");
|
||||
spilled_registers[index] = false;
|
||||
}
|
||||
|
||||
bool RegisterMapper::IsRegisterSpilled(X86_64Register x86_reg) const {
|
||||
size_t index = static_cast<size_t>(x86_reg);
|
||||
ASSERT_MSG(index < static_cast<size_t>(X86_64Register::COUNT), "Invalid x86_64 register");
|
||||
return spilled_registers[index];
|
||||
}
|
||||
|
||||
void RegisterMapper::SaveRegister(Arm64CodeGenerator& codegen, X86_64Register x86_reg,
|
||||
RegisterContext* ctx) {
|
||||
if (!ctx) {
|
||||
return;
|
||||
}
|
||||
|
||||
int arm64_reg = MapX86_64ToArm64(x86_reg);
|
||||
if (arm64_reg == INVALID_MAPPING) {
|
||||
return;
|
||||
}
|
||||
|
||||
size_t index = static_cast<size_t>(x86_reg);
|
||||
if (IsXmmRegister(x86_reg)) {
|
||||
int vreg = MapX86_64XmmToArm64Neon(x86_reg);
|
||||
if (vreg != INVALID_MAPPING) {
|
||||
codegen.movz(SCRATCH_REG,
|
||||
reinterpret_cast<u64>(&ctx->xmm_regs[index - 16][0]) & 0xFFFF);
|
||||
codegen.movk(SCRATCH_REG,
|
||||
(reinterpret_cast<u64>(&ctx->xmm_regs[index - 16][0]) >> 16) & 0xFFFF, 16);
|
||||
codegen.movk(SCRATCH_REG,
|
||||
(reinterpret_cast<u64>(&ctx->xmm_regs[index - 16][0]) >> 32) & 0xFFFF, 32);
|
||||
codegen.movk(SCRATCH_REG,
|
||||
(reinterpret_cast<u64>(&ctx->xmm_regs[index - 16][0]) >> 48) & 0xFFFF, 48);
|
||||
codegen.str_v(vreg, SCRATCH_REG, 0);
|
||||
}
|
||||
} else if (x86_reg == X86_64Register::FLAGS) {
|
||||
codegen.movz(SCRATCH_REG, reinterpret_cast<u64>(&ctx->flags) & 0xFFFF);
|
||||
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->flags) >> 16) & 0xFFFF, 16);
|
||||
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->flags) >> 32) & 0xFFFF, 32);
|
||||
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->flags) >> 48) & 0xFFFF, 48);
|
||||
codegen.str(arm64_reg, SCRATCH_REG, 0);
|
||||
} else if (x86_reg == X86_64Register::RSP || x86_reg == X86_64Register::RBP) {
|
||||
if (arm64_reg == STACK_POINTER) {
|
||||
codegen.mov(SCRATCH_REG, STACK_POINTER);
|
||||
codegen.movz(SCRATCH_REG2, reinterpret_cast<u64>(&ctx->rsp) & 0xFFFF);
|
||||
codegen.movk(SCRATCH_REG2, (reinterpret_cast<u64>(&ctx->rsp) >> 16) & 0xFFFF, 16);
|
||||
codegen.movk(SCRATCH_REG2, (reinterpret_cast<u64>(&ctx->rsp) >> 32) & 0xFFFF, 32);
|
||||
codegen.movk(SCRATCH_REG2, (reinterpret_cast<u64>(&ctx->rsp) >> 48) & 0xFFFF, 48);
|
||||
codegen.str(SCRATCH_REG, SCRATCH_REG2, 0);
|
||||
} else {
|
||||
codegen.movz(SCRATCH_REG, reinterpret_cast<u64>(&ctx->rbp) & 0xFFFF);
|
||||
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->rbp) >> 16) & 0xFFFF, 16);
|
||||
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->rbp) >> 32) & 0xFFFF, 32);
|
||||
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->rbp) >> 48) & 0xFFFF, 48);
|
||||
codegen.str(arm64_reg, SCRATCH_REG, 0);
|
||||
}
|
||||
} else {
|
||||
if (index < 16) {
|
||||
codegen.movz(SCRATCH_REG, reinterpret_cast<u64>(&ctx->gp_regs[index]) & 0xFFFF);
|
||||
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->gp_regs[index]) >> 16) & 0xFFFF,
|
||||
16);
|
||||
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->gp_regs[index]) >> 32) & 0xFFFF,
|
||||
32);
|
||||
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->gp_regs[index]) >> 48) & 0xFFFF,
|
||||
48);
|
||||
codegen.str(arm64_reg, SCRATCH_REG, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RegisterMapper::RestoreRegister(Arm64CodeGenerator& codegen, X86_64Register x86_reg,
|
||||
RegisterContext* ctx) {
|
||||
if (!ctx) {
|
||||
return;
|
||||
}
|
||||
|
||||
int arm64_reg = MapX86_64ToArm64(x86_reg);
|
||||
if (arm64_reg == INVALID_MAPPING) {
|
||||
return;
|
||||
}
|
||||
|
||||
size_t index = static_cast<size_t>(x86_reg);
|
||||
if (IsXmmRegister(x86_reg)) {
|
||||
int vreg = MapX86_64XmmToArm64Neon(x86_reg);
|
||||
if (vreg != INVALID_MAPPING) {
|
||||
codegen.movz(SCRATCH_REG,
|
||||
reinterpret_cast<u64>(&ctx->xmm_regs[index - 16][0]) & 0xFFFF);
|
||||
codegen.movk(SCRATCH_REG,
|
||||
(reinterpret_cast<u64>(&ctx->xmm_regs[index - 16][0]) >> 16) & 0xFFFF, 16);
|
||||
codegen.movk(SCRATCH_REG,
|
||||
(reinterpret_cast<u64>(&ctx->xmm_regs[index - 16][0]) >> 32) & 0xFFFF, 32);
|
||||
codegen.movk(SCRATCH_REG,
|
||||
(reinterpret_cast<u64>(&ctx->xmm_regs[index - 16][0]) >> 48) & 0xFFFF, 48);
|
||||
codegen.ldr_v(vreg, SCRATCH_REG, 0);
|
||||
}
|
||||
} else if (x86_reg == X86_64Register::FLAGS) {
|
||||
codegen.movz(SCRATCH_REG, reinterpret_cast<u64>(&ctx->flags) & 0xFFFF);
|
||||
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->flags) >> 16) & 0xFFFF, 16);
|
||||
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->flags) >> 32) & 0xFFFF, 32);
|
||||
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->flags) >> 48) & 0xFFFF, 48);
|
||||
codegen.ldr(arm64_reg, SCRATCH_REG, 0);
|
||||
} else if (x86_reg == X86_64Register::RSP || x86_reg == X86_64Register::RBP) {
|
||||
if (arm64_reg == STACK_POINTER) {
|
||||
codegen.movz(SCRATCH_REG, reinterpret_cast<u64>(&ctx->rsp) & 0xFFFF);
|
||||
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->rsp) >> 16) & 0xFFFF, 16);
|
||||
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->rsp) >> 32) & 0xFFFF, 32);
|
||||
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->rsp) >> 48) & 0xFFFF, 48);
|
||||
codegen.ldr(SCRATCH_REG2, SCRATCH_REG, 0);
|
||||
codegen.mov(STACK_POINTER, SCRATCH_REG2);
|
||||
} else {
|
||||
codegen.movz(SCRATCH_REG, reinterpret_cast<u64>(&ctx->rbp) & 0xFFFF);
|
||||
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->rbp) >> 16) & 0xFFFF, 16);
|
||||
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->rbp) >> 32) & 0xFFFF, 32);
|
||||
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->rbp) >> 48) & 0xFFFF, 48);
|
||||
codegen.ldr(arm64_reg, SCRATCH_REG, 0);
|
||||
}
|
||||
} else {
|
||||
if (index < 16) {
|
||||
codegen.movz(SCRATCH_REG, reinterpret_cast<u64>(&ctx->gp_regs[index]) & 0xFFFF);
|
||||
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->gp_regs[index]) >> 16) & 0xFFFF,
|
||||
16);
|
||||
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->gp_regs[index]) >> 32) & 0xFFFF,
|
||||
32);
|
||||
codegen.movk(SCRATCH_REG, (reinterpret_cast<u64>(&ctx->gp_regs[index]) >> 48) & 0xFFFF,
|
||||
48);
|
||||
codegen.ldr(arm64_reg, SCRATCH_REG, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RegisterMapper::SaveAllRegisters(Arm64CodeGenerator& codegen, RegisterContext* ctx) {
|
||||
if (!ctx) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 16; i++) {
|
||||
SaveRegister(codegen, static_cast<X86_64Register>(i), ctx);
|
||||
}
|
||||
for (int i = 16; i < 32; i++) {
|
||||
SaveRegister(codegen, static_cast<X86_64Register>(i), ctx);
|
||||
}
|
||||
SaveRegister(codegen, X86_64Register::FLAGS, ctx);
|
||||
}
|
||||
|
||||
void RegisterMapper::RestoreAllRegisters(Arm64CodeGenerator& codegen, RegisterContext* ctx) {
|
||||
if (!ctx) {
|
||||
return;
|
||||
}
|
||||
|
||||
RestoreRegister(codegen, X86_64Register::FLAGS, ctx);
|
||||
for (int i = 16; i < 32; i++) {
|
||||
RestoreRegister(codegen, static_cast<X86_64Register>(i), ctx);
|
||||
}
|
||||
for (int i = 0; i < 16; i++) {
|
||||
RestoreRegister(codegen, static_cast<X86_64Register>(i), ctx);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Core::Jit
|
||||
147
src/core/jit/register_mapping.h
Normal file
147
src/core/jit/register_mapping.h
Normal file
@ -0,0 +1,147 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include "common/types.h"
|
||||
#include "core/jit/arm64_codegen.h"
|
||||
|
||||
namespace Core::Jit {
|
||||
|
||||
enum class X86_64Register : u8 {
|
||||
RAX = 0,
|
||||
RCX = 1,
|
||||
RDX = 2,
|
||||
RBX = 3,
|
||||
RSP = 4,
|
||||
RBP = 5,
|
||||
RSI = 6,
|
||||
RDI = 7,
|
||||
R8 = 8,
|
||||
R9 = 9,
|
||||
R10 = 10,
|
||||
R11 = 11,
|
||||
R12 = 12,
|
||||
R13 = 13,
|
||||
R14 = 14,
|
||||
R15 = 15,
|
||||
XMM0 = 16,
|
||||
XMM1 = 17,
|
||||
XMM2 = 18,
|
||||
XMM3 = 19,
|
||||
XMM4 = 20,
|
||||
XMM5 = 21,
|
||||
XMM6 = 22,
|
||||
XMM7 = 23,
|
||||
XMM8 = 24,
|
||||
XMM9 = 25,
|
||||
XMM10 = 26,
|
||||
XMM11 = 27,
|
||||
XMM12 = 28,
|
||||
XMM13 = 29,
|
||||
XMM14 = 30,
|
||||
XMM15 = 31,
|
||||
FLAGS = 32,
|
||||
COUNT = 33
|
||||
};
|
||||
|
||||
enum class Arm64Register : u8 {
|
||||
X0 = 0,
|
||||
X1 = 1,
|
||||
X2 = 2,
|
||||
X3 = 3,
|
||||
X4 = 4,
|
||||
X5 = 5,
|
||||
X6 = 6,
|
||||
X7 = 7,
|
||||
X8 = 8,
|
||||
X9 = 9,
|
||||
X10 = 10,
|
||||
X11 = 11,
|
||||
X12 = 12,
|
||||
X13 = 13,
|
||||
X14 = 14,
|
||||
X15 = 15,
|
||||
X16 = 16,
|
||||
X17 = 17,
|
||||
X18 = 18,
|
||||
X19 = 19,
|
||||
X20 = 20,
|
||||
X21 = 21,
|
||||
X22 = 22,
|
||||
X23 = 23,
|
||||
X24 = 24,
|
||||
X25 = 25,
|
||||
X26 = 26,
|
||||
X27 = 27,
|
||||
X28 = 28,
|
||||
X29 = 29,
|
||||
X30 = 30,
|
||||
SP = 31,
|
||||
V0 = 32,
|
||||
V1 = 33,
|
||||
V2 = 34,
|
||||
V3 = 35,
|
||||
V4 = 36,
|
||||
V5 = 37,
|
||||
V6 = 38,
|
||||
V7 = 39,
|
||||
V8 = 40,
|
||||
V9 = 41,
|
||||
V10 = 42,
|
||||
V11 = 43,
|
||||
V12 = 44,
|
||||
V13 = 45,
|
||||
V14 = 46,
|
||||
V15 = 47,
|
||||
COUNT = 48
|
||||
};
|
||||
|
||||
struct RegisterContext {
|
||||
u64 gp_regs[16];
|
||||
u64 xmm_regs[16][2];
|
||||
u64 flags;
|
||||
u64 rsp;
|
||||
u64 rbp;
|
||||
};
|
||||
|
||||
class RegisterMapper {
|
||||
public:
|
||||
RegisterMapper();
|
||||
|
||||
int MapX86_64ToArm64(X86_64Register x86_reg);
|
||||
int MapX86_64XmmToArm64Neon(X86_64Register xmm_reg);
|
||||
bool IsXmmRegister(X86_64Register reg);
|
||||
|
||||
void SpillRegister(X86_64Register x86_reg);
|
||||
void ReloadRegister(X86_64Register x86_reg);
|
||||
bool IsRegisterSpilled(X86_64Register x86_reg) const;
|
||||
|
||||
void SaveAllRegisters(Arm64CodeGenerator& codegen, RegisterContext* ctx);
|
||||
void RestoreAllRegisters(Arm64CodeGenerator& codegen, RegisterContext* ctx);
|
||||
void SaveRegister(Arm64CodeGenerator& codegen, X86_64Register x86_reg, RegisterContext* ctx);
|
||||
void RestoreRegister(Arm64CodeGenerator& codegen, X86_64Register x86_reg, RegisterContext* ctx);
|
||||
|
||||
static constexpr int SCRATCH_REG = 9;
|
||||
static constexpr int SCRATCH_REG2 = 10;
|
||||
static constexpr int FLAGS_REG = 11;
|
||||
static constexpr int STACK_POINTER = 31;
|
||||
|
||||
private:
|
||||
static constexpr int INVALID_MAPPING = -1;
|
||||
|
||||
std::array<int, static_cast<size_t>(X86_64Register::COUNT)> x86_to_arm64_map;
|
||||
std::array<bool, static_cast<size_t>(X86_64Register::COUNT)> spilled_registers;
|
||||
void* register_save_area;
|
||||
};
|
||||
|
||||
inline int GetArm64RegisterNumber(Arm64Register reg) {
|
||||
return static_cast<int>(reg);
|
||||
}
|
||||
|
||||
inline int GetX86_64RegisterNumber(X86_64Register reg) {
|
||||
return static_cast<int>(reg);
|
||||
}
|
||||
|
||||
} // namespace Core::Jit
|
||||
206
src/core/jit/simd_translator.cpp
Normal file
206
src/core/jit/simd_translator.cpp
Normal file
@ -0,0 +1,206 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "register_mapping.h"
|
||||
#include "simd_translator.h"
|
||||
|
||||
namespace Core::Jit {
|
||||
|
||||
SimdTranslator::SimdTranslator(Arm64CodeGenerator& codegen, RegisterMapper& reg_mapper)
|
||||
: codegen(codegen), reg_mapper(reg_mapper) {}
|
||||
|
||||
int SimdTranslator::GetArm64NeonRegister(const ZydisDecodedOperand& operand) {
|
||||
if (operand.type != ZYDIS_OPERAND_TYPE_REGISTER) {
|
||||
return -1;
|
||||
}
|
||||
if (operand.reg.value < ZYDIS_REGISTER_XMM0 || operand.reg.value > ZYDIS_REGISTER_XMM15) {
|
||||
return -1;
|
||||
}
|
||||
X86_64Register xmm_reg =
|
||||
static_cast<X86_64Register>(static_cast<int>(X86_64Register::XMM0) +
|
||||
static_cast<int>(operand.reg.value - ZYDIS_REGISTER_XMM0));
|
||||
return reg_mapper.MapX86_64XmmToArm64Neon(xmm_reg);
|
||||
}
|
||||
|
||||
void SimdTranslator::LoadMemoryOperandV(int vreg, const ZydisDecodedOperand& mem_op) {
|
||||
ASSERT_MSG(mem_op.type == ZYDIS_OPERAND_TYPE_MEMORY, "Expected memory operand");
|
||||
|
||||
int addr_reg = RegisterMapper::SCRATCH_REG;
|
||||
codegen.mov(addr_reg, 0);
|
||||
|
||||
if (mem_op.mem.base != ZYDIS_REGISTER_NONE && mem_op.mem.base != ZYDIS_REGISTER_RIP) {
|
||||
if (mem_op.mem.base >= ZYDIS_REGISTER_RAX && mem_op.mem.base <= ZYDIS_REGISTER_R15) {
|
||||
X86_64Register x86_base =
|
||||
static_cast<X86_64Register>(mem_op.mem.base - ZYDIS_REGISTER_RAX);
|
||||
if (x86_base < X86_64Register::COUNT) {
|
||||
int base_reg = reg_mapper.MapX86_64ToArm64(x86_base);
|
||||
codegen.mov(addr_reg, base_reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (mem_op.mem.disp.value != 0) {
|
||||
codegen.add(addr_reg, addr_reg, static_cast<s32>(mem_op.mem.disp.value));
|
||||
}
|
||||
|
||||
codegen.ldr_v(vreg, addr_reg, 0);
|
||||
}
|
||||
|
||||
void SimdTranslator::StoreMemoryOperandV(int vreg, const ZydisDecodedOperand& mem_op) {
|
||||
ASSERT_MSG(mem_op.type == ZYDIS_OPERAND_TYPE_MEMORY, "Expected memory operand");
|
||||
|
||||
int addr_reg = RegisterMapper::SCRATCH_REG;
|
||||
codegen.mov(addr_reg, 0);
|
||||
|
||||
if (mem_op.mem.base != ZYDIS_REGISTER_NONE) {
|
||||
if (mem_op.mem.base >= ZYDIS_REGISTER_RAX && mem_op.mem.base <= ZYDIS_REGISTER_R15) {
|
||||
X86_64Register x86_base =
|
||||
static_cast<X86_64Register>(mem_op.mem.base - ZYDIS_REGISTER_RAX);
|
||||
if (x86_base < X86_64Register::COUNT) {
|
||||
int base_reg = reg_mapper.MapX86_64ToArm64(x86_base);
|
||||
codegen.mov(addr_reg, base_reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (mem_op.mem.disp.value != 0) {
|
||||
codegen.add(addr_reg, addr_reg, static_cast<s32>(mem_op.mem.disp.value));
|
||||
}
|
||||
|
||||
codegen.str_v(vreg, addr_reg, 0);
|
||||
}
|
||||
|
||||
bool SimdTranslator::TranslateSseInstruction(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands) {
|
||||
switch (instruction.mnemonic) {
|
||||
case ZYDIS_MNEMONIC_MOVAPS:
|
||||
return TranslateMovaps(instruction, operands);
|
||||
case ZYDIS_MNEMONIC_MOVUPS:
|
||||
return TranslateMovups(instruction, operands);
|
||||
case ZYDIS_MNEMONIC_ADDPS:
|
||||
return TranslateAddps(instruction, operands);
|
||||
case ZYDIS_MNEMONIC_SUBPS:
|
||||
return TranslateSubps(instruction, operands);
|
||||
case ZYDIS_MNEMONIC_MULPS:
|
||||
return TranslateMulps(instruction, operands);
|
||||
default:
|
||||
LOG_WARNING(Core, "Unsupported SSE instruction: {}",
|
||||
ZydisMnemonicGetString(instruction.mnemonic));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool SimdTranslator::TranslateMovaps(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands) {
|
||||
const auto& dst = operands[0];
|
||||
const auto& src = operands[1];
|
||||
|
||||
int dst_vreg = GetArm64NeonRegister(dst);
|
||||
if (dst_vreg == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
|
||||
int src_vreg = GetArm64NeonRegister(src);
|
||||
if (src_vreg == -1) {
|
||||
return false;
|
||||
}
|
||||
codegen.mov_v(dst_vreg, src_vreg);
|
||||
} else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) {
|
||||
LoadMemoryOperandV(dst_vreg, src);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SimdTranslator::TranslateMovups(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands) {
|
||||
return TranslateMovaps(instruction, operands);
|
||||
}
|
||||
|
||||
bool SimdTranslator::TranslateAddps(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands) {
|
||||
const auto& dst = operands[0];
|
||||
const auto& src = operands[1];
|
||||
|
||||
int dst_vreg = GetArm64NeonRegister(dst);
|
||||
if (dst_vreg == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
|
||||
int src_vreg = GetArm64NeonRegister(src);
|
||||
if (src_vreg == -1) {
|
||||
return false;
|
||||
}
|
||||
codegen.add_v(dst_vreg, dst_vreg, src_vreg);
|
||||
} else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) {
|
||||
int scratch_vreg = 8;
|
||||
LoadMemoryOperandV(scratch_vreg, src);
|
||||
codegen.add_v(dst_vreg, dst_vreg, scratch_vreg);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SimdTranslator::TranslateSubps(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands) {
|
||||
const auto& dst = operands[0];
|
||||
const auto& src = operands[1];
|
||||
|
||||
int dst_vreg = GetArm64NeonRegister(dst);
|
||||
if (dst_vreg == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
|
||||
int src_vreg = GetArm64NeonRegister(src);
|
||||
if (src_vreg == -1) {
|
||||
return false;
|
||||
}
|
||||
codegen.sub_v(dst_vreg, dst_vreg, src_vreg);
|
||||
} else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) {
|
||||
int scratch_vreg = 8;
|
||||
LoadMemoryOperandV(scratch_vreg, src);
|
||||
codegen.sub_v(dst_vreg, dst_vreg, scratch_vreg);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SimdTranslator::TranslateMulps(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands) {
|
||||
const auto& dst = operands[0];
|
||||
const auto& src = operands[1];
|
||||
|
||||
int dst_vreg = GetArm64NeonRegister(dst);
|
||||
if (dst_vreg == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
|
||||
int src_vreg = GetArm64NeonRegister(src);
|
||||
if (src_vreg == -1) {
|
||||
return false;
|
||||
}
|
||||
codegen.mul_v(dst_vreg, dst_vreg, src_vreg);
|
||||
} else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) {
|
||||
int scratch_vreg = 8;
|
||||
LoadMemoryOperandV(scratch_vreg, src);
|
||||
codegen.mul_v(dst_vreg, dst_vreg, scratch_vreg);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace Core::Jit
|
||||
39
src/core/jit/simd_translator.h
Normal file
39
src/core/jit/simd_translator.h
Normal file
@ -0,0 +1,39 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <Zydis/Zydis.h>
|
||||
#include "arm64_codegen.h"
|
||||
#include "register_mapping.h"
|
||||
|
||||
namespace Core::Jit {
|
||||
|
||||
class SimdTranslator {
|
||||
public:
|
||||
explicit SimdTranslator(Arm64CodeGenerator& codegen, RegisterMapper& reg_mapper);
|
||||
|
||||
bool TranslateSseInstruction(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands);
|
||||
|
||||
bool TranslateMovaps(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands);
|
||||
bool TranslateMovups(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands);
|
||||
bool TranslateAddps(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands);
|
||||
bool TranslateSubps(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands);
|
||||
bool TranslateMulps(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands);
|
||||
|
||||
private:
|
||||
int GetArm64NeonRegister(const ZydisDecodedOperand& operand);
|
||||
void LoadMemoryOperandV(int vreg, const ZydisDecodedOperand& mem_op);
|
||||
void StoreMemoryOperandV(int vreg, const ZydisDecodedOperand& mem_op);
|
||||
|
||||
Arm64CodeGenerator& codegen;
|
||||
RegisterMapper& reg_mapper;
|
||||
};
|
||||
|
||||
} // namespace Core::Jit
|
||||
850
src/core/jit/x86_64_translator.cpp
Normal file
850
src/core/jit/x86_64_translator.cpp
Normal file
@ -0,0 +1,850 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <cstring>
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "register_mapping.h"
|
||||
#include "x86_64_translator.h"
|
||||
|
||||
namespace Core::Jit {
|
||||
|
||||
X86_64Translator::X86_64Translator(Arm64CodeGenerator& codegen, RegisterMapper& reg_mapper)
|
||||
: codegen(codegen), reg_mapper(reg_mapper) {}
|
||||
|
||||
bool X86_64Translator::TranslateInstruction(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands, VAddr address) {
|
||||
switch (instruction.mnemonic) {
|
||||
case ZYDIS_MNEMONIC_MOV:
|
||||
return TranslateMov(instruction, operands);
|
||||
case ZYDIS_MNEMONIC_ADD:
|
||||
return TranslateAdd(instruction, operands);
|
||||
case ZYDIS_MNEMONIC_SUB:
|
||||
return TranslateSub(instruction, operands);
|
||||
case ZYDIS_MNEMONIC_MUL:
|
||||
return TranslateMul(instruction, operands);
|
||||
case ZYDIS_MNEMONIC_DIV:
|
||||
case ZYDIS_MNEMONIC_IDIV:
|
||||
return TranslateDiv(instruction, operands);
|
||||
case ZYDIS_MNEMONIC_AND:
|
||||
return TranslateAnd(instruction, operands);
|
||||
case ZYDIS_MNEMONIC_OR:
|
||||
return TranslateOr(instruction, operands);
|
||||
case ZYDIS_MNEMONIC_XOR:
|
||||
return TranslateXor(instruction, operands);
|
||||
case ZYDIS_MNEMONIC_NOT:
|
||||
return TranslateNot(instruction, operands);
|
||||
case ZYDIS_MNEMONIC_SHL:
|
||||
return TranslateShl(instruction, operands);
|
||||
case ZYDIS_MNEMONIC_SHR:
|
||||
return TranslateShr(instruction, operands);
|
||||
case ZYDIS_MNEMONIC_SAR:
|
||||
return TranslateSar(instruction, operands);
|
||||
case ZYDIS_MNEMONIC_PUSH:
|
||||
return TranslatePush(instruction, operands);
|
||||
case ZYDIS_MNEMONIC_POP:
|
||||
return TranslatePop(instruction, operands);
|
||||
case ZYDIS_MNEMONIC_CALL:
|
||||
return TranslateCall(instruction, operands, address);
|
||||
case ZYDIS_MNEMONIC_RET:
|
||||
return TranslateRet(instruction, operands);
|
||||
case ZYDIS_MNEMONIC_JMP:
|
||||
return TranslateJmp(instruction, operands, address);
|
||||
case ZYDIS_MNEMONIC_CMP:
|
||||
return TranslateCmp(instruction, operands);
|
||||
case ZYDIS_MNEMONIC_TEST:
|
||||
return TranslateTest(instruction, operands);
|
||||
case ZYDIS_MNEMONIC_LEA:
|
||||
return TranslateLea(instruction, operands);
|
||||
default:
|
||||
LOG_ERROR(Core, "Unsupported instruction: {}",
|
||||
ZydisMnemonicGetString(instruction.mnemonic));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
X86_64Register X86_64Translator::ZydisToX86_64Register(ZydisRegister reg) {
|
||||
if (reg >= ZYDIS_REGISTER_RAX && reg <= ZYDIS_REGISTER_R15) {
|
||||
return static_cast<X86_64Register>(static_cast<int>(reg - ZYDIS_REGISTER_RAX));
|
||||
} else if (reg >= ZYDIS_REGISTER_XMM0 && reg <= ZYDIS_REGISTER_XMM15) {
|
||||
return static_cast<X86_64Register>(static_cast<int>(X86_64Register::XMM0) +
|
||||
static_cast<int>(reg - ZYDIS_REGISTER_XMM0));
|
||||
}
|
||||
return X86_64Register::COUNT;
|
||||
}
|
||||
|
||||
int X86_64Translator::GetArm64Register(const ZydisDecodedOperand& operand) {
|
||||
if (operand.type != ZYDIS_OPERAND_TYPE_REGISTER) {
|
||||
return -1;
|
||||
}
|
||||
X86_64Register x86_reg = ZydisToX86_64Register(operand.reg.value);
|
||||
if (x86_reg == X86_64Register::COUNT) {
|
||||
return -1;
|
||||
}
|
||||
return reg_mapper.MapX86_64ToArm64(x86_reg);
|
||||
}
|
||||
|
||||
int X86_64Translator::GetArm64XmmRegister(const ZydisDecodedOperand& operand) {
|
||||
if (operand.type != ZYDIS_OPERAND_TYPE_REGISTER) {
|
||||
return -1;
|
||||
}
|
||||
X86_64Register x86_reg = ZydisToX86_64Register(operand.reg.value);
|
||||
if (!reg_mapper.IsXmmRegister(x86_reg)) {
|
||||
return -1;
|
||||
}
|
||||
return reg_mapper.MapX86_64XmmToArm64Neon(x86_reg);
|
||||
}
|
||||
|
||||
void X86_64Translator::CalculateMemoryAddress(int dst_reg, const ZydisDecodedOperand& mem_op) {
|
||||
ASSERT_MSG(mem_op.type == ZYDIS_OPERAND_TYPE_MEMORY, "Expected memory operand");
|
||||
|
||||
const auto& mem = mem_op.mem;
|
||||
int base_reg = -1;
|
||||
int index_reg = -1;
|
||||
|
||||
if (mem.base != ZYDIS_REGISTER_NONE && mem.base != ZYDIS_REGISTER_RIP) {
|
||||
X86_64Register x86_base = ZydisToX86_64Register(mem.base);
|
||||
if (x86_base != X86_64Register::COUNT) {
|
||||
base_reg = reg_mapper.MapX86_64ToArm64(x86_base);
|
||||
}
|
||||
}
|
||||
|
||||
if (mem.index != ZYDIS_REGISTER_NONE) {
|
||||
X86_64Register x86_index = ZydisToX86_64Register(mem.index);
|
||||
if (x86_index != X86_64Register::COUNT) {
|
||||
index_reg = reg_mapper.MapX86_64ToArm64(x86_index);
|
||||
}
|
||||
}
|
||||
|
||||
s64 displacement = mem.disp.value;
|
||||
|
||||
if (base_reg == -1 && index_reg == -1 && displacement == 0) {
|
||||
codegen.mov(dst_reg, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
if (index_reg == -1) {
|
||||
if (base_reg != -1) {
|
||||
if (displacement == 0) {
|
||||
codegen.mov(dst_reg, base_reg);
|
||||
} else if (displacement >= -256 && displacement < 256) {
|
||||
codegen.mov(dst_reg, base_reg);
|
||||
codegen.add_imm(dst_reg, dst_reg, static_cast<s32>(displacement));
|
||||
} else {
|
||||
codegen.mov(dst_reg, base_reg);
|
||||
codegen.mov_imm(RegisterMapper::SCRATCH_REG, displacement);
|
||||
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
|
||||
}
|
||||
} else {
|
||||
codegen.mov_imm(dst_reg, displacement);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (base_reg == -1) {
|
||||
base_reg = 0;
|
||||
}
|
||||
|
||||
int scale = mem.scale;
|
||||
if (scale == 0) {
|
||||
scale = 1;
|
||||
}
|
||||
|
||||
if (scale == 1) {
|
||||
if (displacement == 0) {
|
||||
codegen.add(dst_reg, base_reg, index_reg);
|
||||
} else if (displacement >= -256 && displacement < 256) {
|
||||
codegen.add(dst_reg, base_reg, index_reg);
|
||||
codegen.add_imm(dst_reg, dst_reg, static_cast<s32>(displacement));
|
||||
} else {
|
||||
codegen.add(dst_reg, base_reg, index_reg);
|
||||
codegen.mov_imm(RegisterMapper::SCRATCH_REG, displacement);
|
||||
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
|
||||
}
|
||||
} else if (scale == 2 || scale == 4 || scale == 8) {
|
||||
int shift = (scale == 2) ? 1 : (scale == 4) ? 2 : 3;
|
||||
if (displacement == 0) {
|
||||
codegen.add(dst_reg, base_reg, index_reg, shift);
|
||||
} else {
|
||||
codegen.add(dst_reg, base_reg, index_reg, shift);
|
||||
if (displacement >= -256 && displacement < 256) {
|
||||
codegen.add_imm(dst_reg, dst_reg, static_cast<s32>(displacement));
|
||||
} else {
|
||||
codegen.mov_imm(RegisterMapper::SCRATCH_REG, displacement);
|
||||
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
codegen.mov(dst_reg, base_reg);
|
||||
codegen.mov_imm(RegisterMapper::SCRATCH_REG, scale);
|
||||
codegen.mul(RegisterMapper::SCRATCH_REG, index_reg, RegisterMapper::SCRATCH_REG);
|
||||
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
|
||||
if (displacement != 0) {
|
||||
if (displacement >= -256 && displacement < 256) {
|
||||
codegen.add_imm(dst_reg, dst_reg, static_cast<s32>(displacement));
|
||||
} else {
|
||||
codegen.mov_imm(RegisterMapper::SCRATCH_REG, displacement);
|
||||
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void X86_64Translator::LoadMemoryOperand(int dst_reg, const ZydisDecodedOperand& mem_op,
|
||||
size_t size) {
|
||||
CalculateMemoryAddress(RegisterMapper::SCRATCH_REG, mem_op);
|
||||
|
||||
if (mem_op.mem.base == ZYDIS_REGISTER_RIP) {
|
||||
LOG_WARNING(Core, "RIP-relative addressing not fully supported in JIT");
|
||||
}
|
||||
|
||||
switch (size) {
|
||||
case 1:
|
||||
codegen.ldrb(dst_reg, RegisterMapper::SCRATCH_REG, 0);
|
||||
break;
|
||||
case 2:
|
||||
codegen.ldrh(dst_reg, RegisterMapper::SCRATCH_REG, 0);
|
||||
break;
|
||||
case 4:
|
||||
case 8:
|
||||
codegen.ldr(dst_reg, RegisterMapper::SCRATCH_REG, 0);
|
||||
break;
|
||||
default:
|
||||
ASSERT_MSG(false, "Unsupported memory load size: {}", size);
|
||||
}
|
||||
}
|
||||
|
||||
void X86_64Translator::StoreMemoryOperand(int src_reg, const ZydisDecodedOperand& mem_op,
|
||||
size_t size) {
|
||||
CalculateMemoryAddress(RegisterMapper::SCRATCH_REG, mem_op);
|
||||
|
||||
if (mem_op.mem.base == ZYDIS_REGISTER_RIP) {
|
||||
LOG_WARNING(Core, "RIP-relative addressing not fully supported in JIT");
|
||||
}
|
||||
|
||||
switch (size) {
|
||||
case 1:
|
||||
codegen.strb(src_reg, RegisterMapper::SCRATCH_REG, 0);
|
||||
break;
|
||||
case 2:
|
||||
codegen.strh(src_reg, RegisterMapper::SCRATCH_REG, 0);
|
||||
break;
|
||||
case 4:
|
||||
case 8:
|
||||
codegen.str(src_reg, RegisterMapper::SCRATCH_REG, 0);
|
||||
break;
|
||||
default:
|
||||
ASSERT_MSG(false, "Unsupported memory store size: {}", size);
|
||||
}
|
||||
}
|
||||
|
||||
void X86_64Translator::LoadImmediate(int dst_reg, const ZydisDecodedOperand& imm_op) {
|
||||
ASSERT_MSG(imm_op.type == ZYDIS_OPERAND_TYPE_IMMEDIATE, "Expected immediate operand");
|
||||
s64 value = static_cast<s64>(imm_op.imm.value.s);
|
||||
codegen.mov(dst_reg, value);
|
||||
}
|
||||
|
||||
bool X86_64Translator::TranslateMov(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands) {
|
||||
const auto& dst = operands[0];
|
||||
const auto& src = operands[1];
|
||||
|
||||
if (dst.type == ZYDIS_OPERAND_TYPE_REGISTER) {
|
||||
int dst_reg = GetArm64Register(dst);
|
||||
if (dst_reg == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
|
||||
int src_reg = GetArm64Register(src);
|
||||
if (src_reg == -1) {
|
||||
return false;
|
||||
}
|
||||
codegen.mov(dst_reg, src_reg);
|
||||
} else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
|
||||
LoadImmediate(dst_reg, src);
|
||||
} else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) {
|
||||
LoadMemoryOperand(dst_reg, src, instruction.operand_width / 8);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else if (dst.type == ZYDIS_OPERAND_TYPE_MEMORY) {
|
||||
int src_reg = -1;
|
||||
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
|
||||
src_reg = GetArm64Register(src);
|
||||
if (src_reg == -1) {
|
||||
return false;
|
||||
}
|
||||
} else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
|
||||
LoadImmediate(RegisterMapper::SCRATCH_REG, src);
|
||||
src_reg = RegisterMapper::SCRATCH_REG;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
StoreMemoryOperand(src_reg, dst, instruction.operand_width / 8);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool X86_64Translator::TranslateAdd(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands) {
|
||||
const auto& dst = operands[0];
|
||||
const auto& src = operands[1];
|
||||
|
||||
int dst_reg = GetArm64Register(dst);
|
||||
if (dst_reg == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
|
||||
int src_reg = GetArm64Register(src);
|
||||
if (src_reg == -1) {
|
||||
return false;
|
||||
}
|
||||
codegen.add(dst_reg, dst_reg, src_reg);
|
||||
} else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
|
||||
s32 imm = static_cast<s32>(src.imm.value.s);
|
||||
codegen.add_imm(dst_reg, dst_reg, imm);
|
||||
} else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) {
|
||||
LoadMemoryOperand(RegisterMapper::SCRATCH_REG, src, instruction.operand_width / 8);
|
||||
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool X86_64Translator::TranslateSub(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands) {
|
||||
const auto& dst = operands[0];
|
||||
const auto& src = operands[1];
|
||||
|
||||
int dst_reg = GetArm64Register(dst);
|
||||
if (dst_reg == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
|
||||
int src_reg = GetArm64Register(src);
|
||||
if (src_reg == -1) {
|
||||
return false;
|
||||
}
|
||||
codegen.sub(dst_reg, dst_reg, src_reg);
|
||||
} else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
|
||||
s32 imm = static_cast<s32>(src.imm.value.s);
|
||||
codegen.sub_imm(dst_reg, dst_reg, imm);
|
||||
} else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) {
|
||||
LoadMemoryOperand(RegisterMapper::SCRATCH_REG, src, instruction.operand_width / 8);
|
||||
codegen.sub(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool X86_64Translator::TranslateMul(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands) {
|
||||
const auto& dst = operands[0];
|
||||
|
||||
int dst_reg = GetArm64Register(dst);
|
||||
if (dst_reg == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER) {
|
||||
int src_reg = GetArm64Register(operands[1]);
|
||||
if (src_reg == -1) {
|
||||
return false;
|
||||
}
|
||||
codegen.mul(dst_reg, dst_reg, src_reg);
|
||||
} else if (operands[1].type == ZYDIS_OPERAND_TYPE_MEMORY) {
|
||||
LoadMemoryOperand(RegisterMapper::SCRATCH_REG, operands[1], instruction.operand_width / 8);
|
||||
codegen.mul(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool X86_64Translator::TranslateDiv(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands) {
|
||||
LOG_WARNING(Core, "DIV instruction translation not fully implemented");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool X86_64Translator::TranslateAnd(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands) {
|
||||
const auto& dst = operands[0];
|
||||
const auto& src = operands[1];
|
||||
|
||||
int dst_reg = GetArm64Register(dst);
|
||||
if (dst_reg == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
|
||||
int src_reg = GetArm64Register(src);
|
||||
if (src_reg == -1) {
|
||||
return false;
|
||||
}
|
||||
codegen.and_(dst_reg, dst_reg, src_reg);
|
||||
} else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
|
||||
u64 imm = static_cast<u64>(src.imm.value.u);
|
||||
codegen.and_(dst_reg, dst_reg, imm);
|
||||
} else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) {
|
||||
LoadMemoryOperand(RegisterMapper::SCRATCH_REG, src, instruction.operand_width / 8);
|
||||
codegen.and_(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool X86_64Translator::TranslateOr(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands) {
|
||||
const auto& dst = operands[0];
|
||||
const auto& src = operands[1];
|
||||
|
||||
int dst_reg = GetArm64Register(dst);
|
||||
if (dst_reg == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
|
||||
int src_reg = GetArm64Register(src);
|
||||
if (src_reg == -1) {
|
||||
return false;
|
||||
}
|
||||
codegen.orr(dst_reg, dst_reg, src_reg);
|
||||
} else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
|
||||
u64 imm = static_cast<u64>(src.imm.value.u);
|
||||
codegen.orr(dst_reg, dst_reg, imm);
|
||||
} else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) {
|
||||
LoadMemoryOperand(RegisterMapper::SCRATCH_REG, src, instruction.operand_width / 8);
|
||||
codegen.orr(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool X86_64Translator::TranslateXor(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands) {
|
||||
const auto& dst = operands[0];
|
||||
const auto& src = operands[1];
|
||||
|
||||
int dst_reg = GetArm64Register(dst);
|
||||
if (dst_reg == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
|
||||
int src_reg = GetArm64Register(src);
|
||||
if (src_reg == -1) {
|
||||
return false;
|
||||
}
|
||||
codegen.eor(dst_reg, dst_reg, src_reg);
|
||||
} else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
|
||||
u64 imm = static_cast<u64>(src.imm.value.u);
|
||||
codegen.eor(dst_reg, dst_reg, imm);
|
||||
} else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) {
|
||||
LoadMemoryOperand(RegisterMapper::SCRATCH_REG, src, instruction.operand_width / 8);
|
||||
codegen.eor(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool X86_64Translator::TranslateNot(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands) {
|
||||
const auto& dst = operands[0];
|
||||
|
||||
int dst_reg = GetArm64Register(dst);
|
||||
if (dst_reg == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
codegen.mvn(dst_reg, dst_reg);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool X86_64Translator::TranslateShl(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands) {
|
||||
const auto& dst = operands[0];
|
||||
const auto& src = operands[1];
|
||||
|
||||
int dst_reg = GetArm64Register(dst);
|
||||
if (dst_reg == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER &&
|
||||
(src.reg.value == ZYDIS_REGISTER_CL || src.reg.value == ZYDIS_REGISTER_RCX)) {
|
||||
int cl_reg = reg_mapper.MapX86_64ToArm64(X86_64Register::RCX);
|
||||
codegen.lsl(dst_reg, dst_reg, cl_reg);
|
||||
} else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
|
||||
u64 shift_val = src.imm.value.u;
|
||||
if (shift_val < 64) {
|
||||
codegen.lsl(dst_reg, dst_reg, static_cast<u8>(shift_val));
|
||||
} else {
|
||||
codegen.mov(dst_reg, 0);
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool X86_64Translator::TranslateShr(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands) {
|
||||
const auto& dst = operands[0];
|
||||
const auto& src = operands[1];
|
||||
|
||||
int dst_reg = GetArm64Register(dst);
|
||||
if (dst_reg == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER &&
|
||||
(src.reg.value == ZYDIS_REGISTER_CL || src.reg.value == ZYDIS_REGISTER_RCX)) {
|
||||
int cl_reg = reg_mapper.MapX86_64ToArm64(X86_64Register::RCX);
|
||||
codegen.lsr(dst_reg, dst_reg, cl_reg);
|
||||
} else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
|
||||
u64 shift_val = src.imm.value.u;
|
||||
if (shift_val < 64) {
|
||||
codegen.lsr(dst_reg, dst_reg, static_cast<u8>(shift_val));
|
||||
} else {
|
||||
codegen.mov(dst_reg, 0);
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool X86_64Translator::TranslateSar(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands) {
|
||||
const auto& dst = operands[0];
|
||||
const auto& src = operands[1];
|
||||
|
||||
int dst_reg = GetArm64Register(dst);
|
||||
if (dst_reg == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER &&
|
||||
(src.reg.value == ZYDIS_REGISTER_CL || src.reg.value == ZYDIS_REGISTER_RCX)) {
|
||||
int cl_reg = reg_mapper.MapX86_64ToArm64(X86_64Register::RCX);
|
||||
codegen.asr(dst_reg, dst_reg, cl_reg);
|
||||
} else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
|
||||
u64 shift_val = src.imm.value.u;
|
||||
if (shift_val < 64) {
|
||||
codegen.asr(dst_reg, dst_reg, static_cast<u8>(shift_val));
|
||||
} else {
|
||||
codegen.mov(dst_reg, 0);
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool X86_64Translator::TranslatePush(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands) {
|
||||
const auto& src = operands[0];
|
||||
|
||||
int sp_reg = reg_mapper.MapX86_64ToArm64(X86_64Register::RSP);
|
||||
codegen.sub(sp_reg, sp_reg, 8);
|
||||
|
||||
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
|
||||
int src_reg = GetArm64Register(src);
|
||||
if (src_reg == -1) {
|
||||
return false;
|
||||
}
|
||||
codegen.str(src_reg, sp_reg, 0);
|
||||
} else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
|
||||
LoadImmediate(RegisterMapper::SCRATCH_REG, src);
|
||||
codegen.str(RegisterMapper::SCRATCH_REG, sp_reg, 0);
|
||||
} else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) {
|
||||
LoadMemoryOperand(RegisterMapper::SCRATCH_REG, src, instruction.operand_width / 8);
|
||||
codegen.str(RegisterMapper::SCRATCH_REG, sp_reg, 0);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool X86_64Translator::TranslatePop(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands) {
|
||||
const auto& dst = operands[0];
|
||||
|
||||
int dst_reg = GetArm64Register(dst);
|
||||
if (dst_reg == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
int sp_reg = reg_mapper.MapX86_64ToArm64(X86_64Register::RSP);
|
||||
codegen.ldr(dst_reg, sp_reg, 0);
|
||||
codegen.add(sp_reg, sp_reg, 8);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool X86_64Translator::TranslateCall(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands, VAddr address) {
|
||||
const auto& target = operands[0];
|
||||
VAddr target_address = 0;
|
||||
VAddr return_address = address + instruction.length;
|
||||
|
||||
// Calculate target address based on operand type
|
||||
if (target.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
|
||||
// Direct relative call: CALL rel32
|
||||
// Target = current_address + instruction.length + offset
|
||||
s64 offset = static_cast<s64>(target.imm.value.s);
|
||||
target_address = address + instruction.length + offset;
|
||||
} else if (target.type == ZYDIS_OPERAND_TYPE_MEMORY) {
|
||||
// Indirect call: CALL [mem]
|
||||
// Load address from memory into scratch register
|
||||
LoadMemoryOperand(RegisterMapper::SCRATCH_REG, target, 8);
|
||||
// Push return address
|
||||
int sp_reg = RegisterMapper::STACK_POINTER;
|
||||
codegen.sub_imm(sp_reg, sp_reg, 8); // Decrement stack by 8 bytes
|
||||
codegen.mov_imm(RegisterMapper::SCRATCH_REG2, return_address);
|
||||
codegen.str(RegisterMapper::SCRATCH_REG2, sp_reg, 0); // Store return address
|
||||
// Call via register
|
||||
codegen.blr(RegisterMapper::SCRATCH_REG);
|
||||
return true;
|
||||
} else if (target.type == ZYDIS_OPERAND_TYPE_REGISTER) {
|
||||
// Indirect call: CALL reg
|
||||
int reg = GetArm64Register(target);
|
||||
if (reg == -1) {
|
||||
LOG_ERROR(Core, "Invalid register for CALL");
|
||||
return false;
|
||||
}
|
||||
// Push return address
|
||||
int sp_reg = RegisterMapper::STACK_POINTER;
|
||||
codegen.sub_imm(sp_reg, sp_reg, 8); // Decrement stack by 8 bytes
|
||||
codegen.mov_imm(RegisterMapper::SCRATCH_REG, return_address);
|
||||
codegen.str(RegisterMapper::SCRATCH_REG, sp_reg, 0); // Store return address
|
||||
// Call via register
|
||||
codegen.blr(reg);
|
||||
return true;
|
||||
} else {
|
||||
LOG_ERROR(Core, "Unsupported CALL operand type");
|
||||
return false;
|
||||
}
|
||||
|
||||
// For direct calls, push return address and branch to target
|
||||
// Push return address onto stack
|
||||
int sp_reg = RegisterMapper::STACK_POINTER;
|
||||
codegen.sub_imm(sp_reg, sp_reg, 8); // Decrement stack by 8 bytes (x86_64 stack grows down)
|
||||
codegen.mov_imm(RegisterMapper::SCRATCH_REG, return_address);
|
||||
codegen.str(RegisterMapper::SCRATCH_REG, sp_reg, 0); // Store return address at [SP]
|
||||
|
||||
// Branch to target (will be linked later if target block is available)
|
||||
void* placeholder_target = reinterpret_cast<void*>(target_address);
|
||||
codegen.bl(placeholder_target); // Use bl (branch with link) for calls
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool X86_64Translator::TranslateRet(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands) {
|
||||
// x86_64 RET pops return address from stack and jumps to it
|
||||
int sp_reg = RegisterMapper::STACK_POINTER;
|
||||
int scratch_reg = RegisterMapper::SCRATCH_REG;
|
||||
|
||||
// Load return address from stack
|
||||
codegen.ldr(scratch_reg, sp_reg, 0); // Load return address from [SP]
|
||||
codegen.add_imm(sp_reg, sp_reg, 8); // Increment stack by 8 bytes (pop)
|
||||
|
||||
// Jump to return address
|
||||
codegen.br(scratch_reg);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool X86_64Translator::TranslateJmp(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands, VAddr address) {
|
||||
const auto& target = operands[0];
|
||||
VAddr target_address = 0;
|
||||
|
||||
// Calculate target address based on operand type
|
||||
if (target.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
|
||||
// Direct relative jump: JMP rel32
|
||||
// Target = current_address + instruction.length + offset
|
||||
s64 offset = static_cast<s64>(target.imm.value.s);
|
||||
target_address = address + instruction.length + offset;
|
||||
} else if (target.type == ZYDIS_OPERAND_TYPE_MEMORY) {
|
||||
// Indirect jump: JMP [mem]
|
||||
// Load address from memory into scratch register
|
||||
LoadMemoryOperand(RegisterMapper::SCRATCH_REG, target, 8);
|
||||
// TODO: don't use a dispatcher
|
||||
codegen.br(RegisterMapper::SCRATCH_REG);
|
||||
return true;
|
||||
} else if (target.type == ZYDIS_OPERAND_TYPE_REGISTER) {
|
||||
// Indirect jump: JMP reg
|
||||
int reg = GetArm64Register(target);
|
||||
if (reg == -1) {
|
||||
LOG_ERROR(Core, "Invalid register for JMP");
|
||||
return false;
|
||||
}
|
||||
codegen.br(reg);
|
||||
return true;
|
||||
} else {
|
||||
LOG_ERROR(Core, "Unsupported JMP operand type");
|
||||
return false;
|
||||
}
|
||||
|
||||
// For direct jumps, we need to branch to the target address
|
||||
// Since the target block may not be translated yet, we'll generate
|
||||
// a placeholder that can be patched later during block linking
|
||||
// For now, generate a branch to a dispatcher function
|
||||
// TODO: Implement proper block linking to patch this with direct branch
|
||||
|
||||
// Calculate offset from current code position
|
||||
void* placeholder_target = reinterpret_cast<void*>(target_address);
|
||||
codegen.b(placeholder_target);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool X86_64Translator::TranslateCmp(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands) {
|
||||
const auto& dst = operands[0];
|
||||
const auto& src = operands[1];
|
||||
|
||||
int dst_reg = GetArm64Register(dst);
|
||||
if (dst_reg == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
|
||||
int src_reg = GetArm64Register(src);
|
||||
if (src_reg == -1) {
|
||||
return false;
|
||||
}
|
||||
codegen.cmp(dst_reg, src_reg);
|
||||
} else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
|
||||
s32 imm = static_cast<s32>(src.imm.value.s);
|
||||
codegen.cmp_imm(dst_reg, imm);
|
||||
} else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) {
|
||||
LoadMemoryOperand(RegisterMapper::SCRATCH_REG, src, instruction.operand_width / 8);
|
||||
codegen.cmp(dst_reg, RegisterMapper::SCRATCH_REG);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool X86_64Translator::TranslateTest(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands) {
|
||||
const auto& dst = operands[0];
|
||||
const auto& src = operands[1];
|
||||
|
||||
int dst_reg = GetArm64Register(dst);
|
||||
if (dst_reg == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (src.type == ZYDIS_OPERAND_TYPE_REGISTER) {
|
||||
int src_reg = GetArm64Register(src);
|
||||
if (src_reg == -1) {
|
||||
return false;
|
||||
}
|
||||
codegen.tst(dst_reg, src_reg);
|
||||
} else if (src.type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
|
||||
u64 imm = static_cast<u64>(src.imm.value.u);
|
||||
codegen.tst(dst_reg, imm);
|
||||
} else if (src.type == ZYDIS_OPERAND_TYPE_MEMORY) {
|
||||
LoadMemoryOperand(RegisterMapper::SCRATCH_REG, src, instruction.operand_width / 8);
|
||||
codegen.tst(dst_reg, RegisterMapper::SCRATCH_REG);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool X86_64Translator::TranslateLea(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands) {
|
||||
const auto& dst = operands[0];
|
||||
const auto& src = operands[1];
|
||||
|
||||
ASSERT_MSG(src.type == ZYDIS_OPERAND_TYPE_MEMORY, "LEA source must be memory");
|
||||
|
||||
int dst_reg = GetArm64Register(dst);
|
||||
if (dst_reg == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
CalculateMemoryAddress(dst_reg, src);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void X86_64Translator::UpdateFlagsForArithmetic(int result_reg, int src1_reg, int src2_reg,
|
||||
bool is_subtract) {
|
||||
int flags_reg = reg_mapper.MapX86_64ToArm64(X86_64Register::FLAGS);
|
||||
|
||||
codegen.cmp(result_reg, 0);
|
||||
|
||||
codegen.mov(RegisterMapper::SCRATCH_REG, 0);
|
||||
|
||||
codegen.b_eq(codegen.getCurr());
|
||||
codegen.mov(RegisterMapper::SCRATCH_REG, 1 << 6);
|
||||
codegen.b(codegen.getCurr());
|
||||
}
|
||||
|
||||
void X86_64Translator::UpdateFlagsForLogical(int result_reg) {
|
||||
codegen.cmp(result_reg, 0);
|
||||
}
|
||||
|
||||
void X86_64Translator::UpdateFlagsForShift(int result_reg, int shift_amount) {
|
||||
codegen.cmp(result_reg, 0);
|
||||
}
|
||||
|
||||
int X86_64Translator::GetConditionCode(ZydisMnemonic mnemonic) {
|
||||
switch (mnemonic) {
|
||||
case ZYDIS_MNEMONIC_JZ:
|
||||
return 0;
|
||||
case ZYDIS_MNEMONIC_JNZ:
|
||||
return 1;
|
||||
case ZYDIS_MNEMONIC_JL:
|
||||
return 11;
|
||||
case ZYDIS_MNEMONIC_JLE:
|
||||
return 13;
|
||||
case ZYDIS_MNEMONIC_JNLE:
|
||||
return 12;
|
||||
case ZYDIS_MNEMONIC_JNL:
|
||||
return 10;
|
||||
case ZYDIS_MNEMONIC_JB:
|
||||
return 3;
|
||||
case ZYDIS_MNEMONIC_JBE:
|
||||
return 9;
|
||||
case ZYDIS_MNEMONIC_JNBE:
|
||||
return 8;
|
||||
case ZYDIS_MNEMONIC_JNB:
|
||||
return 2;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Core::Jit
|
||||
80
src/core/jit/x86_64_translator.h
Normal file
80
src/core/jit/x86_64_translator.h
Normal file
@ -0,0 +1,80 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <Zydis/Zydis.h>
|
||||
#include "arm64_codegen.h"
|
||||
#include "common/types.h"
|
||||
#include "register_mapping.h"
|
||||
|
||||
namespace Core::Jit {
|
||||
|
||||
class X86_64Translator {
|
||||
public:
|
||||
explicit X86_64Translator(Arm64CodeGenerator& codegen, RegisterMapper& reg_mapper);
|
||||
~X86_64Translator() = default;
|
||||
|
||||
bool TranslateInstruction(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands, VAddr address);
|
||||
|
||||
bool TranslateMov(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands);
|
||||
bool TranslateAdd(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands);
|
||||
bool TranslateSub(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands);
|
||||
bool TranslateMul(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands);
|
||||
bool TranslateDiv(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands);
|
||||
bool TranslateAnd(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands);
|
||||
bool TranslateOr(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands);
|
||||
bool TranslateXor(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands);
|
||||
bool TranslateNot(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands);
|
||||
bool TranslateShl(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands);
|
||||
bool TranslateShr(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands);
|
||||
bool TranslateSar(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands);
|
||||
bool TranslatePush(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands);
|
||||
bool TranslatePop(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands);
|
||||
bool TranslateCall(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands, VAddr address);
|
||||
bool TranslateRet(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands);
|
||||
bool TranslateJmp(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands, VAddr address);
|
||||
bool TranslateCmp(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands);
|
||||
bool TranslateTest(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands);
|
||||
bool TranslateLea(const ZydisDecodedInstruction& instruction,
|
||||
const ZydisDecodedOperand* operands);
|
||||
|
||||
void UpdateFlagsForArithmetic(int result_reg, int src1_reg, int src2_reg, bool is_subtract);
|
||||
void UpdateFlagsForLogical(int result_reg);
|
||||
void UpdateFlagsForShift(int result_reg, int shift_amount);
|
||||
int GetConditionCode(ZydisMnemonic mnemonic);
|
||||
|
||||
private:
|
||||
int GetArm64Register(const ZydisDecodedOperand& operand);
|
||||
int GetArm64XmmRegister(const ZydisDecodedOperand& operand);
|
||||
void LoadMemoryOperand(int dst_reg, const ZydisDecodedOperand& mem_op, size_t size);
|
||||
void StoreMemoryOperand(int src_reg, const ZydisDecodedOperand& mem_op, size_t size);
|
||||
void LoadImmediate(int dst_reg, const ZydisDecodedOperand& imm_op);
|
||||
void CalculateMemoryAddress(int dst_reg, const ZydisDecodedOperand& mem_op);
|
||||
X86_64Register ZydisToX86_64Register(ZydisRegister reg);
|
||||
|
||||
Arm64CodeGenerator& codegen;
|
||||
RegisterMapper& reg_mapper;
|
||||
};
|
||||
|
||||
} // namespace Core::Jit
|
||||
@ -3,6 +3,7 @@
|
||||
|
||||
#include "fiber.h"
|
||||
|
||||
#include "common/arch.h"
|
||||
#include "common/elf_info.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/libraries/fiber/fiber_error.h"
|
||||
@ -23,12 +24,34 @@ OrbisFiberContext* GetFiberContext() {
|
||||
return Core::GetTcbBase()->tcb_fiber;
|
||||
}
|
||||
|
||||
#ifdef ARCH_X86_64
|
||||
extern "C" s32 PS4_SYSV_ABI _sceFiberSetJmp(OrbisFiberContext* ctx) asm("_sceFiberSetJmp");
|
||||
extern "C" s32 PS4_SYSV_ABI _sceFiberLongJmp(OrbisFiberContext* ctx) asm("_sceFiberLongJmp");
|
||||
extern "C" void PS4_SYSV_ABI _sceFiberSwitchEntry(OrbisFiberData* data,
|
||||
bool set_fpu) asm("_sceFiberSwitchEntry");
|
||||
#elif defined(ARCH_ARM64)
|
||||
extern "C" s32 PS4_SYSV_ABI _sceFiberSetJmp(OrbisFiberContext* ctx);
|
||||
extern "C" s32 PS4_SYSV_ABI _sceFiberLongJmp(OrbisFiberContext* ctx);
|
||||
extern "C" void PS4_SYSV_ABI _sceFiberSwitchEntry(OrbisFiberData* data, bool set_fpu);
|
||||
#endif
|
||||
extern "C" void PS4_SYSV_ABI _sceFiberForceQuit(u64 ret) asm("_sceFiberForceQuit");
|
||||
|
||||
#ifdef ARCH_ARM64
|
||||
extern "C" s32 PS4_SYSV_ABI _sceFiberSetJmp(OrbisFiberContext* ctx) {
|
||||
UNREACHABLE_MSG("ARM64 fiber implementation not yet complete");
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern "C" s32 PS4_SYSV_ABI _sceFiberLongJmp(OrbisFiberContext* ctx) {
|
||||
UNREACHABLE_MSG("ARM64 fiber implementation not yet complete");
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern "C" void PS4_SYSV_ABI _sceFiberSwitchEntry(OrbisFiberData* data, bool set_fpu) {
|
||||
UNREACHABLE_MSG("ARM64 fiber implementation not yet complete");
|
||||
}
|
||||
#endif
|
||||
|
||||
extern "C" void PS4_SYSV_ABI _sceFiberForceQuit(u64 ret) {
|
||||
OrbisFiberContext* g_ctx = GetFiberContext();
|
||||
g_ctx->return_val = ret;
|
||||
|
||||
@ -318,8 +318,8 @@ void RegisterLib(Core::Loader::SymbolsResolver* sym) {
|
||||
LIB_FUNCTION("Mv1zUObHvXI", "libkernel", 1, "libkernel", sceKernelGetSystemSwVersion);
|
||||
LIB_FUNCTION("igMefp4SAv0", "libkernel", 1, "libkernel", get_authinfo);
|
||||
LIB_FUNCTION("G-MYv5erXaU", "libkernel", 1, "libkernel", sceKernelGetAppInfo);
|
||||
LIB_FUNCTION("PfccT7qURYE", "libkernel", 1, "libkernel", kernel_ioctl);
|
||||
LIB_FUNCTION("wW+k21cmbwQ", "libkernel", 1, "libkernel", kernel_ioctl);
|
||||
LIB_FUNCTION_VARIADIC("PfccT7qURYE", "libkernel", 1, "libkernel", kernel_ioctl);
|
||||
LIB_FUNCTION_VARIADIC("wW+k21cmbwQ", "libkernel", 1, "libkernel", kernel_ioctl);
|
||||
LIB_FUNCTION("JGfTMBOdUJo", "libkernel", 1, "libkernel", sceKernelGetFsSandboxRandomWord);
|
||||
LIB_FUNCTION("6xVpy0Fdq+I", "libkernel", 1, "libkernel", _sigprocmask);
|
||||
LIB_FUNCTION("Xjoosiw+XPI", "libkernel", 1, "libkernel", sceKernelUuidCreate);
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/arch.h"
|
||||
#include "common/assert.h"
|
||||
#include "core/libraries/kernel/orbis_error.h"
|
||||
#include "core/libraries/kernel/threads/exception.h"
|
||||
@ -23,6 +24,7 @@ void SigactionHandler(int signum, siginfo_t* inf, ucontext_t* raw_context) {
|
||||
if (handler) {
|
||||
auto ctx = Ucontext{};
|
||||
#ifdef __APPLE__
|
||||
#ifdef ARCH_X86_64
|
||||
const auto& regs = raw_context->uc_mcontext->__ss;
|
||||
ctx.uc_mcontext.mc_r8 = regs.__r8;
|
||||
ctx.uc_mcontext.mc_r9 = regs.__r9;
|
||||
@ -42,7 +44,13 @@ void SigactionHandler(int signum, siginfo_t* inf, ucontext_t* raw_context) {
|
||||
ctx.uc_mcontext.mc_rsp = regs.__rsp;
|
||||
ctx.uc_mcontext.mc_fs = regs.__fs;
|
||||
ctx.uc_mcontext.mc_gs = regs.__gs;
|
||||
#elif defined(ARCH_ARM64)
|
||||
UNREACHABLE_MSG("ARM64 exception handling not yet implemented");
|
||||
#else
|
||||
#error "Unsupported architecture"
|
||||
#endif
|
||||
#else
|
||||
#ifdef ARCH_X86_64
|
||||
const auto& regs = raw_context->uc_mcontext.gregs;
|
||||
ctx.uc_mcontext.mc_r8 = regs[REG_R8];
|
||||
ctx.uc_mcontext.mc_r9 = regs[REG_R9];
|
||||
@ -62,6 +70,11 @@ void SigactionHandler(int signum, siginfo_t* inf, ucontext_t* raw_context) {
|
||||
ctx.uc_mcontext.mc_rsp = regs[REG_RSP];
|
||||
ctx.uc_mcontext.mc_fs = (regs[REG_CSGSFS] >> 32) & 0xFFFF;
|
||||
ctx.uc_mcontext.mc_gs = (regs[REG_CSGSFS] >> 16) & 0xFFFF;
|
||||
#elif defined(ARCH_ARM64)
|
||||
UNREACHABLE_MSG("ARM64 exception handling not yet implemented");
|
||||
#else
|
||||
#error "Unsupported architecture"
|
||||
#endif
|
||||
#endif
|
||||
handler(POSIX_SIGUSR1, &ctx);
|
||||
}
|
||||
|
||||
@ -18,7 +18,13 @@ static std::mutex MutxStaticLock;
|
||||
#define THR_ADAPTIVE_MUTEX_INITIALIZER ((PthreadMutex*)1)
|
||||
#define THR_MUTEX_DESTROYED ((PthreadMutex*)2)
|
||||
|
||||
#ifdef ARCH_X86_64
|
||||
#define CPU_SPINWAIT __asm__ volatile("pause")
|
||||
#elif defined(ARCH_ARM64)
|
||||
#define CPU_SPINWAIT __asm__ volatile("yield")
|
||||
#else
|
||||
#define CPU_SPINWAIT
|
||||
#endif
|
||||
|
||||
#define CHECK_AND_INIT_MUTEX \
|
||||
if (PthreadMutex* m = *mutex; m <= THR_MUTEX_DESTROYED) [[unlikely]] { \
|
||||
|
||||
@ -18,6 +18,7 @@ int PS4_SYSV_ABI internal_snprintf(char* s, size_t n, VA_ARGS) {
|
||||
return snprintf_ctx(s, n, &ctx);
|
||||
}
|
||||
void RegisterlibSceLibcInternalIo(Core::Loader::SymbolsResolver* sym) {
|
||||
LIB_FUNCTION("eLdDw6l0-bU", "libSceLibcInternal", 1, "libSceLibcInternal", internal_snprintf);
|
||||
LIB_FUNCTION_VARIADIC("eLdDw6l0-bU", "libSceLibcInternal", 1, "libSceLibcInternal",
|
||||
internal_snprintf);
|
||||
}
|
||||
} // namespace Libraries::LibcInternal
|
||||
@ -19,6 +19,18 @@
|
||||
sym->AddSymbol(sr, func); \
|
||||
}
|
||||
|
||||
#define LIB_FUNCTION_VARIADIC(nid, lib, libversion, mod, function) \
|
||||
{ \
|
||||
Core::Loader::SymbolResolver sr{}; \
|
||||
sr.name = nid; \
|
||||
sr.library = lib; \
|
||||
sr.library_version = libversion; \
|
||||
sr.module = mod; \
|
||||
sr.type = Core::Loader::SymbolType::Function; \
|
||||
auto func = reinterpret_cast<u64>(function); \
|
||||
sym->AddSymbol(sr, func); \
|
||||
}
|
||||
|
||||
#define LIB_OBJ(nid, lib, libversion, mod, obj) \
|
||||
{ \
|
||||
Core::Loader::SymbolResolver sr{}; \
|
||||
|
||||
@ -20,6 +20,9 @@
|
||||
#include "core/memory.h"
|
||||
#include "core/tls.h"
|
||||
#include "ipc/ipc.h"
|
||||
#ifdef ARCH_ARM64
|
||||
#include "core/jit/execution_engine.h"
|
||||
#endif
|
||||
|
||||
namespace Core {
|
||||
|
||||
@ -49,6 +52,20 @@ static PS4_SYSV_ABI void* RunMainEntry [[noreturn]] (EntryParams* params) {
|
||||
: "rax", "rsi", "rdi");
|
||||
UNREACHABLE();
|
||||
}
|
||||
#elif defined(ARCH_ARM64)
|
||||
static PS4_SYSV_ABI void* RunMainEntry [[noreturn]] (EntryParams* params) {
|
||||
auto* jit = Core::Jit::JitEngine::Instance();
|
||||
if (jit) {
|
||||
// JIT should already be initialized in Emulator::Run(), but check just in case
|
||||
if (!jit->IsInitialized()) {
|
||||
jit->Initialize();
|
||||
}
|
||||
jit->ExecuteBlock(params->entry_addr);
|
||||
} else {
|
||||
LOG_CRITICAL(Core_Linker, "JIT engine not available");
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
#endif
|
||||
|
||||
Linker::Linker() : memory{Memory::Instance()} {}
|
||||
|
||||
@ -6,6 +6,7 @@
|
||||
#include "common/config.h"
|
||||
#include "common/debug.h"
|
||||
#include "core/file_sys/fs.h"
|
||||
#include "core/jit/execution_engine.h"
|
||||
#include "core/libraries/kernel/memory.h"
|
||||
#include "core/libraries/kernel/orbis_error.h"
|
||||
#include "core/libraries/kernel/process.h"
|
||||
@ -849,6 +850,15 @@ s64 MemoryManager::ProtectBytes(VAddr addr, VirtualMemoryArea& vma_base, u64 siz
|
||||
|
||||
impl.Protect(addr, size, perms);
|
||||
|
||||
#ifdef ARCH_ARM64
|
||||
if (True(prot & MemoryProt::CpuWrite) && vma_base.type == VMAType::Code) {
|
||||
auto* jit = Core::Jit::JitEngine::Instance();
|
||||
if (jit) {
|
||||
jit->InvalidateRange(addr, addr + adjusted_size);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return adjusted_size;
|
||||
}
|
||||
|
||||
|
||||
@ -6,6 +6,9 @@
|
||||
#include "common/decoder.h"
|
||||
#include "common/signal_context.h"
|
||||
#include "core/signals.h"
|
||||
#ifdef ARCH_ARM64
|
||||
#include "core/jit/execution_engine.h"
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
@ -79,6 +82,15 @@ static void SignalHandler(int sig, siginfo_t* info, void* raw_context) {
|
||||
case SIGSEGV:
|
||||
case SIGBUS: {
|
||||
const bool is_write = Common::IsWriteError(raw_context);
|
||||
#ifdef ARCH_ARM64
|
||||
auto* jit = Core::Jit::JitEngine::Instance();
|
||||
if (jit && jit->IsJitCode(code_address)) {
|
||||
VAddr ps4_addr = jit->GetPs4AddressForJitCode(code_address);
|
||||
if (ps4_addr != 0) {
|
||||
jit->InvalidateBlock(ps4_addr);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (!signals->DispatchAccessViolation(raw_context, info->si_addr)) {
|
||||
UNREACHABLE_MSG(
|
||||
"Unhandled access violation in thread '{}' at code address {}: {} address {}",
|
||||
@ -87,13 +99,20 @@ static void SignalHandler(int sig, siginfo_t* info, void* raw_context) {
|
||||
}
|
||||
break;
|
||||
}
|
||||
case SIGILL:
|
||||
case SIGILL: {
|
||||
#ifdef ARCH_ARM64
|
||||
auto* jit = Core::Jit::JitEngine::Instance();
|
||||
if (jit && jit->IsJitCode(code_address)) {
|
||||
LOG_ERROR(Core, "Illegal instruction in JIT code at {}", fmt::ptr(code_address));
|
||||
}
|
||||
#endif
|
||||
if (!signals->DispatchIllegalInstruction(raw_context)) {
|
||||
UNREACHABLE_MSG("Unhandled illegal instruction in thread '{}' at code address {}: {}",
|
||||
GetThreadName(), fmt::ptr(code_address),
|
||||
DisassembleInstruction(code_address));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case SIGUSR1: { // Sleep thread until signal is received
|
||||
sigset_t sigset;
|
||||
sigemptyset(&sigset);
|
||||
|
||||
@ -11,8 +11,10 @@
|
||||
#include <csignal>
|
||||
#include <pthread.h>
|
||||
#include <unistd.h>
|
||||
#ifdef ARCH_X86_64
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace Core {
|
||||
|
||||
@ -126,8 +128,10 @@ void NativeThread::Exit() {
|
||||
|
||||
void NativeThread::Initialize() {
|
||||
// Set MXCSR and FPUCW registers to the values used by Orbis.
|
||||
#ifdef ARCH_X86_64
|
||||
_mm_setcsr(ORBIS_MXCSR);
|
||||
asm volatile("fldcw %0" : : "m"(ORBIS_FPUCW));
|
||||
#endif
|
||||
#if _WIN64
|
||||
tid = GetCurrentThreadId();
|
||||
#else
|
||||
|
||||
@ -30,6 +30,7 @@
|
||||
#include "core/file_format/psf.h"
|
||||
#include "core/file_format/trp.h"
|
||||
#include "core/file_sys/fs.h"
|
||||
#include "core/jit/execution_engine.h"
|
||||
#include "core/libraries/disc_map/disc_map.h"
|
||||
#include "core/libraries/font/font.h"
|
||||
#include "core/libraries/font/fontft.h"
|
||||
@ -261,6 +262,19 @@ void Emulator::Run(std::filesystem::path file, std::vector<std::string> args,
|
||||
controller = Common::Singleton<Input::GameController>::Instance();
|
||||
linker = Common::Singleton<Core::Linker>::Instance();
|
||||
|
||||
#ifdef ARCH_ARM64
|
||||
// Initialize JIT engine early for ARM64 builds
|
||||
auto* jit = Core::Jit::JitEngine::Instance();
|
||||
if (jit) {
|
||||
try {
|
||||
jit->Initialize();
|
||||
LOG_INFO(Loader, "JIT Execution Engine initialized");
|
||||
} catch (const std::bad_alloc& e) {
|
||||
LOG_CRITICAL(Loader, "Failed to initialize JIT engine: {}", e.what());
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Load renderdoc module
|
||||
VideoCore::LoadRenderDoc();
|
||||
|
||||
|
||||
@ -4,9 +4,13 @@
|
||||
|
||||
#include <unordered_map>
|
||||
#include <boost/container/flat_map.hpp>
|
||||
#include "common/arch.h"
|
||||
#ifdef ARCH_X86_64
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <xbyak/xbyak_util.h>
|
||||
#endif
|
||||
#include "common/config.h"
|
||||
#include "common/decoder.h"
|
||||
#include "common/io_file.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/path_util.h"
|
||||
@ -20,22 +24,28 @@
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
#include "shader_recompiler/ir/srt_gvn_table.h"
|
||||
#include "shader_recompiler/ir/value.h"
|
||||
#include "src/common/arch.h"
|
||||
#include "src/common/decoder.h"
|
||||
|
||||
#ifdef ARCH_X86_64
|
||||
using namespace Xbyak::util;
|
||||
|
||||
static Xbyak::CodeGenerator g_srt_codegen(32_MB);
|
||||
static const u8* g_srt_codegen_start = nullptr;
|
||||
#endif
|
||||
|
||||
namespace Shader {
|
||||
|
||||
#ifdef ARCH_X86_64
|
||||
PFN_SrtWalker RegisterWalkerCode(const u8* ptr, size_t size) {
|
||||
const auto func_addr = (PFN_SrtWalker)g_srt_codegen.getCurr();
|
||||
g_srt_codegen.db(ptr, size);
|
||||
g_srt_codegen.ready();
|
||||
return func_addr;
|
||||
}
|
||||
#else
|
||||
PFN_SrtWalker RegisterWalkerCode(const u8* ptr, size_t size) {
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace Shader
|
||||
|
||||
@ -69,6 +79,7 @@ static void DumpSrtProgram(const Shader::Info& info, const u8* code, size_t code
|
||||
}
|
||||
|
||||
static bool SrtWalkerSignalHandler(void* context, void* fault_address) {
|
||||
#ifdef ARCH_X86_64
|
||||
// Only handle if the fault address is within the SRT code range
|
||||
const u8* code_start = g_srt_codegen_start;
|
||||
const u8* code_end = code_start + g_srt_codegen.getSize();
|
||||
@ -117,6 +128,9 @@ static bool SrtWalkerSignalHandler(void* context, void* fault_address) {
|
||||
LOG_DEBUG(Render_Recompiler, "Patched SRT walker at {}", code);
|
||||
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
using namespace Shader;
|
||||
@ -159,6 +173,7 @@ namespace Shader::Optimization {
|
||||
|
||||
namespace {
|
||||
|
||||
#ifdef ARCH_X86_64
|
||||
static inline void PushPtr(Xbyak::CodeGenerator& c, u32 off_dw) {
|
||||
c.push(rdi);
|
||||
c.mov(rdi, ptr[rdi + (off_dw << 2)]);
|
||||
@ -236,6 +251,9 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
|
||||
|
||||
info.srt_info.flattened_bufsize_dw = pass_info.dst_off_dw;
|
||||
}
|
||||
#else
|
||||
static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {}
|
||||
#endif
|
||||
|
||||
}; // namespace
|
||||
|
||||
@ -293,7 +311,9 @@ void FlattenExtendedUserdataPass(IR::Program& program) {
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef ARCH_X86_64
|
||||
GenerateSrtProgram(info, pass_info);
|
||||
#endif
|
||||
|
||||
// Assign offsets to duplicate readconsts
|
||||
for (IR::Inst* readconst : all_readconsts) {
|
||||
|
||||
63
tests/CMakeLists.txt
Normal file
63
tests/CMakeLists.txt
Normal file
@ -0,0 +1,63 @@
|
||||
# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
add_executable(jit_tests
|
||||
test_arm64_codegen.cpp
|
||||
test_register_mapping.cpp
|
||||
test_block_manager.cpp
|
||||
test_execution_engine.cpp
|
||||
test_block_linking.cpp
|
||||
test_call_ret.cpp
|
||||
test_hle_bridge.cpp
|
||||
main.cpp
|
||||
)
|
||||
|
||||
if (ARCHITECTURE STREQUAL "arm64")
|
||||
target_sources(jit_tests PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/arm64_codegen.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/arm64_codegen.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/register_mapping.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/register_mapping.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/block_manager.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/block_manager.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/x86_64_translator.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/x86_64_translator.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/simd_translator.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/simd_translator.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/calling_convention.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/calling_convention.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/hle_bridge.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../src/core/jit/hle_bridge.h
|
||||
)
|
||||
endif()
|
||||
|
||||
target_sources(jit_tests PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../src/common/assert.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../src/common/decoder.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_logging_stub.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(jit_tests PRIVATE
|
||||
GTest::gtest
|
||||
GTest::gtest_main
|
||||
GTest::gmock
|
||||
Zydis::Zydis
|
||||
fmt::fmt
|
||||
)
|
||||
|
||||
target_include_directories(jit_tests PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../src
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../externals/zydis/include
|
||||
)
|
||||
|
||||
target_compile_definitions(jit_tests PRIVATE
|
||||
ARCH_ARM64
|
||||
)
|
||||
|
||||
# to make ctest work
|
||||
add_test(NAME JitTests COMMAND jit_tests)
|
||||
|
||||
set_tests_properties(JitTests PROPERTIES
|
||||
TIMEOUT 60
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
|
||||
)
|
||||
9
tests/main.cpp
Normal file
9
tests/main.cpp
Normal file
@ -0,0 +1,9 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
111
tests/test_arm64_codegen.cpp
Normal file
111
tests/test_arm64_codegen.cpp
Normal file
@ -0,0 +1,111 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "core/jit/arm64_codegen.h"
|
||||
#include <cstring>
|
||||
#include <gtest/gtest.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
using namespace Core::Jit;
|
||||
|
||||
class Arm64CodeGenTest : public ::testing::Test {
|
||||
protected:
|
||||
void SetUp() override { test_gen = std::make_unique<Arm64CodeGenerator>(); }
|
||||
|
||||
void TearDown() override { test_gen.reset(); }
|
||||
|
||||
std::unique_ptr<Arm64CodeGenerator> test_gen;
|
||||
};
|
||||
|
||||
TEST_F(Arm64CodeGenTest, Constructor) {
|
||||
EXPECT_NE(test_gen->getCode(), nullptr);
|
||||
EXPECT_EQ(test_gen->getSize(), 0);
|
||||
}
|
||||
|
||||
TEST_F(Arm64CodeGenTest, Reset) {
|
||||
test_gen->add(0, 1, 2);
|
||||
size_t size_after_add = test_gen->getSize();
|
||||
EXPECT_GT(size_after_add, 0);
|
||||
|
||||
test_gen->reset();
|
||||
EXPECT_EQ(test_gen->getSize(), 0);
|
||||
}
|
||||
|
||||
TEST_F(Arm64CodeGenTest, AddInstruction) {
|
||||
test_gen->add(0, 1, 2); // X0 = X1 + X2
|
||||
EXPECT_GT(test_gen->getSize(), 0);
|
||||
EXPECT_LE(test_gen->getSize(), 4); // Should be 4 bytes (one instruction)
|
||||
}
|
||||
|
||||
TEST_F(Arm64CodeGenTest, AddImmediate) {
|
||||
test_gen->add_imm(0, 1, 42); // X0 = X1 + 42
|
||||
EXPECT_GT(test_gen->getSize(), 0);
|
||||
}
|
||||
|
||||
TEST_F(Arm64CodeGenTest, MovRegister) {
|
||||
test_gen->mov(0, 1); // X0 = X1
|
||||
EXPECT_GT(test_gen->getSize(), 0);
|
||||
}
|
||||
|
||||
TEST_F(Arm64CodeGenTest, MovImmediate) {
|
||||
test_gen->mov(0, 0x1234LL); // X0 = 0x1234
|
||||
EXPECT_GT(test_gen->getSize(), 0);
|
||||
// Large immediate may require multiple instructions
|
||||
EXPECT_LE(test_gen->getSize(),
|
||||
16); // Up to 4 instructions for 64-bit immediate
|
||||
}
|
||||
|
||||
TEST_F(Arm64CodeGenTest, LoadStore) {
|
||||
test_gen->ldr(0, 1, 0); // X0 = [X1]
|
||||
test_gen->str(0, 1, 0); // [X1] = X0
|
||||
EXPECT_GE(test_gen->getSize(), 8); // At least 2 instructions
|
||||
}
|
||||
|
||||
TEST_F(Arm64CodeGenTest, Branch) {
|
||||
void *target = test_gen->getCode(); // Branch to start of code
|
||||
test_gen->b(target);
|
||||
EXPECT_GT(test_gen->getSize(), 0);
|
||||
}
|
||||
|
||||
TEST_F(Arm64CodeGenTest, ConditionalBranch) {
|
||||
void *target = test_gen->getCode(); // Branch to start of code
|
||||
test_gen->b(0, target); // Branch if equal
|
||||
EXPECT_GT(test_gen->getSize(), 0);
|
||||
}
|
||||
|
||||
TEST_F(Arm64CodeGenTest, Compare) {
|
||||
test_gen->cmp(0, 1); // Compare X0 and X1
|
||||
EXPECT_GT(test_gen->getSize(), 0);
|
||||
}
|
||||
|
||||
TEST_F(Arm64CodeGenTest, ArithmeticOperations) {
|
||||
test_gen->add(0, 1, 2);
|
||||
test_gen->sub(0, 1, 2);
|
||||
test_gen->mul(0, 1, 2);
|
||||
test_gen->and_(0, 1, 2);
|
||||
test_gen->orr(0, 1, 2);
|
||||
test_gen->eor(0, 1, 2);
|
||||
EXPECT_GE(test_gen->getSize(), 24); // At least 6 instructions
|
||||
}
|
||||
|
||||
TEST_F(Arm64CodeGenTest, SIMDOperations) {
|
||||
test_gen->mov_v(0, 1); // V0 = V1
|
||||
test_gen->add_v(0, 1, 2); // V0 = V1 + V2
|
||||
test_gen->sub_v(0, 1, 2); // V0 = V1 - V2
|
||||
test_gen->mul_v(0, 1, 2); // V0 = V1 * V2
|
||||
EXPECT_GE(test_gen->getSize(), 16); // At least 4 instructions
|
||||
}
|
||||
|
||||
TEST_F(Arm64CodeGenTest, SetSize) {
|
||||
test_gen->add(0, 1, 2);
|
||||
size_t original_size = test_gen->getSize();
|
||||
EXPECT_GT(original_size, 0);
|
||||
|
||||
// Test setting size to 0
|
||||
test_gen->setSize(0);
|
||||
EXPECT_EQ(test_gen->getSize(), 0);
|
||||
|
||||
// Test setting size back (this should work without throwing)
|
||||
test_gen->setSize(original_size);
|
||||
EXPECT_EQ(test_gen->getSize(), original_size);
|
||||
}
|
||||
247
tests/test_block_linking.cpp
Normal file
247
tests/test_block_linking.cpp
Normal file
@ -0,0 +1,247 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/decoder.h"
|
||||
#include "core/jit/arm64_codegen.h"
|
||||
#include "core/jit/block_manager.h"
|
||||
#include "core/jit/register_mapping.h"
|
||||
#include "core/jit/x86_64_translator.h"
|
||||
#include <gtest/gtest.h>
|
||||
#include <sys/mman.h>
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
||||
using namespace Core::Jit;
|
||||
|
||||
class BlockLinkingTest : public ::testing::Test {
|
||||
protected:
|
||||
void SetUp() override {
|
||||
// Allocate executable memory for test code
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
test_code_buffer = mmap(nullptr, 64 * 1024, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
ASSERT_NE(test_code_buffer, MAP_FAILED)
|
||||
<< "Failed to allocate executable memory for test";
|
||||
pthread_jit_write_protect_np(0);
|
||||
#else
|
||||
test_code_buffer =
|
||||
mmap(nullptr, 64 * 1024, PROT_READ | PROT_WRITE | PROT_EXEC,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
ASSERT_NE(test_code_buffer, MAP_FAILED)
|
||||
<< "Failed to allocate executable memory for test";
|
||||
#endif
|
||||
codegen = std::make_unique<Arm64CodeGenerator>(64 * 1024, test_code_buffer);
|
||||
register_mapper = std::make_unique<RegisterMapper>();
|
||||
translator = std::make_unique<X86_64Translator>(*codegen, *register_mapper);
|
||||
block_manager = std::make_unique<BlockManager>();
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
translator.reset();
|
||||
register_mapper.reset();
|
||||
codegen.reset();
|
||||
block_manager.reset();
|
||||
if (test_code_buffer != MAP_FAILED) {
|
||||
munmap(test_code_buffer, 64 * 1024);
|
||||
}
|
||||
}
|
||||
|
||||
void *test_code_buffer = MAP_FAILED;
|
||||
std::unique_ptr<Arm64CodeGenerator> codegen;
|
||||
std::unique_ptr<RegisterMapper> register_mapper;
|
||||
std::unique_ptr<X86_64Translator> translator;
|
||||
std::unique_ptr<BlockManager> block_manager;
|
||||
};
|
||||
|
||||
// Test that JMP translation can handle direct immediate addresses
|
||||
TEST_F(BlockLinkingTest, TranslateDirectJmp) {
|
||||
// Create a simple x86_64 JMP instruction: JMP +0x1000 (relative jump)
|
||||
// x86_64 encoding: E9 <offset> (near relative jump, 32-bit offset)
|
||||
// E9 00 10 00 00 = JMP +0x1000
|
||||
u8 x86_jmp[] = {0xE9, 0x00, 0x10, 0x00, 0x00};
|
||||
|
||||
ZydisDecodedInstruction instruction;
|
||||
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
|
||||
|
||||
ZyanStatus status = Common::Decoder::Instance()->decodeInstruction(
|
||||
instruction, operands, x86_jmp, sizeof(x86_jmp));
|
||||
|
||||
if (!ZYAN_SUCCESS(status)) {
|
||||
GTEST_SKIP()
|
||||
<< "Failed to decode JMP instruction - Zydis may not be available";
|
||||
}
|
||||
|
||||
// JMP translation should succeed (even if target isn't linked yet)
|
||||
bool result = translator->TranslateJmp(instruction, operands, 0x400000);
|
||||
EXPECT_TRUE(result) << "JMP translation should succeed";
|
||||
EXPECT_GT(codegen->getSize(), 0) << "JMP should generate ARM64 code";
|
||||
}
|
||||
|
||||
// Test that we can create two blocks and link them
|
||||
TEST_F(BlockLinkingTest, CreateAndLinkBlocks) {
|
||||
VAddr block1_addr = 0x400000;
|
||||
VAddr block2_addr = 0x401000;
|
||||
|
||||
// Allocate separate memory for each block to avoid issues
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
void *block1_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
ASSERT_NE(block1_mem, MAP_FAILED);
|
||||
pthread_jit_write_protect_np(0);
|
||||
|
||||
void *block2_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
ASSERT_NE(block2_mem, MAP_FAILED);
|
||||
#else
|
||||
void *block1_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE | PROT_EXEC,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
ASSERT_NE(block1_mem, MAP_FAILED);
|
||||
|
||||
void *block2_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE | PROT_EXEC,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
ASSERT_NE(block2_mem, MAP_FAILED);
|
||||
#endif
|
||||
|
||||
// Write simple NOP instructions
|
||||
u32 nop = 0xD503201F; // ARM64 NOP
|
||||
*reinterpret_cast<u32 *>(block1_mem) = nop;
|
||||
*reinterpret_cast<u32 *>(block2_mem) = nop;
|
||||
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
pthread_jit_write_protect_np(1);
|
||||
mprotect(block1_mem, 4096, PROT_READ | PROT_EXEC);
|
||||
mprotect(block2_mem, 4096, PROT_READ | PROT_EXEC);
|
||||
#endif
|
||||
|
||||
// Create blocks
|
||||
CodeBlock *block1 = block_manager->CreateBlock(block1_addr, block1_mem, 4, 1);
|
||||
ASSERT_NE(block1, nullptr);
|
||||
|
||||
CodeBlock *block2 = block_manager->CreateBlock(block2_addr, block2_mem, 4, 1);
|
||||
ASSERT_NE(block2, nullptr);
|
||||
|
||||
// Verify blocks exist
|
||||
EXPECT_EQ(block_manager->GetBlockCount(), 2);
|
||||
EXPECT_NE(block_manager->GetBlock(block1_addr), nullptr);
|
||||
EXPECT_NE(block_manager->GetBlock(block2_addr), nullptr);
|
||||
|
||||
// Test that blocks can be retrieved
|
||||
CodeBlock *retrieved_block1 = block_manager->GetBlock(block1_addr);
|
||||
CodeBlock *retrieved_block2 = block_manager->GetBlock(block2_addr);
|
||||
EXPECT_EQ(retrieved_block1, block1);
|
||||
EXPECT_EQ(retrieved_block2, block2);
|
||||
|
||||
// Cleanup
|
||||
munmap(block1_mem, 4096);
|
||||
munmap(block2_mem, 4096);
|
||||
}
|
||||
|
||||
// Test that block linking tracks dependencies
|
||||
TEST_F(BlockLinkingTest, BlockDependencies) {
|
||||
VAddr block1_addr = 0x400000;
|
||||
VAddr block2_addr = 0x401000;
|
||||
|
||||
// Allocate memory for blocks
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
void *block1_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
ASSERT_NE(block1_mem, MAP_FAILED);
|
||||
pthread_jit_write_protect_np(0);
|
||||
|
||||
void *block2_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
ASSERT_NE(block2_mem, MAP_FAILED);
|
||||
|
||||
u32 nop = 0xD503201F;
|
||||
*reinterpret_cast<u32 *>(block1_mem) = nop;
|
||||
*reinterpret_cast<u32 *>(block2_mem) = nop;
|
||||
|
||||
pthread_jit_write_protect_np(1);
|
||||
mprotect(block1_mem, 4096, PROT_READ | PROT_EXEC);
|
||||
mprotect(block2_mem, 4096, PROT_READ | PROT_EXEC);
|
||||
#else
|
||||
void *block1_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE | PROT_EXEC,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
ASSERT_NE(block1_mem, MAP_FAILED);
|
||||
|
||||
void *block2_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE | PROT_EXEC,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
ASSERT_NE(block2_mem, MAP_FAILED);
|
||||
|
||||
u32 nop = 0xD503201F;
|
||||
*reinterpret_cast<u32 *>(block1_mem) = nop;
|
||||
*reinterpret_cast<u32 *>(block2_mem) = nop;
|
||||
#endif
|
||||
|
||||
// Create blocks
|
||||
CodeBlock *block1 = block_manager->CreateBlock(block1_addr, block1_mem, 4, 1);
|
||||
CodeBlock *block2 = block_manager->CreateBlock(block2_addr, block2_mem, 4, 1);
|
||||
|
||||
// Add dependency: block1 depends on block2
|
||||
block_manager->AddDependency(block1_addr, block2_addr);
|
||||
|
||||
// Verify dependency is tracked
|
||||
EXPECT_EQ(block1->dependencies.count(block2_addr), 1);
|
||||
|
||||
// Cleanup
|
||||
munmap(block1_mem, 4096);
|
||||
munmap(block2_mem, 4096);
|
||||
}
|
||||
|
||||
// Test that invalidating a block invalidates dependent blocks
|
||||
TEST_F(BlockLinkingTest, InvalidateDependentBlocks) {
|
||||
VAddr block1_addr = 0x400000;
|
||||
VAddr block2_addr = 0x401000;
|
||||
|
||||
// Allocate memory for blocks
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
void *block1_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
ASSERT_NE(block1_mem, MAP_FAILED);
|
||||
pthread_jit_write_protect_np(0);
|
||||
|
||||
void *block2_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
ASSERT_NE(block2_mem, MAP_FAILED);
|
||||
|
||||
u32 nop = 0xD503201F;
|
||||
*reinterpret_cast<u32 *>(block1_mem) = nop;
|
||||
*reinterpret_cast<u32 *>(block2_mem) = nop;
|
||||
|
||||
pthread_jit_write_protect_np(1);
|
||||
mprotect(block1_mem, 4096, PROT_READ | PROT_EXEC);
|
||||
mprotect(block2_mem, 4096, PROT_READ | PROT_EXEC);
|
||||
#else
|
||||
void *block1_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE | PROT_EXEC,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
ASSERT_NE(block1_mem, MAP_FAILED);
|
||||
|
||||
void *block2_mem = mmap(nullptr, 4096, PROT_READ | PROT_WRITE | PROT_EXEC,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
ASSERT_NE(block2_mem, MAP_FAILED);
|
||||
|
||||
u32 nop = 0xD503201F;
|
||||
*reinterpret_cast<u32 *>(block1_mem) = nop;
|
||||
*reinterpret_cast<u32 *>(block2_mem) = nop;
|
||||
#endif
|
||||
|
||||
// Create blocks with dependency
|
||||
CodeBlock *block1 = block_manager->CreateBlock(block1_addr, block1_mem, 4, 1);
|
||||
CodeBlock *block2 = block_manager->CreateBlock(block2_addr, block2_mem, 4, 1);
|
||||
|
||||
block_manager->AddDependency(block1_addr, block2_addr);
|
||||
|
||||
// Invalidate block2
|
||||
block_manager->InvalidateBlock(block2_addr);
|
||||
|
||||
// block2 should be removed
|
||||
EXPECT_EQ(block_manager->GetBlock(block2_addr), nullptr);
|
||||
// block1 should still exist (dependency tracking doesn't auto-invalidate)
|
||||
// But in a real implementation, we might want to invalidate dependents
|
||||
EXPECT_NE(block_manager->GetBlock(block1_addr), nullptr);
|
||||
|
||||
// Cleanup
|
||||
munmap(block1_mem, 4096);
|
||||
munmap(block2_mem, 4096);
|
||||
}
|
||||
180
tests/test_block_manager.cpp
Normal file
180
tests/test_block_manager.cpp
Normal file
@ -0,0 +1,180 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "core/jit/block_manager.h"
|
||||
#include <gtest/gtest.h>
|
||||
#include <sys/mman.h>
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
||||
using namespace Core::Jit;
|
||||
|
||||
class BlockManagerTest : public ::testing::Test {
|
||||
protected:
|
||||
void SetUp() override {
|
||||
// Allocate executable memory for test code blocks
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
// On macOS ARM64, use the JIT API approach
|
||||
test_code = mmap(nullptr, 64 * 1024, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
ASSERT_NE(test_code, MAP_FAILED)
|
||||
<< "Failed to allocate executable memory for test";
|
||||
pthread_jit_write_protect_np(0); // Disable write protection for writing
|
||||
// Will make executable later if needed
|
||||
#else
|
||||
test_code = mmap(nullptr, 64 * 1024, PROT_READ | PROT_WRITE | PROT_EXEC,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
ASSERT_NE(test_code, MAP_FAILED)
|
||||
<< "Failed to allocate executable memory for test";
|
||||
#endif
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
if (test_code != MAP_FAILED) {
|
||||
munmap(test_code, 64 * 1024);
|
||||
}
|
||||
}
|
||||
|
||||
void *test_code = MAP_FAILED;
|
||||
};
|
||||
|
||||
TEST_F(BlockManagerTest, Constructor) {
|
||||
BlockManager manager;
|
||||
EXPECT_EQ(manager.GetBlockCount(), 0);
|
||||
EXPECT_EQ(manager.GetTotalCodeSize(), 0);
|
||||
}
|
||||
|
||||
TEST_F(BlockManagerTest, CreateBlock) {
|
||||
BlockManager manager;
|
||||
VAddr ps4_addr = 0x400000;
|
||||
void *arm64_code = test_code;
|
||||
size_t code_size = 1024;
|
||||
size_t instruction_count = 10;
|
||||
|
||||
CodeBlock *block =
|
||||
manager.CreateBlock(ps4_addr, arm64_code, code_size, instruction_count);
|
||||
ASSERT_NE(block, nullptr);
|
||||
EXPECT_EQ(block->ps4_address, ps4_addr);
|
||||
EXPECT_EQ(block->arm64_code, arm64_code);
|
||||
EXPECT_EQ(block->code_size, code_size);
|
||||
EXPECT_EQ(block->instruction_count, instruction_count);
|
||||
EXPECT_FALSE(block->is_linked);
|
||||
EXPECT_EQ(manager.GetBlockCount(), 1);
|
||||
EXPECT_EQ(manager.GetTotalCodeSize(), code_size);
|
||||
}
|
||||
|
||||
TEST_F(BlockManagerTest, GetBlock) {
|
||||
BlockManager manager;
|
||||
VAddr ps4_addr = 0x400000;
|
||||
void *arm64_code = test_code;
|
||||
|
||||
// Block doesn't exist yet
|
||||
CodeBlock *block = manager.GetBlock(ps4_addr);
|
||||
EXPECT_EQ(block, nullptr);
|
||||
|
||||
manager.CreateBlock(ps4_addr, arm64_code, 1024, 10);
|
||||
|
||||
// Now it should exist
|
||||
block = manager.GetBlock(ps4_addr);
|
||||
ASSERT_NE(block, nullptr);
|
||||
EXPECT_EQ(block->ps4_address, ps4_addr);
|
||||
}
|
||||
|
||||
TEST_F(BlockManagerTest, MultipleBlocks) {
|
||||
BlockManager manager;
|
||||
|
||||
// Create multiple blocks
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
VAddr ps4_addr = 0x400000 + (i * 0x1000);
|
||||
void *arm64_code = static_cast<char *>(test_code) + (i * 1024);
|
||||
manager.CreateBlock(ps4_addr, arm64_code, 1024, 10);
|
||||
}
|
||||
|
||||
EXPECT_EQ(manager.GetBlockCount(), 10);
|
||||
EXPECT_EQ(manager.GetTotalCodeSize(), 10 * 1024);
|
||||
}
|
||||
|
||||
TEST_F(BlockManagerTest, InvalidateBlock) {
|
||||
BlockManager manager;
|
||||
VAddr ps4_addr = 0x400000;
|
||||
|
||||
// Create and verify block exists
|
||||
manager.CreateBlock(ps4_addr, test_code, 1024, 10);
|
||||
EXPECT_NE(manager.GetBlock(ps4_addr), nullptr);
|
||||
|
||||
// Invalidate block
|
||||
manager.InvalidateBlock(ps4_addr);
|
||||
|
||||
// Block should no longer exist
|
||||
EXPECT_EQ(manager.GetBlock(ps4_addr), nullptr);
|
||||
EXPECT_EQ(manager.GetBlockCount(), 0);
|
||||
EXPECT_EQ(manager.GetTotalCodeSize(), 0);
|
||||
}
|
||||
|
||||
TEST_F(BlockManagerTest, InvalidateRange) {
|
||||
BlockManager manager;
|
||||
|
||||
// Create blocks at different addresses
|
||||
manager.CreateBlock(0x400000, test_code, 1024, 10);
|
||||
manager.CreateBlock(0x401000, static_cast<char *>(test_code) + 1024, 1024,
|
||||
10);
|
||||
manager.CreateBlock(0x402000, static_cast<char *>(test_code) + 2048, 1024,
|
||||
10);
|
||||
manager.CreateBlock(0x500000, static_cast<char *>(test_code) + 3072, 1024,
|
||||
10);
|
||||
|
||||
EXPECT_EQ(manager.GetBlockCount(), 4);
|
||||
|
||||
// Invalidate range that covers first 3 blocks
|
||||
manager.InvalidateRange(0x400000, 0x403000);
|
||||
|
||||
// First 3 blocks should be gone, last one should remain
|
||||
EXPECT_EQ(manager.GetBlock(0x400000), nullptr);
|
||||
EXPECT_EQ(manager.GetBlock(0x401000), nullptr);
|
||||
EXPECT_EQ(manager.GetBlock(0x402000), nullptr);
|
||||
EXPECT_NE(manager.GetBlock(0x500000), nullptr);
|
||||
EXPECT_EQ(manager.GetBlockCount(), 1);
|
||||
}
|
||||
|
||||
TEST_F(BlockManagerTest, AddDependency) {
|
||||
BlockManager manager;
|
||||
VAddr block_addr = 0x400000;
|
||||
VAddr dep_addr = 0x500000;
|
||||
|
||||
CodeBlock *block = manager.CreateBlock(block_addr, test_code, 1024, 10);
|
||||
manager.AddDependency(block_addr, dep_addr);
|
||||
|
||||
EXPECT_EQ(block->dependencies.size(), 1);
|
||||
EXPECT_NE(block->dependencies.find(dep_addr), block->dependencies.end());
|
||||
}
|
||||
|
||||
TEST_F(BlockManagerTest, MultipleDependencies) {
|
||||
BlockManager manager;
|
||||
VAddr block_addr = 0x400000;
|
||||
|
||||
CodeBlock *block = manager.CreateBlock(block_addr, test_code, 1024, 10);
|
||||
manager.AddDependency(block_addr, 0x500000);
|
||||
manager.AddDependency(block_addr, 0x600000);
|
||||
manager.AddDependency(block_addr, 0x700000);
|
||||
|
||||
EXPECT_EQ(block->dependencies.size(), 3);
|
||||
}
|
||||
|
||||
TEST_F(BlockManagerTest, Clear) {
|
||||
BlockManager manager;
|
||||
|
||||
// Create multiple blocks
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
VAddr ps4_addr = 0x400000 + (i * 0x1000);
|
||||
void *arm64_code = static_cast<char *>(test_code) + (i * 1024);
|
||||
manager.CreateBlock(ps4_addr, arm64_code, 1024, 10);
|
||||
}
|
||||
|
||||
EXPECT_EQ(manager.GetBlockCount(), 5);
|
||||
|
||||
manager.Clear();
|
||||
|
||||
EXPECT_EQ(manager.GetBlockCount(), 0);
|
||||
EXPECT_EQ(manager.GetTotalCodeSize(), 0);
|
||||
}
|
||||
151
tests/test_call_ret.cpp
Normal file
151
tests/test_call_ret.cpp
Normal file
@ -0,0 +1,151 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/decoder.h"
|
||||
#include "core/jit/arm64_codegen.h"
|
||||
#include "core/jit/register_mapping.h"
|
||||
#include "core/jit/x86_64_translator.h"
|
||||
#include <gtest/gtest.h>
|
||||
#include <sys/mman.h>
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
||||
using namespace Core::Jit;
|
||||
|
||||
class CallRetTest : public ::testing::Test {
|
||||
protected:
|
||||
void SetUp() override {
|
||||
// Allocate executable memory for test code
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
test_code_buffer = mmap(nullptr, 64 * 1024, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
ASSERT_NE(test_code_buffer, MAP_FAILED)
|
||||
<< "Failed to allocate executable memory for test";
|
||||
pthread_jit_write_protect_np(0);
|
||||
#else
|
||||
test_code_buffer =
|
||||
mmap(nullptr, 64 * 1024, PROT_READ | PROT_WRITE | PROT_EXEC,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
ASSERT_NE(test_code_buffer, MAP_FAILED)
|
||||
<< "Failed to allocate executable memory for test";
|
||||
#endif
|
||||
codegen = std::make_unique<Arm64CodeGenerator>(64 * 1024, test_code_buffer);
|
||||
register_mapper = std::make_unique<RegisterMapper>();
|
||||
translator = std::make_unique<X86_64Translator>(*codegen, *register_mapper);
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
translator.reset();
|
||||
register_mapper.reset();
|
||||
codegen.reset();
|
||||
if (test_code_buffer != MAP_FAILED) {
|
||||
munmap(test_code_buffer, 64 * 1024);
|
||||
}
|
||||
}
|
||||
|
||||
void *test_code_buffer = MAP_FAILED;
|
||||
std::unique_ptr<Arm64CodeGenerator> codegen;
|
||||
std::unique_ptr<RegisterMapper> register_mapper;
|
||||
std::unique_ptr<X86_64Translator> translator;
|
||||
};
|
||||
|
||||
// Test that RET translation generates ARM64 code
|
||||
TEST_F(CallRetTest, TranslateRet) {
|
||||
// x86_64 RET instruction: C3
|
||||
u8 x86_ret[] = {0xC3};
|
||||
|
||||
ZydisDecodedInstruction instruction;
|
||||
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
|
||||
|
||||
ZyanStatus status = Common::Decoder::Instance()->decodeInstruction(
|
||||
instruction, operands, x86_ret, sizeof(x86_ret));
|
||||
|
||||
if (!ZYAN_SUCCESS(status)) {
|
||||
GTEST_SKIP()
|
||||
<< "Failed to decode RET instruction - Zydis may not be available";
|
||||
}
|
||||
|
||||
// RET translation should succeed
|
||||
bool result = translator->TranslateRet(instruction, operands);
|
||||
EXPECT_TRUE(result) << "RET translation should succeed";
|
||||
EXPECT_GT(codegen->getSize(), 0) << "RET should generate ARM64 code";
|
||||
}
|
||||
|
||||
// Test that CALL translation generates ARM64 code
|
||||
TEST_F(CallRetTest, TranslateDirectCall) {
|
||||
// x86_64 CALL instruction: E8 <offset> (near relative call, 32-bit offset)
|
||||
// E8 00 10 00 00 = CALL +0x1000
|
||||
u8 x86_call[] = {0xE8, 0x00, 0x10, 0x00, 0x00};
|
||||
|
||||
ZydisDecodedInstruction instruction;
|
||||
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
|
||||
|
||||
ZyanStatus status = Common::Decoder::Instance()->decodeInstruction(
|
||||
instruction, operands, x86_call, sizeof(x86_call));
|
||||
|
||||
if (!ZYAN_SUCCESS(status)) {
|
||||
GTEST_SKIP()
|
||||
<< "Failed to decode CALL instruction - Zydis may not be available";
|
||||
}
|
||||
|
||||
// CALL translation should succeed
|
||||
bool result = translator->TranslateCall(instruction, operands, 0x400000);
|
||||
EXPECT_TRUE(result) << "CALL translation should succeed";
|
||||
EXPECT_GT(codegen->getSize(), 0) << "CALL should generate ARM64 code";
|
||||
}
|
||||
|
||||
// Test that CALL pushes return address to stack
|
||||
TEST_F(CallRetTest, CallPushesReturnAddress) {
|
||||
// Simulate a CALL instruction
|
||||
// We need to verify that the stack pointer is decremented and return address
|
||||
// is stored This is a simplified test - full implementation will need
|
||||
// execution engine integration
|
||||
|
||||
// For now, just verify CALL generates code
|
||||
u8 x86_call[] = {0xE8, 0x00, 0x10, 0x00, 0x00};
|
||||
|
||||
ZydisDecodedInstruction instruction;
|
||||
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
|
||||
|
||||
ZyanStatus status = Common::Decoder::Instance()->decodeInstruction(
|
||||
instruction, operands, x86_call, sizeof(x86_call));
|
||||
|
||||
if (!ZYAN_SUCCESS(status)) {
|
||||
GTEST_SKIP() << "Failed to decode CALL instruction";
|
||||
}
|
||||
|
||||
size_t size_before = codegen->getSize();
|
||||
bool result = translator->TranslateCall(instruction, operands, 0x400000);
|
||||
size_t size_after = codegen->getSize();
|
||||
|
||||
EXPECT_TRUE(result);
|
||||
EXPECT_GT(size_after, size_before) << "CALL should generate code";
|
||||
// CALL should generate more code than a simple branch (needs stack
|
||||
// manipulation)
|
||||
EXPECT_GE(size_after - size_before, 4)
|
||||
<< "CALL should generate multiple instructions";
|
||||
}
|
||||
|
||||
// Test that RET pops return address from stack
|
||||
TEST_F(CallRetTest, RetPopsReturnAddress) {
|
||||
// RET instruction should pop return address and jump to it
|
||||
u8 x86_ret[] = {0xC3};
|
||||
|
||||
ZydisDecodedInstruction instruction;
|
||||
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
|
||||
|
||||
ZyanStatus status = Common::Decoder::Instance()->decodeInstruction(
|
||||
instruction, operands, x86_ret, sizeof(x86_ret));
|
||||
|
||||
if (!ZYAN_SUCCESS(status)) {
|
||||
GTEST_SKIP() << "Failed to decode RET instruction";
|
||||
}
|
||||
|
||||
size_t size_before = codegen->getSize();
|
||||
bool result = translator->TranslateRet(instruction, operands);
|
||||
size_t size_after = codegen->getSize();
|
||||
|
||||
EXPECT_TRUE(result);
|
||||
EXPECT_GT(size_after, size_before) << "RET should generate code";
|
||||
}
|
||||
49
tests/test_execution_engine.cpp
Normal file
49
tests/test_execution_engine.cpp
Normal file
@ -0,0 +1,49 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "core/jit/arm64_codegen.h"
|
||||
#include "core/jit/block_manager.h"
|
||||
#include "core/jit/register_mapping.h"
|
||||
#include <gtest/gtest.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
using namespace Core::Jit;
|
||||
|
||||
// NOTE: ExecutionEngine requires MemoryManager and AddressSpace which have
|
||||
// heavy dependencies. These tests focus on the components that can be tested in
|
||||
// isolation. Full integration tests would require the complete emulator system
|
||||
// to be initialized. Let's just skip them for now.
|
||||
|
||||
class ExecutionEngineComponentTest : public ::testing::Test {
|
||||
protected:
|
||||
void SetUp() override {}
|
||||
|
||||
void TearDown() override {}
|
||||
};
|
||||
|
||||
// Test that the components used by ExecutionEngine can be constructed
|
||||
TEST_F(ExecutionEngineComponentTest, ComponentConstruction) {
|
||||
BlockManager block_manager;
|
||||
RegisterMapper register_mapper;
|
||||
Arm64CodeGenerator code_generator;
|
||||
|
||||
// All components should construct successfully
|
||||
EXPECT_EQ(block_manager.GetBlockCount(), 0);
|
||||
EXPECT_NE(code_generator.getCode(), nullptr);
|
||||
}
|
||||
|
||||
// Test block invalidation through BlockManager (used by ExecutionEngine)
|
||||
TEST_F(ExecutionEngineComponentTest, BlockInvalidation) {
|
||||
BlockManager block_manager;
|
||||
VAddr test_addr = 0x400000;
|
||||
|
||||
// Invalidate should not crash even if block doesn't exist
|
||||
EXPECT_NO_THROW(block_manager.InvalidateBlock(test_addr));
|
||||
}
|
||||
|
||||
TEST_F(ExecutionEngineComponentTest, BlockInvalidateRange) {
|
||||
BlockManager block_manager;
|
||||
|
||||
// Invalidate range should not crash
|
||||
EXPECT_NO_THROW(block_manager.InvalidateRange(0x400000, 0x500000));
|
||||
}
|
||||
83
tests/test_hle_bridge.cpp
Normal file
83
tests/test_hle_bridge.cpp
Normal file
@ -0,0 +1,83 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "core/jit/arm64_codegen.h"
|
||||
#include "core/jit/hle_bridge.h"
|
||||
#include "core/jit/register_mapping.h"
|
||||
#include <gtest/gtest.h>
|
||||
#include <sys/mman.h>
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
||||
using namespace Core::Jit;
|
||||
|
||||
// Simple test HLE function
|
||||
extern "C" PS4_SYSV_ABI u64 TestHleFunction(u64 arg1, u64 arg2) {
|
||||
return arg1 + arg2;
|
||||
}
|
||||
|
||||
class HleBridgeTest : public ::testing::Test {
|
||||
protected:
|
||||
void SetUp() override {
|
||||
// Allocate executable memory for test code
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
test_code_buffer = mmap(nullptr, 64 * 1024, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
ASSERT_NE(test_code_buffer, MAP_FAILED)
|
||||
<< "Failed to allocate executable memory for test";
|
||||
pthread_jit_write_protect_np(0);
|
||||
#else
|
||||
test_code_buffer =
|
||||
mmap(nullptr, 64 * 1024, PROT_READ | PROT_WRITE | PROT_EXEC,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
ASSERT_NE(test_code_buffer, MAP_FAILED)
|
||||
<< "Failed to allocate executable memory for test";
|
||||
#endif
|
||||
codegen = std::make_unique<Arm64CodeGenerator>(64 * 1024, test_code_buffer);
|
||||
register_mapper = std::make_unique<RegisterMapper>();
|
||||
hle_bridge = std::make_unique<HleBridge>(*codegen, *register_mapper);
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
hle_bridge.reset();
|
||||
register_mapper.reset();
|
||||
codegen.reset();
|
||||
if (test_code_buffer != MAP_FAILED) {
|
||||
munmap(test_code_buffer, 64 * 1024);
|
||||
}
|
||||
}
|
||||
|
||||
void *test_code_buffer = MAP_FAILED;
|
||||
std::unique_ptr<Arm64CodeGenerator> codegen;
|
||||
std::unique_ptr<RegisterMapper> register_mapper;
|
||||
std::unique_ptr<HleBridge> hle_bridge;
|
||||
};
|
||||
|
||||
// Test that HLE bridge can be constructed
|
||||
TEST_F(HleBridgeTest, Construction) { EXPECT_NE(hle_bridge, nullptr); }
|
||||
|
||||
// Test that we can generate a bridge to an HLE function
|
||||
TEST_F(HleBridgeTest, GenerateBridge) {
|
||||
void *hle_func = reinterpret_cast<void *>(TestHleFunction);
|
||||
|
||||
// Generate bridge code
|
||||
hle_bridge->GenerateBridge(hle_func, 2); // 2 integer arguments
|
||||
|
||||
// Should generate some code
|
||||
EXPECT_GT(codegen->getSize(), 0) << "HLE bridge should generate code";
|
||||
}
|
||||
|
||||
// Test that bridge preserves caller-saved registers
|
||||
TEST_F(HleBridgeTest, BridgePreservesRegisters) {
|
||||
// This is a placeholder test - full register preservation testing
|
||||
// would require execution, which is complex
|
||||
void *hle_func = reinterpret_cast<void *>(TestHleFunction);
|
||||
|
||||
size_t size_before = codegen->getSize();
|
||||
hle_bridge->GenerateBridge(hle_func, 2);
|
||||
size_t size_after = codegen->getSize();
|
||||
|
||||
// Bridge should generate substantial code for register preservation
|
||||
EXPECT_GT(size_after - size_before, 8) << "Bridge should preserve registers";
|
||||
}
|
||||
25
tests/test_logging_stub.cpp
Normal file
25
tests/test_logging_stub.cpp
Normal file
@ -0,0 +1,25 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/logging/types.h"
|
||||
#include <fmt/format.h>
|
||||
|
||||
namespace Common::Log {
|
||||
|
||||
void FmtLogMessageImpl(Class log_class, Level log_level, const char *filename,
|
||||
unsigned int line_num, const char *function,
|
||||
const char *format, const fmt::format_args &args) {
|
||||
// Stub implementation - just ignore logs in tests
|
||||
(void)log_class;
|
||||
(void)log_level;
|
||||
(void)filename;
|
||||
(void)line_num;
|
||||
(void)function;
|
||||
(void)format;
|
||||
(void)args;
|
||||
}
|
||||
|
||||
void Start() {}
|
||||
void Stop() {}
|
||||
|
||||
} // namespace Common::Log
|
||||
86
tests/test_register_mapping.cpp
Normal file
86
tests/test_register_mapping.cpp
Normal file
@ -0,0 +1,86 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "core/jit/register_mapping.h"
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
using namespace Core::Jit;
|
||||
|
||||
class RegisterMappingTest : public ::testing::Test {
|
||||
protected:
|
||||
RegisterMapper mapper;
|
||||
};
|
||||
|
||||
TEST_F(RegisterMappingTest, MapGeneralPurposeRegisters) {
|
||||
// Test mapping of common x86_64 registers
|
||||
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::RAX), 0); // X0
|
||||
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::RCX), 1); // X1
|
||||
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::RDX), 2); // X2
|
||||
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::RSI), 3); // X3
|
||||
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::RDI),
|
||||
0); // X0 (same as RAX)
|
||||
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::R8), 4); // X4
|
||||
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::R9), 5); // X5
|
||||
}
|
||||
|
||||
TEST_F(RegisterMappingTest, MapStackPointer) {
|
||||
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::RSP), 31); // SP
|
||||
}
|
||||
|
||||
TEST_F(RegisterMappingTest, MapFramePointer) {
|
||||
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::RBP), 29); // FP
|
||||
}
|
||||
|
||||
TEST_F(RegisterMappingTest, MapCalleeSavedRegisters) {
|
||||
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::RBX), 19); // X19
|
||||
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::R12), 20); // X20
|
||||
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::R13), 21); // X21
|
||||
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::R14), 22); // X22
|
||||
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::R15), 23); // X23
|
||||
}
|
||||
|
||||
TEST_F(RegisterMappingTest, MapFlagsRegister) {
|
||||
EXPECT_EQ(mapper.MapX86_64ToArm64(X86_64Register::FLAGS), 11); // X11
|
||||
}
|
||||
|
||||
TEST_F(RegisterMappingTest, MapXMMRegisters) {
|
||||
// Test mapping of XMM registers to NEON registers (V registers start at 32)
|
||||
EXPECT_EQ(mapper.MapX86_64XmmToArm64Neon(X86_64Register::XMM0), 32); // V0
|
||||
EXPECT_EQ(mapper.MapX86_64XmmToArm64Neon(X86_64Register::XMM1), 33); // V1
|
||||
EXPECT_EQ(mapper.MapX86_64XmmToArm64Neon(X86_64Register::XMM2), 34); // V2
|
||||
EXPECT_EQ(mapper.MapX86_64XmmToArm64Neon(X86_64Register::XMM3), 35); // V3
|
||||
EXPECT_EQ(mapper.MapX86_64XmmToArm64Neon(X86_64Register::XMM4), 36); // V4
|
||||
EXPECT_EQ(mapper.MapX86_64XmmToArm64Neon(X86_64Register::XMM5), 37); // V5
|
||||
EXPECT_EQ(mapper.MapX86_64XmmToArm64Neon(X86_64Register::XMM6), 38); // V6
|
||||
EXPECT_EQ(mapper.MapX86_64XmmToArm64Neon(X86_64Register::XMM7), 39); // V7
|
||||
}
|
||||
|
||||
TEST_F(RegisterMappingTest, MapAllXMMRegisters) {
|
||||
// Test all 16 XMM registers (V registers start at 32)
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
X86_64Register xmm_reg =
|
||||
static_cast<X86_64Register>(static_cast<int>(X86_64Register::XMM0) + i);
|
||||
int neon_reg = mapper.MapX86_64XmmToArm64Neon(xmm_reg);
|
||||
EXPECT_EQ(neon_reg, 32 + i) << "XMM" << i << " should map to V" << i
|
||||
<< " (register number " << (32 + i) << ")";
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RegisterMappingTest, InvalidRegister) {
|
||||
// COUNT is not a valid register
|
||||
// NOTE: The implementation uses ASSERT_MSG which will crash on invalid input
|
||||
// This test verifies that valid registers work correctly
|
||||
// Testing invalid registers would require a different implementation that
|
||||
// returns error codes For now, we just verify that the last valid register
|
||||
// works
|
||||
int result = mapper.MapX86_64ToArm64(X86_64Register::XMM15);
|
||||
EXPECT_GE(result, 0) << "Last valid register should map correctly";
|
||||
}
|
||||
|
||||
TEST_F(RegisterMappingTest, RegisterMappingConsistency) {
|
||||
// Test that register mappings are consistent
|
||||
// RAX should always map to the same ARM64 register
|
||||
int reg1 = mapper.MapX86_64ToArm64(X86_64Register::RAX);
|
||||
int reg2 = mapper.MapX86_64ToArm64(X86_64Register::RAX);
|
||||
EXPECT_EQ(reg1, reg2);
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user