// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include #include #include "arm64_codegen.h" #include "common/assert.h" #include "common/logging/log.h" #include "common/types.h" #if defined(__APPLE__) && defined(ARCH_ARM64) #include #endif namespace Core::Jit { static constexpr size_t PAGE_SIZE = 4096; static constexpr size_t ALIGNMENT = 16; static size_t alignUp(size_t value, size_t alignment) { return (value + alignment - 1) & ~(alignment - 1); } static void* allocateExecutableMemory(size_t size) { size = alignUp(size, PAGE_SIZE); #if defined(__APPLE__) && defined(ARCH_ARM64) // On macOS ARM64: // 1. Allocate with PROT_READ | PROT_WRITE (no PROT_EXEC initially) // 2. Use pthread_jit_write_protect_np to allow writing // 3. After writing, use mprotect to add PROT_EXEC void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (ptr == MAP_FAILED) { LOG_CRITICAL(Core, "Failed to allocate executable memory: {} (errno={})", strerror(errno), errno); return nullptr; } // Initially disable write protection so we can write code pthread_jit_write_protect_np(0); return ptr; #else void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (ptr == MAP_FAILED) { LOG_CRITICAL(Core, "Failed to allocate executable memory: {}", strerror(errno)); return nullptr; } return ptr; #endif } Arm64CodeGenerator::Arm64CodeGenerator(size_t buffer_size, void* code_ptr) : buffer_size(alignUp(buffer_size, PAGE_SIZE)), owns_buffer(code_ptr == nullptr) { if (code_ptr) { code_buffer = code_ptr; this->code_ptr = code_ptr; } else { code_buffer = allocateExecutableMemory(buffer_size); this->code_ptr = code_buffer; } if (!code_buffer) { throw std::bad_alloc(); } } Arm64CodeGenerator::~Arm64CodeGenerator() { if (owns_buffer && code_buffer) { munmap(code_buffer, buffer_size); } } void Arm64CodeGenerator::reset() { code_ptr = code_buffer; fixups.clear(); } void Arm64CodeGenerator::setSize(size_t offset) { code_ptr = static_cast(code_buffer) + offset; } void Arm64CodeGenerator::emit32(u32 instruction) { #if defined(__APPLE__) && defined(ARCH_ARM64) // On macOS ARM64, disable write protection before writing pthread_jit_write_protect_np(0); #endif u8* curr = static_cast(code_ptr); u8* end = static_cast(code_buffer) + buffer_size; ASSERT_MSG(curr + 4 <= end, "Code buffer overflow"); *reinterpret_cast(curr) = instruction; code_ptr = curr + 4; #if defined(__APPLE__) && defined(ARCH_ARM64) // Re-enable write protection after writing pthread_jit_write_protect_np(1); #endif } void Arm64CodeGenerator::emit64(u64 instruction) { emit32(static_cast(instruction)); emit32(static_cast(instruction >> 32)); } void* Arm64CodeGenerator::allocateCode(size_t size) { size = alignUp(size, ALIGNMENT); void* result = code_ptr; u8* curr = static_cast(code_ptr); u8* end = static_cast(code_buffer) + buffer_size; code_ptr = curr + size; ASSERT_MSG(static_cast(code_ptr) <= end, "Code buffer overflow"); return result; } void Arm64CodeGenerator::makeExecutable() { size_t size = getSize(); size = alignUp(size, PAGE_SIZE); #if defined(__APPLE__) && defined(ARCH_ARM64) // On macOS ARM64, re-enable write protection before making executable pthread_jit_write_protect_np(1); // Flush instruction cache __builtin___clear_cache(static_cast(code_buffer), static_cast(code_buffer) + size); #endif if (mprotect(code_buffer, size, PROT_READ | PROT_EXEC) != 0) { LOG_CRITICAL(Core, "Failed to make code executable: {}", strerror(errno)); } } // Memory operations void Arm64CodeGenerator::ldr(int reg, void* addr) { movz(9, reinterpret_cast(addr) & 0xFFFF); movk(9, (reinterpret_cast(addr) >> 16) & 0xFFFF, 16); movk(9, (reinterpret_cast(addr) >> 32) & 0xFFFF, 32); movk(9, (reinterpret_cast(addr) >> 48) & 0xFFFF, 48); ldr(reg, 9, 0); } void Arm64CodeGenerator::ldr(int reg, int base_reg, s32 offset) { if (offset >= 0 && offset < 32768 && (offset % 8 == 0)) { emit32(0xF9400000 | (reg << 0) | (base_reg << 5) | ((offset / 8) << 10)); } else { mov_imm(9, offset); add(9, base_reg, 9); ldr(reg, 9, 0); } } void Arm64CodeGenerator::ldrh(int reg, int base_reg, s32 offset) { if (offset >= 0 && offset < 8192 && (offset % 2 == 0)) { emit32(0x79400000 | (reg << 0) | (base_reg << 5) | ((offset / 2) << 12)); } else { mov_imm(9, offset); add(9, base_reg, 9); ldrh(reg, 9, 0); } } void Arm64CodeGenerator::ldrb(int reg, int base_reg, s32 offset) { if (offset >= 0 && offset < 4096) { emit32(0x39400000 | (reg << 0) | (base_reg << 5) | (offset << 12)); } else { mov_imm(9, offset); add(9, base_reg, 9); ldrb(reg, 9, 0); } } void Arm64CodeGenerator::ldp(int reg1, int reg2, int base_reg, s32 offset) { if (offset >= -256 && offset < 256 && (offset % 8 == 0)) { s32 scaled_offset = offset / 8; u32 imm7 = (scaled_offset >= 0) ? scaled_offset : (64 + scaled_offset); emit32(0xA9400000 | (reg1 << 0) | (reg2 << 10) | (base_reg << 5) | (imm7 << 15)); } else { mov_imm(9, offset); add(9, base_reg, 9); ldp(reg1, reg2, 9, 0); } } void Arm64CodeGenerator::str(int reg, void* addr) { movz(9, reinterpret_cast(addr) & 0xFFFF); movk(9, (reinterpret_cast(addr) >> 16) & 0xFFFF, 16); movk(9, (reinterpret_cast(addr) >> 32) & 0xFFFF, 32); movk(9, (reinterpret_cast(addr) >> 48) & 0xFFFF, 48); str(reg, 9, 0); } void Arm64CodeGenerator::str(int reg, int base_reg, s32 offset) { if (offset >= 0 && offset < 32768 && (offset % 8 == 0)) { emit32(0xF9000000 | (reg << 0) | (base_reg << 5) | ((offset / 8) << 10)); } else { mov_imm(9, offset); add(9, base_reg, 9); str(reg, 9, 0); } } void Arm64CodeGenerator::strh(int reg, int base_reg, s32 offset) { if (offset >= 0 && offset < 8192 && (offset % 2 == 0)) { emit32(0x79000000 | (reg << 0) | (base_reg << 5) | ((offset / 2) << 12)); } else { mov_imm(9, offset); add(9, base_reg, 9); strh(reg, 9, 0); } } void Arm64CodeGenerator::strb(int reg, int base_reg, s32 offset) { if (offset >= 0 && offset < 4096) { emit32(0x39000000 | (reg << 0) | (base_reg << 5) | (offset << 12)); } else { mov_imm(9, offset); add(9, base_reg, 9); strb(reg, 9, 0); } } void Arm64CodeGenerator::stp(int reg1, int reg2, int base_reg, s32 offset) { if (offset >= -256 && offset < 256 && (offset % 8 == 0)) { s32 scaled_offset = offset / 8; u32 imm7 = (scaled_offset >= 0) ? scaled_offset : (64 + scaled_offset); emit32(0xA9000000 | (reg1 << 0) | (reg2 << 10) | (base_reg << 5) | (imm7 << 15)); } else { mov_imm(9, offset); add(9, base_reg, 9); stp(reg1, reg2, 9, 0); } } // Arithmetic operations void Arm64CodeGenerator::add(int dst, int src1, int src2) { emit32(0x8B000000 | (dst << 0) | (src1 << 5) | (src2 << 16)); } void Arm64CodeGenerator::add(int dst, int src1, int src2, int shift) { ASSERT_MSG(shift >= 0 && shift <= 3, "Invalid shift amount"); emit32(0x8B000000 | (dst << 0) | (src1 << 5) | (src2 << 16) | (shift << 12)); } void Arm64CodeGenerator::add_imm(int dst, int src1, s32 imm) { if (imm >= 0 && imm < 4096) { emit32(0x91000000 | (dst << 0) | (src1 << 5) | (imm << 10)); } else if (imm < 0 && imm > -4096) { sub_imm(dst, src1, -imm); } else { mov_imm(9, imm); add(dst, src1, 9); } } void Arm64CodeGenerator::sub(int dst, int src1, int src2) { emit32(0xCB000000 | (dst << 0) | (src1 << 5) | (src2 << 16)); } void Arm64CodeGenerator::sub_imm(int dst, int src1, s32 imm) { if (imm >= 0 && imm < 4096) { emit32(0xD1000000 | (dst << 0) | (src1 << 5) | (imm << 10)); } else if (imm < 0 && imm > -4096) { add_imm(dst, src1, -imm); } else { mov_imm(9, imm); sub(dst, src1, 9); } } void Arm64CodeGenerator::mul(int dst, int src1, int src2) { emit32(0x9B007C00 | (dst << 0) | (src1 << 5) | (src2 << 16)); } void Arm64CodeGenerator::sdiv(int dst, int src1, int src2) { emit32(0x9AC00C00 | (dst << 0) | (src1 << 5) | (src2 << 16)); } void Arm64CodeGenerator::udiv(int dst, int src1, int src2) { emit32(0x9AC00800 | (dst << 0) | (src1 << 5) | (src2 << 16)); } void Arm64CodeGenerator::and_(int dst, int src1, int src2) { emit32(0x8A000000 | (dst << 0) | (src1 << 5) | (src2 << 16)); } void Arm64CodeGenerator::and_(int dst, int src1, u64 imm) { if (imm <= 0xFFF) { emit32(0x92000000 | (dst << 0) | (src1 << 5) | (static_cast(imm) << 10)); } else { mov_imm(9, imm); and_(dst, src1, 9); } } void Arm64CodeGenerator::orr(int dst, int src1, int src2) { emit32(0xAA000000 | (dst << 0) | (src1 << 5) | (src2 << 16)); } void Arm64CodeGenerator::orr(int dst, int src1, u64 imm) { if (imm <= 0xFFF) { emit32(0xB2000000 | (dst << 0) | (src1 << 5) | (static_cast(imm) << 10)); } else { mov_imm(9, imm); orr(dst, src1, 9); } } void Arm64CodeGenerator::eor(int dst, int src1, int src2) { emit32(0xCA000000 | (dst << 0) | (src1 << 5) | (src2 << 16)); } void Arm64CodeGenerator::eor(int dst, int src1, u64 imm) { if (imm <= 0xFFF) { emit32(0xD2000000 | (dst << 0) | (src1 << 5) | (static_cast(imm) << 10)); } else { mov_imm(9, imm); eor(dst, src1, 9); } } void Arm64CodeGenerator::mvn(int dst, int src) { emit32(0xAA200000 | (dst << 0) | (src << 16)); } void Arm64CodeGenerator::lsl(int dst, int src1, int src2) { emit32(0x9AC02000 | (dst << 0) | (src1 << 5) | (src2 << 16)); } void Arm64CodeGenerator::lsl(int dst, int src1, u8 shift) { ASSERT_MSG(shift < 64, "Shift amount must be < 64"); emit32(0xD3400000 | (dst << 0) | (src1 << 5) | (shift << 10)); } void Arm64CodeGenerator::lsr(int dst, int src1, int src2) { emit32(0x9AC02400 | (dst << 0) | (src1 << 5) | (src2 << 16)); } void Arm64CodeGenerator::lsr(int dst, int src1, u8 shift) { ASSERT_MSG(shift < 64, "Shift amount must be < 64"); emit32(0xD3500000 | (dst << 0) | (src1 << 5) | (shift << 10)); } void Arm64CodeGenerator::asr(int dst, int src1, int src2) { emit32(0x9AC02800 | (dst << 0) | (src1 << 5) | (src2 << 16)); } void Arm64CodeGenerator::asr(int dst, int src1, u8 shift) { ASSERT_MSG(shift < 64, "Shift amount must be < 64"); emit32(0xD3600000 | (dst << 0) | (src1 << 5) | (shift << 10)); } // Move operations void Arm64CodeGenerator::mov(int dst, int src) { if (dst != src) { emit32(0xAA0003E0 | (dst << 0) | (src << 16)); } } void Arm64CodeGenerator::mov_imm(int dst, s64 imm) { if (imm >= 0 && imm <= 0xFFFF) { movz(dst, static_cast(imm)); } else if (imm >= -0x10000 && imm < 0) { movn(dst, static_cast(-imm - 1)); } else { movz(dst, imm & 0xFFFF); if ((imm >> 16) & 0xFFFF) { movk(dst, (imm >> 16) & 0xFFFF, 16); } if ((imm >> 32) & 0xFFFF) { movk(dst, (imm >> 32) & 0xFFFF, 32); } if ((imm >> 48) & 0xFFFF) { movk(dst, (imm >> 48) & 0xFFFF, 48); } } } void Arm64CodeGenerator::movz(int dst, u16 imm, u8 shift) { ASSERT_MSG(shift % 16 == 0 && shift < 64, "Shift must be multiple of 16 and < 64"); emit32(0xD2800000 | (dst << 0) | (imm << 5) | ((shift / 16) << 21)); } void Arm64CodeGenerator::movk(int dst, u16 imm, u8 shift) { ASSERT_MSG(shift % 16 == 0 && shift < 64, "Shift must be multiple of 16 and < 64"); emit32(0xF2800000 | (dst << 0) | (imm << 5) | ((shift / 16) << 21)); } void Arm64CodeGenerator::movn(int dst, u16 imm, u8 shift) { ASSERT_MSG(shift % 16 == 0 && shift < 64, "Shift must be multiple of 16 and < 64"); emit32(0x92800000 | (dst << 0) | (imm << 5) | ((shift / 16) << 21)); } // Compare operations void Arm64CodeGenerator::cmp(int reg1, int reg2) { emit32(0xEB000000 | (31 << 0) | (reg1 << 5) | (reg2 << 16)); } void Arm64CodeGenerator::cmp_imm(int reg, s32 imm) { if (imm >= 0 && imm < 4096) { emit32(0xF1000000 | (31 << 0) | (reg << 5) | (imm << 10)); } else { mov_imm(9, imm); cmp(reg, 9); } } void Arm64CodeGenerator::tst(int reg1, int reg2) { emit32(0xEA000000 | (31 << 0) | (reg1 << 5) | (reg2 << 16)); } void Arm64CodeGenerator::tst(int reg, u64 imm) { if (imm <= 0xFFF) { emit32(0xF2000000 | (31 << 0) | (reg << 5) | (static_cast(imm) << 10)); } else { mov(9, imm); tst(reg, 9); } } // Branch operations void Arm64CodeGenerator::b(void* target) { s64 offset = reinterpret_cast(target) - reinterpret_cast(code_ptr); if (offset >= -0x8000000 && offset < 0x8000000) { s32 imm26 = static_cast(offset / 4); emit32(0x14000000 | (imm26 & 0x3FFFFFF)); } else { movz(9, reinterpret_cast(target) & 0xFFFF); movk(9, (reinterpret_cast(target) >> 16) & 0xFFFF, 16); movk(9, (reinterpret_cast(target) >> 32) & 0xFFFF, 32); movk(9, (reinterpret_cast(target) >> 48) & 0xFFFF, 48); br(9); } } void Arm64CodeGenerator::b(int condition, void* target) { s64 offset = reinterpret_cast(target) - reinterpret_cast(code_ptr); if (offset >= -0x8000000 && offset < 0x8000000) { s32 imm19 = static_cast(offset / 4); emit32(0x54000000 | (condition << 0) | (imm19 << 5)); } else { movz(9, reinterpret_cast(target) & 0xFFFF); movk(9, (reinterpret_cast(target) >> 16) & 0xFFFF, 16); movk(9, (reinterpret_cast(target) >> 32) & 0xFFFF, 32); movk(9, (reinterpret_cast(target) >> 48) & 0xFFFF, 48); emit32(0x54000000 | (condition << 0) | (0 << 5)); br(9); } } void Arm64CodeGenerator::bl(void* target) { s64 offset = reinterpret_cast(target) - reinterpret_cast(code_ptr); if (offset >= -0x8000000 && offset < 0x8000000) { s32 imm26 = static_cast(offset / 4); emit32(0x94000000 | (imm26 & 0x3FFFFFF)); } else { movz(9, reinterpret_cast(target) & 0xFFFF); movk(9, (reinterpret_cast(target) >> 16) & 0xFFFF, 16); movk(9, (reinterpret_cast(target) >> 32) & 0xFFFF, 32); movk(9, (reinterpret_cast(target) >> 48) & 0xFFFF, 48); blr(9); } } void Arm64CodeGenerator::br(int reg) { emit32(0xD61F0000 | (reg << 5)); } void Arm64CodeGenerator::blr(int reg) { emit32(0xD63F0000 | (reg << 5)); } void Arm64CodeGenerator::ret(int reg) { emit32(0xD65F0000 | (reg << 5)); } // Conditional branches void Arm64CodeGenerator::b_eq(void* target) { b(0, target); } void Arm64CodeGenerator::b_ne(void* target) { b(1, target); } void Arm64CodeGenerator::b_lt(void* target) { b(11, target); } void Arm64CodeGenerator::b_le(void* target) { b(13, target); } void Arm64CodeGenerator::b_gt(void* target) { b(12, target); } void Arm64CodeGenerator::b_ge(void* target) { b(10, target); } void Arm64CodeGenerator::b_lo(void* target) { b(3, target); } void Arm64CodeGenerator::b_ls(void* target) { b(9, target); } void Arm64CodeGenerator::b_hi(void* target) { b(8, target); } void Arm64CodeGenerator::b_hs(void* target) { b(2, target); } // Stack operations void Arm64CodeGenerator::push(int reg) { sub(31, 31, 16); str(reg, 31, 0); } void Arm64CodeGenerator::push(int reg1, int reg2) { sub(31, 31, 16); stp(reg1, reg2, 31, 0); } void Arm64CodeGenerator::pop(int reg) { ldr(reg, 31, 0); add(31, 31, 16); } void Arm64CodeGenerator::pop(int reg1, int reg2) { ldp(reg1, reg2, 31, 0); add(31, 31, 16); } // System operations void Arm64CodeGenerator::nop() { emit32(0xD503201F); } void Arm64CodeGenerator::brk(u16 imm) { emit32(0xD4200000 | (imm << 5)); } // NEON/SIMD operations void Arm64CodeGenerator::ldr_v(int vreg, int base_reg, s32 offset) { if (offset >= 0 && offset < 4096 && (offset % 16 == 0)) { emit32(0x3DC00000 | (vreg << 0) | (base_reg << 5) | ((offset / 16) << 12)); } else { mov_imm(9, offset); add(9, base_reg, 9); ldr_v(vreg, 9, 0); } } void Arm64CodeGenerator::str_v(int vreg, int base_reg, s32 offset) { if (offset >= 0 && offset < 4096 && (offset % 16 == 0)) { emit32(0x3D800000 | (vreg << 0) | (base_reg << 5) | ((offset / 16) << 12)); } else { mov_imm(9, offset); add(9, base_reg, 9); str_v(vreg, 9, 0); } } void Arm64CodeGenerator::mov_v(int vdst, int vsrc) { emit32(0x4EA01C00 | (vdst << 0) | (vsrc << 5)); } void Arm64CodeGenerator::add_v(int vdst, int vsrc1, int vsrc2) { emit32(0x4E208400 | (vdst << 0) | (vsrc1 << 5) | (vsrc2 << 16)); } void Arm64CodeGenerator::sub_v(int vdst, int vsrc1, int vsrc2) { emit32(0x4EA08400 | (vdst << 0) | (vsrc1 << 5) | (vsrc2 << 16)); } void Arm64CodeGenerator::mul_v(int vdst, int vsrc1, int vsrc2) { emit32(0x4E209C00 | (vdst << 0) | (vsrc1 << 5) | (vsrc2 << 16)); } } // namespace Core::Jit