better block linking with delinkers; memory operand optims

This commit is contained in:
AlpinDale 2025-12-09 07:16:22 +04:30
parent c2c82646a6
commit b76e05d5dd
6 changed files with 174 additions and 27 deletions

View File

@ -228,6 +228,11 @@ void Arm64CodeGenerator::add(int dst, int src1, int src2) {
emit32(0x8B000000 | (dst << 0) | (src1 << 5) | (src2 << 16));
}
void Arm64CodeGenerator::add(int dst, int src1, int src2, int shift) {
ASSERT_MSG(shift >= 0 && shift <= 3, "Invalid shift amount");
emit32(0x8B000000 | (dst << 0) | (src1 << 5) | (src2 << 16) | (shift << 12));
}
void Arm64CodeGenerator::add_imm(int dst, int src1, s32 imm) {
if (imm >= 0 && imm < 4096) {
emit32(0x91000000 | (dst << 0) | (src1 << 5) | (imm << 10));

View File

@ -43,6 +43,7 @@ public:
// Arithmetic operations
void add(int dst, int src1, int src2);
void add(int dst, int src1, int src2, int shift);
void add_imm(int dst, int src1, s32 imm);
void sub(int dst, int src1, int src2);
void sub_imm(int dst, int src1, s32 imm);

View File

@ -37,6 +37,16 @@ CodeBlock* BlockManager::CreateBlock(VAddr ps4_address, void* arm64_code, size_t
void BlockManager::InvalidateBlock(VAddr ps4_address) {
std::lock_guard<std::mutex> lock(mutex);
// Delink all links pointing to this block
auto lower = block_links.lower_bound({ps4_address, nullptr});
auto upper = block_links.upper_bound(
{ps4_address, reinterpret_cast<ExitFunctionLinkData*>(UINTPTR_MAX)});
for (auto it = lower; it != upper;) {
it->second(it->first.host_link);
it = block_links.erase(it);
}
blocks.erase(ps4_address);
LOG_DEBUG(Core, "Invalidated code block at PS4 address {:#x}", ps4_address);
}
@ -44,6 +54,17 @@ void BlockManager::InvalidateBlock(VAddr ps4_address) {
void BlockManager::InvalidateRange(VAddr start, VAddr end) {
std::lock_guard<std::mutex> lock(mutex);
// Delink all links pointing to blocks in this range
auto link_it = block_links.begin();
while (link_it != block_links.end()) {
if (link_it->first.guest_destination >= start && link_it->first.guest_destination < end) {
link_it->second(link_it->first.host_link);
link_it = block_links.erase(link_it);
} else {
++link_it;
}
}
auto it = blocks.begin();
while (it != blocks.end()) {
VAddr block_addr = it->first;
@ -77,8 +98,19 @@ void BlockManager::AddDependency(VAddr block_address, VAddr dependency) {
}
}
void BlockManager::AddBlockLink(VAddr guest_dest, ExitFunctionLinkData* link_data,
BlockDelinkerFunc delinker) {
std::lock_guard<std::mutex> lock(mutex);
block_links[{guest_dest, link_data}] = delinker;
}
void BlockManager::Clear() {
std::lock_guard<std::mutex> lock(mutex);
// Delink all links before clearing
for (auto& [tag, delinker] : block_links) {
delinker(tag.host_link);
}
block_links.clear();
blocks.clear();
}

View File

@ -3,6 +3,9 @@
#pragma once
#include <atomic>
#include <functional>
#include <map>
#include <memory>
#include <mutex>
#include <set>
@ -11,6 +14,30 @@
namespace Core::Jit {
struct ExitFunctionLinkData {
void* host_code;
VAddr guest_rip;
void* caller_address;
u32 original_instruction;
};
using BlockDelinkerFunc = std::function<void(ExitFunctionLinkData*)>;
struct BlockLinkTag {
VAddr guest_destination;
ExitFunctionLinkData* host_link;
bool operator<(const BlockLinkTag& other) const {
if (guest_destination < other.guest_destination) {
return true;
} else if (guest_destination == other.guest_destination) {
return host_link < other.host_link;
} else {
return false;
}
}
};
struct CodeBlock {
VAddr ps4_address;
void* arm64_code;
@ -41,6 +68,8 @@ public:
void InvalidateBlock(VAddr ps4_address);
void InvalidateRange(VAddr start, VAddr end);
void AddDependency(VAddr block_address, VAddr dependency);
void AddBlockLink(VAddr guest_dest, ExitFunctionLinkData* link_data,
BlockDelinkerFunc delinker);
void Clear();
size_t GetBlockCount() const {
@ -49,6 +78,7 @@ public:
size_t GetTotalCodeSize() const;
std::unordered_map<VAddr, std::unique_ptr<CodeBlock>> blocks;
std::map<BlockLinkTag, BlockDelinkerFunc> block_links;
mutable std::mutex mutex;
};

View File

@ -250,6 +250,18 @@ CodeBlock* ExecutionEngine::TranslateBlock(VAddr ps4_address) {
return new_block;
}
static void DirectBlockDelinker(ExitFunctionLinkData* record, bool is_call) {
void* caller_addr = record->caller_address;
u32 original_inst = record->original_instruction;
std::atomic_ref<u32>(*reinterpret_cast<u32*>(caller_addr))
.store(original_inst, std::memory_order::relaxed);
#if defined(__APPLE__) && defined(ARCH_ARM64)
__builtin___clear_cache(static_cast<char*>(caller_addr), static_cast<char*>(caller_addr) + 4);
#endif
delete record;
}
void ExecutionEngine::LinkBlock(CodeBlock* block, VAddr target_address) {
CodeBlock* target_block = block_manager->GetBlock(target_address);
if (!target_block) {
@ -261,25 +273,33 @@ void ExecutionEngine::LinkBlock(CodeBlock* block, VAddr target_address) {
#if defined(__APPLE__) && defined(ARCH_ARM64)
pthread_jit_write_protect_np(0);
#endif
// Calculate offset from patch location to target
s64 offset = reinterpret_cast<s64>(target_block->arm64_code) -
reinterpret_cast<s64>(block->branch_patch_location);
void* caller_address = block->branch_patch_location;
s64 offset =
reinterpret_cast<s64>(target_block->arm64_code) - reinterpret_cast<s64>(caller_address);
// Check if we can use a relative branch (within ±128MB)
if (offset >= -0x8000000 && offset < 0x8000000) {
s32 imm26 = static_cast<s32>(offset / 4);
u32* patch_ptr = reinterpret_cast<u32*>(block->branch_patch_location);
// Patch the branch instruction: 0x14000000 | (imm26 & 0x3FFFFFF)
*patch_ptr = 0x14000000 | (imm26 & 0x3FFFFFF);
u32* patch_ptr = reinterpret_cast<u32*>(caller_address);
u32 branch_inst = 0x14000000 | (imm26 & 0x3FFFFFF);
u32 original_inst = *patch_ptr;
std::atomic_ref<u32>(*patch_ptr).store(branch_inst, std::memory_order::relaxed);
// Register delinker
ExitFunctionLinkData* link_data = new ExitFunctionLinkData{
target_block->arm64_code, target_address, caller_address, original_inst};
block_manager->AddBlockLink(target_address, link_data, [](ExitFunctionLinkData* r) {
DirectBlockDelinker(r, false);
});
} else {
// Far branch - need to use indirect branch
// For now, leave as-is (will use the placeholder branch)
// Far branch - need to use indirect branch via thunk
LOG_DEBUG(Core, "Branch target too far for direct linking: offset={}", offset);
}
#if defined(__APPLE__) && defined(ARCH_ARM64)
pthread_jit_write_protect_np(1);
__builtin___clear_cache(static_cast<char*>(block->branch_patch_location),
static_cast<char*>(block->branch_patch_location) + 4);
__builtin___clear_cache(static_cast<char*>(caller_address),
static_cast<char*>(caller_address) + 4);
#endif
block->is_linked = true;
LOG_DEBUG(Core, "Linked block {:#x} to {:#x}", block->ps4_address, target_address);
@ -295,8 +315,19 @@ void ExecutionEngine::LinkBlock(CodeBlock* block, VAddr target_address) {
if (offset >= -0x8000000 && offset < 0x8000000) {
s32 imm26 = static_cast<s32>(offset / 4);
u32* patch_ptr = reinterpret_cast<u32*>(link_location);
*patch_ptr = 0x14000000 | (imm26 & 0x3FFFFFF);
block->code_size += 4; // Update block size
u32 branch_inst = 0x14000000 | (imm26 & 0x3FFFFFF);
u32 original_inst = 0x14000002;
std::atomic_ref<u32>(*patch_ptr).store(branch_inst, std::memory_order::relaxed);
// Register delinker
ExitFunctionLinkData* link_data = new ExitFunctionLinkData{
target_block->arm64_code, target_address, link_location, original_inst};
block_manager->AddBlockLink(target_address, link_data, [](ExitFunctionLinkData* r) {
DirectBlockDelinker(r, false);
});
block->code_size += 4;
}
#if defined(__APPLE__) && defined(ARCH_ARM64)
pthread_jit_write_protect_np(1);

View File

@ -116,29 +116,77 @@ void X86_64Translator::CalculateMemoryAddress(int dst_reg, const ZydisDecodedOpe
}
}
if (base_reg == -1 && index_reg == -1 && mem.disp.value == 0) {
s64 displacement = mem.disp.value;
if (base_reg == -1 && index_reg == -1 && displacement == 0) {
codegen.mov(dst_reg, 0);
return;
}
if (base_reg != -1) {
codegen.mov(dst_reg, base_reg);
} else {
codegen.mov(dst_reg, 0);
}
if (index_reg != -1) {
if (mem.scale > 0 && mem.scale <= 8) {
codegen.mov(RegisterMapper::SCRATCH_REG, static_cast<s64>(mem.scale));
codegen.mul(RegisterMapper::SCRATCH_REG, index_reg, RegisterMapper::SCRATCH_REG);
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
if (index_reg == -1) {
if (base_reg != -1) {
if (displacement == 0) {
codegen.mov(dst_reg, base_reg);
} else if (displacement >= -256 && displacement < 256) {
codegen.mov(dst_reg, base_reg);
codegen.add_imm(dst_reg, dst_reg, static_cast<s32>(displacement));
} else {
codegen.mov(dst_reg, base_reg);
codegen.mov_imm(RegisterMapper::SCRATCH_REG, displacement);
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
}
} else {
codegen.add(dst_reg, dst_reg, index_reg);
codegen.mov_imm(dst_reg, displacement);
}
return;
}
if (mem.disp.value != 0) {
codegen.add(dst_reg, dst_reg, static_cast<s32>(mem.disp.value));
if (base_reg == -1) {
base_reg = 0;
}
int scale = mem.scale;
if (scale == 0) {
scale = 1;
}
if (scale == 1) {
if (displacement == 0) {
codegen.add(dst_reg, base_reg, index_reg);
} else if (displacement >= -256 && displacement < 256) {
codegen.add(dst_reg, base_reg, index_reg);
codegen.add_imm(dst_reg, dst_reg, static_cast<s32>(displacement));
} else {
codegen.add(dst_reg, base_reg, index_reg);
codegen.mov_imm(RegisterMapper::SCRATCH_REG, displacement);
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
}
} else if (scale == 2 || scale == 4 || scale == 8) {
int shift = (scale == 2) ? 1 : (scale == 4) ? 2 : 3;
if (displacement == 0) {
codegen.add(dst_reg, base_reg, index_reg, shift);
} else {
codegen.add(dst_reg, base_reg, index_reg, shift);
if (displacement >= -256 && displacement < 256) {
codegen.add_imm(dst_reg, dst_reg, static_cast<s32>(displacement));
} else {
codegen.mov_imm(RegisterMapper::SCRATCH_REG, displacement);
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
}
}
} else {
codegen.mov(dst_reg, base_reg);
codegen.mov_imm(RegisterMapper::SCRATCH_REG, scale);
codegen.mul(RegisterMapper::SCRATCH_REG, index_reg, RegisterMapper::SCRATCH_REG);
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
if (displacement != 0) {
if (displacement >= -256 && displacement < 256) {
codegen.add_imm(dst_reg, dst_reg, static_cast<s32>(displacement));
} else {
codegen.mov_imm(RegisterMapper::SCRATCH_REG, displacement);
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
}
}
}
}