mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-12-16 12:09:07 +00:00
better block linking with delinkers; memory operand optims
This commit is contained in:
parent
c2c82646a6
commit
b76e05d5dd
@ -228,6 +228,11 @@ void Arm64CodeGenerator::add(int dst, int src1, int src2) {
|
||||
emit32(0x8B000000 | (dst << 0) | (src1 << 5) | (src2 << 16));
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::add(int dst, int src1, int src2, int shift) {
|
||||
ASSERT_MSG(shift >= 0 && shift <= 3, "Invalid shift amount");
|
||||
emit32(0x8B000000 | (dst << 0) | (src1 << 5) | (src2 << 16) | (shift << 12));
|
||||
}
|
||||
|
||||
void Arm64CodeGenerator::add_imm(int dst, int src1, s32 imm) {
|
||||
if (imm >= 0 && imm < 4096) {
|
||||
emit32(0x91000000 | (dst << 0) | (src1 << 5) | (imm << 10));
|
||||
|
||||
@ -43,6 +43,7 @@ public:
|
||||
|
||||
// Arithmetic operations
|
||||
void add(int dst, int src1, int src2);
|
||||
void add(int dst, int src1, int src2, int shift);
|
||||
void add_imm(int dst, int src1, s32 imm);
|
||||
void sub(int dst, int src1, int src2);
|
||||
void sub_imm(int dst, int src1, s32 imm);
|
||||
|
||||
@ -37,6 +37,16 @@ CodeBlock* BlockManager::CreateBlock(VAddr ps4_address, void* arm64_code, size_t
|
||||
|
||||
void BlockManager::InvalidateBlock(VAddr ps4_address) {
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
|
||||
// Delink all links pointing to this block
|
||||
auto lower = block_links.lower_bound({ps4_address, nullptr});
|
||||
auto upper = block_links.upper_bound(
|
||||
{ps4_address, reinterpret_cast<ExitFunctionLinkData*>(UINTPTR_MAX)});
|
||||
for (auto it = lower; it != upper;) {
|
||||
it->second(it->first.host_link);
|
||||
it = block_links.erase(it);
|
||||
}
|
||||
|
||||
blocks.erase(ps4_address);
|
||||
LOG_DEBUG(Core, "Invalidated code block at PS4 address {:#x}", ps4_address);
|
||||
}
|
||||
@ -44,6 +54,17 @@ void BlockManager::InvalidateBlock(VAddr ps4_address) {
|
||||
void BlockManager::InvalidateRange(VAddr start, VAddr end) {
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
|
||||
// Delink all links pointing to blocks in this range
|
||||
auto link_it = block_links.begin();
|
||||
while (link_it != block_links.end()) {
|
||||
if (link_it->first.guest_destination >= start && link_it->first.guest_destination < end) {
|
||||
link_it->second(link_it->first.host_link);
|
||||
link_it = block_links.erase(link_it);
|
||||
} else {
|
||||
++link_it;
|
||||
}
|
||||
}
|
||||
|
||||
auto it = blocks.begin();
|
||||
while (it != blocks.end()) {
|
||||
VAddr block_addr = it->first;
|
||||
@ -77,8 +98,19 @@ void BlockManager::AddDependency(VAddr block_address, VAddr dependency) {
|
||||
}
|
||||
}
|
||||
|
||||
void BlockManager::AddBlockLink(VAddr guest_dest, ExitFunctionLinkData* link_data,
|
||||
BlockDelinkerFunc delinker) {
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
block_links[{guest_dest, link_data}] = delinker;
|
||||
}
|
||||
|
||||
void BlockManager::Clear() {
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
// Delink all links before clearing
|
||||
for (auto& [tag, delinker] : block_links) {
|
||||
delinker(tag.host_link);
|
||||
}
|
||||
block_links.clear();
|
||||
blocks.clear();
|
||||
}
|
||||
|
||||
|
||||
@ -3,6 +3,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <set>
|
||||
@ -11,6 +14,30 @@
|
||||
|
||||
namespace Core::Jit {
|
||||
|
||||
struct ExitFunctionLinkData {
|
||||
void* host_code;
|
||||
VAddr guest_rip;
|
||||
void* caller_address;
|
||||
u32 original_instruction;
|
||||
};
|
||||
|
||||
using BlockDelinkerFunc = std::function<void(ExitFunctionLinkData*)>;
|
||||
|
||||
struct BlockLinkTag {
|
||||
VAddr guest_destination;
|
||||
ExitFunctionLinkData* host_link;
|
||||
|
||||
bool operator<(const BlockLinkTag& other) const {
|
||||
if (guest_destination < other.guest_destination) {
|
||||
return true;
|
||||
} else if (guest_destination == other.guest_destination) {
|
||||
return host_link < other.host_link;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct CodeBlock {
|
||||
VAddr ps4_address;
|
||||
void* arm64_code;
|
||||
@ -41,6 +68,8 @@ public:
|
||||
void InvalidateBlock(VAddr ps4_address);
|
||||
void InvalidateRange(VAddr start, VAddr end);
|
||||
void AddDependency(VAddr block_address, VAddr dependency);
|
||||
void AddBlockLink(VAddr guest_dest, ExitFunctionLinkData* link_data,
|
||||
BlockDelinkerFunc delinker);
|
||||
void Clear();
|
||||
|
||||
size_t GetBlockCount() const {
|
||||
@ -49,6 +78,7 @@ public:
|
||||
size_t GetTotalCodeSize() const;
|
||||
|
||||
std::unordered_map<VAddr, std::unique_ptr<CodeBlock>> blocks;
|
||||
std::map<BlockLinkTag, BlockDelinkerFunc> block_links;
|
||||
mutable std::mutex mutex;
|
||||
};
|
||||
|
||||
|
||||
@ -250,6 +250,18 @@ CodeBlock* ExecutionEngine::TranslateBlock(VAddr ps4_address) {
|
||||
return new_block;
|
||||
}
|
||||
|
||||
static void DirectBlockDelinker(ExitFunctionLinkData* record, bool is_call) {
|
||||
void* caller_addr = record->caller_address;
|
||||
u32 original_inst = record->original_instruction;
|
||||
|
||||
std::atomic_ref<u32>(*reinterpret_cast<u32*>(caller_addr))
|
||||
.store(original_inst, std::memory_order::relaxed);
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
__builtin___clear_cache(static_cast<char*>(caller_addr), static_cast<char*>(caller_addr) + 4);
|
||||
#endif
|
||||
delete record;
|
||||
}
|
||||
|
||||
void ExecutionEngine::LinkBlock(CodeBlock* block, VAddr target_address) {
|
||||
CodeBlock* target_block = block_manager->GetBlock(target_address);
|
||||
if (!target_block) {
|
||||
@ -261,25 +273,33 @@ void ExecutionEngine::LinkBlock(CodeBlock* block, VAddr target_address) {
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
pthread_jit_write_protect_np(0);
|
||||
#endif
|
||||
// Calculate offset from patch location to target
|
||||
s64 offset = reinterpret_cast<s64>(target_block->arm64_code) -
|
||||
reinterpret_cast<s64>(block->branch_patch_location);
|
||||
void* caller_address = block->branch_patch_location;
|
||||
s64 offset =
|
||||
reinterpret_cast<s64>(target_block->arm64_code) - reinterpret_cast<s64>(caller_address);
|
||||
|
||||
// Check if we can use a relative branch (within ±128MB)
|
||||
if (offset >= -0x8000000 && offset < 0x8000000) {
|
||||
s32 imm26 = static_cast<s32>(offset / 4);
|
||||
u32* patch_ptr = reinterpret_cast<u32*>(block->branch_patch_location);
|
||||
// Patch the branch instruction: 0x14000000 | (imm26 & 0x3FFFFFF)
|
||||
*patch_ptr = 0x14000000 | (imm26 & 0x3FFFFFF);
|
||||
u32* patch_ptr = reinterpret_cast<u32*>(caller_address);
|
||||
u32 branch_inst = 0x14000000 | (imm26 & 0x3FFFFFF);
|
||||
|
||||
u32 original_inst = *patch_ptr;
|
||||
std::atomic_ref<u32>(*patch_ptr).store(branch_inst, std::memory_order::relaxed);
|
||||
|
||||
// Register delinker
|
||||
ExitFunctionLinkData* link_data = new ExitFunctionLinkData{
|
||||
target_block->arm64_code, target_address, caller_address, original_inst};
|
||||
block_manager->AddBlockLink(target_address, link_data, [](ExitFunctionLinkData* r) {
|
||||
DirectBlockDelinker(r, false);
|
||||
});
|
||||
} else {
|
||||
// Far branch - need to use indirect branch
|
||||
// For now, leave as-is (will use the placeholder branch)
|
||||
// Far branch - need to use indirect branch via thunk
|
||||
LOG_DEBUG(Core, "Branch target too far for direct linking: offset={}", offset);
|
||||
}
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
pthread_jit_write_protect_np(1);
|
||||
__builtin___clear_cache(static_cast<char*>(block->branch_patch_location),
|
||||
static_cast<char*>(block->branch_patch_location) + 4);
|
||||
__builtin___clear_cache(static_cast<char*>(caller_address),
|
||||
static_cast<char*>(caller_address) + 4);
|
||||
#endif
|
||||
block->is_linked = true;
|
||||
LOG_DEBUG(Core, "Linked block {:#x} to {:#x}", block->ps4_address, target_address);
|
||||
@ -295,8 +315,19 @@ void ExecutionEngine::LinkBlock(CodeBlock* block, VAddr target_address) {
|
||||
if (offset >= -0x8000000 && offset < 0x8000000) {
|
||||
s32 imm26 = static_cast<s32>(offset / 4);
|
||||
u32* patch_ptr = reinterpret_cast<u32*>(link_location);
|
||||
*patch_ptr = 0x14000000 | (imm26 & 0x3FFFFFF);
|
||||
block->code_size += 4; // Update block size
|
||||
u32 branch_inst = 0x14000000 | (imm26 & 0x3FFFFFF);
|
||||
u32 original_inst = 0x14000002;
|
||||
|
||||
std::atomic_ref<u32>(*patch_ptr).store(branch_inst, std::memory_order::relaxed);
|
||||
|
||||
// Register delinker
|
||||
ExitFunctionLinkData* link_data = new ExitFunctionLinkData{
|
||||
target_block->arm64_code, target_address, link_location, original_inst};
|
||||
block_manager->AddBlockLink(target_address, link_data, [](ExitFunctionLinkData* r) {
|
||||
DirectBlockDelinker(r, false);
|
||||
});
|
||||
|
||||
block->code_size += 4;
|
||||
}
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
pthread_jit_write_protect_np(1);
|
||||
|
||||
@ -116,29 +116,77 @@ void X86_64Translator::CalculateMemoryAddress(int dst_reg, const ZydisDecodedOpe
|
||||
}
|
||||
}
|
||||
|
||||
if (base_reg == -1 && index_reg == -1 && mem.disp.value == 0) {
|
||||
s64 displacement = mem.disp.value;
|
||||
|
||||
if (base_reg == -1 && index_reg == -1 && displacement == 0) {
|
||||
codegen.mov(dst_reg, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
if (index_reg == -1) {
|
||||
if (base_reg != -1) {
|
||||
if (displacement == 0) {
|
||||
codegen.mov(dst_reg, base_reg);
|
||||
} else if (displacement >= -256 && displacement < 256) {
|
||||
codegen.mov(dst_reg, base_reg);
|
||||
codegen.add_imm(dst_reg, dst_reg, static_cast<s32>(displacement));
|
||||
} else {
|
||||
codegen.mov(dst_reg, 0);
|
||||
codegen.mov(dst_reg, base_reg);
|
||||
codegen.mov_imm(RegisterMapper::SCRATCH_REG, displacement);
|
||||
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
|
||||
}
|
||||
} else {
|
||||
codegen.mov_imm(dst_reg, displacement);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (index_reg != -1) {
|
||||
if (mem.scale > 0 && mem.scale <= 8) {
|
||||
codegen.mov(RegisterMapper::SCRATCH_REG, static_cast<s64>(mem.scale));
|
||||
if (base_reg == -1) {
|
||||
base_reg = 0;
|
||||
}
|
||||
|
||||
int scale = mem.scale;
|
||||
if (scale == 0) {
|
||||
scale = 1;
|
||||
}
|
||||
|
||||
if (scale == 1) {
|
||||
if (displacement == 0) {
|
||||
codegen.add(dst_reg, base_reg, index_reg);
|
||||
} else if (displacement >= -256 && displacement < 256) {
|
||||
codegen.add(dst_reg, base_reg, index_reg);
|
||||
codegen.add_imm(dst_reg, dst_reg, static_cast<s32>(displacement));
|
||||
} else {
|
||||
codegen.add(dst_reg, base_reg, index_reg);
|
||||
codegen.mov_imm(RegisterMapper::SCRATCH_REG, displacement);
|
||||
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
|
||||
}
|
||||
} else if (scale == 2 || scale == 4 || scale == 8) {
|
||||
int shift = (scale == 2) ? 1 : (scale == 4) ? 2 : 3;
|
||||
if (displacement == 0) {
|
||||
codegen.add(dst_reg, base_reg, index_reg, shift);
|
||||
} else {
|
||||
codegen.add(dst_reg, base_reg, index_reg, shift);
|
||||
if (displacement >= -256 && displacement < 256) {
|
||||
codegen.add_imm(dst_reg, dst_reg, static_cast<s32>(displacement));
|
||||
} else {
|
||||
codegen.mov_imm(RegisterMapper::SCRATCH_REG, displacement);
|
||||
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
codegen.mov(dst_reg, base_reg);
|
||||
codegen.mov_imm(RegisterMapper::SCRATCH_REG, scale);
|
||||
codegen.mul(RegisterMapper::SCRATCH_REG, index_reg, RegisterMapper::SCRATCH_REG);
|
||||
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
|
||||
if (displacement != 0) {
|
||||
if (displacement >= -256 && displacement < 256) {
|
||||
codegen.add_imm(dst_reg, dst_reg, static_cast<s32>(displacement));
|
||||
} else {
|
||||
codegen.add(dst_reg, dst_reg, index_reg);
|
||||
codegen.mov_imm(RegisterMapper::SCRATCH_REG, displacement);
|
||||
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
|
||||
}
|
||||
}
|
||||
|
||||
if (mem.disp.value != 0) {
|
||||
codegen.add(dst_reg, dst_reg, static_cast<s32>(mem.disp.value));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user