mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-12-16 12:09:07 +00:00
better block linking with delinkers; memory operand optims
This commit is contained in:
parent
c2c82646a6
commit
b76e05d5dd
@ -228,6 +228,11 @@ void Arm64CodeGenerator::add(int dst, int src1, int src2) {
|
|||||||
emit32(0x8B000000 | (dst << 0) | (src1 << 5) | (src2 << 16));
|
emit32(0x8B000000 | (dst << 0) | (src1 << 5) | (src2 << 16));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Arm64CodeGenerator::add(int dst, int src1, int src2, int shift) {
|
||||||
|
ASSERT_MSG(shift >= 0 && shift <= 3, "Invalid shift amount");
|
||||||
|
emit32(0x8B000000 | (dst << 0) | (src1 << 5) | (src2 << 16) | (shift << 12));
|
||||||
|
}
|
||||||
|
|
||||||
void Arm64CodeGenerator::add_imm(int dst, int src1, s32 imm) {
|
void Arm64CodeGenerator::add_imm(int dst, int src1, s32 imm) {
|
||||||
if (imm >= 0 && imm < 4096) {
|
if (imm >= 0 && imm < 4096) {
|
||||||
emit32(0x91000000 | (dst << 0) | (src1 << 5) | (imm << 10));
|
emit32(0x91000000 | (dst << 0) | (src1 << 5) | (imm << 10));
|
||||||
|
|||||||
@ -43,6 +43,7 @@ public:
|
|||||||
|
|
||||||
// Arithmetic operations
|
// Arithmetic operations
|
||||||
void add(int dst, int src1, int src2);
|
void add(int dst, int src1, int src2);
|
||||||
|
void add(int dst, int src1, int src2, int shift);
|
||||||
void add_imm(int dst, int src1, s32 imm);
|
void add_imm(int dst, int src1, s32 imm);
|
||||||
void sub(int dst, int src1, int src2);
|
void sub(int dst, int src1, int src2);
|
||||||
void sub_imm(int dst, int src1, s32 imm);
|
void sub_imm(int dst, int src1, s32 imm);
|
||||||
|
|||||||
@ -37,6 +37,16 @@ CodeBlock* BlockManager::CreateBlock(VAddr ps4_address, void* arm64_code, size_t
|
|||||||
|
|
||||||
void BlockManager::InvalidateBlock(VAddr ps4_address) {
|
void BlockManager::InvalidateBlock(VAddr ps4_address) {
|
||||||
std::lock_guard<std::mutex> lock(mutex);
|
std::lock_guard<std::mutex> lock(mutex);
|
||||||
|
|
||||||
|
// Delink all links pointing to this block
|
||||||
|
auto lower = block_links.lower_bound({ps4_address, nullptr});
|
||||||
|
auto upper = block_links.upper_bound(
|
||||||
|
{ps4_address, reinterpret_cast<ExitFunctionLinkData*>(UINTPTR_MAX)});
|
||||||
|
for (auto it = lower; it != upper;) {
|
||||||
|
it->second(it->first.host_link);
|
||||||
|
it = block_links.erase(it);
|
||||||
|
}
|
||||||
|
|
||||||
blocks.erase(ps4_address);
|
blocks.erase(ps4_address);
|
||||||
LOG_DEBUG(Core, "Invalidated code block at PS4 address {:#x}", ps4_address);
|
LOG_DEBUG(Core, "Invalidated code block at PS4 address {:#x}", ps4_address);
|
||||||
}
|
}
|
||||||
@ -44,6 +54,17 @@ void BlockManager::InvalidateBlock(VAddr ps4_address) {
|
|||||||
void BlockManager::InvalidateRange(VAddr start, VAddr end) {
|
void BlockManager::InvalidateRange(VAddr start, VAddr end) {
|
||||||
std::lock_guard<std::mutex> lock(mutex);
|
std::lock_guard<std::mutex> lock(mutex);
|
||||||
|
|
||||||
|
// Delink all links pointing to blocks in this range
|
||||||
|
auto link_it = block_links.begin();
|
||||||
|
while (link_it != block_links.end()) {
|
||||||
|
if (link_it->first.guest_destination >= start && link_it->first.guest_destination < end) {
|
||||||
|
link_it->second(link_it->first.host_link);
|
||||||
|
link_it = block_links.erase(link_it);
|
||||||
|
} else {
|
||||||
|
++link_it;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
auto it = blocks.begin();
|
auto it = blocks.begin();
|
||||||
while (it != blocks.end()) {
|
while (it != blocks.end()) {
|
||||||
VAddr block_addr = it->first;
|
VAddr block_addr = it->first;
|
||||||
@ -77,8 +98,19 @@ void BlockManager::AddDependency(VAddr block_address, VAddr dependency) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void BlockManager::AddBlockLink(VAddr guest_dest, ExitFunctionLinkData* link_data,
|
||||||
|
BlockDelinkerFunc delinker) {
|
||||||
|
std::lock_guard<std::mutex> lock(mutex);
|
||||||
|
block_links[{guest_dest, link_data}] = delinker;
|
||||||
|
}
|
||||||
|
|
||||||
void BlockManager::Clear() {
|
void BlockManager::Clear() {
|
||||||
std::lock_guard<std::mutex> lock(mutex);
|
std::lock_guard<std::mutex> lock(mutex);
|
||||||
|
// Delink all links before clearing
|
||||||
|
for (auto& [tag, delinker] : block_links) {
|
||||||
|
delinker(tag.host_link);
|
||||||
|
}
|
||||||
|
block_links.clear();
|
||||||
blocks.clear();
|
blocks.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -3,6 +3,9 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
|
#include <functional>
|
||||||
|
#include <map>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <set>
|
#include <set>
|
||||||
@ -11,6 +14,30 @@
|
|||||||
|
|
||||||
namespace Core::Jit {
|
namespace Core::Jit {
|
||||||
|
|
||||||
|
struct ExitFunctionLinkData {
|
||||||
|
void* host_code;
|
||||||
|
VAddr guest_rip;
|
||||||
|
void* caller_address;
|
||||||
|
u32 original_instruction;
|
||||||
|
};
|
||||||
|
|
||||||
|
using BlockDelinkerFunc = std::function<void(ExitFunctionLinkData*)>;
|
||||||
|
|
||||||
|
struct BlockLinkTag {
|
||||||
|
VAddr guest_destination;
|
||||||
|
ExitFunctionLinkData* host_link;
|
||||||
|
|
||||||
|
bool operator<(const BlockLinkTag& other) const {
|
||||||
|
if (guest_destination < other.guest_destination) {
|
||||||
|
return true;
|
||||||
|
} else if (guest_destination == other.guest_destination) {
|
||||||
|
return host_link < other.host_link;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
struct CodeBlock {
|
struct CodeBlock {
|
||||||
VAddr ps4_address;
|
VAddr ps4_address;
|
||||||
void* arm64_code;
|
void* arm64_code;
|
||||||
@ -41,6 +68,8 @@ public:
|
|||||||
void InvalidateBlock(VAddr ps4_address);
|
void InvalidateBlock(VAddr ps4_address);
|
||||||
void InvalidateRange(VAddr start, VAddr end);
|
void InvalidateRange(VAddr start, VAddr end);
|
||||||
void AddDependency(VAddr block_address, VAddr dependency);
|
void AddDependency(VAddr block_address, VAddr dependency);
|
||||||
|
void AddBlockLink(VAddr guest_dest, ExitFunctionLinkData* link_data,
|
||||||
|
BlockDelinkerFunc delinker);
|
||||||
void Clear();
|
void Clear();
|
||||||
|
|
||||||
size_t GetBlockCount() const {
|
size_t GetBlockCount() const {
|
||||||
@ -49,6 +78,7 @@ public:
|
|||||||
size_t GetTotalCodeSize() const;
|
size_t GetTotalCodeSize() const;
|
||||||
|
|
||||||
std::unordered_map<VAddr, std::unique_ptr<CodeBlock>> blocks;
|
std::unordered_map<VAddr, std::unique_ptr<CodeBlock>> blocks;
|
||||||
|
std::map<BlockLinkTag, BlockDelinkerFunc> block_links;
|
||||||
mutable std::mutex mutex;
|
mutable std::mutex mutex;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -250,6 +250,18 @@ CodeBlock* ExecutionEngine::TranslateBlock(VAddr ps4_address) {
|
|||||||
return new_block;
|
return new_block;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void DirectBlockDelinker(ExitFunctionLinkData* record, bool is_call) {
|
||||||
|
void* caller_addr = record->caller_address;
|
||||||
|
u32 original_inst = record->original_instruction;
|
||||||
|
|
||||||
|
std::atomic_ref<u32>(*reinterpret_cast<u32*>(caller_addr))
|
||||||
|
.store(original_inst, std::memory_order::relaxed);
|
||||||
|
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||||
|
__builtin___clear_cache(static_cast<char*>(caller_addr), static_cast<char*>(caller_addr) + 4);
|
||||||
|
#endif
|
||||||
|
delete record;
|
||||||
|
}
|
||||||
|
|
||||||
void ExecutionEngine::LinkBlock(CodeBlock* block, VAddr target_address) {
|
void ExecutionEngine::LinkBlock(CodeBlock* block, VAddr target_address) {
|
||||||
CodeBlock* target_block = block_manager->GetBlock(target_address);
|
CodeBlock* target_block = block_manager->GetBlock(target_address);
|
||||||
if (!target_block) {
|
if (!target_block) {
|
||||||
@ -261,25 +273,33 @@ void ExecutionEngine::LinkBlock(CodeBlock* block, VAddr target_address) {
|
|||||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||||
pthread_jit_write_protect_np(0);
|
pthread_jit_write_protect_np(0);
|
||||||
#endif
|
#endif
|
||||||
// Calculate offset from patch location to target
|
void* caller_address = block->branch_patch_location;
|
||||||
s64 offset = reinterpret_cast<s64>(target_block->arm64_code) -
|
s64 offset =
|
||||||
reinterpret_cast<s64>(block->branch_patch_location);
|
reinterpret_cast<s64>(target_block->arm64_code) - reinterpret_cast<s64>(caller_address);
|
||||||
|
|
||||||
// Check if we can use a relative branch (within ±128MB)
|
// Check if we can use a relative branch (within ±128MB)
|
||||||
if (offset >= -0x8000000 && offset < 0x8000000) {
|
if (offset >= -0x8000000 && offset < 0x8000000) {
|
||||||
s32 imm26 = static_cast<s32>(offset / 4);
|
s32 imm26 = static_cast<s32>(offset / 4);
|
||||||
u32* patch_ptr = reinterpret_cast<u32*>(block->branch_patch_location);
|
u32* patch_ptr = reinterpret_cast<u32*>(caller_address);
|
||||||
// Patch the branch instruction: 0x14000000 | (imm26 & 0x3FFFFFF)
|
u32 branch_inst = 0x14000000 | (imm26 & 0x3FFFFFF);
|
||||||
*patch_ptr = 0x14000000 | (imm26 & 0x3FFFFFF);
|
|
||||||
|
u32 original_inst = *patch_ptr;
|
||||||
|
std::atomic_ref<u32>(*patch_ptr).store(branch_inst, std::memory_order::relaxed);
|
||||||
|
|
||||||
|
// Register delinker
|
||||||
|
ExitFunctionLinkData* link_data = new ExitFunctionLinkData{
|
||||||
|
target_block->arm64_code, target_address, caller_address, original_inst};
|
||||||
|
block_manager->AddBlockLink(target_address, link_data, [](ExitFunctionLinkData* r) {
|
||||||
|
DirectBlockDelinker(r, false);
|
||||||
|
});
|
||||||
} else {
|
} else {
|
||||||
// Far branch - need to use indirect branch
|
// Far branch - need to use indirect branch via thunk
|
||||||
// For now, leave as-is (will use the placeholder branch)
|
|
||||||
LOG_DEBUG(Core, "Branch target too far for direct linking: offset={}", offset);
|
LOG_DEBUG(Core, "Branch target too far for direct linking: offset={}", offset);
|
||||||
}
|
}
|
||||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||||
pthread_jit_write_protect_np(1);
|
pthread_jit_write_protect_np(1);
|
||||||
__builtin___clear_cache(static_cast<char*>(block->branch_patch_location),
|
__builtin___clear_cache(static_cast<char*>(caller_address),
|
||||||
static_cast<char*>(block->branch_patch_location) + 4);
|
static_cast<char*>(caller_address) + 4);
|
||||||
#endif
|
#endif
|
||||||
block->is_linked = true;
|
block->is_linked = true;
|
||||||
LOG_DEBUG(Core, "Linked block {:#x} to {:#x}", block->ps4_address, target_address);
|
LOG_DEBUG(Core, "Linked block {:#x} to {:#x}", block->ps4_address, target_address);
|
||||||
@ -295,8 +315,19 @@ void ExecutionEngine::LinkBlock(CodeBlock* block, VAddr target_address) {
|
|||||||
if (offset >= -0x8000000 && offset < 0x8000000) {
|
if (offset >= -0x8000000 && offset < 0x8000000) {
|
||||||
s32 imm26 = static_cast<s32>(offset / 4);
|
s32 imm26 = static_cast<s32>(offset / 4);
|
||||||
u32* patch_ptr = reinterpret_cast<u32*>(link_location);
|
u32* patch_ptr = reinterpret_cast<u32*>(link_location);
|
||||||
*patch_ptr = 0x14000000 | (imm26 & 0x3FFFFFF);
|
u32 branch_inst = 0x14000000 | (imm26 & 0x3FFFFFF);
|
||||||
block->code_size += 4; // Update block size
|
u32 original_inst = 0x14000002;
|
||||||
|
|
||||||
|
std::atomic_ref<u32>(*patch_ptr).store(branch_inst, std::memory_order::relaxed);
|
||||||
|
|
||||||
|
// Register delinker
|
||||||
|
ExitFunctionLinkData* link_data = new ExitFunctionLinkData{
|
||||||
|
target_block->arm64_code, target_address, link_location, original_inst};
|
||||||
|
block_manager->AddBlockLink(target_address, link_data, [](ExitFunctionLinkData* r) {
|
||||||
|
DirectBlockDelinker(r, false);
|
||||||
|
});
|
||||||
|
|
||||||
|
block->code_size += 4;
|
||||||
}
|
}
|
||||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||||
pthread_jit_write_protect_np(1);
|
pthread_jit_write_protect_np(1);
|
||||||
|
|||||||
@ -116,29 +116,77 @@ void X86_64Translator::CalculateMemoryAddress(int dst_reg, const ZydisDecodedOpe
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (base_reg == -1 && index_reg == -1 && mem.disp.value == 0) {
|
s64 displacement = mem.disp.value;
|
||||||
|
|
||||||
|
if (base_reg == -1 && index_reg == -1 && displacement == 0) {
|
||||||
codegen.mov(dst_reg, 0);
|
codegen.mov(dst_reg, 0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (base_reg != -1) {
|
if (index_reg == -1) {
|
||||||
codegen.mov(dst_reg, base_reg);
|
if (base_reg != -1) {
|
||||||
} else {
|
if (displacement == 0) {
|
||||||
codegen.mov(dst_reg, 0);
|
codegen.mov(dst_reg, base_reg);
|
||||||
}
|
} else if (displacement >= -256 && displacement < 256) {
|
||||||
|
codegen.mov(dst_reg, base_reg);
|
||||||
if (index_reg != -1) {
|
codegen.add_imm(dst_reg, dst_reg, static_cast<s32>(displacement));
|
||||||
if (mem.scale > 0 && mem.scale <= 8) {
|
} else {
|
||||||
codegen.mov(RegisterMapper::SCRATCH_REG, static_cast<s64>(mem.scale));
|
codegen.mov(dst_reg, base_reg);
|
||||||
codegen.mul(RegisterMapper::SCRATCH_REG, index_reg, RegisterMapper::SCRATCH_REG);
|
codegen.mov_imm(RegisterMapper::SCRATCH_REG, displacement);
|
||||||
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
|
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
codegen.add(dst_reg, dst_reg, index_reg);
|
codegen.mov_imm(dst_reg, displacement);
|
||||||
}
|
}
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mem.disp.value != 0) {
|
if (base_reg == -1) {
|
||||||
codegen.add(dst_reg, dst_reg, static_cast<s32>(mem.disp.value));
|
base_reg = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int scale = mem.scale;
|
||||||
|
if (scale == 0) {
|
||||||
|
scale = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (scale == 1) {
|
||||||
|
if (displacement == 0) {
|
||||||
|
codegen.add(dst_reg, base_reg, index_reg);
|
||||||
|
} else if (displacement >= -256 && displacement < 256) {
|
||||||
|
codegen.add(dst_reg, base_reg, index_reg);
|
||||||
|
codegen.add_imm(dst_reg, dst_reg, static_cast<s32>(displacement));
|
||||||
|
} else {
|
||||||
|
codegen.add(dst_reg, base_reg, index_reg);
|
||||||
|
codegen.mov_imm(RegisterMapper::SCRATCH_REG, displacement);
|
||||||
|
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
|
||||||
|
}
|
||||||
|
} else if (scale == 2 || scale == 4 || scale == 8) {
|
||||||
|
int shift = (scale == 2) ? 1 : (scale == 4) ? 2 : 3;
|
||||||
|
if (displacement == 0) {
|
||||||
|
codegen.add(dst_reg, base_reg, index_reg, shift);
|
||||||
|
} else {
|
||||||
|
codegen.add(dst_reg, base_reg, index_reg, shift);
|
||||||
|
if (displacement >= -256 && displacement < 256) {
|
||||||
|
codegen.add_imm(dst_reg, dst_reg, static_cast<s32>(displacement));
|
||||||
|
} else {
|
||||||
|
codegen.mov_imm(RegisterMapper::SCRATCH_REG, displacement);
|
||||||
|
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
codegen.mov(dst_reg, base_reg);
|
||||||
|
codegen.mov_imm(RegisterMapper::SCRATCH_REG, scale);
|
||||||
|
codegen.mul(RegisterMapper::SCRATCH_REG, index_reg, RegisterMapper::SCRATCH_REG);
|
||||||
|
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
|
||||||
|
if (displacement != 0) {
|
||||||
|
if (displacement >= -256 && displacement < 256) {
|
||||||
|
codegen.add_imm(dst_reg, dst_reg, static_cast<s32>(displacement));
|
||||||
|
} else {
|
||||||
|
codegen.mov_imm(RegisterMapper::SCRATCH_REG, displacement);
|
||||||
|
codegen.add(dst_reg, dst_reg, RegisterMapper::SCRATCH_REG);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user