rsx/cfg: Implement dependency injection pass with branches

This commit is contained in:
kd-11 2025-12-03 14:16:37 +03:00 committed by kd-11
parent 8ff3dda5e8
commit 856eaac1b6
2 changed files with 208 additions and 6 deletions

View File

@ -3,6 +3,7 @@
#include "Emu/RSX/Program/Assembler/FPOpcodes.h" #include "Emu/RSX/Program/Assembler/FPOpcodes.h"
#include "Emu/RSX/Program/RSXFragmentProgram.h" #include "Emu/RSX/Program/RSXFragmentProgram.h"
#include <unordered_map>
#include <unordered_set> #include <unordered_set>
namespace rsx::assembler::FP namespace rsx::assembler::FP
@ -13,6 +14,14 @@ namespace rsx::assembler::FP
static constexpr char content_float16 = 'H'; static constexpr char content_float16 = 'H';
static constexpr char content_dual = 'D'; static constexpr char content_dual = 'D';
using register_file_t = std::array<char, register_file_length>;
struct DependencyPassContext
{
std::unordered_map<BasicBlock*, register_file_t> exec_register_map;
std::unordered_map<BasicBlock*, register_file_t> sync_register_map;
};
std::vector<RegisterRef> decode_lanes16(const std::unordered_set<u32>& lanes) std::vector<RegisterRef> decode_lanes16(const std::unordered_set<u32>& lanes)
{ {
std::vector<RegisterRef> result; std::vector<RegisterRef> result;
@ -193,9 +202,23 @@ namespace rsx::assembler::FP
return result; return result;
} }
void insert_dependency_barriers(BasicBlock* block) std::vector<Instruction> resolve_dependencies(const std::unordered_set<u32>& lanes, bool f16)
{ {
std::array<char, register_file_length> register_file; std::vector<Instruction> result;
const auto regs = (f16 ? decode_lanes16 : decode_lanes32)(lanes);
for (const auto& ref : regs)
{
auto instructions = (f16 ? build_barrier16 : build_barrier32)(ref);
result.insert(result.end(), instructions.begin(), instructions.end());
}
return result;
}
void insert_dependency_barriers(DependencyPassContext& ctx, BasicBlock* block)
{
register_file_t& register_file = ctx.exec_register_map[block];
std::memset(register_file.data(), content_unknown, register_file_length); std::memset(register_file.data(), content_unknown, register_file_length);
std::unordered_set<u32> barrier16; std::unordered_set<u32> barrier16;
@ -275,14 +298,109 @@ namespace rsx::assembler::FP
} }
} }
void insert_block_register_dependency(DependencyPassContext& ctx, BasicBlock* block, const std::unordered_set<u32>& lanes, bool f16)
{
if (block->pred.empty())
{
return;
}
std::unordered_set<u32> clobbered_lanes;
std::unordered_set<u32> lanes_to_search;
for (auto& back_edge : block->pred)
{
auto target = back_edge.from;
// Did this target even clobber our register?
ensure(ctx.exec_register_map.find(target) != ctx.exec_register_map.end(), "Block has not been pre-processed");
if (ctx.sync_register_map.find(target) == ctx.sync_register_map.end())
{
auto& blob = ctx.sync_register_map[target];
std::memset(blob.data(), content_unknown, register_file_length);
}
auto& sync_register_file = ctx.sync_register_map[target];
const auto& exec_register_file = ctx.exec_register_map[target];
const auto clobber_type = f16 ? content_float32 : content_float16;
lanes_to_search.clear();
clobbered_lanes.clear();
for (auto& lane : lanes)
{
if (exec_register_file[lane] == clobber_type &&
sync_register_file[lane] == content_unknown)
{
clobbered_lanes.insert(lane);
sync_register_file[lane] = content_dual;
continue;
}
if (exec_register_file[lane] == content_unknown)
{
lanes_to_search.insert(lane);
}
}
if (!clobbered_lanes.empty())
{
const auto instructions = resolve_dependencies(clobbered_lanes, f16);
target->epilogue.insert(target->epilogue.end(), instructions.begin(), instructions.end());
}
if (lanes_to_search.empty())
{
break;
}
// We have some missing lanes. Search upwards
if (!target->pred.empty())
{
// We only need to search the last predecessor which is the true "root" of the branch
auto parent = target->pred.back().from;
insert_block_register_dependency(ctx, parent, lanes_to_search, f16);
}
}
}
void insert_block_dependencies(DependencyPassContext& ctx, BasicBlock* block)
{
auto range_from_ref = [](const RegisterRef& ref)
{
const auto range = get_register_file_range(ref);
std::unordered_set<u32> result;
for (const auto& value : range)
{
result.insert(value);
}
return result;
};
for (auto& ref : block->input_list)
{
const auto range = range_from_ref(ref);
insert_block_register_dependency(ctx, block, range, ref.reg.f16);
}
}
void RegisterDependencyPass::run(FlowGraph& graph) void RegisterDependencyPass::run(FlowGraph& graph)
{ {
DependencyPassContext ctx{};
// First, run intra-block dependency // First, run intra-block dependency
for (auto& block : graph.blocks) for (auto& block : graph.blocks)
{ {
insert_dependency_barriers(&block); insert_dependency_barriers(ctx, &block);
} }
// TODO: Create prologue/epilogue instructions // Then, create prologue/epilogue instructions
// Traverse the list in reverse order to bubble up dependencies correctly.
for (auto it = graph.blocks.rbegin(); it != graph.blocks.rend(); ++it)
{
insert_block_dependencies(ctx, &(*it));
}
} }
} }

View File

@ -48,6 +48,15 @@ namespace rsx::assembler
bb.instructions = ir.build(); bb.instructions = ir.build();
return graph; return graph;
} }
static BasicBlock* BB_from_source(FlowGraph* graph, const std::string& asm_)
{
auto ir = FPIR::from_source(asm_);
graph->blocks.push_back({});
BasicBlock& bb = graph->blocks.back();
bb.instructions = ir.build();
return &bb;
}
TEST(TestFPIR, FromSource) TEST(TestFPIR, FromSource)
{ {
auto ir = FPIR::from_source(R"( auto ir = FPIR::from_source(R"(
@ -232,8 +241,83 @@ namespace rsx::assembler
EXPECT_EQ(SRC0{ .HEX = block.instructions[3].bytecode[1] }.swizzle_y, 3); EXPECT_EQ(SRC0{ .HEX = block.instructions[3].bytecode[1] }.swizzle_y, 3);
} }
TEST(TestFPIR, RegisterDependencyPass_Complex) TEST(TestFPIR, RegisterDependencyPass_Complex_IF_BothPredecessorsClobber)
{ {
// TODO: Multi-level block structure with nested IFs/LOOPs // Multi-level but only single IF
// Mockup of a simple lighting function, R0 = Light vector, R1 = Decompressed normal. DP4 used for simplicity.
// Data hazards sprinkled in for testing. R3 is clobbered in the ancestor and the IF branch.
// Barrier should go in the IF branch here.
FlowGraph graph;
BasicBlock* bb0 = BB_from_source(&graph, R"(
DP4 R2, R0, R1
SFL R3
SGT R3, R2, R0
IF.GE
)");
BasicBlock* bb1 = BB_from_source(&graph, R"(
ADD R0, R0, R2
MOV H6, #{ 0.25 }
)");
BasicBlock* bb2 = BB_from_source(&graph, R"(
ADD R0, R0, R3
MOV R1, R0
)");
// Front edges
bb0->insert_succ(bb1, EdgeType::IF);
bb0->insert_succ(bb2, EdgeType::ENDIF);
bb1->insert_succ(bb2, EdgeType::ENDIF);
// Back edges
bb2->insert_pred(bb1, EdgeType::ENDIF);
bb2->insert_pred(bb0, EdgeType::ENDIF);
bb1->insert_pred(bb0, EdgeType::IF);
RSXFragmentProgram prog{};
FP::RegisterAnnotationPass annotation_pass{ prog };
FP::RegisterDependencyPass deps_pass{};
annotation_pass.run(graph);
deps_pass.run(graph);
ASSERT_EQ(bb0->instructions.size(), 4);
ASSERT_EQ(bb1->instructions.size(), 2);
ASSERT_EQ(bb2->instructions.size(), 2);
// bb1 has a epilogue
ASSERT_EQ(bb1->epilogue.size(), 2);
// bb1 epilogue updates R3.xy
// R3.x = packHalf2(H6.xy)
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[0].bytecode[0] }.opcode, RSX_FP_OPCODE_PK2);
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[0].bytecode[0] }.fp16, 0);
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[0].bytecode[0] }.dest_reg, 3);
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[0].bytecode[0] }.mask_x, true);
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[0].bytecode[0] }.mask_y, false);
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[0].bytecode[0] }.mask_z, false);
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[0].bytecode[0] }.mask_w, false);
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[0].bytecode[1] }.reg_type, RSX_FP_REGISTER_TYPE_TEMP);
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[0].bytecode[1] }.tmp_reg_index, 6);
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[0].bytecode[1] }.fp16, 1);
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[0].bytecode[1] }.swizzle_x, 0);
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[0].bytecode[1] }.swizzle_y, 1);
// R3.y = packHalf2(H6.zw)
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[1].bytecode[0] }.opcode, RSX_FP_OPCODE_PK2);
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[1].bytecode[0] }.fp16, 0);
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[1].bytecode[0] }.dest_reg, 3);
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[1].bytecode[0] }.mask_x, false);
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[1].bytecode[0] }.mask_y, true);
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[1].bytecode[0] }.mask_z, false);
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[1].bytecode[0] }.mask_w, false);
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[1].bytecode[1] }.reg_type, RSX_FP_REGISTER_TYPE_TEMP);
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[1].bytecode[1] }.tmp_reg_index, 6);
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[1].bytecode[1] }.fp16, 1);
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[1].bytecode[1] }.swizzle_x, 2);
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[1].bytecode[1] }.swizzle_y, 3);
} }
} }