rsx/cfg/fp: Fix back-traversal for IF-ELSE pairs

This commit is contained in:
kd-11 2025-12-09 02:03:18 +03:00 committed by kd-11
parent 4dc52fde82
commit a412e34fa6
2 changed files with 152 additions and 8 deletions

View File

@ -1,4 +1,5 @@
#include "stdafx.h"
#include "RegisterDependencyPass.h"
#include "Emu/RSX/Program/Assembler/FPOpcodes.h"
#include "Emu/RSX/Program/RSXFragmentProgram.h"
@ -377,11 +378,6 @@ namespace rsx::assembler::FP
void insert_block_register_dependency(DependencyPassContext& ctx, BasicBlock* block, const std::unordered_set<u32>& lanes, bool f16)
{
if (block->pred.empty())
{
return;
}
std::unordered_set<u32> clobbered_lanes;
std::unordered_set<u32> lanes_to_search;
@ -389,6 +385,18 @@ namespace rsx::assembler::FP
{
auto target = back_edge.from;
// Quick check - if we've reached an IF-ELSE anchor, don't traverse upwards.
// The IF and ELSE edges are already a complete set and will bre processed before this node.
if (back_edge.type == EdgeType::ENDIF &&
&back_edge == &block->pred.back() &&
target->succ.size() == 3 &&
target->succ[1].type == EdgeType::ELSE &&
target->succ[2].type == EdgeType::ENDIF &&
target->succ[2].to == block)
{
return;
}
// Did this target even clobber our register?
ensure(ctx.exec_register_map.find(target) != ctx.exec_register_map.end(), "Block has not been pre-processed");
@ -429,15 +437,14 @@ namespace rsx::assembler::FP
if (lanes_to_search.empty())
{
break;
continue;
}
// We have some missing lanes. Search upwards
if (!target->pred.empty())
{
// We only need to search the last predecessor which is the true "root" of the branch
auto parent = target->pred.back().from;
insert_block_register_dependency(ctx, parent, lanes_to_search, f16);
insert_block_register_dependency(ctx, target, lanes_to_search, f16);
}
}
}

View File

@ -594,4 +594,141 @@ namespace rsx::assembler
// Delay slot detection will cause no dependency injection
ASSERT_EQ(block.instructions.size(), 3);
}
TEST(TestFPIR, RegisterDependencyPass_Skip_IF_ELSE_Ancestors)
{
// R4/H8 is clobbered but an IF-ELSE chain follows it.
// Merge block reads H8, but since both IF-ELSE legs resolve the dependency, we do not need a barrier for H8.
// H6 is included as a control.
auto ir = FPIR::from_source(R"(
MOV R4, #{ 0.25 }
MOV H6.x, #{ 0.125 }
IF.LT
MOV H8, #{ 0.0 }
ELSE
MOV H8, #{ 0.25 }
ENDIF
ADD R0, R3, H8
)");
auto bytecode = ir.compile();
RSXFragmentProgram prog{};
prog.data = bytecode.data();
auto graph = deconstruct_fragment_program(prog);
// Verify state before
ASSERT_EQ(graph.blocks.size(), 4);
EXPECT_EQ(get_graph_block(graph, 0)->instructions.size(), 3);
EXPECT_EQ(get_graph_block(graph, 1)->instructions.size(), 1);
EXPECT_EQ(get_graph_block(graph, 2)->instructions.size(), 1);
EXPECT_EQ(get_graph_block(graph, 3)->instructions.size(), 1);
FP::RegisterAnnotationPass annotation_pass{ prog, {.skip_delay_slots = true } };
FP::RegisterDependencyPass deps_pass{};
annotation_pass.run(graph);
deps_pass.run(graph);
// We get one barrier on R3 (H6) but nont for R4 (H8)
EXPECT_EQ(get_graph_block(graph, 0)->epilogue.size(), 1);
// No intra-block barriers
EXPECT_EQ(get_graph_block(graph, 0)->instructions.size(), 3);
EXPECT_EQ(get_graph_block(graph, 1)->instructions.size(), 1);
EXPECT_EQ(get_graph_block(graph, 2)->instructions.size(), 1);
EXPECT_EQ(get_graph_block(graph, 3)->instructions.size(), 1);
}
TEST(TestFPIR, RegisterDependencyPass_Process_IF_Ancestors)
{
// H8.x is clobbered but only an IF sequence follows with no ELSE.
// Merge block reads r4.x, but since both IF-ELSE legs resolve the dependency, we do not need a barrier.
auto ir = FPIR::from_source(R"(
MOV H8.x, #{ 0.25 }
IF.LT
MOV R4.x, #{ 0.0 }
ENDIF
MOV R0, R4
)");
auto bytecode = ir.compile();
RSXFragmentProgram prog{};
prog.data = bytecode.data();
auto graph = deconstruct_fragment_program(prog);
// Verify state before
ASSERT_EQ(graph.blocks.size(), 3);
EXPECT_EQ(get_graph_block(graph, 0)->instructions.size(), 2);
EXPECT_EQ(get_graph_block(graph, 1)->instructions.size(), 1);
EXPECT_EQ(get_graph_block(graph, 2)->instructions.size(), 1);
FP::RegisterAnnotationPass annotation_pass{ prog, {.skip_delay_slots = true } };
FP::RegisterDependencyPass deps_pass{};
annotation_pass.run(graph);
deps_pass.run(graph);
// A barrier will be inserted into block 0 epilogue
EXPECT_EQ(get_graph_block(graph, 0)->instructions.size(), 2);
EXPECT_EQ(get_graph_block(graph, 1)->instructions.size(), 1);
EXPECT_EQ(get_graph_block(graph, 2)->instructions.size(), 1);
EXPECT_EQ(get_graph_block(graph, 0)->epilogue.size(), 1);
EXPECT_EQ(get_graph_block(graph, 1)->epilogue.size(), 0);
EXPECT_EQ(get_graph_block(graph, 2)->epilogue.size(), 0);
}
TEST(TestFPIR, RegisterDependencyPass_Complex_IF_ELSE_Ancestor_Clobber)
{
// 2 clobbered registers up the chain.
// 1 full barrier is needed for R4 (4 instructions)
auto ir = FPIR::from_source(R"(
MOV R4, #{ 0.0 }
IF.LT
MOV H9, #{ 0.25 }
ENDIF
MOV H8, #{ 0.25 }
IF.LT
IF.GT
ADD R0, R0, R0
ELSE
ADD R0, R1, R0
ENDIF
ENDIF
ADD R0, R0, R4
)");
auto bytecode = ir.compile();
RSXFragmentProgram prog{};
prog.data = bytecode.data();
auto graph = deconstruct_fragment_program(prog);
// Verify state before
ASSERT_EQ(graph.blocks.size(), 7);
EXPECT_EQ(get_graph_block(graph, 0)->instructions.size(), 2);
EXPECT_EQ(get_graph_block(graph, 1)->instructions.size(), 1);
EXPECT_EQ(get_graph_block(graph, 2)->instructions.size(), 2);
EXPECT_EQ(get_graph_block(graph, 3)->instructions.size(), 1);
EXPECT_EQ(get_graph_block(graph, 4)->instructions.size(), 1);
EXPECT_EQ(get_graph_block(graph, 5)->instructions.size(), 1);
EXPECT_EQ(get_graph_block(graph, 6)->instructions.size(), 1);
FP::RegisterAnnotationPass annotation_pass{ prog, {.skip_delay_slots = true } };
FP::RegisterDependencyPass deps_pass{};
annotation_pass.run(graph);
deps_pass.run(graph);
// Full-lane barrier on writing blocks
EXPECT_EQ(get_graph_block(graph, 1)->epilogue.size(), 2);
EXPECT_EQ(get_graph_block(graph, 2)->epilogue.size(), 2);
EXPECT_EQ(get_graph_block(graph, 0)->instructions.size(), 2);
EXPECT_EQ(get_graph_block(graph, 1)->instructions.size(), 1);
EXPECT_EQ(get_graph_block(graph, 2)->instructions.size(), 2);
EXPECT_EQ(get_graph_block(graph, 3)->instructions.size(), 1);
EXPECT_EQ(get_graph_block(graph, 4)->instructions.size(), 1);
EXPECT_EQ(get_graph_block(graph, 5)->instructions.size(), 1);
EXPECT_EQ(get_graph_block(graph, 6)->instructions.size(), 1);
}
}