From a412e34fa6f2364df74f8396be07c489fde41e6c Mon Sep 17 00:00:00 2001 From: kd-11 Date: Tue, 9 Dec 2025 02:03:18 +0300 Subject: [PATCH] rsx/cfg/fp: Fix back-traversal for IF-ELSE pairs --- .../Passes/FP/RegisterDependencyPass.cpp | 23 ++- rpcs3/tests/test_rsx_fp_asm.cpp | 137 ++++++++++++++++++ 2 files changed, 152 insertions(+), 8 deletions(-) diff --git a/rpcs3/Emu/RSX/Program/Assembler/Passes/FP/RegisterDependencyPass.cpp b/rpcs3/Emu/RSX/Program/Assembler/Passes/FP/RegisterDependencyPass.cpp index 01cfc6ece6..971f485d62 100644 --- a/rpcs3/Emu/RSX/Program/Assembler/Passes/FP/RegisterDependencyPass.cpp +++ b/rpcs3/Emu/RSX/Program/Assembler/Passes/FP/RegisterDependencyPass.cpp @@ -1,4 +1,5 @@ #include "stdafx.h" + #include "RegisterDependencyPass.h" #include "Emu/RSX/Program/Assembler/FPOpcodes.h" #include "Emu/RSX/Program/RSXFragmentProgram.h" @@ -377,11 +378,6 @@ namespace rsx::assembler::FP void insert_block_register_dependency(DependencyPassContext& ctx, BasicBlock* block, const std::unordered_set& lanes, bool f16) { - if (block->pred.empty()) - { - return; - } - std::unordered_set clobbered_lanes; std::unordered_set lanes_to_search; @@ -389,6 +385,18 @@ namespace rsx::assembler::FP { auto target = back_edge.from; + // Quick check - if we've reached an IF-ELSE anchor, don't traverse upwards. + // The IF and ELSE edges are already a complete set and will bre processed before this node. + if (back_edge.type == EdgeType::ENDIF && + &back_edge == &block->pred.back() && + target->succ.size() == 3 && + target->succ[1].type == EdgeType::ELSE && + target->succ[2].type == EdgeType::ENDIF && + target->succ[2].to == block) + { + return; + } + // Did this target even clobber our register? ensure(ctx.exec_register_map.find(target) != ctx.exec_register_map.end(), "Block has not been pre-processed"); @@ -429,15 +437,14 @@ namespace rsx::assembler::FP if (lanes_to_search.empty()) { - break; + continue; } // We have some missing lanes. Search upwards if (!target->pred.empty()) { // We only need to search the last predecessor which is the true "root" of the branch - auto parent = target->pred.back().from; - insert_block_register_dependency(ctx, parent, lanes_to_search, f16); + insert_block_register_dependency(ctx, target, lanes_to_search, f16); } } } diff --git a/rpcs3/tests/test_rsx_fp_asm.cpp b/rpcs3/tests/test_rsx_fp_asm.cpp index c30f5ff172..d14dfacae4 100644 --- a/rpcs3/tests/test_rsx_fp_asm.cpp +++ b/rpcs3/tests/test_rsx_fp_asm.cpp @@ -594,4 +594,141 @@ namespace rsx::assembler // Delay slot detection will cause no dependency injection ASSERT_EQ(block.instructions.size(), 3); } + + TEST(TestFPIR, RegisterDependencyPass_Skip_IF_ELSE_Ancestors) + { + // R4/H8 is clobbered but an IF-ELSE chain follows it. + // Merge block reads H8, but since both IF-ELSE legs resolve the dependency, we do not need a barrier for H8. + // H6 is included as a control. + auto ir = FPIR::from_source(R"( + MOV R4, #{ 0.25 } + MOV H6.x, #{ 0.125 } + IF.LT + MOV H8, #{ 0.0 } + ELSE + MOV H8, #{ 0.25 } + ENDIF + ADD R0, R3, H8 + )"); + + auto bytecode = ir.compile(); + RSXFragmentProgram prog{}; + prog.data = bytecode.data(); + auto graph = deconstruct_fragment_program(prog); + + // Verify state before + ASSERT_EQ(graph.blocks.size(), 4); + EXPECT_EQ(get_graph_block(graph, 0)->instructions.size(), 3); + EXPECT_EQ(get_graph_block(graph, 1)->instructions.size(), 1); + EXPECT_EQ(get_graph_block(graph, 2)->instructions.size(), 1); + EXPECT_EQ(get_graph_block(graph, 3)->instructions.size(), 1); + + FP::RegisterAnnotationPass annotation_pass{ prog, {.skip_delay_slots = true } }; + FP::RegisterDependencyPass deps_pass{}; + + annotation_pass.run(graph); + deps_pass.run(graph); + + // We get one barrier on R3 (H6) but nont for R4 (H8) + EXPECT_EQ(get_graph_block(graph, 0)->epilogue.size(), 1); + + // No intra-block barriers + EXPECT_EQ(get_graph_block(graph, 0)->instructions.size(), 3); + EXPECT_EQ(get_graph_block(graph, 1)->instructions.size(), 1); + EXPECT_EQ(get_graph_block(graph, 2)->instructions.size(), 1); + EXPECT_EQ(get_graph_block(graph, 3)->instructions.size(), 1); + } + + TEST(TestFPIR, RegisterDependencyPass_Process_IF_Ancestors) + { + // H8.x is clobbered but only an IF sequence follows with no ELSE. + // Merge block reads r4.x, but since both IF-ELSE legs resolve the dependency, we do not need a barrier. + auto ir = FPIR::from_source(R"( + MOV H8.x, #{ 0.25 } + IF.LT + MOV R4.x, #{ 0.0 } + ENDIF + MOV R0, R4 + )"); + + auto bytecode = ir.compile(); + RSXFragmentProgram prog{}; + prog.data = bytecode.data(); + auto graph = deconstruct_fragment_program(prog); + + // Verify state before + ASSERT_EQ(graph.blocks.size(), 3); + EXPECT_EQ(get_graph_block(graph, 0)->instructions.size(), 2); + EXPECT_EQ(get_graph_block(graph, 1)->instructions.size(), 1); + EXPECT_EQ(get_graph_block(graph, 2)->instructions.size(), 1); + + FP::RegisterAnnotationPass annotation_pass{ prog, {.skip_delay_slots = true } }; + FP::RegisterDependencyPass deps_pass{}; + + annotation_pass.run(graph); + deps_pass.run(graph); + + // A barrier will be inserted into block 0 epilogue + EXPECT_EQ(get_graph_block(graph, 0)->instructions.size(), 2); + EXPECT_EQ(get_graph_block(graph, 1)->instructions.size(), 1); + EXPECT_EQ(get_graph_block(graph, 2)->instructions.size(), 1); + + EXPECT_EQ(get_graph_block(graph, 0)->epilogue.size(), 1); + EXPECT_EQ(get_graph_block(graph, 1)->epilogue.size(), 0); + EXPECT_EQ(get_graph_block(graph, 2)->epilogue.size(), 0); + } + + TEST(TestFPIR, RegisterDependencyPass_Complex_IF_ELSE_Ancestor_Clobber) + { + // 2 clobbered registers up the chain. + // 1 full barrier is needed for R4 (4 instructions) + auto ir = FPIR::from_source(R"( + MOV R4, #{ 0.0 } + IF.LT + MOV H9, #{ 0.25 } + ENDIF + MOV H8, #{ 0.25 } + IF.LT + IF.GT + ADD R0, R0, R0 + ELSE + ADD R0, R1, R0 + ENDIF + ENDIF + ADD R0, R0, R4 + )"); + + auto bytecode = ir.compile(); + RSXFragmentProgram prog{}; + prog.data = bytecode.data(); + auto graph = deconstruct_fragment_program(prog); + + // Verify state before + ASSERT_EQ(graph.blocks.size(), 7); + EXPECT_EQ(get_graph_block(graph, 0)->instructions.size(), 2); + EXPECT_EQ(get_graph_block(graph, 1)->instructions.size(), 1); + EXPECT_EQ(get_graph_block(graph, 2)->instructions.size(), 2); + EXPECT_EQ(get_graph_block(graph, 3)->instructions.size(), 1); + EXPECT_EQ(get_graph_block(graph, 4)->instructions.size(), 1); + EXPECT_EQ(get_graph_block(graph, 5)->instructions.size(), 1); + EXPECT_EQ(get_graph_block(graph, 6)->instructions.size(), 1); + + FP::RegisterAnnotationPass annotation_pass{ prog, {.skip_delay_slots = true } }; + FP::RegisterDependencyPass deps_pass{}; + + annotation_pass.run(graph); + deps_pass.run(graph); + + // Full-lane barrier on writing blocks + EXPECT_EQ(get_graph_block(graph, 1)->epilogue.size(), 2); + EXPECT_EQ(get_graph_block(graph, 2)->epilogue.size(), 2); + + EXPECT_EQ(get_graph_block(graph, 0)->instructions.size(), 2); + EXPECT_EQ(get_graph_block(graph, 1)->instructions.size(), 1); + EXPECT_EQ(get_graph_block(graph, 2)->instructions.size(), 2); + EXPECT_EQ(get_graph_block(graph, 3)->instructions.size(), 1); + EXPECT_EQ(get_graph_block(graph, 4)->instructions.size(), 1); + EXPECT_EQ(get_graph_block(graph, 5)->instructions.size(), 1); + EXPECT_EQ(get_graph_block(graph, 6)->instructions.size(), 1); + } }