mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-12-16 04:09:07 +00:00
rsx/cfg/fp: Fix back-traversal for IF-ELSE pairs
This commit is contained in:
parent
4dc52fde82
commit
a412e34fa6
@ -1,4 +1,5 @@
|
|||||||
#include "stdafx.h"
|
#include "stdafx.h"
|
||||||
|
|
||||||
#include "RegisterDependencyPass.h"
|
#include "RegisterDependencyPass.h"
|
||||||
#include "Emu/RSX/Program/Assembler/FPOpcodes.h"
|
#include "Emu/RSX/Program/Assembler/FPOpcodes.h"
|
||||||
#include "Emu/RSX/Program/RSXFragmentProgram.h"
|
#include "Emu/RSX/Program/RSXFragmentProgram.h"
|
||||||
@ -377,11 +378,6 @@ namespace rsx::assembler::FP
|
|||||||
|
|
||||||
void insert_block_register_dependency(DependencyPassContext& ctx, BasicBlock* block, const std::unordered_set<u32>& lanes, bool f16)
|
void insert_block_register_dependency(DependencyPassContext& ctx, BasicBlock* block, const std::unordered_set<u32>& lanes, bool f16)
|
||||||
{
|
{
|
||||||
if (block->pred.empty())
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::unordered_set<u32> clobbered_lanes;
|
std::unordered_set<u32> clobbered_lanes;
|
||||||
std::unordered_set<u32> lanes_to_search;
|
std::unordered_set<u32> lanes_to_search;
|
||||||
|
|
||||||
@ -389,6 +385,18 @@ namespace rsx::assembler::FP
|
|||||||
{
|
{
|
||||||
auto target = back_edge.from;
|
auto target = back_edge.from;
|
||||||
|
|
||||||
|
// Quick check - if we've reached an IF-ELSE anchor, don't traverse upwards.
|
||||||
|
// The IF and ELSE edges are already a complete set and will bre processed before this node.
|
||||||
|
if (back_edge.type == EdgeType::ENDIF &&
|
||||||
|
&back_edge == &block->pred.back() &&
|
||||||
|
target->succ.size() == 3 &&
|
||||||
|
target->succ[1].type == EdgeType::ELSE &&
|
||||||
|
target->succ[2].type == EdgeType::ENDIF &&
|
||||||
|
target->succ[2].to == block)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Did this target even clobber our register?
|
// Did this target even clobber our register?
|
||||||
ensure(ctx.exec_register_map.find(target) != ctx.exec_register_map.end(), "Block has not been pre-processed");
|
ensure(ctx.exec_register_map.find(target) != ctx.exec_register_map.end(), "Block has not been pre-processed");
|
||||||
|
|
||||||
@ -429,15 +437,14 @@ namespace rsx::assembler::FP
|
|||||||
|
|
||||||
if (lanes_to_search.empty())
|
if (lanes_to_search.empty())
|
||||||
{
|
{
|
||||||
break;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We have some missing lanes. Search upwards
|
// We have some missing lanes. Search upwards
|
||||||
if (!target->pred.empty())
|
if (!target->pred.empty())
|
||||||
{
|
{
|
||||||
// We only need to search the last predecessor which is the true "root" of the branch
|
// We only need to search the last predecessor which is the true "root" of the branch
|
||||||
auto parent = target->pred.back().from;
|
insert_block_register_dependency(ctx, target, lanes_to_search, f16);
|
||||||
insert_block_register_dependency(ctx, parent, lanes_to_search, f16);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -594,4 +594,141 @@ namespace rsx::assembler
|
|||||||
// Delay slot detection will cause no dependency injection
|
// Delay slot detection will cause no dependency injection
|
||||||
ASSERT_EQ(block.instructions.size(), 3);
|
ASSERT_EQ(block.instructions.size(), 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(TestFPIR, RegisterDependencyPass_Skip_IF_ELSE_Ancestors)
|
||||||
|
{
|
||||||
|
// R4/H8 is clobbered but an IF-ELSE chain follows it.
|
||||||
|
// Merge block reads H8, but since both IF-ELSE legs resolve the dependency, we do not need a barrier for H8.
|
||||||
|
// H6 is included as a control.
|
||||||
|
auto ir = FPIR::from_source(R"(
|
||||||
|
MOV R4, #{ 0.25 }
|
||||||
|
MOV H6.x, #{ 0.125 }
|
||||||
|
IF.LT
|
||||||
|
MOV H8, #{ 0.0 }
|
||||||
|
ELSE
|
||||||
|
MOV H8, #{ 0.25 }
|
||||||
|
ENDIF
|
||||||
|
ADD R0, R3, H8
|
||||||
|
)");
|
||||||
|
|
||||||
|
auto bytecode = ir.compile();
|
||||||
|
RSXFragmentProgram prog{};
|
||||||
|
prog.data = bytecode.data();
|
||||||
|
auto graph = deconstruct_fragment_program(prog);
|
||||||
|
|
||||||
|
// Verify state before
|
||||||
|
ASSERT_EQ(graph.blocks.size(), 4);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 0)->instructions.size(), 3);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 1)->instructions.size(), 1);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 2)->instructions.size(), 1);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 3)->instructions.size(), 1);
|
||||||
|
|
||||||
|
FP::RegisterAnnotationPass annotation_pass{ prog, {.skip_delay_slots = true } };
|
||||||
|
FP::RegisterDependencyPass deps_pass{};
|
||||||
|
|
||||||
|
annotation_pass.run(graph);
|
||||||
|
deps_pass.run(graph);
|
||||||
|
|
||||||
|
// We get one barrier on R3 (H6) but nont for R4 (H8)
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 0)->epilogue.size(), 1);
|
||||||
|
|
||||||
|
// No intra-block barriers
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 0)->instructions.size(), 3);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 1)->instructions.size(), 1);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 2)->instructions.size(), 1);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 3)->instructions.size(), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TestFPIR, RegisterDependencyPass_Process_IF_Ancestors)
|
||||||
|
{
|
||||||
|
// H8.x is clobbered but only an IF sequence follows with no ELSE.
|
||||||
|
// Merge block reads r4.x, but since both IF-ELSE legs resolve the dependency, we do not need a barrier.
|
||||||
|
auto ir = FPIR::from_source(R"(
|
||||||
|
MOV H8.x, #{ 0.25 }
|
||||||
|
IF.LT
|
||||||
|
MOV R4.x, #{ 0.0 }
|
||||||
|
ENDIF
|
||||||
|
MOV R0, R4
|
||||||
|
)");
|
||||||
|
|
||||||
|
auto bytecode = ir.compile();
|
||||||
|
RSXFragmentProgram prog{};
|
||||||
|
prog.data = bytecode.data();
|
||||||
|
auto graph = deconstruct_fragment_program(prog);
|
||||||
|
|
||||||
|
// Verify state before
|
||||||
|
ASSERT_EQ(graph.blocks.size(), 3);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 0)->instructions.size(), 2);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 1)->instructions.size(), 1);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 2)->instructions.size(), 1);
|
||||||
|
|
||||||
|
FP::RegisterAnnotationPass annotation_pass{ prog, {.skip_delay_slots = true } };
|
||||||
|
FP::RegisterDependencyPass deps_pass{};
|
||||||
|
|
||||||
|
annotation_pass.run(graph);
|
||||||
|
deps_pass.run(graph);
|
||||||
|
|
||||||
|
// A barrier will be inserted into block 0 epilogue
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 0)->instructions.size(), 2);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 1)->instructions.size(), 1);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 2)->instructions.size(), 1);
|
||||||
|
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 0)->epilogue.size(), 1);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 1)->epilogue.size(), 0);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 2)->epilogue.size(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TestFPIR, RegisterDependencyPass_Complex_IF_ELSE_Ancestor_Clobber)
|
||||||
|
{
|
||||||
|
// 2 clobbered registers up the chain.
|
||||||
|
// 1 full barrier is needed for R4 (4 instructions)
|
||||||
|
auto ir = FPIR::from_source(R"(
|
||||||
|
MOV R4, #{ 0.0 }
|
||||||
|
IF.LT
|
||||||
|
MOV H9, #{ 0.25 }
|
||||||
|
ENDIF
|
||||||
|
MOV H8, #{ 0.25 }
|
||||||
|
IF.LT
|
||||||
|
IF.GT
|
||||||
|
ADD R0, R0, R0
|
||||||
|
ELSE
|
||||||
|
ADD R0, R1, R0
|
||||||
|
ENDIF
|
||||||
|
ENDIF
|
||||||
|
ADD R0, R0, R4
|
||||||
|
)");
|
||||||
|
|
||||||
|
auto bytecode = ir.compile();
|
||||||
|
RSXFragmentProgram prog{};
|
||||||
|
prog.data = bytecode.data();
|
||||||
|
auto graph = deconstruct_fragment_program(prog);
|
||||||
|
|
||||||
|
// Verify state before
|
||||||
|
ASSERT_EQ(graph.blocks.size(), 7);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 0)->instructions.size(), 2);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 1)->instructions.size(), 1);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 2)->instructions.size(), 2);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 3)->instructions.size(), 1);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 4)->instructions.size(), 1);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 5)->instructions.size(), 1);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 6)->instructions.size(), 1);
|
||||||
|
|
||||||
|
FP::RegisterAnnotationPass annotation_pass{ prog, {.skip_delay_slots = true } };
|
||||||
|
FP::RegisterDependencyPass deps_pass{};
|
||||||
|
|
||||||
|
annotation_pass.run(graph);
|
||||||
|
deps_pass.run(graph);
|
||||||
|
|
||||||
|
// Full-lane barrier on writing blocks
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 1)->epilogue.size(), 2);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 2)->epilogue.size(), 2);
|
||||||
|
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 0)->instructions.size(), 2);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 1)->instructions.size(), 1);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 2)->instructions.size(), 2);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 3)->instructions.size(), 1);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 4)->instructions.size(), 1);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 5)->instructions.size(), 1);
|
||||||
|
EXPECT_EQ(get_graph_block(graph, 6)->instructions.size(), 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user