mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-12-16 04:09:07 +00:00
rsx/cfg: Implement dependency injection pass with branches
This commit is contained in:
parent
8ff3dda5e8
commit
856eaac1b6
@ -3,6 +3,7 @@
|
||||
#include "Emu/RSX/Program/Assembler/FPOpcodes.h"
|
||||
#include "Emu/RSX/Program/RSXFragmentProgram.h"
|
||||
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace rsx::assembler::FP
|
||||
@ -13,6 +14,14 @@ namespace rsx::assembler::FP
|
||||
static constexpr char content_float16 = 'H';
|
||||
static constexpr char content_dual = 'D';
|
||||
|
||||
using register_file_t = std::array<char, register_file_length>;
|
||||
|
||||
struct DependencyPassContext
|
||||
{
|
||||
std::unordered_map<BasicBlock*, register_file_t> exec_register_map;
|
||||
std::unordered_map<BasicBlock*, register_file_t> sync_register_map;
|
||||
};
|
||||
|
||||
std::vector<RegisterRef> decode_lanes16(const std::unordered_set<u32>& lanes)
|
||||
{
|
||||
std::vector<RegisterRef> result;
|
||||
@ -193,9 +202,23 @@ namespace rsx::assembler::FP
|
||||
return result;
|
||||
}
|
||||
|
||||
void insert_dependency_barriers(BasicBlock* block)
|
||||
std::vector<Instruction> resolve_dependencies(const std::unordered_set<u32>& lanes, bool f16)
|
||||
{
|
||||
std::array<char, register_file_length> register_file;
|
||||
std::vector<Instruction> result;
|
||||
|
||||
const auto regs = (f16 ? decode_lanes16 : decode_lanes32)(lanes);
|
||||
for (const auto& ref : regs)
|
||||
{
|
||||
auto instructions = (f16 ? build_barrier16 : build_barrier32)(ref);
|
||||
result.insert(result.end(), instructions.begin(), instructions.end());
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void insert_dependency_barriers(DependencyPassContext& ctx, BasicBlock* block)
|
||||
{
|
||||
register_file_t& register_file = ctx.exec_register_map[block];
|
||||
std::memset(register_file.data(), content_unknown, register_file_length);
|
||||
|
||||
std::unordered_set<u32> barrier16;
|
||||
@ -275,14 +298,109 @@ namespace rsx::assembler::FP
|
||||
}
|
||||
}
|
||||
|
||||
void insert_block_register_dependency(DependencyPassContext& ctx, BasicBlock* block, const std::unordered_set<u32>& lanes, bool f16)
|
||||
{
|
||||
if (block->pred.empty())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
std::unordered_set<u32> clobbered_lanes;
|
||||
std::unordered_set<u32> lanes_to_search;
|
||||
|
||||
for (auto& back_edge : block->pred)
|
||||
{
|
||||
auto target = back_edge.from;
|
||||
|
||||
// Did this target even clobber our register?
|
||||
ensure(ctx.exec_register_map.find(target) != ctx.exec_register_map.end(), "Block has not been pre-processed");
|
||||
|
||||
if (ctx.sync_register_map.find(target) == ctx.sync_register_map.end())
|
||||
{
|
||||
auto& blob = ctx.sync_register_map[target];
|
||||
std::memset(blob.data(), content_unknown, register_file_length);
|
||||
}
|
||||
|
||||
auto& sync_register_file = ctx.sync_register_map[target];
|
||||
const auto& exec_register_file = ctx.exec_register_map[target];
|
||||
const auto clobber_type = f16 ? content_float32 : content_float16;
|
||||
|
||||
lanes_to_search.clear();
|
||||
clobbered_lanes.clear();
|
||||
|
||||
for (auto& lane : lanes)
|
||||
{
|
||||
if (exec_register_file[lane] == clobber_type &&
|
||||
sync_register_file[lane] == content_unknown)
|
||||
{
|
||||
clobbered_lanes.insert(lane);
|
||||
sync_register_file[lane] = content_dual;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (exec_register_file[lane] == content_unknown)
|
||||
{
|
||||
lanes_to_search.insert(lane);
|
||||
}
|
||||
}
|
||||
|
||||
if (!clobbered_lanes.empty())
|
||||
{
|
||||
const auto instructions = resolve_dependencies(clobbered_lanes, f16);
|
||||
target->epilogue.insert(target->epilogue.end(), instructions.begin(), instructions.end());
|
||||
}
|
||||
|
||||
if (lanes_to_search.empty())
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
// We have some missing lanes. Search upwards
|
||||
if (!target->pred.empty())
|
||||
{
|
||||
// We only need to search the last predecessor which is the true "root" of the branch
|
||||
auto parent = target->pred.back().from;
|
||||
insert_block_register_dependency(ctx, parent, lanes_to_search, f16);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void insert_block_dependencies(DependencyPassContext& ctx, BasicBlock* block)
|
||||
{
|
||||
auto range_from_ref = [](const RegisterRef& ref)
|
||||
{
|
||||
const auto range = get_register_file_range(ref);
|
||||
|
||||
std::unordered_set<u32> result;
|
||||
for (const auto& value : range)
|
||||
{
|
||||
result.insert(value);
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
for (auto& ref : block->input_list)
|
||||
{
|
||||
const auto range = range_from_ref(ref);
|
||||
insert_block_register_dependency(ctx, block, range, ref.reg.f16);
|
||||
}
|
||||
}
|
||||
|
||||
void RegisterDependencyPass::run(FlowGraph& graph)
|
||||
{
|
||||
DependencyPassContext ctx{};
|
||||
|
||||
// First, run intra-block dependency
|
||||
for (auto& block : graph.blocks)
|
||||
{
|
||||
insert_dependency_barriers(&block);
|
||||
insert_dependency_barriers(ctx, &block);
|
||||
}
|
||||
|
||||
// TODO: Create prologue/epilogue instructions
|
||||
// Then, create prologue/epilogue instructions
|
||||
// Traverse the list in reverse order to bubble up dependencies correctly.
|
||||
for (auto it = graph.blocks.rbegin(); it != graph.blocks.rend(); ++it)
|
||||
{
|
||||
insert_block_dependencies(ctx, &(*it));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -48,6 +48,15 @@ namespace rsx::assembler
|
||||
bb.instructions = ir.build();
|
||||
return graph;
|
||||
}
|
||||
|
||||
static BasicBlock* BB_from_source(FlowGraph* graph, const std::string& asm_)
|
||||
{
|
||||
auto ir = FPIR::from_source(asm_);
|
||||
graph->blocks.push_back({});
|
||||
BasicBlock& bb = graph->blocks.back();
|
||||
bb.instructions = ir.build();
|
||||
return &bb;
|
||||
}
|
||||
TEST(TestFPIR, FromSource)
|
||||
{
|
||||
auto ir = FPIR::from_source(R"(
|
||||
@ -232,8 +241,83 @@ namespace rsx::assembler
|
||||
EXPECT_EQ(SRC0{ .HEX = block.instructions[3].bytecode[1] }.swizzle_y, 3);
|
||||
}
|
||||
|
||||
TEST(TestFPIR, RegisterDependencyPass_Complex)
|
||||
TEST(TestFPIR, RegisterDependencyPass_Complex_IF_BothPredecessorsClobber)
|
||||
{
|
||||
// TODO: Multi-level block structure with nested IFs/LOOPs
|
||||
// Multi-level but only single IF
|
||||
// Mockup of a simple lighting function, R0 = Light vector, R1 = Decompressed normal. DP4 used for simplicity.
|
||||
// Data hazards sprinkled in for testing. R3 is clobbered in the ancestor and the IF branch.
|
||||
// Barrier should go in the IF branch here.
|
||||
FlowGraph graph;
|
||||
BasicBlock* bb0 = BB_from_source(&graph, R"(
|
||||
DP4 R2, R0, R1
|
||||
SFL R3
|
||||
SGT R3, R2, R0
|
||||
IF.GE
|
||||
)");
|
||||
|
||||
BasicBlock* bb1 = BB_from_source(&graph, R"(
|
||||
ADD R0, R0, R2
|
||||
MOV H6, #{ 0.25 }
|
||||
)");
|
||||
|
||||
BasicBlock* bb2 = BB_from_source(&graph, R"(
|
||||
ADD R0, R0, R3
|
||||
MOV R1, R0
|
||||
)");
|
||||
|
||||
// Front edges
|
||||
bb0->insert_succ(bb1, EdgeType::IF);
|
||||
bb0->insert_succ(bb2, EdgeType::ENDIF);
|
||||
bb1->insert_succ(bb2, EdgeType::ENDIF);
|
||||
|
||||
// Back edges
|
||||
bb2->insert_pred(bb1, EdgeType::ENDIF);
|
||||
bb2->insert_pred(bb0, EdgeType::ENDIF);
|
||||
bb1->insert_pred(bb0, EdgeType::IF);
|
||||
|
||||
RSXFragmentProgram prog{};
|
||||
|
||||
FP::RegisterAnnotationPass annotation_pass{ prog };
|
||||
FP::RegisterDependencyPass deps_pass{};
|
||||
|
||||
annotation_pass.run(graph);
|
||||
deps_pass.run(graph);
|
||||
|
||||
ASSERT_EQ(bb0->instructions.size(), 4);
|
||||
ASSERT_EQ(bb1->instructions.size(), 2);
|
||||
ASSERT_EQ(bb2->instructions.size(), 2);
|
||||
|
||||
// bb1 has a epilogue
|
||||
ASSERT_EQ(bb1->epilogue.size(), 2);
|
||||
|
||||
// bb1 epilogue updates R3.xy
|
||||
|
||||
// R3.x = packHalf2(H6.xy)
|
||||
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[0].bytecode[0] }.opcode, RSX_FP_OPCODE_PK2);
|
||||
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[0].bytecode[0] }.fp16, 0);
|
||||
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[0].bytecode[0] }.dest_reg, 3);
|
||||
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[0].bytecode[0] }.mask_x, true);
|
||||
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[0].bytecode[0] }.mask_y, false);
|
||||
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[0].bytecode[0] }.mask_z, false);
|
||||
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[0].bytecode[0] }.mask_w, false);
|
||||
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[0].bytecode[1] }.reg_type, RSX_FP_REGISTER_TYPE_TEMP);
|
||||
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[0].bytecode[1] }.tmp_reg_index, 6);
|
||||
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[0].bytecode[1] }.fp16, 1);
|
||||
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[0].bytecode[1] }.swizzle_x, 0);
|
||||
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[0].bytecode[1] }.swizzle_y, 1);
|
||||
|
||||
// R3.y = packHalf2(H6.zw)
|
||||
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[1].bytecode[0] }.opcode, RSX_FP_OPCODE_PK2);
|
||||
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[1].bytecode[0] }.fp16, 0);
|
||||
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[1].bytecode[0] }.dest_reg, 3);
|
||||
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[1].bytecode[0] }.mask_x, false);
|
||||
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[1].bytecode[0] }.mask_y, true);
|
||||
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[1].bytecode[0] }.mask_z, false);
|
||||
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[1].bytecode[0] }.mask_w, false);
|
||||
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[1].bytecode[1] }.reg_type, RSX_FP_REGISTER_TYPE_TEMP);
|
||||
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[1].bytecode[1] }.tmp_reg_index, 6);
|
||||
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[1].bytecode[1] }.fp16, 1);
|
||||
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[1].bytecode[1] }.swizzle_x, 2);
|
||||
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[1].bytecode[1] }.swizzle_y, 3);
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user