mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-12-16 04:09:07 +00:00
rsx/cfg: Implement dependency injection pass with branches
This commit is contained in:
parent
8ff3dda5e8
commit
856eaac1b6
@ -3,6 +3,7 @@
|
|||||||
#include "Emu/RSX/Program/Assembler/FPOpcodes.h"
|
#include "Emu/RSX/Program/Assembler/FPOpcodes.h"
|
||||||
#include "Emu/RSX/Program/RSXFragmentProgram.h"
|
#include "Emu/RSX/Program/RSXFragmentProgram.h"
|
||||||
|
|
||||||
|
#include <unordered_map>
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
|
|
||||||
namespace rsx::assembler::FP
|
namespace rsx::assembler::FP
|
||||||
@ -13,6 +14,14 @@ namespace rsx::assembler::FP
|
|||||||
static constexpr char content_float16 = 'H';
|
static constexpr char content_float16 = 'H';
|
||||||
static constexpr char content_dual = 'D';
|
static constexpr char content_dual = 'D';
|
||||||
|
|
||||||
|
using register_file_t = std::array<char, register_file_length>;
|
||||||
|
|
||||||
|
struct DependencyPassContext
|
||||||
|
{
|
||||||
|
std::unordered_map<BasicBlock*, register_file_t> exec_register_map;
|
||||||
|
std::unordered_map<BasicBlock*, register_file_t> sync_register_map;
|
||||||
|
};
|
||||||
|
|
||||||
std::vector<RegisterRef> decode_lanes16(const std::unordered_set<u32>& lanes)
|
std::vector<RegisterRef> decode_lanes16(const std::unordered_set<u32>& lanes)
|
||||||
{
|
{
|
||||||
std::vector<RegisterRef> result;
|
std::vector<RegisterRef> result;
|
||||||
@ -193,9 +202,23 @@ namespace rsx::assembler::FP
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
void insert_dependency_barriers(BasicBlock* block)
|
std::vector<Instruction> resolve_dependencies(const std::unordered_set<u32>& lanes, bool f16)
|
||||||
{
|
{
|
||||||
std::array<char, register_file_length> register_file;
|
std::vector<Instruction> result;
|
||||||
|
|
||||||
|
const auto regs = (f16 ? decode_lanes16 : decode_lanes32)(lanes);
|
||||||
|
for (const auto& ref : regs)
|
||||||
|
{
|
||||||
|
auto instructions = (f16 ? build_barrier16 : build_barrier32)(ref);
|
||||||
|
result.insert(result.end(), instructions.begin(), instructions.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void insert_dependency_barriers(DependencyPassContext& ctx, BasicBlock* block)
|
||||||
|
{
|
||||||
|
register_file_t& register_file = ctx.exec_register_map[block];
|
||||||
std::memset(register_file.data(), content_unknown, register_file_length);
|
std::memset(register_file.data(), content_unknown, register_file_length);
|
||||||
|
|
||||||
std::unordered_set<u32> barrier16;
|
std::unordered_set<u32> barrier16;
|
||||||
@ -275,14 +298,109 @@ namespace rsx::assembler::FP
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void insert_block_register_dependency(DependencyPassContext& ctx, BasicBlock* block, const std::unordered_set<u32>& lanes, bool f16)
|
||||||
|
{
|
||||||
|
if (block->pred.empty())
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unordered_set<u32> clobbered_lanes;
|
||||||
|
std::unordered_set<u32> lanes_to_search;
|
||||||
|
|
||||||
|
for (auto& back_edge : block->pred)
|
||||||
|
{
|
||||||
|
auto target = back_edge.from;
|
||||||
|
|
||||||
|
// Did this target even clobber our register?
|
||||||
|
ensure(ctx.exec_register_map.find(target) != ctx.exec_register_map.end(), "Block has not been pre-processed");
|
||||||
|
|
||||||
|
if (ctx.sync_register_map.find(target) == ctx.sync_register_map.end())
|
||||||
|
{
|
||||||
|
auto& blob = ctx.sync_register_map[target];
|
||||||
|
std::memset(blob.data(), content_unknown, register_file_length);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto& sync_register_file = ctx.sync_register_map[target];
|
||||||
|
const auto& exec_register_file = ctx.exec_register_map[target];
|
||||||
|
const auto clobber_type = f16 ? content_float32 : content_float16;
|
||||||
|
|
||||||
|
lanes_to_search.clear();
|
||||||
|
clobbered_lanes.clear();
|
||||||
|
|
||||||
|
for (auto& lane : lanes)
|
||||||
|
{
|
||||||
|
if (exec_register_file[lane] == clobber_type &&
|
||||||
|
sync_register_file[lane] == content_unknown)
|
||||||
|
{
|
||||||
|
clobbered_lanes.insert(lane);
|
||||||
|
sync_register_file[lane] = content_dual;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (exec_register_file[lane] == content_unknown)
|
||||||
|
{
|
||||||
|
lanes_to_search.insert(lane);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!clobbered_lanes.empty())
|
||||||
|
{
|
||||||
|
const auto instructions = resolve_dependencies(clobbered_lanes, f16);
|
||||||
|
target->epilogue.insert(target->epilogue.end(), instructions.begin(), instructions.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lanes_to_search.empty())
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We have some missing lanes. Search upwards
|
||||||
|
if (!target->pred.empty())
|
||||||
|
{
|
||||||
|
// We only need to search the last predecessor which is the true "root" of the branch
|
||||||
|
auto parent = target->pred.back().from;
|
||||||
|
insert_block_register_dependency(ctx, parent, lanes_to_search, f16);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void insert_block_dependencies(DependencyPassContext& ctx, BasicBlock* block)
|
||||||
|
{
|
||||||
|
auto range_from_ref = [](const RegisterRef& ref)
|
||||||
|
{
|
||||||
|
const auto range = get_register_file_range(ref);
|
||||||
|
|
||||||
|
std::unordered_set<u32> result;
|
||||||
|
for (const auto& value : range)
|
||||||
|
{
|
||||||
|
result.insert(value);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
};
|
||||||
|
|
||||||
|
for (auto& ref : block->input_list)
|
||||||
|
{
|
||||||
|
const auto range = range_from_ref(ref);
|
||||||
|
insert_block_register_dependency(ctx, block, range, ref.reg.f16);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void RegisterDependencyPass::run(FlowGraph& graph)
|
void RegisterDependencyPass::run(FlowGraph& graph)
|
||||||
{
|
{
|
||||||
|
DependencyPassContext ctx{};
|
||||||
|
|
||||||
// First, run intra-block dependency
|
// First, run intra-block dependency
|
||||||
for (auto& block : graph.blocks)
|
for (auto& block : graph.blocks)
|
||||||
{
|
{
|
||||||
insert_dependency_barriers(&block);
|
insert_dependency_barriers(ctx, &block);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Create prologue/epilogue instructions
|
// Then, create prologue/epilogue instructions
|
||||||
|
// Traverse the list in reverse order to bubble up dependencies correctly.
|
||||||
|
for (auto it = graph.blocks.rbegin(); it != graph.blocks.rend(); ++it)
|
||||||
|
{
|
||||||
|
insert_block_dependencies(ctx, &(*it));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -48,6 +48,15 @@ namespace rsx::assembler
|
|||||||
bb.instructions = ir.build();
|
bb.instructions = ir.build();
|
||||||
return graph;
|
return graph;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static BasicBlock* BB_from_source(FlowGraph* graph, const std::string& asm_)
|
||||||
|
{
|
||||||
|
auto ir = FPIR::from_source(asm_);
|
||||||
|
graph->blocks.push_back({});
|
||||||
|
BasicBlock& bb = graph->blocks.back();
|
||||||
|
bb.instructions = ir.build();
|
||||||
|
return &bb;
|
||||||
|
}
|
||||||
TEST(TestFPIR, FromSource)
|
TEST(TestFPIR, FromSource)
|
||||||
{
|
{
|
||||||
auto ir = FPIR::from_source(R"(
|
auto ir = FPIR::from_source(R"(
|
||||||
@ -232,8 +241,83 @@ namespace rsx::assembler
|
|||||||
EXPECT_EQ(SRC0{ .HEX = block.instructions[3].bytecode[1] }.swizzle_y, 3);
|
EXPECT_EQ(SRC0{ .HEX = block.instructions[3].bytecode[1] }.swizzle_y, 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(TestFPIR, RegisterDependencyPass_Complex)
|
TEST(TestFPIR, RegisterDependencyPass_Complex_IF_BothPredecessorsClobber)
|
||||||
{
|
{
|
||||||
// TODO: Multi-level block structure with nested IFs/LOOPs
|
// Multi-level but only single IF
|
||||||
|
// Mockup of a simple lighting function, R0 = Light vector, R1 = Decompressed normal. DP4 used for simplicity.
|
||||||
|
// Data hazards sprinkled in for testing. R3 is clobbered in the ancestor and the IF branch.
|
||||||
|
// Barrier should go in the IF branch here.
|
||||||
|
FlowGraph graph;
|
||||||
|
BasicBlock* bb0 = BB_from_source(&graph, R"(
|
||||||
|
DP4 R2, R0, R1
|
||||||
|
SFL R3
|
||||||
|
SGT R3, R2, R0
|
||||||
|
IF.GE
|
||||||
|
)");
|
||||||
|
|
||||||
|
BasicBlock* bb1 = BB_from_source(&graph, R"(
|
||||||
|
ADD R0, R0, R2
|
||||||
|
MOV H6, #{ 0.25 }
|
||||||
|
)");
|
||||||
|
|
||||||
|
BasicBlock* bb2 = BB_from_source(&graph, R"(
|
||||||
|
ADD R0, R0, R3
|
||||||
|
MOV R1, R0
|
||||||
|
)");
|
||||||
|
|
||||||
|
// Front edges
|
||||||
|
bb0->insert_succ(bb1, EdgeType::IF);
|
||||||
|
bb0->insert_succ(bb2, EdgeType::ENDIF);
|
||||||
|
bb1->insert_succ(bb2, EdgeType::ENDIF);
|
||||||
|
|
||||||
|
// Back edges
|
||||||
|
bb2->insert_pred(bb1, EdgeType::ENDIF);
|
||||||
|
bb2->insert_pred(bb0, EdgeType::ENDIF);
|
||||||
|
bb1->insert_pred(bb0, EdgeType::IF);
|
||||||
|
|
||||||
|
RSXFragmentProgram prog{};
|
||||||
|
|
||||||
|
FP::RegisterAnnotationPass annotation_pass{ prog };
|
||||||
|
FP::RegisterDependencyPass deps_pass{};
|
||||||
|
|
||||||
|
annotation_pass.run(graph);
|
||||||
|
deps_pass.run(graph);
|
||||||
|
|
||||||
|
ASSERT_EQ(bb0->instructions.size(), 4);
|
||||||
|
ASSERT_EQ(bb1->instructions.size(), 2);
|
||||||
|
ASSERT_EQ(bb2->instructions.size(), 2);
|
||||||
|
|
||||||
|
// bb1 has a epilogue
|
||||||
|
ASSERT_EQ(bb1->epilogue.size(), 2);
|
||||||
|
|
||||||
|
// bb1 epilogue updates R3.xy
|
||||||
|
|
||||||
|
// R3.x = packHalf2(H6.xy)
|
||||||
|
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[0].bytecode[0] }.opcode, RSX_FP_OPCODE_PK2);
|
||||||
|
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[0].bytecode[0] }.fp16, 0);
|
||||||
|
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[0].bytecode[0] }.dest_reg, 3);
|
||||||
|
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[0].bytecode[0] }.mask_x, true);
|
||||||
|
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[0].bytecode[0] }.mask_y, false);
|
||||||
|
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[0].bytecode[0] }.mask_z, false);
|
||||||
|
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[0].bytecode[0] }.mask_w, false);
|
||||||
|
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[0].bytecode[1] }.reg_type, RSX_FP_REGISTER_TYPE_TEMP);
|
||||||
|
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[0].bytecode[1] }.tmp_reg_index, 6);
|
||||||
|
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[0].bytecode[1] }.fp16, 1);
|
||||||
|
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[0].bytecode[1] }.swizzle_x, 0);
|
||||||
|
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[0].bytecode[1] }.swizzle_y, 1);
|
||||||
|
|
||||||
|
// R3.y = packHalf2(H6.zw)
|
||||||
|
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[1].bytecode[0] }.opcode, RSX_FP_OPCODE_PK2);
|
||||||
|
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[1].bytecode[0] }.fp16, 0);
|
||||||
|
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[1].bytecode[0] }.dest_reg, 3);
|
||||||
|
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[1].bytecode[0] }.mask_x, false);
|
||||||
|
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[1].bytecode[0] }.mask_y, true);
|
||||||
|
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[1].bytecode[0] }.mask_z, false);
|
||||||
|
EXPECT_EQ(OPDEST{ .HEX = bb1->epilogue[1].bytecode[0] }.mask_w, false);
|
||||||
|
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[1].bytecode[1] }.reg_type, RSX_FP_REGISTER_TYPE_TEMP);
|
||||||
|
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[1].bytecode[1] }.tmp_reg_index, 6);
|
||||||
|
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[1].bytecode[1] }.fp16, 1);
|
||||||
|
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[1].bytecode[1] }.swizzle_x, 2);
|
||||||
|
EXPECT_EQ(SRC0{ .HEX = bb1->epilogue[1].bytecode[1] }.swizzle_y, 3);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user