From 1ea3c121fa5036a28725560e11bd6ec8346d21c9 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 10 Dec 2025 01:40:26 +0300 Subject: [PATCH] rsx/cfg: Fix delay-slot detection when copying from the same register index but different precision. --- rpcs3/Emu/RSX/Program/Assembler/FPASM.cpp | 1 + .../Passes/FP/RegisterAnnotationPass.cpp | 22 +++++++++--- rpcs3/tests/test_rsx_fp_asm.cpp | 35 +++++++++++++++++++ 3 files changed, 54 insertions(+), 4 deletions(-) diff --git a/rpcs3/Emu/RSX/Program/Assembler/FPASM.cpp b/rpcs3/Emu/RSX/Program/Assembler/FPASM.cpp index ee8f4441cb..2d74fafc73 100644 --- a/rpcs3/Emu/RSX/Program/Assembler/FPASM.cpp +++ b/rpcs3/Emu/RSX/Program/Assembler/FPASM.cpp @@ -24,6 +24,7 @@ namespace rsx::assembler // Arithmetic { "NOP", { .op = RSX_FP_OPCODE_NOP, .exec_if_lt = true, .exec_if_eq = true, .exec_if_gt = true, .set_cond = false } }, { "MOV", { .op = RSX_FP_OPCODE_MOV, .exec_if_lt = true, .exec_if_eq = true, .exec_if_gt = true, .set_cond = false } }, + { "MUL", { .op = RSX_FP_OPCODE_MUL, .exec_if_lt = true, .exec_if_eq = true, .exec_if_gt = true, .set_cond = false } }, { "ADD", { .op = RSX_FP_OPCODE_ADD, .exec_if_lt = true, .exec_if_eq = true, .exec_if_gt = true, .set_cond = false } }, { "MAD", { .op = RSX_FP_OPCODE_MAD, .exec_if_lt = true, .exec_if_eq = true, .exec_if_gt = true, .set_cond = false } }, { "FMA", { .op = RSX_FP_OPCODE_MAD, .exec_if_lt = true, .exec_if_eq = true, .exec_if_gt = true, .set_cond = false } }, diff --git a/rpcs3/Emu/RSX/Program/Assembler/Passes/FP/RegisterAnnotationPass.cpp b/rpcs3/Emu/RSX/Program/Assembler/Passes/FP/RegisterAnnotationPass.cpp index 4f63f364b6..9b031f2a0e 100644 --- a/rpcs3/Emu/RSX/Program/Assembler/Passes/FP/RegisterAnnotationPass.cpp +++ b/rpcs3/Emu/RSX/Program/Assembler/Passes/FP/RegisterAnnotationPass.cpp @@ -1,4 +1,5 @@ #include "stdafx.h" + #include "RegisterAnnotationPass.h" #include "Emu/RSX/Program/Assembler/FPOpcodes.h" #include "Emu/RSX/Program/RSXFragmentProgram.h" @@ -24,14 +25,27 @@ namespace rsx::assembler::FP dst.no_dest || // Must have a sink src0.reg_type != RSX_FP_REGISTER_TYPE_TEMP || // Must read from reg dst.dest_reg != src0.tmp_reg_index || // Must be a write-to-self - dst.fp16 || // Always full lane. We need to collect more data on this but it won't matter - dst.saturate || // Precision modifier - (dst.prec != RSX_FP_PRECISION_REAL && - dst.prec != RSX_FP_PRECISION_UNKNOWN)) // Cannot have precision modifiers + dst.fp16 != src0.fp16 || // Must really be the same register + src0.abs || src0.neg || + dst.saturate) // Precision modifier { return false; } + switch (dst.prec) + { + case RSX_FP_PRECISION_REAL: + case RSX_FP_PRECISION_UNKNOWN: + break; + case RSX_FP_PRECISION_HALF: + if (!src0.fp16) return false; + break; + case RSX_FP_PRECISION_FIXED12: + case RSX_FP_PRECISION_FIXED9: + case RSX_FP_PRECISION_SATURATE: + return false; + } + // Check if we have precision modifiers on the source if (src0.abs || src0.neg || src1.scale) { diff --git a/rpcs3/tests/test_rsx_fp_asm.cpp b/rpcs3/tests/test_rsx_fp_asm.cpp index d14dfacae4..9be88db5c7 100644 --- a/rpcs3/tests/test_rsx_fp_asm.cpp +++ b/rpcs3/tests/test_rsx_fp_asm.cpp @@ -731,4 +731,39 @@ namespace rsx::assembler EXPECT_EQ(get_graph_block(graph, 5)->instructions.size(), 1); EXPECT_EQ(get_graph_block(graph, 6)->instructions.size(), 1); } + + TEST(TestFPIR, RegisterDependencyPass_SplinterCell_DelaySlot) + { + // Real shader pattern found in splinter cell blacklist. + // TEX instructions replaced with MOV for simplicity. + // There are no dependent reads here, no barriers are expected. + // In the game, instruction 4 was misclassified as a delay slot, causing a skipped clobber. + auto ir = FPIR::from_source(R"( + MOV R0.w, #{ 0.25 } + MOV H0, H8 + MUL R0.w, H0.w, R0.w + MOV R0.xyz, H0.xyz + MOV R1, #{ 0.25 } + FMA H0, R0, #{ 0.125 }, R1 + )"); + + auto bytecode = ir.compile(); + RSXFragmentProgram prog{}; + prog.data = bytecode.data(); + auto graph = deconstruct_fragment_program(prog); + + // Verify state before + ASSERT_EQ(graph.blocks.size(), 1); + EXPECT_EQ(get_graph_block(graph, 0)->instructions.size(), 6); + + FP::RegisterAnnotationPass annotation_pass{ prog, {.skip_delay_slots = true } }; + FP::RegisterDependencyPass deps_pass{}; + + annotation_pass.run(graph); + deps_pass.run(graph); + + // Verify state after + EXPECT_EQ(get_graph_block(graph, 0)->instructions.size(), 6); + EXPECT_EQ(get_graph_block(graph, 0)->epilogue.size(), 0); + } }