diff --git a/rpcs3/CMakeLists.txt b/rpcs3/CMakeLists.txt index 3217cc38f5..796351e16c 100644 --- a/rpcs3/CMakeLists.txt +++ b/rpcs3/CMakeLists.txt @@ -192,6 +192,7 @@ if(BUILD_RPCS3_TESTS) tests/test_tuple.cpp tests/test_simple_array.cpp tests/test_address_range.cpp + tests/test_rsx_cfg.cpp ) target_link_libraries(rpcs3_test diff --git a/rpcs3/Emu/CMakeLists.txt b/rpcs3/Emu/CMakeLists.txt index 139688947d..1a902b46df 100644 --- a/rpcs3/Emu/CMakeLists.txt +++ b/rpcs3/Emu/CMakeLists.txt @@ -516,6 +516,7 @@ target_sources(rpcs3_emu PRIVATE RSX/Overlays/overlay_video.cpp RSX/Overlays/Shaders/shader_loading_dialog.cpp RSX/Overlays/Shaders/shader_loading_dialog_native.cpp + RSX/Program/Assembler/FPToCFG.cpp RSX/Program/CgBinaryProgram.cpp RSX/Program/CgBinaryFragmentProgram.cpp RSX/Program/CgBinaryVertexProgram.cpp diff --git a/rpcs3/Emu/RSX/Program/Assembler/CFG.h b/rpcs3/Emu/RSX/Program/Assembler/CFG.h new file mode 100644 index 0000000000..b2d4ad75a8 --- /dev/null +++ b/rpcs3/Emu/RSX/Program/Assembler/CFG.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include "IR.h" + +#include + +struct RSXFragmentProgram; + +namespace rsx::assembler +{ + struct FlowGraph + { + std::list blocks; + + BasicBlock* push(BasicBlock* parent = nullptr, u32 pc = 0) + { + if (!parent && !blocks.empty()) + { + parent = &blocks.back(); + } + + blocks.push_back({}); + BasicBlock* new_block = &blocks.back(); + + if (parent) + { + parent->insert_succ(new_block); + new_block->insert_pred(parent); + } + + new_block->id = pc; + return new_block; + } + }; + + FlowGraph deconstruct_fragment_program(const RSXFragmentProgram& prog); +} + diff --git a/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp b/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp new file mode 100644 index 0000000000..0f8cd2ea91 --- /dev/null +++ b/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp @@ -0,0 +1,158 @@ +#include "stdafx.h" + +#pragma optimize("", off) + +#include "CFG.h" + +#include "Emu/RSX/Common/simple_array.hpp" +#include "Emu/RSX/Program/RSXFragmentProgram.h" +#include "Emu/RSX/Program/ProgramStateCache.h" + +#include +#include + +using namespace program_hash_util; + +namespace rsx::assembler +{ + inline v128 decode_instruction(const v128& raw_inst) + { + // Fixup of RSX's weird half-word shuffle for FP instructions + // Convert input stream into LE u16 array + __m128i _mask0 = _mm_set1_epi32(0xff00ff00); + __m128i _mask1 = _mm_set1_epi32(0x00ff00ff); + __m128i a = _mm_slli_epi32(static_cast<__m128i>(raw_inst), 8); + __m128i b = _mm_srli_epi32(static_cast<__m128i>(raw_inst), 8); + __m128i ret = _mm_or_epi32( + _mm_and_epi32(_mask0, a), + _mm_and_epi32(_mask1, b) + ); + return v128::loadu(&ret); + } + + FlowGraph deconstruct_fragment_program(const RSXFragmentProgram& prog) + { + // For a flowgraph, we don't care at all about the actual contents, just flow control instructions. + OPDEST dst{}; + SRC0 src0{}; + SRC1 src1{}; + SRC2 src2{}; + + u32 pc = 0; // Program counter + u32 instruction_size = 0; + bool end = false; + + // Flow control data + rsx::simple_array end_blocks; + rsx::simple_array else_blocks; + + // Data block + u32* data = static_cast(prog.get_data()); + + // Output + FlowGraph graph{}; + BasicBlock* bb = graph.push(); + + auto find_block_for_pc = [&](u32 id) -> BasicBlock* + { + auto found = std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == id)); + if (found != graph.blocks.end()) + { + return &(*found); + } + return nullptr; + }; + + auto safe_insert_block = [&](BasicBlock* parent, u32 id) -> BasicBlock* + { + if (auto found = find_block_for_pc(id)) + { + parent->insert_succ(found); + found->insert_pred(parent); + return found; + } + + return graph.push(parent, id); + }; + + while (!end) + { + BasicBlock** found = end_blocks.find_if(FN(x->id == pc)); + + if (!found) + { + found = else_blocks.find_if(FN(x->id == pc)); + } + + if (found) + { + bb = *found; + } + + const v128 raw_inst = v128::loadu(data, pc); + v128 decoded = decode_instruction(raw_inst); + + dst.HEX = decoded._u32[0]; + src0.HEX = decoded._u32[1]; + src1.HEX = decoded._u32[2]; + src2.HEX = decoded._u32[3]; + + const u32 opcode = dst.opcode | (src1.opcode_is_branch << 6); + + if (opcode == RSX_FP_OPCODE_NOP) + { + pc++; + continue; + } + + end = !!dst.end; + + bb->instructions.push_back({}); + auto& ir_inst = bb->instructions.back(); + std::memcpy(ir_inst.bytecode, &decoded._u32[0], 16); + + switch (opcode) + { + case RSX_FP_OPCODE_CAL: + // Unimplemented. Also unused by the RSX compiler + fmt::throw_exception("Unimplemented FP CAL instruction."); + break; + case RSX_FP_OPCODE_RET: + // Outside a subroutine, this doesn't mean much. The main block can conditionally return to stop execution early. + // This will not alter flow control. + break; + case RSX_FP_OPCODE_IFE: + { + // Inserts if and else and end blocks + auto parent = bb; + bb = safe_insert_block(parent, pc + 1); + if (src2.end_offset != src1.else_offset) + { + else_blocks.push_back(safe_insert_block(parent, src1.else_offset >> 2)); + } + end_blocks.push_back(safe_insert_block(parent, src2.end_offset >> 2)); + break; + } + case RSX_FP_OPCODE_LOOP: + case RSX_FP_OPCODE_REP: + { + // Inserts for and end blocks + auto parent = bb; + bb = safe_insert_block(parent, pc + 1); + end_blocks.push_back(safe_insert_block(parent, src2.end_offset >> 2)); + break; + } + default: + if (fragment_program_utils::is_any_src_constant(decoded)) + { + pc++; + } + } + + pc++; + } + + graph.blocks.sort(FN(x.id < y.id)); + return graph; + } +} diff --git a/rpcs3/Emu/RSX/Program/Assembler/IR.h b/rpcs3/Emu/RSX/Program/Assembler/IR.h new file mode 100644 index 0000000000..53d6080a11 --- /dev/null +++ b/rpcs3/Emu/RSX/Program/Assembler/IR.h @@ -0,0 +1,70 @@ +#pragma once + +#include + +namespace rsx::assembler +{ + struct BasicBlock; + + struct Register + { + int id = 0; + bool f16 = false; + }; + + struct RegisterRef + { + Register reg{}; + + // Vector information + union + { + u32 mask; + + struct + { + bool x : 1; + bool y : 1; + bool z : 1; + bool w : 1; + }; + }; + }; + + struct Instruction + { + // Raw data. Every instruction is max 128 bits + u32 bytecode[4]; + + // Decoded + u32 opcode = 0; + std::vector srcs; + std::vector dsts; + }; + + struct FlowEdge + { + BasicBlock* from = nullptr; + BasicBlock* to = nullptr; + }; + + struct BasicBlock + { + u32 id = 0; + std::vector instructions; + std::vector succ; // [0] = if/loop, [1] = else + std::vector pred; // Back edge. + + void insert_succ(BasicBlock* b) + { + FlowEdge e{ .from = this, .to = b }; + succ.push_back(e); + } + + void insert_pred(BasicBlock* b) + { + FlowEdge e{ .from = this, .to = b }; + pred.push_back(e); + } + }; +} diff --git a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h index 467c6f3ac7..d3ef8643f1 100644 --- a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h +++ b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h @@ -3,6 +3,8 @@ #include "FragmentProgramRegister.h" #include "RSXFragmentProgram.h" +#include "Assembler/CFG.h" + #include #include diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 0bbea41832..1b5716f01b 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -156,6 +156,7 @@ + @@ -699,6 +700,8 @@ + + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 65cd509f85..23b7ef174d 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -133,6 +133,9 @@ {ce6d6b90-8313-4273-b46c-d92bd450c002} + + {d99df916-8a99-428b-869a-9f14ac0ab411} + @@ -1372,6 +1375,9 @@ Emu\Io + + Emu\GPU\RSX\Program\Assembler + @@ -2764,6 +2770,12 @@ Utilities + + Emu\GPU\RSX\Program\Assembler + + + Emu\GPU\RSX\Program\Assembler + diff --git a/rpcs3/tests/rpcs3_test.vcxproj b/rpcs3/tests/rpcs3_test.vcxproj index 4f0d136a9a..22992e6a07 100644 --- a/rpcs3/tests/rpcs3_test.vcxproj +++ b/rpcs3/tests/rpcs3_test.vcxproj @@ -88,6 +88,7 @@ + diff --git a/rpcs3/tests/test_rsx_cfg.cpp b/rpcs3/tests/test_rsx_cfg.cpp new file mode 100644 index 0000000000..f868965515 --- /dev/null +++ b/rpcs3/tests/test_rsx_cfg.cpp @@ -0,0 +1,218 @@ +#pragma optimize("", off) +#include + +#include "Emu/RSX/Common/simple_array.hpp" +#include "Emu/RSX/Program/Assembler/CFG.h" +#include "Emu/RSX/Program/RSXFragmentProgram.h" + +#include + +namespace rsx::assembler +{ + auto swap_bytes16 = [](u32 dword) -> u32 + { + // Lazy encode, but good enough for what we need here. + union v32 + { + u32 HEX; + u8 _v[4]; + }; + + u8* src_bytes = reinterpret_cast(&dword); + v32 dst_bytes; + + dst_bytes._v[0] = src_bytes[1]; + dst_bytes._v[1] = src_bytes[0]; + dst_bytes._v[2] = src_bytes[3]; + dst_bytes._v[3] = src_bytes[2]; + + return dst_bytes.HEX; + }; + + // Instruction mocks because we don't have a working assember (yet) + auto encode_instruction = [](u32 opcode, bool end = false) -> v128 + { + OPDEST dst{}; + dst.opcode = opcode; + + if (end) + { + dst.end = 1; + } + + return v128::from32(swap_bytes16(dst.HEX), 0, 0, 0); + }; + + auto create_if(u32 end, u32 _else = 0) + { + OPDEST dst{}; + dst.opcode = RSX_FP_OPCODE_IFE; + + SRC1 src1{}; + src1.else_offset = (_else ? _else : end) << 2; + src1.opcode_is_branch = 1; + + SRC2 src2{}; + src2.end_offset = end << 2; + + return v128::from32(swap_bytes16(dst.HEX), 0, swap_bytes16(src1.HEX), swap_bytes16(src2.HEX)); + }; + + TEST(CFG, FpToCFG_Basic) + { + rsx::simple_array buffer = { + encode_instruction(RSX_FP_OPCODE_ADD), + encode_instruction(RSX_FP_OPCODE_MOV, true) + }; + + RSXFragmentProgram program{}; + program.data = buffer.data(); + + FlowGraph graph = deconstruct_fragment_program(program); + + EXPECT_EQ(graph.blocks.size(), 1); + EXPECT_EQ(graph.blocks.front().instructions.size(), 2); + } + + TEST(CFG, FpToCFG_IF) + { + rsx::simple_array buffer = { + encode_instruction(RSX_FP_OPCODE_ADD), // 0 + encode_instruction(RSX_FP_OPCODE_MOV), // 1 + create_if(4), // 2 (BR, 4) + encode_instruction(RSX_FP_OPCODE_ADD), // 3 + encode_instruction(RSX_FP_OPCODE_MOV, true), // 4 (Merge block) + }; + + const std::pair expected_block_data[3] = { + { 0, 3 }, // Head + { 3, 1 }, // Branch + { 4, 1 }, // Merge + }; + + RSXFragmentProgram program{}; + program.data = buffer.data(); + + FlowGraph graph = deconstruct_fragment_program(program); + + ASSERT_EQ(graph.blocks.size(), 3); + + int i = 0; + for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it) + { + const auto& expected = expected_block_data[i++]; + EXPECT_EQ(it->id, expected.first); + EXPECT_EQ(it->instructions.size(), expected.second); + } + } + + TEST(CFG, FpToCFG_NestedIF) + { + rsx::simple_array buffer = { + encode_instruction(RSX_FP_OPCODE_ADD), // 0 + encode_instruction(RSX_FP_OPCODE_MOV), // 1 + create_if(8), // 2 (BR, 8) + encode_instruction(RSX_FP_OPCODE_ADD), // 3 + create_if(6), // 4 (BR, 6) + encode_instruction(RSX_FP_OPCODE_MOV), // 5 + encode_instruction(RSX_FP_OPCODE_MOV), // 6 (merge block 1) + encode_instruction(RSX_FP_OPCODE_ADD), // 7 + encode_instruction(RSX_FP_OPCODE_MOV, true) // 8 (merge block 2 + }; + + const std::pair expected_block_data[5] = { + { 0, 3 }, // Head + { 3, 2 }, // Branch 1 + { 5, 1 }, // Branch 2 + { 6, 2 }, // Merge 1 + { 8, 1 }, // Merge 2 + }; + + RSXFragmentProgram program{}; + program.data = buffer.data(); + + FlowGraph graph = deconstruct_fragment_program(program); + + ASSERT_EQ(graph.blocks.size(), 5); + + int i = 0; + for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it) + { + const auto& expected = expected_block_data[i++]; + EXPECT_EQ(it->id, expected.first); + EXPECT_EQ(it->instructions.size(), expected.second); + } + } + + TEST(CFG, FpToCFG_NestedIF_MultiplePred) + { + rsx::simple_array buffer = { + encode_instruction(RSX_FP_OPCODE_ADD), // 0 + encode_instruction(RSX_FP_OPCODE_MOV), // 1 + create_if(6), // 2 (BR, 6) + encode_instruction(RSX_FP_OPCODE_ADD), // 3 + create_if(6), // 4 (BR, 6) + encode_instruction(RSX_FP_OPCODE_MOV), // 5 + encode_instruction(RSX_FP_OPCODE_MOV), // 6 (merge block) + encode_instruction(RSX_FP_OPCODE_ADD), // 7 + encode_instruction(RSX_FP_OPCODE_MOV, true) // 8 + }; + + const std::pair expected_block_data[4] = { + { 0, 3 }, // Head + { 3, 2 }, // Branch 1 + { 5, 1 }, // Branch 2 + { 6, 3 }, // Merge + }; + + RSXFragmentProgram program{}; + program.data = buffer.data(); + + FlowGraph graph = deconstruct_fragment_program(program); + + ASSERT_EQ(graph.blocks.size(), 4); + + int i = 0; + for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it) + { + const auto& expected = expected_block_data[i++]; + EXPECT_EQ(it->id, expected.first); + EXPECT_EQ(it->instructions.size(), expected.second); + } + } + + TEST(CFG, FpToCFG_IF_ELSE) + { + rsx::simple_array buffer = { + encode_instruction(RSX_FP_OPCODE_ADD), // 0 + encode_instruction(RSX_FP_OPCODE_MOV), // 1 + create_if(6, 4), // 2 (BR, 6) + encode_instruction(RSX_FP_OPCODE_ADD), // 3 + encode_instruction(RSX_FP_OPCODE_MOV), // 4 (Else) + encode_instruction(RSX_FP_OPCODE_ADD), // 5 + encode_instruction(RSX_FP_OPCODE_MOV, true), // 6 (Merge) + }; + + const std::pair expected_block_data[4] = { + { 0, 3 }, // Head + { 3, 1 }, // Branch positive + { 4, 2 }, // Branch negative + { 6, 1 }, // Merge + }; + + RSXFragmentProgram program{}; + program.data = buffer.data(); + + FlowGraph graph = deconstruct_fragment_program(program); + + ASSERT_EQ(graph.blocks.size(), 4); + + int i = 0; + for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it) + { + const auto& expected = expected_block_data[i++]; + EXPECT_EQ(it->id, expected.first); + EXPECT_EQ(it->instructions.size(), expected.second); + } + } +} diff --git a/rpcs3/tests/test_simple_array.cpp b/rpcs3/tests/test_simple_array.cpp index 90a0e73575..fcec3568f4 100644 --- a/rpcs3/tests/test_simple_array.cpp +++ b/rpcs3/tests/test_simple_array.cpp @@ -303,4 +303,24 @@ namespace rsx EXPECT_EQ(data_ptr1 & 15, 0); EXPECT_EQ(data_ptr2 & 127, 0); } + + TEST(SimpleArray, Find) + { + const rsx::simple_array arr{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 + }; + + EXPECT_EQ(*arr.find(8), 8); + EXPECT_EQ(arr.find(99), nullptr); + } + + TEST(SimpleArray, FindIf) + { + const rsx::simple_array arr{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 + }; + + EXPECT_EQ(*arr.find_if(FN(x == 8)), 8); + EXPECT_EQ(arr.find_if(FN(x == 99)), nullptr); + } }