rpcs3/rpcs3/Emu/RSX/Program/Assembler/Passes/FP/RegisterAnnotationPass.cpp

186 lines
5.0 KiB
C++

#include "stdafx.h"
#include "RegisterAnnotationPass.h"
#include "Emu/RSX/Program/Assembler/FPOpcodes.h"
#include <span>
#include <unordered_map>
namespace rsx::assembler::FP
{
static constexpr u32 register_file_length = 48 * 8; // 24 F32 or 48 F16 registers
static constexpr char content_unknown = 0;
static constexpr char content_float32 = 'R';
static constexpr char content_float16 = 'H';
static constexpr char content_dual = 'D';
std::vector<RegisterRef> compile_register_file(const std::array<char, 48 * 8>& file)
{
std::vector<RegisterRef> results;
// F16 register processing
for (int reg16 = 0; reg16 < 48; ++reg16)
{
const u32 offset = reg16 * 8;
auto word = *reinterpret_cast<const u64*>(&file[offset]);
if (!word) [[ likely ]]
{
// Trivial rejection, very commonly hit.
continue;
}
RegisterRef ref{ .reg {.id = reg16, .f16 = true } };
ref.x = (file[offset] == content_dual || file[offset] == content_float16);
ref.y = (file[offset + 2] == content_dual || file[offset + 2] == content_float16);
ref.z = (file[offset + 4] == content_dual || file[offset + 4] == content_float16);
ref.w = (file[offset + 6] == content_dual || file[offset + 6] == content_float16);
if (ref)
{
results.push_back(ref);
}
}
// Helper to check a span for 32-bit access
auto match_any_32 = [](const std::span<const char> lanes)
{
for (const auto& c : lanes)
{
if (c == content_dual || c == content_float32)
{
return true;
}
}
return false;
};
// F32 register processing
for (int reg32 = 0; reg32 < 24; ++reg32)
{
const u32 offset = reg32 * 16;
auto word0 = *reinterpret_cast<const u64*>(&file[offset]);
auto word1 = *reinterpret_cast<const u64*>(&file[offset + 8]);
if (!word0 && !word1) [[ likely ]]
{
// Trivial rejection, very commonly hit.
continue;
}
RegisterRef ref{ .reg {.id = reg32, .f16 = false } };
if (word0)
{
ref.x = match_any_32({ &file[offset], 4 });
ref.y = match_any_32({ &file[offset + 4], 4 });
}
if (word1)
{
ref.z = match_any_32({ &file[offset + 8], 4 });
ref.w = match_any_32({ &file[offset + 12], 4 });
}
if (ref)
{
results.push_back(ref);
}
}
return results;
}
// Decay instructions into register references
void annotate_instructions(BasicBlock* block, const RSXFragmentProgram& prog)
{
for (auto& instruction : block->instructions)
{
const u32 operand_count = get_operand_count(static_cast<FP_opcode>(instruction.opcode));
for (u32 i = 0; i < operand_count; i++)
{
RegisterRef reg = get_src_register(prog, &instruction, i);
if (!reg.mask)
{
// Likely a literal constant
continue;
}
instruction.srcs.push_back(reg);
}
RegisterRef dst = get_dst_register(&instruction);
if (dst)
{
instruction.dsts.push_back(dst);
}
}
}
// Annotate each block with input and output lanes (read and clobber list)
void annotate_block_io(BasicBlock* block)
{
alignas(16) std::array<char, register_file_length> output_register_file;
alignas(16) std::array<char, register_file_length> input_register_file; // We'll eventually replace with a bitfield mask, but for ease of debugging, we use char for now
std::memset(output_register_file.data(), content_unknown, register_file_length);
std::memset(input_register_file.data(), content_unknown, register_file_length);
for (const auto& instruction : block->instructions)
{
for (const auto& src : instruction.srcs)
{
const auto read_bytes = get_register_file_range(src);
const char expected_type = src.reg.f16 ? content_float16 : content_float32;
for (const auto& index : read_bytes)
{
if (output_register_file[index] != content_unknown)
{
// Something already wrote to this lane
continue;
}
if (input_register_file[index] == expected_type)
{
// We already know about this input
continue;
}
if (input_register_file[index] == 0)
{
// Not known, tag as input
input_register_file[index] = expected_type;
continue;
}
// Collision on the lane
input_register_file[index] = content_dual;
}
}
if (!instruction.dsts.empty())
{
const auto& dst = instruction.dsts.front();
const auto write_bytes = get_register_file_range(dst);
const char expected_type = dst.reg.f16 ? content_float16 : content_float32;
for (const auto& index : write_bytes)
{
output_register_file[index] = expected_type;
}
}
}
// Compile the input and output refs into register references
block->clobber_list = compile_register_file(output_register_file);
block->input_list = compile_register_file(input_register_file);
}
void RegisterAnnotationPass::run(FlowGraph& graph)
{
for (auto& block : graph.blocks)
{
annotate_instructions(&block, m_prog);
annotate_block_io(&block);
}
}
}