mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-12-16 04:09:07 +00:00
rsx/fp: Re-implement ROP output resolve
This commit is contained in:
parent
f2913e4692
commit
316e01995b
@ -525,7 +525,6 @@ target_sources(rpcs3_emu PRIVATE
|
|||||||
RSX/Program/CgBinaryFragmentProgram.cpp
|
RSX/Program/CgBinaryFragmentProgram.cpp
|
||||||
RSX/Program/CgBinaryVertexProgram.cpp
|
RSX/Program/CgBinaryVertexProgram.cpp
|
||||||
RSX/Program/FragmentProgramDecompiler.cpp
|
RSX/Program/FragmentProgramDecompiler.cpp
|
||||||
RSX/Program/FragmentProgramRegister.cpp
|
|
||||||
RSX/Program/GLSLCommon.cpp
|
RSX/Program/GLSLCommon.cpp
|
||||||
RSX/Program/ProgramStateCache.cpp
|
RSX/Program/ProgramStateCache.cpp
|
||||||
RSX/Program/program_util.cpp
|
RSX/Program/program_util.cpp
|
||||||
|
|||||||
@ -14,6 +14,8 @@ namespace rsx
|
|||||||
{
|
{
|
||||||
namespace fragment_program
|
namespace fragment_program
|
||||||
{
|
{
|
||||||
|
using namespace rsx::assembler;
|
||||||
|
|
||||||
static const std::string reg_table[] =
|
static const std::string reg_table[] =
|
||||||
{
|
{
|
||||||
"wpos",
|
"wpos",
|
||||||
@ -22,6 +24,28 @@ namespace rsx
|
|||||||
"tc0", "tc1", "tc2", "tc3", "tc4", "tc5", "tc6", "tc7", "tc8", "tc9",
|
"tc0", "tc1", "tc2", "tc3", "tc4", "tc5", "tc6", "tc7", "tc8", "tc9",
|
||||||
"ssa"
|
"ssa"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const std::vector<RegisterRef> s_fp32_output_set =
|
||||||
|
{
|
||||||
|
{.reg {.id = 0, .f16 = false }, .mask = 0xf },
|
||||||
|
{.reg {.id = 2, .f16 = false }, .mask = 0xf },
|
||||||
|
{.reg {.id = 3, .f16 = false }, .mask = 0xf },
|
||||||
|
{.reg {.id = 4, .f16 = false }, .mask = 0xf },
|
||||||
|
};
|
||||||
|
|
||||||
|
static const std::vector<RegisterRef> s_fp16_output_set =
|
||||||
|
{
|
||||||
|
{.reg {.id = 0, .f16 = true }, .mask = 0xf },
|
||||||
|
{.reg {.id = 4, .f16 = true }, .mask = 0xf },
|
||||||
|
{.reg {.id = 6, .f16 = true }, .mask = 0xf },
|
||||||
|
{.reg {.id = 8, .f16 = true }, .mask = 0xf },
|
||||||
|
};
|
||||||
|
|
||||||
|
static const RegisterRef s_z_export_reg =
|
||||||
|
{
|
||||||
|
.reg {.id = 1, .f16 = false },
|
||||||
|
.mask = (1u << 2)
|
||||||
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -37,6 +61,26 @@ enum VectorLane : u8
|
|||||||
W = 3,
|
W = 3,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
std::vector<RegisterRef> get_fragment_program_output_set(u32 ctrl, u32 mrt_count)
|
||||||
|
{
|
||||||
|
std::vector<RegisterRef> result;
|
||||||
|
if (mrt_count > 0)
|
||||||
|
{
|
||||||
|
result = (ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS)
|
||||||
|
? s_fp32_output_set
|
||||||
|
: s_fp16_output_set;
|
||||||
|
|
||||||
|
result.resize(mrt_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT)
|
||||||
|
{
|
||||||
|
result.push_back(s_z_export_reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
FragmentProgramDecompiler::FragmentProgramDecompiler(const RSXFragmentProgram &prog, u32& size)
|
FragmentProgramDecompiler::FragmentProgramDecompiler(const RSXFragmentProgram &prog, u32& size)
|
||||||
: m_size(size)
|
: m_size(size)
|
||||||
, m_prog(prog)
|
, m_prog(prog)
|
||||||
@ -157,8 +201,6 @@ void FragmentProgramDecompiler::SetDst(std::string code, u32 flags)
|
|||||||
}
|
}
|
||||||
|
|
||||||
const u32 reg_index = dst.fp16 ? (dst.dest_reg >> 1) : dst.dest_reg;
|
const u32 reg_index = dst.fp16 ? (dst.dest_reg >> 1) : dst.dest_reg;
|
||||||
ensure(reg_index < temp_registers.size());
|
|
||||||
|
|
||||||
if (dst.opcode == RSX_FP_OPCODE_MOV &&
|
if (dst.opcode == RSX_FP_OPCODE_MOV &&
|
||||||
src0.reg_type == RSX_FP_REGISTER_TYPE_TEMP &&
|
src0.reg_type == RSX_FP_REGISTER_TYPE_TEMP &&
|
||||||
src0.tmp_reg_index == reg_index)
|
src0.tmp_reg_index == reg_index)
|
||||||
@ -171,8 +213,6 @@ void FragmentProgramDecompiler::SetDst(std::string code, u32 flags)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
temp_registers[reg_index].tag(dst.dest_reg, !!dst.fp16, dst.mask_x, dst.mask_y, dst.mask_z, dst.mask_w);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void FragmentProgramDecompiler::AddFlowOp(const std::string& code)
|
void FragmentProgramDecompiler::AddFlowOp(const std::string& code)
|
||||||
@ -528,26 +568,7 @@ template<typename T> std::string FragmentProgramDecompiler::GetSRC(T src)
|
|||||||
switch (src.reg_type)
|
switch (src.reg_type)
|
||||||
{
|
{
|
||||||
case RSX_FP_REGISTER_TYPE_TEMP:
|
case RSX_FP_REGISTER_TYPE_TEMP:
|
||||||
|
if (src.fp16 && precision_modifier == RSX_FP_PRECISION_HALF)
|
||||||
if (!src.fp16)
|
|
||||||
{
|
|
||||||
if (dst.opcode == RSX_FP_OPCODE_UP16 ||
|
|
||||||
dst.opcode == RSX_FP_OPCODE_UP2 ||
|
|
||||||
dst.opcode == RSX_FP_OPCODE_UP4 ||
|
|
||||||
dst.opcode == RSX_FP_OPCODE_UPB ||
|
|
||||||
dst.opcode == RSX_FP_OPCODE_UPG)
|
|
||||||
{
|
|
||||||
auto ® = temp_registers[src.tmp_reg_index];
|
|
||||||
if (reg.requires_gather(src.swizzle_x))
|
|
||||||
{
|
|
||||||
properties.has_gather_op = true;
|
|
||||||
AddReg(src.tmp_reg_index, src.fp16);
|
|
||||||
ret = getFloatTypeName(4) + reg.gather_r();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (precision_modifier == RSX_FP_PRECISION_HALF)
|
|
||||||
{
|
{
|
||||||
// clamp16() is not a cheap operation when emulated; avoid at all costs
|
// clamp16() is not a cheap operation when emulated; avoid at all costs
|
||||||
precision_modifier = RSX_FP_PRECISION_REAL;
|
precision_modifier = RSX_FP_PRECISION_REAL;
|
||||||
@ -778,17 +799,6 @@ std::string FragmentProgramDecompiler::BuildCode()
|
|||||||
{
|
{
|
||||||
// Hw tests show that the depth export register is default-initialized to 0 and not wpos.z!!
|
// Hw tests show that the depth export register is default-initialized to 0 and not wpos.z!!
|
||||||
m_parr.AddParam(PF_PARAM_NONE, getFloatTypeName(4), "r1", init_value);
|
m_parr.AddParam(PF_PARAM_NONE, getFloatTypeName(4), "r1", init_value);
|
||||||
|
|
||||||
auto& r1 = temp_registers[1];
|
|
||||||
if (r1.requires_gather(VectorLane::Z))
|
|
||||||
{
|
|
||||||
// r1.zw was not written to
|
|
||||||
properties.has_gather_op = true;
|
|
||||||
main_epilogue << " r1.z = " << float4_type << r1.gather_r() << ".z;\n";
|
|
||||||
|
|
||||||
// Emit debug warning. Useful to diagnose regressions, but should be removed in future.
|
|
||||||
rsx_log.warning("ROP reads from shader depth without writing to it. Final value will be gathered.");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add the color output registers. They are statically written to and have guaranteed initialization (except r1.z which == wpos.z)
|
// Add the color output registers. They are statically written to and have guaranteed initialization (except r1.z which == wpos.z)
|
||||||
@ -816,33 +826,6 @@ std::string FragmentProgramDecompiler::BuildCode()
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto block_index = ouput_register_indices[n];
|
|
||||||
auto& r = temp_registers[block_index];
|
|
||||||
|
|
||||||
if (fp16_out)
|
|
||||||
{
|
|
||||||
// Check if we need a split/extract op
|
|
||||||
if (r.requires_split(0))
|
|
||||||
{
|
|
||||||
main_epilogue << " " << reg_name << " = " << float4_type << r.split_h0() << ";\n";
|
|
||||||
|
|
||||||
// Emit debug warning. Useful to diagnose regressions, but should be removed in future.
|
|
||||||
rsx_log.warning("ROP reads from %s without writing to it. Final value will be extracted from the 32-bit register.", reg_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!r.requires_gather128())
|
|
||||||
{
|
|
||||||
// Nothing to do
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// We need to gather the data from existing registers
|
|
||||||
main_epilogue << " " << reg_name << " = " << float4_type << r.gather_r() << ";\n";
|
|
||||||
properties.has_gather_op = true;
|
|
||||||
|
|
||||||
// Emit debug warning. Useful to diagnose regressions, but should be removed in future.
|
// Emit debug warning. Useful to diagnose regressions, but should be removed in future.
|
||||||
rsx_log.warning("ROP reads from %s without writing to it. Final value will be gathered.", reg_name);
|
rsx_log.warning("ROP reads from %s without writing to it. Final value will be gathered.", reg_name);
|
||||||
}
|
}
|
||||||
@ -1030,28 +1013,6 @@ std::string FragmentProgramDecompiler::BuildCode()
|
|||||||
OS << Format(divsq_func);
|
OS << Format(divsq_func);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Declare register gather/merge if needed
|
|
||||||
if (properties.has_gather_op)
|
|
||||||
{
|
|
||||||
std::string float2 = getFloatTypeName(2);
|
|
||||||
|
|
||||||
OS << float4 << " gather(" << float4 << " _h0, " << float4 << " _h1)\n";
|
|
||||||
OS << "{\n";
|
|
||||||
OS << " float x = uintBitsToFloat(packHalf2x16(_h0.xy));\n";
|
|
||||||
OS << " float y = uintBitsToFloat(packHalf2x16(_h0.zw));\n";
|
|
||||||
OS << " float z = uintBitsToFloat(packHalf2x16(_h1.xy));\n";
|
|
||||||
OS << " float w = uintBitsToFloat(packHalf2x16(_h1.zw));\n";
|
|
||||||
OS << " return " << float4 << "(x, y, z, w);\n";
|
|
||||||
OS << "}\n\n";
|
|
||||||
|
|
||||||
OS << float2 << " gather(" << float4 << " _h)\n";
|
|
||||||
OS << "{\n";
|
|
||||||
OS << " float x = uintBitsToFloat(packHalf2x16(_h.xy));\n";
|
|
||||||
OS << " float y = uintBitsToFloat(packHalf2x16(_h.zw));\n";
|
|
||||||
OS << " return " << float2 << "(x, y);\n";
|
|
||||||
OS << "}\n\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (properties.has_dynamic_register_load)
|
if (properties.has_dynamic_register_load)
|
||||||
{
|
{
|
||||||
OS <<
|
OS <<
|
||||||
@ -1303,8 +1264,28 @@ std::string FragmentProgramDecompiler::Decompile()
|
|||||||
{
|
{
|
||||||
auto graph = deconstruct_fragment_program(m_prog);
|
auto graph = deconstruct_fragment_program(m_prog);
|
||||||
|
|
||||||
if (g_cfg.video.shader_precision != gpu_preset_level::low)
|
if (!graph.blocks.empty())
|
||||||
{
|
{
|
||||||
|
// The RSX CFG is missing the output block. We inject a fake tail block that ingests the ROP outputs.
|
||||||
|
BasicBlock* rop_block = nullptr;
|
||||||
|
BasicBlock* tail_block = &graph.blocks.back();
|
||||||
|
if (tail_block->instructions.size() == 0)
|
||||||
|
{
|
||||||
|
// Merge block. Use this directly
|
||||||
|
rop_block = tail_block;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
graph.blocks.push_back({});
|
||||||
|
rop_block = &graph.blocks.back();
|
||||||
|
|
||||||
|
tail_block->insert_succ(rop_block);
|
||||||
|
rop_block->insert_pred(tail_block);
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto rop_inputs = get_fragment_program_output_set(m_prog.ctrl, m_prog.mrt_buffers_count);
|
||||||
|
rop_block->input_list.insert(rop_block->input_list.end(), rop_inputs.begin(), rop_inputs.end());
|
||||||
|
|
||||||
FP::RegisterAnnotationPass annotation_pass{ m_prog };
|
FP::RegisterAnnotationPass annotation_pass{ m_prog };
|
||||||
FP::RegisterDependencyPass dependency_pass{};
|
FP::RegisterDependencyPass dependency_pass{};
|
||||||
|
|
||||||
@ -1376,6 +1357,9 @@ std::string FragmentProgramDecompiler::Decompile()
|
|||||||
case EdgeType::ENDLOOP:
|
case EdgeType::ENDLOOP:
|
||||||
// Pure merge block?
|
// Pure merge block?
|
||||||
break;
|
break;
|
||||||
|
case EdgeType::NONE:
|
||||||
|
ensure(block.instructions.empty());
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
fmt::throw_exception("Unhandled edge type %d", static_cast<int>(pred.type));
|
fmt::throw_exception("Unhandled edge type %d", static_cast<int>(pred.type));
|
||||||
break;
|
break;
|
||||||
|
|||||||
@ -1,6 +1,5 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
#include "ShaderParam.h"
|
#include "ShaderParam.h"
|
||||||
#include "FragmentProgramRegister.h"
|
|
||||||
#include "RSXFragmentProgram.h"
|
#include "RSXFragmentProgram.h"
|
||||||
|
|
||||||
#include "Assembler/CFG.h"
|
#include "Assembler/CFG.h"
|
||||||
@ -53,8 +52,6 @@ class FragmentProgramDecompiler
|
|||||||
int m_code_level;
|
int m_code_level;
|
||||||
std::unordered_map<u32, u32> m_constant_offsets;
|
std::unordered_map<u32, u32> m_constant_offsets;
|
||||||
|
|
||||||
std::array<rsx::MixedPrecisionRegister, 64> temp_registers;
|
|
||||||
|
|
||||||
std::string GetMask() const;
|
std::string GetMask() const;
|
||||||
|
|
||||||
void SetDst(std::string code, u32 flags = 0);
|
void SetDst(std::string code, u32 flags = 0);
|
||||||
@ -175,7 +172,6 @@ public:
|
|||||||
|
|
||||||
// Decoded properties (out)
|
// Decoded properties (out)
|
||||||
bool has_lit_op = false;
|
bool has_lit_op = false;
|
||||||
bool has_gather_op = false;
|
|
||||||
bool has_no_output = false;
|
bool has_no_output = false;
|
||||||
bool has_discard_op = false;
|
bool has_discard_op = false;
|
||||||
bool has_tex_op = false;
|
bool has_tex_op = false;
|
||||||
|
|||||||
@ -1,196 +0,0 @@
|
|||||||
#include "stdafx.h"
|
|
||||||
#include "FragmentProgramRegister.h"
|
|
||||||
|
|
||||||
namespace rsx
|
|
||||||
{
|
|
||||||
MixedPrecisionRegister::MixedPrecisionRegister()
|
|
||||||
{
|
|
||||||
std::fill(content_mask.begin(), content_mask.end(), data_type_bits::undefined);
|
|
||||||
}
|
|
||||||
|
|
||||||
void MixedPrecisionRegister::tag_h0(bool x, bool y, bool z, bool w)
|
|
||||||
{
|
|
||||||
if (x) content_mask[0] = data_type_bits::f16;
|
|
||||||
if (y) content_mask[1] = data_type_bits::f16;
|
|
||||||
if (z) content_mask[2] = data_type_bits::f16;
|
|
||||||
if (w) content_mask[3] = data_type_bits::f16;
|
|
||||||
}
|
|
||||||
|
|
||||||
void MixedPrecisionRegister::tag_h1(bool x, bool y, bool z, bool w)
|
|
||||||
{
|
|
||||||
if (x) content_mask[4] = data_type_bits::f16;
|
|
||||||
if (y) content_mask[5] = data_type_bits::f16;
|
|
||||||
if (z) content_mask[6] = data_type_bits::f16;
|
|
||||||
if (w) content_mask[7] = data_type_bits::f16;
|
|
||||||
}
|
|
||||||
|
|
||||||
void MixedPrecisionRegister::tag_r(bool x, bool y, bool z, bool w)
|
|
||||||
{
|
|
||||||
if (x) content_mask[0] = content_mask[1] = data_type_bits::f32;
|
|
||||||
if (y) content_mask[2] = content_mask[3] = data_type_bits::f32;
|
|
||||||
if (z) content_mask[4] = content_mask[5] = data_type_bits::f32;
|
|
||||||
if (w) content_mask[6] = content_mask[7] = data_type_bits::f32;
|
|
||||||
}
|
|
||||||
|
|
||||||
void MixedPrecisionRegister::tag(u32 index, bool is_fp16, bool x, bool y, bool z, bool w)
|
|
||||||
{
|
|
||||||
if (file_index == umax)
|
|
||||||
{
|
|
||||||
// First-time use. Initialize...
|
|
||||||
const u32 real_index = is_fp16 ? (index >> 1) : index;
|
|
||||||
file_index = real_index;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (is_fp16)
|
|
||||||
{
|
|
||||||
ensure((index / 2) == file_index);
|
|
||||||
|
|
||||||
if (index & 1)
|
|
||||||
{
|
|
||||||
tag_h1(x, y, z, w);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
tag_h0(x, y, z, w);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
tag_r(x, y, z, w);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string MixedPrecisionRegister::gather_r() const
|
|
||||||
{
|
|
||||||
const auto half_index = file_index << 1;
|
|
||||||
const std::string reg = "r" + std::to_string(file_index);
|
|
||||||
const std::string gather_half_regs[] = {
|
|
||||||
"gather(h" + std::to_string(half_index) + ")",
|
|
||||||
"gather(h" + std::to_string(half_index + 1) + ")"
|
|
||||||
};
|
|
||||||
|
|
||||||
std::string outputs[4];
|
|
||||||
for (int ch = 0; ch < 4; ++ch)
|
|
||||||
{
|
|
||||||
// FIXME: This approach ignores mixed register bits. Not ideal!!!!
|
|
||||||
const auto channel0 = content_mask[ch * 2];
|
|
||||||
const auto is_fp16_ch = channel0 == content_mask[ch * 2 + 1] && channel0 == data_type_bits::f16;
|
|
||||||
outputs[ch] = is_fp16_ch ? gather_half_regs[ch / 2] : reg;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Grouping. Only replace relevant bits...
|
|
||||||
if (outputs[0] == outputs[1]) outputs[0] = "";
|
|
||||||
if (outputs[2] == outputs[3]) outputs[2] = "";
|
|
||||||
|
|
||||||
// Assemble
|
|
||||||
bool group = false;
|
|
||||||
std::string result = "";
|
|
||||||
constexpr std::string_view swz_mask = "xyzw";
|
|
||||||
|
|
||||||
for (int ch = 0; ch < 4; ++ch)
|
|
||||||
{
|
|
||||||
if (outputs[ch].empty())
|
|
||||||
{
|
|
||||||
group = true;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!result.empty())
|
|
||||||
{
|
|
||||||
result += ", ";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (group)
|
|
||||||
{
|
|
||||||
ensure(ch > 0);
|
|
||||||
group = false;
|
|
||||||
|
|
||||||
if (outputs[ch] == reg)
|
|
||||||
{
|
|
||||||
result += reg + "." + swz_mask[ch - 1] + swz_mask[ch];
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
result += outputs[ch];
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
const int subch = outputs[ch] == reg ? ch : (ch % 2); // Avoid .xyxy.z and other such ugly swizzles
|
|
||||||
result += outputs[ch] + "." + swz_mask[subch];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Optimize dual-gather (128-bit gather) to use special function
|
|
||||||
const std::string double_gather = gather_half_regs[0] + ", " + gather_half_regs[1];
|
|
||||||
if (result == double_gather)
|
|
||||||
{
|
|
||||||
result = "gather(h" + std::to_string(half_index) + ", h" + std::to_string(half_index + 1) + ")";
|
|
||||||
}
|
|
||||||
|
|
||||||
return "(" + result + ")";
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string MixedPrecisionRegister::fetch_halfreg(u32 word_index) const
|
|
||||||
{
|
|
||||||
// Reads half-word 0 (H16x4) from a full real (R32x4) register
|
|
||||||
constexpr std::string_view swz_mask = "xyzw";
|
|
||||||
const std::string reg = "r" + std::to_string(file_index);
|
|
||||||
const std::string hreg = "h" + std::to_string(file_index * 2 + word_index);
|
|
||||||
|
|
||||||
const std::string word0_bits = "floatBitsToUint(" + reg + "." + swz_mask[word_index * 2] + ")";
|
|
||||||
const std::string word1_bits = "floatBitsToUint(" + reg + "." + swz_mask[word_index * 2 + 1] + ")";
|
|
||||||
const std::string words[] = {
|
|
||||||
"unpackHalf2x16(" + word0_bits + ")",
|
|
||||||
"unpackHalf2x16(" + word1_bits + ")"
|
|
||||||
};
|
|
||||||
|
|
||||||
// Assemble
|
|
||||||
std::string outputs[4];
|
|
||||||
|
|
||||||
ensure(word_index <= 1);
|
|
||||||
const int word_offset = word_index * 4;
|
|
||||||
for (int ch = 0; ch < 4; ++ch)
|
|
||||||
{
|
|
||||||
outputs[ch] = content_mask[ch + word_offset] == data_type_bits::f32
|
|
||||||
? words[ch / 2]
|
|
||||||
: hreg;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Grouping. Only replace relevant bits...
|
|
||||||
if (outputs[0] == outputs[1]) outputs[0] = "";
|
|
||||||
if (outputs[2] == outputs[3]) outputs[2] = "";
|
|
||||||
|
|
||||||
// Assemble
|
|
||||||
bool group = false;
|
|
||||||
std::string result = "";
|
|
||||||
|
|
||||||
for (int ch = 0; ch < 4; ++ch)
|
|
||||||
{
|
|
||||||
if (outputs[ch].empty())
|
|
||||||
{
|
|
||||||
group = true;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!result.empty())
|
|
||||||
{
|
|
||||||
result += ", ";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (group)
|
|
||||||
{
|
|
||||||
ensure(ch > 0);
|
|
||||||
group = false;
|
|
||||||
result += outputs[ch];
|
|
||||||
|
|
||||||
if (outputs[ch] == hreg)
|
|
||||||
{
|
|
||||||
result += std::string(".") + swz_mask[ch - 1] + swz_mask[ch];
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
const int subch = outputs[ch] == hreg ? ch : (ch % 2); // Avoid .xyxy.z and other such ugly swizzles
|
|
||||||
result += outputs[ch] + "." + swz_mask[subch];
|
|
||||||
}
|
|
||||||
|
|
||||||
return "(" + result + ")";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -1,111 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <util/types.hpp>
|
|
||||||
|
|
||||||
namespace rsx
|
|
||||||
{
|
|
||||||
class MixedPrecisionRegister
|
|
||||||
{
|
|
||||||
enum data_type_bits
|
|
||||||
{
|
|
||||||
undefined = 0,
|
|
||||||
f16 = 1,
|
|
||||||
f32 = 2
|
|
||||||
};
|
|
||||||
|
|
||||||
std::array<data_type_bits, 8> content_mask; // Content details for each half-word
|
|
||||||
u32 file_index = umax;
|
|
||||||
|
|
||||||
void tag_h0(bool x, bool y, bool z, bool w);
|
|
||||||
|
|
||||||
void tag_h1(bool x, bool y, bool z, bool w);
|
|
||||||
|
|
||||||
void tag_r(bool x, bool y, bool z, bool w);
|
|
||||||
|
|
||||||
std::string fetch_halfreg(u32 word_index) const;
|
|
||||||
|
|
||||||
public:
|
|
||||||
MixedPrecisionRegister();
|
|
||||||
|
|
||||||
void tag(u32 index, bool is_fp16, bool x, bool y, bool z, bool w);
|
|
||||||
|
|
||||||
std::string gather_r() const;
|
|
||||||
|
|
||||||
std::string split_h0() const
|
|
||||||
{
|
|
||||||
return fetch_halfreg(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string split_h1() const
|
|
||||||
{
|
|
||||||
return fetch_halfreg(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Getters
|
|
||||||
|
|
||||||
// Return true if all values are unwritten to (undefined)
|
|
||||||
bool floating() const
|
|
||||||
{
|
|
||||||
return file_index == umax;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return true if the first half register is all undefined
|
|
||||||
bool floating_h0() const
|
|
||||||
{
|
|
||||||
return content_mask[0] == content_mask[1] &&
|
|
||||||
content_mask[1] == content_mask[2] &&
|
|
||||||
content_mask[2] == content_mask[3] &&
|
|
||||||
content_mask[3] == data_type_bits::undefined;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return true if the second half register is all undefined
|
|
||||||
bool floating_h1() const
|
|
||||||
{
|
|
||||||
return content_mask[4] == content_mask[5] &&
|
|
||||||
content_mask[5] == content_mask[6] &&
|
|
||||||
content_mask[6] == content_mask[7] &&
|
|
||||||
content_mask[7] == data_type_bits::undefined;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return true if any of the half-words are 16-bit
|
|
||||||
bool requires_gather(u8 channel) const
|
|
||||||
{
|
|
||||||
// Data fetched from the single precision register requires merging of the two half registers
|
|
||||||
const auto channel_offset = channel * 2;
|
|
||||||
ensure(channel_offset <= 6);
|
|
||||||
|
|
||||||
return (content_mask[channel_offset] == data_type_bits::f16 || content_mask[channel_offset + 1] == data_type_bits::f16);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return true if the entire 128-bit register is filled with 2xfp16x4 data words
|
|
||||||
bool requires_gather128() const
|
|
||||||
{
|
|
||||||
// Full 128-bit check
|
|
||||||
for (const auto& ch : content_mask)
|
|
||||||
{
|
|
||||||
if (ch == data_type_bits::f16)
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return true if the half-register is polluted with fp32 data
|
|
||||||
bool requires_split(u32 word_index) const
|
|
||||||
{
|
|
||||||
const u32 content_offset = word_index * 4;
|
|
||||||
for (u32 i = 0; i < 4; ++i)
|
|
||||||
{
|
|
||||||
if (content_mask[content_offset + i] == data_type_bits::f32)
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
@ -161,7 +161,6 @@
|
|||||||
<ClCompile Include="Emu\RSX\Program\Assembler\FPToCFG.cpp" />
|
<ClCompile Include="Emu\RSX\Program\Assembler\FPToCFG.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\Program\Assembler\Passes\FP\RegisterAnnotationPass.cpp" />
|
<ClCompile Include="Emu\RSX\Program\Assembler\Passes\FP\RegisterAnnotationPass.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\Program\Assembler\Passes\FP\RegisterDependencyPass.cpp" />
|
<ClCompile Include="Emu\RSX\Program\Assembler\Passes\FP\RegisterDependencyPass.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\Program\FragmentProgramRegister.cpp" />
|
|
||||||
<ClCompile Include="Emu\RSX\Program\ProgramStateCache.cpp" />
|
<ClCompile Include="Emu\RSX\Program\ProgramStateCache.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\Program\program_util.cpp" />
|
<ClCompile Include="Emu\RSX\Program\program_util.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\Program\SPIRVCommon.cpp" />
|
<ClCompile Include="Emu\RSX\Program\SPIRVCommon.cpp" />
|
||||||
@ -710,7 +709,6 @@
|
|||||||
<ClInclude Include="Emu\RSX\Program\Assembler\IR.h" />
|
<ClInclude Include="Emu\RSX\Program\Assembler\IR.h" />
|
||||||
<ClInclude Include="Emu\RSX\Program\Assembler\Passes\FP\RegisterAnnotationPass.h" />
|
<ClInclude Include="Emu\RSX\Program\Assembler\Passes\FP\RegisterAnnotationPass.h" />
|
||||||
<ClInclude Include="Emu\RSX\Program\Assembler\Passes\FP\RegisterDependencyPass.h" />
|
<ClInclude Include="Emu\RSX\Program\Assembler\Passes\FP\RegisterDependencyPass.h" />
|
||||||
<ClInclude Include="Emu\RSX\Program\FragmentProgramRegister.h" />
|
|
||||||
<ClInclude Include="Emu\RSX\Program\GLSLTypes.h" />
|
<ClInclude Include="Emu\RSX\Program\GLSLTypes.h" />
|
||||||
<ClInclude Include="Emu\RSX\Program\ProgramStateCache.h" />
|
<ClInclude Include="Emu\RSX\Program\ProgramStateCache.h" />
|
||||||
<ClInclude Include="Emu\RSX\Program\program_util.h" />
|
<ClInclude Include="Emu\RSX\Program\program_util.h" />
|
||||||
|
|||||||
@ -1360,9 +1360,6 @@
|
|||||||
<ClCompile Include="Emu\Cell\ErrorCodes.cpp">
|
<ClCompile Include="Emu\Cell\ErrorCodes.cpp">
|
||||||
<Filter>Emu\Cell</Filter>
|
<Filter>Emu\Cell</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<ClCompile Include="Emu\RSX\Program\FragmentProgramRegister.cpp">
|
|
||||||
<Filter>Emu\GPU\RSX\Program</Filter>
|
|
||||||
</ClCompile>
|
|
||||||
<ClCompile Include="util\emu_utils.cpp">
|
<ClCompile Include="util\emu_utils.cpp">
|
||||||
<Filter>Utilities</Filter>
|
<Filter>Utilities</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
@ -2764,9 +2761,6 @@
|
|||||||
<ClInclude Include="Emu\Audio\audio_utils.h">
|
<ClInclude Include="Emu\Audio\audio_utils.h">
|
||||||
<Filter>Emu\Audio</Filter>
|
<Filter>Emu\Audio</Filter>
|
||||||
</ClInclude>
|
</ClInclude>
|
||||||
<ClInclude Include="Emu\RSX\Program\FragmentProgramRegister.h">
|
|
||||||
<Filter>Emu\GPU\RSX\Program</Filter>
|
|
||||||
</ClInclude>
|
|
||||||
<ClInclude Include="util\video_source.h">
|
<ClInclude Include="util\video_source.h">
|
||||||
<Filter>Utilities</Filter>
|
<Filter>Utilities</Filter>
|
||||||
</ClInclude>
|
</ClInclude>
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user