amdgpu: Split liverpool registers and cleanup (#3707)
Some checks are pending
Build and Release / reuse (push) Waiting to run
Build and Release / clang-format (push) Waiting to run
Build and Release / get-info (push) Waiting to run
Build and Release / windows-sdl (push) Blocked by required conditions
Build and Release / windows-qt (push) Blocked by required conditions
Build and Release / macos-sdl (push) Blocked by required conditions
Build and Release / macos-qt (push) Blocked by required conditions
Build and Release / linux-sdl (push) Blocked by required conditions
Build and Release / linux-qt (push) Blocked by required conditions
Build and Release / linux-sdl-gcc (push) Blocked by required conditions
Build and Release / linux-qt-gcc (push) Blocked by required conditions
Build and Release / pre-release (push) Blocked by required conditions

This commit is contained in:
TheTurtle 2025-10-05 23:42:40 +03:00 committed by GitHub
parent d17a4fb8cc
commit 8f37cfb739
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
75 changed files with 2505 additions and 2641 deletions

View File

@ -851,10 +851,10 @@ if (ARCHITECTURE STREQUAL "x86_64")
src/core/cpu_patches.h) src/core/cpu_patches.h)
endif() endif()
set(SHADER_RECOMPILER src/shader_recompiler/exception.h set(SHADER_RECOMPILER src/shader_recompiler/profile.h
src/shader_recompiler/profile.h
src/shader_recompiler/recompiler.cpp src/shader_recompiler/recompiler.cpp
src/shader_recompiler/recompiler.h src/shader_recompiler/recompiler.h
src/shader_recompiler/resource.h
src/shader_recompiler/info.h src/shader_recompiler/info.h
src/shader_recompiler/params.h src/shader_recompiler/params.h
src/shader_recompiler/runtime_info.h src/shader_recompiler/runtime_info.h
@ -952,17 +952,24 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/ir/value.h src/shader_recompiler/ir/value.h
) )
set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp set(VIDEO_CORE src/video_core/amdgpu/cb_db_extent.h
src/video_core/amdgpu/liverpool.cpp
src/video_core/amdgpu/liverpool.h src/video_core/amdgpu/liverpool.h
src/video_core/amdgpu/pixel_format.cpp src/video_core/amdgpu/pixel_format.cpp
src/video_core/amdgpu/pixel_format.h src/video_core/amdgpu/pixel_format.h
src/video_core/amdgpu/pm4_cmds.h src/video_core/amdgpu/pm4_cmds.h
src/video_core/amdgpu/pm4_opcodes.h src/video_core/amdgpu/pm4_opcodes.h
src/video_core/amdgpu/regs_color.h
src/video_core/amdgpu/regs_depth.h
src/video_core/amdgpu/regs.cpp
src/video_core/amdgpu/regs.h
src/video_core/amdgpu/regs_primitive.h
src/video_core/amdgpu/regs_shader.h
src/video_core/amdgpu/regs_texture.h
src/video_core/amdgpu/regs_vertex.h
src/video_core/amdgpu/resource.h src/video_core/amdgpu/resource.h
src/video_core/amdgpu/tiling.cpp src/video_core/amdgpu/tiling.cpp
src/video_core/amdgpu/tiling.h src/video_core/amdgpu/tiling.h
src/video_core/amdgpu/types.h
src/video_core/amdgpu/default_context.cpp
src/video_core/buffer_cache/buffer.cpp src/video_core/buffer_cache/buffer.cpp
src/video_core/buffer_cache/buffer.h src/video_core/buffer_cache/buffer.h
src/video_core/buffer_cache/buffer_cache.cpp src/video_core/buffer_cache/buffer_cache.cpp

View File

@ -1,20 +1,14 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include <half.hpp> #include <half.hpp>
#include "common/number_utils.h" #include "common/number_utils.h"
#include "video_core/amdgpu/pixel_format.h"
#include "video_core/amdgpu/types.h"
#define UF11_EXPONENT_SHIFT 6 constexpr u32 UF11_EXPONENT_SHIFT = 6;
#define UF10_EXPONENT_SHIFT 5 constexpr u32 UF10_EXPONENT_SHIFT = 5;
constexpr u32 RGB9E5_MANTISSA_BITS = 9;
#define RGB9E5_MANTISSA_BITS 9 constexpr u32 RGB9E5_EXP_BIAS = 1;
#define RGB9E5_EXP_BIAS 1 constexpr u32 F32_INFINITY = 0x7f800000;
#define F32_INFINITY 0x7f800000
namespace NumberUtils { namespace NumberUtils {

View File

@ -157,7 +157,7 @@ std::optional<RegDump*> DebugStateImpl::GetRegDump(uintptr_t base_addr, uintptr_
} }
void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
const AmdGpu::Liverpool::Regs& regs) { const AmdGpu::Regs& regs) {
std::scoped_lock lock{frame_dump_list_mutex}; std::scoped_lock lock{frame_dump_list_mutex};
auto dump = GetRegDump(base_addr, header_addr); auto dump = GetRegDump(base_addr, header_addr);
@ -170,15 +170,14 @@ void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
for (int i = 0; i < RegDump::MaxShaderStages; i++) { for (int i = 0; i < RegDump::MaxShaderStages; i++) {
if ((*dump)->regs.stage_enable.IsStageEnabled(i)) { if ((*dump)->regs.stage_enable.IsStageEnabled(i)) {
auto stage = (*dump)->regs.ProgramForStage(i); auto stage = (*dump)->regs.ProgramForStage(i);
if (stage->address_lo != 0) { if (stage->address) {
const auto& info = AmdGpu::Liverpool::SearchBinaryInfo(stage->Address<u32*>()); const auto params = AmdGpu::GetParams(*stage);
auto code = stage->Code();
(*dump)->stages[i] = PipelineShaderProgramDump{ (*dump)->stages[i] = PipelineShaderProgramDump{
.name = Vulkan::PipelineCache::GetShaderName(Shader::StageFromIndex(i), .name = Vulkan::PipelineCache::GetShaderName(Shader::StageFromIndex(i),
info.shader_hash), params.hash),
.hash = info.shader_hash, .hash = params.hash,
.user_data = *stage, .user_data = *stage,
.code = std::vector<u32>{code.begin(), code.end()}, .code = std::vector<u32>{params.code.begin(), params.code.end()},
}; };
} }
} }
@ -198,12 +197,12 @@ void DebugStateImpl::PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_a
auto& cs = (*dump)->regs.cs_program; auto& cs = (*dump)->regs.cs_program;
cs = cs_state; cs = cs_state;
const auto& info = AmdGpu::Liverpool::SearchBinaryInfo(cs.Address<u32*>()); const auto params = AmdGpu::GetParams(cs);
(*dump)->cs_data = PipelineComputerProgramDump{ (*dump)->cs_data = PipelineComputerProgramDump{
.name = Vulkan::PipelineCache::GetShaderName(Shader::Stage::Compute, info.shader_hash), .name = Vulkan::PipelineCache::GetShaderName(Shader::Stage::Compute, params.hash),
.hash = info.shader_hash, .hash = params.hash,
.cs_program = cs, .cs_program = cs,
.code = std::vector<u32>{cs.Code().begin(), cs.Code().end()}, .code = std::vector<u32>{params.code.begin(), params.code.end()},
}; };
} }

View File

@ -11,7 +11,9 @@
#include <queue> #include <queue>
#include "common/types.h" #include "common/types.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/regs.h"
#include "video_core/renderer_vulkan/vk_common.h"
#ifdef _WIN32 #ifdef _WIN32
#ifndef WIN32_LEAN_AND_MEAN #ifndef WIN32_LEAN_AND_MEAN
@ -54,21 +56,21 @@ struct QueueDump {
struct PipelineShaderProgramDump { struct PipelineShaderProgramDump {
std::string name; std::string name;
u64 hash; u64 hash;
Vulkan::Liverpool::ShaderProgram user_data{}; AmdGpu::ShaderProgram user_data{};
std::vector<u32> code{}; std::vector<u32> code{};
}; };
struct PipelineComputerProgramDump { struct PipelineComputerProgramDump {
std::string name; std::string name;
u64 hash; u64 hash;
Vulkan::Liverpool::ComputeProgram cs_program{}; AmdGpu::ComputeProgram cs_program{};
std::vector<u32> code{}; std::vector<u32> code{};
}; };
struct RegDump { struct RegDump {
bool is_compute{false}; bool is_compute{false};
static constexpr size_t MaxShaderStages = 5; static constexpr size_t MaxShaderStages = 5;
Vulkan::Liverpool::Regs regs{}; AmdGpu::Regs regs;
std::array<PipelineShaderProgramDump, MaxShaderStages> stages{}; std::array<PipelineShaderProgramDump, MaxShaderStages> stages{};
PipelineComputerProgramDump cs_data{}; PipelineComputerProgramDump cs_data{};
}; };
@ -219,9 +221,8 @@ public:
void PushQueueDump(QueueDump dump); void PushQueueDump(QueueDump dump);
void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, const AmdGpu::Regs& regs);
const AmdGpu::Liverpool::Regs& regs); using CsState = AmdGpu::ComputeProgram;
using CsState = AmdGpu::Liverpool::ComputeProgram;
void PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr, const CsState& cs_state); void PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr, const CsState& cs_state);
void CollectShader(const std::string& name, Shader::LogicalStage l_stage, void CollectShader(const std::string& name, Shader::LogicalStage l_stage,

View File

@ -65,7 +65,7 @@ static HdrType GetNext(HdrType this_pm4, uint32_t n) {
} }
void ParsePolygonControl(u32 value, bool begin_table) { void ParsePolygonControl(u32 value, bool begin_table) {
auto const reg = reinterpret_cast<AmdGpu::Liverpool::PolygonControl const&>(value); auto const reg = reinterpret_cast<AmdGpu::PolygonControl const&>(value);
if (!begin_table || if (!begin_table ||
BeginTable("PA_SU_SC_MODE_CNTL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) { BeginTable("PA_SU_SC_MODE_CNTL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) {
@ -73,80 +73,80 @@ void ParsePolygonControl(u32 value, bool begin_table) {
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("CULL_FRONT"); Text("CULL_FRONT");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.cull_front.Value()); Text("%X", reg.cull_front);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("CULL_BACK"); Text("CULL_BACK");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.cull_back.Value()); Text("%X", reg.cull_back);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("FACE"); Text("FACE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%s", enum_name(reg.front_face.Value()).data()); Text("%s", enum_name(reg.front_face).data());
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("POLY_MODE"); Text("POLY_MODE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.enable_polygon_mode.Value()); Text("%X", reg.enable_polygon_mode);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("POLYMODE_FRONT_PTYPE"); Text("POLYMODE_FRONT_PTYPE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%s", enum_name(reg.polygon_mode_front.Value()).data()); Text("%s", enum_name(reg.polygon_mode_front).data());
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("POLYMODE_BACK_PTYPE"); Text("POLYMODE_BACK_PTYPE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%s", enum_name(reg.polygon_mode_back.Value()).data()); Text("%s", enum_name(reg.polygon_mode_back).data());
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("POLY_OFFSET_FRONT_ENABLE"); Text("POLY_OFFSET_FRONT_ENABLE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.enable_polygon_offset_front.Value()); Text("%X", reg.enable_polygon_offset_front);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("POLY_OFFSET_BACK_ENABLE"); Text("POLY_OFFSET_BACK_ENABLE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.enable_polygon_offset_back.Value()); Text("%X", reg.enable_polygon_offset_back);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("POLY_OFFSET_PARA_ENABLE"); Text("POLY_OFFSET_PARA_ENABLE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.enable_polygon_offset_para.Value()); Text("%X", reg.enable_polygon_offset_para);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("VTX_WINDOW_OFFSET_ENABLE"); Text("VTX_WINDOW_OFFSET_ENABLE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.enable_window_offset.Value()); Text("%X", reg.enable_window_offset);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("PROVOKING_VTX_LAST"); Text("PROVOKING_VTX_LAST");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X (%s)", (u32)reg.provoking_vtx_last.Value(), Text("%X (%s)", static_cast<u32>(reg.provoking_vtx_last),
enum_name(reg.provoking_vtx_last.Value()).data()); enum_name(reg.provoking_vtx_last).data());
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("PERSP_CORR_DIS"); Text("PERSP_CORR_DIS");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.persp_corr_dis.Value()); Text("%X", reg.persp_corr_dis);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("MULTI_PRIM_IB_ENA"); Text("MULTI_PRIM_IB_ENA");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.multi_prim_ib_ena.Value()); Text("%X", reg.multi_prim_ib_ena);
if (begin_table) { if (begin_table) {
EndTable(); EndTable();
@ -155,7 +155,7 @@ void ParsePolygonControl(u32 value, bool begin_table) {
} }
void ParseAaConfig(u32 value, bool begin_table) { void ParseAaConfig(u32 value, bool begin_table) {
auto const reg = reinterpret_cast<Liverpool::AaConfig const&>(value); auto const reg = reinterpret_cast<AmdGpu::AaConfig const&>(value);
if (!begin_table || if (!begin_table ||
BeginTable("PA_SC_AA_CONFIG", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) { BeginTable("PA_SC_AA_CONFIG", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) {
@ -163,31 +163,31 @@ void ParseAaConfig(u32 value, bool begin_table) {
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("MSAA_NUM_SAMPLES"); Text("MSAA_NUM_SAMPLES");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.msaa_num_samples.Value()); Text("%X", reg.msaa_num_samples);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("AA_MASK_CENTROID_DTMN"); Text("AA_MASK_CENTROID_DTMN");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.aa_mask_centroid_dtmn.Value()); Text("%X", reg.aa_mask_centroid_dtmn);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("MAX_SAMPLE_DIST"); Text("MAX_SAMPLE_DIST");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.max_sample_dst.Value()); Text("%X", reg.max_sample_dst);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("MSAA_EXPOSED_SAMPLES"); Text("MSAA_EXPOSED_SAMPLES");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.msaa_exposed_samples.Value()); Text("%X", reg.msaa_exposed_samples);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("DETAIL_TO_EXPOSED_MODE"); Text("DETAIL_TO_EXPOSED_MODE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.detail_to_exposed_mode.Value()); Text("%X", reg.detail_to_exposed_mode);
if (begin_table) { if (begin_table) {
EndTable(); EndTable();
@ -196,7 +196,7 @@ void ParseAaConfig(u32 value, bool begin_table) {
} }
void ParseViewportControl(u32 value, bool begin_table) { void ParseViewportControl(u32 value, bool begin_table) {
auto const reg = reinterpret_cast<Liverpool::ViewportControl const&>(value); auto const reg = reinterpret_cast<AmdGpu::ViewportControl const&>(value);
if (!begin_table || if (!begin_table ||
BeginTable("PA_CL_VTE_CNTL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) { BeginTable("PA_CL_VTE_CNTL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) {
@ -204,61 +204,61 @@ void ParseViewportControl(u32 value, bool begin_table) {
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("VPORT_X_SCALE_ENA"); Text("VPORT_X_SCALE_ENA");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.xscale_enable.Value()); Text("%X", reg.xscale_enable);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("VPORT_X_OFFSET_ENA"); Text("VPORT_X_OFFSET_ENA");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.yoffset_enable.Value()); Text("%X", reg.yoffset_enable);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("VPORT_Y_SCALE_ENA"); Text("VPORT_Y_SCALE_ENA");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.yscale_enable.Value()); Text("%X", reg.yscale_enable);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("VPORT_Y_OFFSET_ENA"); Text("VPORT_Y_OFFSET_ENA");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.yoffset_enable.Value()); Text("%X", reg.yoffset_enable);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("VPORT_Z_SCALE_ENA"); Text("VPORT_Z_SCALE_ENA");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.zscale_enable.Value()); Text("%X", reg.zscale_enable);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("VPORT_Z_OFFSET_ENA"); Text("VPORT_Z_OFFSET_ENA");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.zoffset_enable.Value()); Text("%X", reg.zoffset_enable);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("VTX_XY_FMT"); Text("VTX_XY_FMT");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.xy_transformed.Value()); Text("%X", reg.xy_transformed);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("VTX_Z_FMT"); Text("VTX_Z_FMT");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.z_transformed.Value()); Text("%X", reg.z_transformed);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("VTX_W0_FMT"); Text("VTX_W0_FMT");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.w_transformed.Value()); Text("%X", reg.w_transformed);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("PERFCOUNTER_REF"); Text("PERFCOUNTER_REF");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.perfcounter_ref.Value()); Text("%X", reg.perfcounter_ref);
if (begin_table) { if (begin_table) {
EndTable(); EndTable();
@ -267,7 +267,7 @@ void ParseViewportControl(u32 value, bool begin_table) {
} }
void ParseColorControl(u32 value, bool begin_table) { void ParseColorControl(u32 value, bool begin_table) {
auto const reg = reinterpret_cast<Liverpool::ColorControl const&>(value); auto const reg = reinterpret_cast<AmdGpu::ColorControl const&>(value);
if (!begin_table || if (!begin_table ||
BeginTable("CB_COLOR_CONTROL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) { BeginTable("CB_COLOR_CONTROL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) {
@ -275,25 +275,25 @@ void ParseColorControl(u32 value, bool begin_table) {
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("DISABLE_DUAL_QUAD__VI"); Text("DISABLE_DUAL_QUAD__VI");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.disable_dual_quad.Value()); Text("%X", reg.disable_dual_quad);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("DEGAMMA_ENABLE"); Text("DEGAMMA_ENABLE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.degamma_enable.Value()); Text("%X", reg.degamma_enable);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("MODE"); Text("MODE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X (%s)", (u32)reg.mode.Value(), enum_name(reg.mode.Value()).data()); Text("%X (%s)", static_cast<u32>(reg.mode), enum_name(reg.mode).data());
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("ROP3"); Text("ROP3");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", static_cast<u32>(reg.rop3.Value())); Text("%X", static_cast<u32>(reg.rop3));
if (begin_table) { if (begin_table) {
EndTable(); EndTable();
@ -302,7 +302,7 @@ void ParseColorControl(u32 value, bool begin_table) {
} }
void ParseColor0Info(u32 value, bool begin_table) { void ParseColor0Info(u32 value, bool begin_table) {
auto const reg = reinterpret_cast<Liverpool::ColorBuffer::Color0Info const&>(value); auto const reg = reinterpret_cast<AmdGpu::ColorBuffer::Color0Info const&>(value);
if (!begin_table || if (!begin_table ||
BeginTable("CB_COLOR_INFO", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) { BeginTable("CB_COLOR_INFO", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) {
@ -310,109 +310,109 @@ void ParseColor0Info(u32 value, bool begin_table) {
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("ENDIAN"); Text("ENDIAN");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%s", enum_name(reg.endian.Value()).data()); Text("%s", enum_name(reg.endian).data());
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("FORMAT"); Text("FORMAT");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%s", enum_name(reg.format.Value()).data()); Text("%s", enum_name(AmdGpu::DataFormat(reg.format)).data());
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("LINEAR_GENERAL"); Text("LINEAR_GENERAL");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.linear_general.Value()); Text("%X", reg.linear_general);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("NUMBER_TYPE"); Text("NUMBER_TYPE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%s", enum_name(reg.number_type.Value()).data()); Text("%s", enum_name(AmdGpu::NumberFormat(reg.number_type)).data());
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("COMP_SWAP"); Text("COMP_SWAP");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%s", enum_name(reg.comp_swap.Value()).data()); Text("%s", enum_name(reg.comp_swap).data());
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("FAST_CLEAR"); Text("FAST_CLEAR");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.fast_clear.Value()); Text("%X", reg.fast_clear);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("COMPRESSION"); Text("COMPRESSION");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.compression.Value()); Text("%X", reg.compression);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("BLEND_CLAMP"); Text("BLEND_CLAMP");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.blend_clamp.Value()); Text("%X", reg.blend_clamp);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("BLEND_BYPASS"); Text("BLEND_BYPASS");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.blend_bypass.Value()); Text("%X", reg.blend_bypass);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("SIMPLE_FLOAT"); Text("SIMPLE_FLOAT");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.simple_float.Value()); Text("%X", reg.simple_float);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("ROUND_MODE"); Text("ROUND_MODE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X (%s)", (u32)reg.round_mode.Value(), enum_name(reg.round_mode.Value()).data()); Text("%X (%s)", static_cast<u32>(reg.round_mode), enum_name(reg.round_mode).data());
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("CMASK_IS_LINEAR"); Text("CMASK_IS_LINEAR");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.cmask_is_linear.Value()); Text("%X", reg.cmask_is_linear);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("BLEND_OPT_DONT_RD_DST"); Text("BLEND_OPT_DONT_RD_DST");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.blend_opt_dont_rd_dst.Value()); Text("%X", reg.blend_opt_dont_rd_dst);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("BLEND_OPT_DISCARD_PIXEL"); Text("BLEND_OPT_DISCARD_PIXEL");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.blend_opt_discard_pixel.Value()); Text("%X", reg.blend_opt_discard_pixel);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("FMASK_COMPRESSION_DISABLE__CI__VI"); Text("FMASK_COMPRESSION_DISABLE__CI__VI");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.fmask_compression_disable_ci.Value()); Text("%X", reg.fmask_compression_disable_ci);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("FMASK_COMPRESS_1FRAG_ONLY__VI"); Text("FMASK_COMPRESS_1FRAG_ONLY__VI");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.fmask_compress_1frag_only.Value()); Text("%X", reg.fmask_compress_1frag_only);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("DCC_ENABLE__VI"); Text("DCC_ENABLE__VI");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.dcc_enable.Value()); Text("%X", reg.dcc_enable);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("CMASK_ADDR_TYPE__VI"); Text("CMASK_ADDR_TYPE__VI");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.cmask_addr_type.Value()); Text("%X", reg.cmask_addr_type);
if (begin_table) { if (begin_table) {
EndTable(); EndTable();
@ -421,7 +421,7 @@ void ParseColor0Info(u32 value, bool begin_table) {
} }
void ParseColor0Attrib(u32 value, bool begin_table) { void ParseColor0Attrib(u32 value, bool begin_table) {
auto const reg = reinterpret_cast<Liverpool::ColorBuffer::Color0Attrib const&>(value); auto const reg = reinterpret_cast<AmdGpu::ColorBuffer::Color0Attrib const&>(value);
if (!begin_table || if (!begin_table ||
BeginTable("CB_COLOR_ATTRIB", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) { BeginTable("CB_COLOR_ATTRIB", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) {
@ -429,37 +429,37 @@ void ParseColor0Attrib(u32 value, bool begin_table) {
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("TILE_MODE_INDEX"); Text("TILE_MODE_INDEX");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%s", enum_name(reg.tile_mode_index.Value()).data()); Text("%s", enum_name(reg.tile_mode_index).data());
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("FMASK_TILE_MODE_INDEX"); Text("FMASK_TILE_MODE_INDEX");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.fmask_tile_mode_index.Value()); Text("%X", reg.fmask_tile_mode_index);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("FMASK_BANK_HEIGHT"); Text("FMASK_BANK_HEIGHT");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.fmask_bank_height.Value()); Text("%X", reg.fmask_bank_height);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("NUM_SAMPLES"); Text("NUM_SAMPLES");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.num_samples_log2.Value()); Text("%X", reg.num_samples_log2);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("NUM_FRAGMENTS"); Text("NUM_FRAGMENTS");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.num_fragments_log2.Value()); Text("%X", reg.num_fragments_log2);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("FORCE_DST_ALPHA_1"); Text("FORCE_DST_ALPHA_1");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.force_dst_alpha_1.Value()); Text("%X", reg.force_dst_alpha_1);
if (begin_table) { if (begin_table) {
EndTable(); EndTable();
@ -468,7 +468,7 @@ void ParseColor0Attrib(u32 value, bool begin_table) {
} }
void ParseBlendControl(u32 value, bool begin_table) { void ParseBlendControl(u32 value, bool begin_table) {
auto const reg = reinterpret_cast<Liverpool::BlendControl const&>(value); auto const reg = reinterpret_cast<AmdGpu::BlendControl const&>(value);
if (!begin_table || if (!begin_table ||
BeginTable("CB_BLEND_CONTROL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) { BeginTable("CB_BLEND_CONTROL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) {
@ -476,59 +476,59 @@ void ParseBlendControl(u32 value, bool begin_table) {
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("COLOR_SRCBLEND"); Text("COLOR_SRCBLEND");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X (%s)", (u32)reg.color_src_factor.Value(), Text("%X (%s)", static_cast<u32>(reg.color_src_factor),
enum_name(reg.color_src_factor.Value()).data()); enum_name(reg.color_src_factor).data());
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("COLOR_COMB_FCN"); Text("COLOR_COMB_FCN");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X (%s)", (u32)reg.color_func.Value(), enum_name(reg.color_func.Value()).data()); Text("%X (%s)", static_cast<u32>(reg.color_func), enum_name(reg.color_func).data());
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("COLOR_DESTBLEND"); Text("COLOR_DESTBLEND");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X (%s)", (u32)reg.color_dst_factor.Value(), Text("%X (%s)", static_cast<u32>(reg.color_dst_factor),
enum_name(reg.color_dst_factor.Value()).data()); enum_name(reg.color_dst_factor).data());
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("ALPHA_SRCBLEND"); Text("ALPHA_SRCBLEND");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X (%s)", (u32)reg.alpha_src_factor.Value(), Text("%X (%s)", static_cast<u32>(reg.alpha_src_factor),
enum_name(reg.alpha_src_factor.Value()).data()); enum_name(reg.alpha_src_factor).data());
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("ALPHA_COMB_FCN"); Text("ALPHA_COMB_FCN");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X (%s)", (u32)reg.alpha_func.Value(), enum_name(reg.alpha_func.Value()).data()); Text("%X (%s)", static_cast<u32>(reg.alpha_func), enum_name(reg.alpha_func).data());
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("ALPHA_DESTBLEND"); Text("ALPHA_DESTBLEND");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X (%s)", (u32)reg.alpha_dst_factor.Value(), Text("%X (%s)", static_cast<u32>(reg.alpha_dst_factor),
enum_name(reg.alpha_dst_factor.Value()).data()); enum_name(reg.alpha_dst_factor).data());
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("SEPARATE_ALPHA_BLEND"); Text("SEPARATE_ALPHA_BLEND");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.separate_alpha_blend.Value()); Text("%X", reg.separate_alpha_blend);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("ENABLE"); Text("ENABLE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.enable.Value()); Text("%X", reg.enable);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("DISABLE_ROP3"); Text("DISABLE_ROP3");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.disable_rop3.Value()); Text("%X", reg.disable_rop3);
if (begin_table) { if (begin_table) {
EndTable(); EndTable();
@ -537,7 +537,7 @@ void ParseBlendControl(u32 value, bool begin_table) {
} }
void ParseDepthRenderControl(u32 value, bool begin_table) { void ParseDepthRenderControl(u32 value, bool begin_table) {
auto const reg = reinterpret_cast<Liverpool::DepthRenderControl const&>(value); auto const reg = reinterpret_cast<AmdGpu::DepthRenderControl const&>(value);
if (!begin_table || if (!begin_table ||
BeginTable("DB_RENDER_CONTROL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) { BeginTable("DB_RENDER_CONTROL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) {
@ -545,61 +545,61 @@ void ParseDepthRenderControl(u32 value, bool begin_table) {
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("DEPTH_CLEAR_ENABLE"); Text("DEPTH_CLEAR_ENABLE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.depth_clear_enable.Value()); Text("%X", reg.depth_clear_enable);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("STENCIL_CLEAR_ENABLE"); Text("STENCIL_CLEAR_ENABLE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.stencil_clear_enable.Value()); Text("%X", reg.stencil_clear_enable);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("DEPTH_COPY"); Text("DEPTH_COPY");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.depth_clear_enable.Value()); Text("%X", reg.depth_clear_enable);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("STENCIL_COPY"); Text("STENCIL_COPY");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.stencil_copy.Value()); Text("%X", reg.stencil_copy);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("RESUMMARIZE_ENABLE"); Text("RESUMMARIZE_ENABLE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.resummarize_enable.Value()); Text("%X", reg.resummarize_enable);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("STENCIL_COMPRESS_DISABLE"); Text("STENCIL_COMPRESS_DISABLE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.stencil_compress_disable.Value()); Text("%X", reg.stencil_compress_disable);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("DEPTH_COMPRESS_DISABLE"); Text("DEPTH_COMPRESS_DISABLE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.depth_compress_disable.Value()); Text("%X", reg.depth_compress_disable);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("COPY_CENTROID"); Text("COPY_CENTROID");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.copy_centroid.Value()); Text("%X", reg.copy_centroid);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("COPY_SAMPLE"); Text("COPY_SAMPLE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.copy_sample.Value()); Text("%X", reg.copy_sample);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("DECOMPRESS_ENABLE__VI"); Text("DECOMPRESS_ENABLE__VI");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.decompress_enable.Value()); Text("%X", reg.decompress_enable);
if (begin_table) { if (begin_table) {
EndTable(); EndTable();
@ -608,7 +608,7 @@ void ParseDepthRenderControl(u32 value, bool begin_table) {
} }
void ParseDepthControl(u32 value, bool begin_table) { void ParseDepthControl(u32 value, bool begin_table) {
auto const reg = reinterpret_cast<Liverpool::DepthControl const&>(value); auto const reg = reinterpret_cast<AmdGpu::DepthControl const&>(value);
if (!begin_table || if (!begin_table ||
BeginTable("DB_DEPTH_CONTROL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) { BeginTable("DB_DEPTH_CONTROL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) {
@ -616,63 +616,63 @@ void ParseDepthControl(u32 value, bool begin_table) {
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("STENCIL_ENABLE"); Text("STENCIL_ENABLE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.stencil_enable.Value()); Text("%X", reg.stencil_enable);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("Z_ENABLE"); Text("Z_ENABLE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.depth_enable.Value()); Text("%X", reg.depth_enable);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("Z_WRITE_ENABLE"); Text("Z_WRITE_ENABLE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.depth_write_enable.Value()); Text("%X", reg.depth_write_enable);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("DEPTH_BOUNDS_ENABLE"); Text("DEPTH_BOUNDS_ENABLE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.depth_bounds_enable.Value()); Text("%X", reg.depth_bounds_enable);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("ZFUNC"); Text("ZFUNC");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X (%s)", (u32)reg.depth_func.Value(), enum_name(reg.depth_func.Value()).data()); Text("%X (%s)", static_cast<u32>(reg.depth_func), enum_name(reg.depth_func).data());
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("BACKFACE_ENABLE"); Text("BACKFACE_ENABLE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.backface_enable.Value()); Text("%X", reg.backface_enable);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("STENCILFUNC"); Text("STENCILFUNC");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X (%s)", (u32)reg.stencil_ref_func.Value(), Text("%X (%s)", static_cast<u32>(reg.stencil_ref_func),
enum_name(reg.stencil_ref_func.Value()).data()); enum_name(reg.stencil_ref_func).data());
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("STENCILFUNC_BF"); Text("STENCILFUNC_BF");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X (%s)", (u32)reg.stencil_bf_func.Value(), Text("%X (%s)", static_cast<u32>(reg.stencil_bf_func),
enum_name(reg.stencil_bf_func.Value()).data()); enum_name(reg.stencil_bf_func).data());
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("ENABLE_COLOR_WRITES_ON_DEPTH_FAIL"); Text("ENABLE_COLOR_WRITES_ON_DEPTH_FAIL");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.enable_color_writes_on_depth_fail.Value()); Text("%X", reg.enable_color_writes_on_depth_fail);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("DISABLE_COLOR_WRITES_ON_DEPTH_PASS"); Text("DISABLE_COLOR_WRITES_ON_DEPTH_PASS");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.disable_color_writes_on_depth_pass.Value()); Text("%X", reg.disable_color_writes_on_depth_pass);
if (begin_table) { if (begin_table) {
EndTable(); EndTable();
@ -681,7 +681,7 @@ void ParseDepthControl(u32 value, bool begin_table) {
} }
void ParseEqaa(u32 value, bool begin_table) { void ParseEqaa(u32 value, bool begin_table) {
auto const reg = reinterpret_cast<Liverpool::Eqaa const&>(value); auto const reg = reinterpret_cast<AmdGpu::Eqaa const&>(value);
if (!begin_table || if (!begin_table ||
BeginTable("DB_DEPTH_CONTROL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) { BeginTable("DB_DEPTH_CONTROL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) {
@ -689,73 +689,73 @@ void ParseEqaa(u32 value, bool begin_table) {
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("MAX_ANCHOR_SAMPLES"); Text("MAX_ANCHOR_SAMPLES");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.max_anchor_samples.Value()); Text("%X", reg.max_anchor_samples);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("PS_ITER_SAMPLES"); Text("PS_ITER_SAMPLES");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.ps_iter_samples.Value()); Text("%X", reg.ps_iter_samples);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("MASK_EXPORT_NUM_SAMPLES"); Text("MASK_EXPORT_NUM_SAMPLES");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.mask_export_num_samples.Value()); Text("%X", reg.mask_export_num_samples);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("ALPHA_TO_MASK_NUM_SAMPLES"); Text("ALPHA_TO_MASK_NUM_SAMPLES");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.alpha_to_mask_num_samples.Value()); Text("%X", reg.alpha_to_mask_num_samples);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("HIGH_QUALITY_INTERSECTIONS"); Text("HIGH_QUALITY_INTERSECTIONS");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.high_quality_intersections.Value()); Text("%X", reg.high_quality_intersections);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("INCOHERENT_EQAA_READS"); Text("INCOHERENT_EQAA_READS");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.incoherent_eqaa_reads.Value()); Text("%X", reg.incoherent_eqaa_reads);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("INTERPOLATE_COMP_Z"); Text("INTERPOLATE_COMP_Z");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.interpolate_comp_z.Value()); Text("%X", reg.interpolate_comp_z);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("INTERPOLATE_SRC_Z"); Text("INTERPOLATE_SRC_Z");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.interpolate_src_z.Value()); Text("%X", reg.interpolate_src_z);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("STATIC_ANCHOR_ASSOCIATIONS"); Text("STATIC_ANCHOR_ASSOCIATIONS");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.static_anchor_associations.Value()); Text("%X", reg.static_anchor_associations);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("ALPHA_TO_MASK_EQAA_DISABLE"); Text("ALPHA_TO_MASK_EQAA_DISABLE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.alpha_to_mask_eqaa_disable.Value()); Text("%X", reg.alpha_to_mask_eqaa_disable);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("OVERRASTERIZATION_AMOUNT"); Text("OVERRASTERIZATION_AMOUNT");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.overrasterization_amount.Value()); Text("%X", reg.overrasterization_amount);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("ENABLE_POSTZ_OVERRASTERIZATION"); Text("ENABLE_POSTZ_OVERRASTERIZATION");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.enable_postz_overrasterization.Value()); Text("%X", reg.enable_postz_overrasterization);
if (begin_table) { if (begin_table) {
EndTable(); EndTable();
@ -764,7 +764,7 @@ void ParseEqaa(u32 value, bool begin_table) {
} }
void ParseZInfo(u32 value, bool begin_table) { void ParseZInfo(u32 value, bool begin_table) {
auto const reg = reinterpret_cast<Liverpool::DepthBuffer::ZInfo const&>(value); auto const reg = reinterpret_cast<AmdGpu::DepthBuffer::ZInfo const&>(value);
if (!begin_table || if (!begin_table ||
BeginTable("DB_DEPTH_CONTROL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) { BeginTable("DB_DEPTH_CONTROL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) {
@ -772,61 +772,61 @@ void ParseZInfo(u32 value, bool begin_table) {
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("FORMAT"); Text("FORMAT");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X (%s)", (u32)reg.format.Value(), enum_name(reg.format.Value()).data()); Text("%X (%s)", static_cast<u32>(reg.format), enum_name(reg.format).data());
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("NUM_SAMPLES"); Text("NUM_SAMPLES");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.num_samples.Value()); Text("%X", reg.num_samples);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("TILE_SPLIT__CI__VI"); Text("TILE_SPLIT__CI__VI");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.tile_split.Value()); Text("%X", reg.tile_split);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("TILE_MODE_INDEX"); Text("TILE_MODE_INDEX");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", static_cast<u32>(reg.tile_mode_index.Value())); Text("%X", static_cast<u32>(reg.tile_mode_index));
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("DECOMPRESS_ON_N_ZPLANES__VI"); Text("DECOMPRESS_ON_N_ZPLANES__VI");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.decompress_on_n_zplanes.Value()); Text("%X", reg.decompress_on_n_zplanes);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("ALLOW_EXPCLEAR"); Text("ALLOW_EXPCLEAR");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.allow_expclear.Value()); Text("%X", reg.allow_expclear);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("READ_SIZE"); Text("READ_SIZE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.read_size.Value()); Text("%X", reg.read_size);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("TILE_SURFACE_ENABLE"); Text("TILE_SURFACE_ENABLE");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.tile_surface_en.Value()); Text("%X", reg.tile_surface_enable);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("CLEAR_DISALLOWED__VI"); Text("CLEAR_DISALLOWED__VI");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.clear_disallowed.Value()); Text("%X", reg.clear_disallowed);
TableNextRow(); TableNextRow();
TableSetColumnIndex(0); TableSetColumnIndex(0);
Text("ZRANGE_PRECISION"); Text("ZRANGE_PRECISION");
TableSetColumnIndex(1); TableSetColumnIndex(1);
Text("%X", reg.zrange_precision.Value()); Text("%X", reg.zrange_precision);
if (begin_table) { if (begin_table) {
EndTable(); EndTable();
@ -1515,4 +1515,4 @@ void CmdListViewer::Draw(bool only_batches_view, CmdListFilter& filter) {
PopID(); PopID();
} }
} // namespace Core::Devtools::Widget } // namespace Core::Devtools::Widget

View File

@ -5,14 +5,13 @@
#pragma once #pragma once
#include <memory>
#include <vector> #include <vector>
#include <imgui.h> #include <imgui.h>
#include "common.h" #include "common.h"
#include "common/types.h" #include "common/types.h"
#include "imgui_memory_editor.h" #include "core/devtools/widget/imgui_memory_editor.h"
#include "reg_view.h" #include "core/devtools/widget/reg_view.h"
namespace AmdGpu { namespace AmdGpu {
union PM4Type3Header; union PM4Type3Header;

View File

@ -16,7 +16,7 @@ using magic_enum::enum_name;
namespace Core::Devtools::Widget { namespace Core::Devtools::Widget {
void RegPopup::DrawColorBuffer(const AmdGpu::Liverpool::ColorBuffer& buffer) { void RegPopup::DrawColorBuffer(const AmdGpu::ColorBuffer& buffer) {
if (BeginTable("COLOR_BUFFER", 2, ImGuiTableFlags_Borders)) { if (BeginTable("COLOR_BUFFER", 2, ImGuiTableFlags_Borders)) {
TableNextRow(); TableNextRow();
@ -36,7 +36,7 @@ void RegPopup::DrawColorBuffer(const AmdGpu::Liverpool::ColorBuffer& buffer) {
if (TreeNode("Color0Info")) { if (TreeNode("Color0Info")) {
TableNextRow(); TableNextRow();
TableNextColumn(); TableNextColumn();
ParseColor0Info(buffer.info.u32all, false); ParseColor0Info(buffer.info.raw, false);
TreePop(); TreePop();
} }
@ -45,7 +45,7 @@ void RegPopup::DrawColorBuffer(const AmdGpu::Liverpool::ColorBuffer& buffer) {
if (TreeNode("Color0Attrib")) { if (TreeNode("Color0Attrib")) {
TableNextRow(); TableNextRow();
TableNextColumn(); TableNextColumn();
ParseColor0Attrib(buffer.attrib.u32all, false); ParseColor0Attrib(buffer.attrib.raw, false);
TreePop(); TreePop();
} }
@ -75,9 +75,8 @@ void RegPopup::DrawColorBuffer(const AmdGpu::Liverpool::ColorBuffer& buffer) {
} }
} }
void RegPopup::DrawDepthBuffer(const DepthBuffer& depth_data) { void RegPopup::DrawDepthBuffer(const AmdGpu::DepthBuffer& buffer,
const auto& [depth_buffer, depth_control] = depth_data; const AmdGpu::DepthControl control) {
SeparatorText("Depth buffer"); SeparatorText("Depth buffer");
if (BeginTable("DEPTH_BUFFER", 2, ImGuiTableFlags_Borders)) { if (BeginTable("DEPTH_BUFFER", 2, ImGuiTableFlags_Borders)) {
@ -85,31 +84,31 @@ void RegPopup::DrawDepthBuffer(const DepthBuffer& depth_data) {
// clang-format off // clang-format off
DrawValueRowList( DrawValueRowList(
"Z_INFO.FORMAT", depth_buffer.z_info.format, "Z_INFO.FORMAT", buffer.z_info.format,
"Z_INFO.NUM_SAMPLES", depth_buffer.z_info.num_samples, "Z_INFO.NUM_SAMPLES", buffer.z_info.num_samples,
"Z_INFO.TILE_SPLIT", depth_buffer.z_info.tile_split, "Z_INFO.TILE_SPLIT", buffer.z_info.tile_split,
"Z_INFO.TILE_MODE_INDEX", depth_buffer.z_info.tile_mode_index, "Z_INFO.TILE_MODE_INDEX", buffer.z_info.tile_mode_index,
"Z_INFO.DECOMPRESS_ON_N_ZPLANES", depth_buffer.z_info.decompress_on_n_zplanes, "Z_INFO.DECOMPRESS_ON_N_ZPLANES", buffer.z_info.decompress_on_n_zplanes,
"Z_INFO.ALLOW_EXPCLEAR", depth_buffer.z_info.allow_expclear, "Z_INFO.ALLOW_EXPCLEAR", buffer.z_info.allow_expclear,
"Z_INFO.READ_SIZE", depth_buffer.z_info.read_size, "Z_INFO.READ_SIZE", buffer.z_info.read_size,
"Z_INFO.TILE_SURFACE_EN", depth_buffer.z_info.tile_surface_en, "Z_INFO.TILE_SURFACE_ENABLE", buffer.z_info.tile_surface_enable,
"Z_INFO.CLEAR_DISALLOWED", depth_buffer.z_info.clear_disallowed, "Z_INFO.CLEAR_DISALLOWED", buffer.z_info.clear_disallowed,
"Z_INFO.ZRANGE_PRECISION", depth_buffer.z_info.zrange_precision, "Z_INFO.ZRANGE_PRECISION", buffer.z_info.zrange_precision,
"STENCIL_INFO.FORMAT", depth_buffer.stencil_info.format, "STENCIL_INFO.FORMAT", buffer.stencil_info.format,
"Z_READ_BASE", depth_buffer.z_read_base, "Z_READ_BASE", buffer.z_read_base,
"STENCIL_READ_BASE", depth_buffer.stencil_read_base, "STENCIL_READ_BASE", buffer.stencil_read_base,
"Z_WRITE_BASE", depth_buffer.z_write_base, "Z_WRITE_BASE", buffer.z_write_base,
"STENCIL_WRITE_BASE", depth_buffer.stencil_write_base, "STENCIL_WRITE_BASE", buffer.stencil_write_base,
"DEPTH_SIZE.PITCH_TILE_MAX", depth_buffer.depth_size.pitch_tile_max, "DEPTH_SIZE.PITCH_TILE_MAX", buffer.depth_size.pitch_tile_max,
"DEPTH_SIZE.HEIGHT_TILE_MAX", depth_buffer.depth_size.height_tile_max, "DEPTH_SIZE.HEIGHT_TILE_MAX", buffer.depth_size.height_tile_max,
"DEPTH_SLICE.TILE_MAX", depth_buffer.depth_slice.tile_max, "DEPTH_SLICE.TILE_MAX", buffer.depth_slice.tile_max,
"Pitch()", depth_buffer.Pitch(), "Pitch()", buffer.Pitch(),
"Height()", depth_buffer.Height(), "Height()", buffer.Height(),
"DepthAddress()", depth_buffer.DepthAddress(), "DepthAddress()", buffer.DepthAddress(),
"StencilAddress()", depth_buffer.StencilAddress(), "StencilAddress()", buffer.StencilAddress(),
"NumSamples()", depth_buffer.NumSamples(), "NumSamples()", buffer.NumSamples(),
"NumBits()", depth_buffer.NumBits(), "NumBits()", buffer.NumBits(),
"GetDepthSliceSize()", depth_buffer.GetDepthSliceSize() "GetDepthSliceSize()", buffer.GetDepthSliceSize()
); );
// clang-format on // clang-format on
@ -121,16 +120,16 @@ void RegPopup::DrawDepthBuffer(const DepthBuffer& depth_data) {
// clang-format off // clang-format off
DrawValueRowList( DrawValueRowList(
"STENCIL_ENABLE", depth_control.stencil_enable, "STENCIL_ENABLE", control.stencil_enable,
"DEPTH_ENABLE", depth_control.depth_enable, "DEPTH_ENABLE", control.depth_enable,
"DEPTH_WRITE_ENABLE", depth_control.depth_write_enable, "DEPTH_WRITE_ENABLE", control.depth_write_enable,
"DEPTH_BOUNDS_ENABLE", depth_control.depth_bounds_enable, "DEPTH_BOUNDS_ENABLE", control.depth_bounds_enable,
"DEPTH_FUNC", depth_control.depth_func, "DEPTH_FUNC", control.depth_func,
"BACKFACE_ENABLE", depth_control.backface_enable, "BACKFACE_ENABLE", control.backface_enable,
"STENCIL_FUNC", depth_control.stencil_ref_func, "STENCIL_FUNC", control.stencil_ref_func,
"STENCIL_FUNC_BF", depth_control.stencil_bf_func, "STENCIL_FUNC_BF", control.stencil_bf_func,
"ENABLE_COLOR_WRITES_ON_DEPTH_FAIL", depth_control.enable_color_writes_on_depth_fail, "ENABLE_COLOR_WRITES_ON_DEPTH_FAIL", control.enable_color_writes_on_depth_fail,
"DISABLE_COLOR_WRITES_ON_DEPTH_PASS", depth_control.disable_color_writes_on_depth_pass "DISABLE_COLOR_WRITES_ON_DEPTH_PASS", control.disable_color_writes_on_depth_pass
); );
// clang-format on // clang-format on
@ -143,15 +142,17 @@ RegPopup::RegPopup() {
id = unique_id++; id = unique_id++;
} }
void RegPopup::SetData(const std::string& base_title, AmdGpu::Liverpool::ColorBuffer color_buffer, void RegPopup::SetData(const std::string& base_title, AmdGpu::ColorBuffer color_buffer, u32 cb_id) {
u32 cb_id) { this->type = DataType::Color;
this->data = color_buffer; this->color = color_buffer;
this->title = fmt::format("{}/CB #{}", base_title, cb_id); this->title = fmt::format("{}/CB #{}", base_title, cb_id);
} }
void RegPopup::SetData(const std::string& base_title, AmdGpu::Liverpool::DepthBuffer depth_buffer, void RegPopup::SetData(const std::string& base_title, AmdGpu::DepthBuffer depth_buffer,
AmdGpu::Liverpool::DepthControl depth_control) { AmdGpu::DepthControl depth_control) {
this->data = std::make_tuple(depth_buffer, depth_control); this->type = DataType::Depth;
this->depth.buffer = depth_buffer;
this->depth.control = depth_control;
this->title = fmt::format("{}/Depth", base_title); this->title = fmt::format("{}/Depth", base_title);
} }
@ -161,10 +162,10 @@ void RegPopup::SetPos(ImVec2 pos, bool auto_resize) {
Begin(name, &open, flags); Begin(name, &open, flags);
SetWindowPos(pos); SetWindowPos(pos);
if (auto_resize) { if (auto_resize) {
if (std::holds_alternative<AmdGpu::Liverpool::ColorBuffer>(data)) { if (type == DataType::Color) {
SetWindowSize({365.0f, 520.0f}); SetWindowSize({365.0f, 520.0f});
KeepWindowInside(); KeepWindowInside();
} else if (std::holds_alternative<DepthBuffer>(data)) { } else if (type == DataType::Depth) {
SetWindowSize({404.0f, 543.0f}); SetWindowSize({404.0f, 543.0f});
KeepWindowInside(); KeepWindowInside();
} }
@ -182,10 +183,10 @@ void RegPopup::Draw() {
moved = true; moved = true;
} }
if (const auto* buffer = std::get_if<AmdGpu::Liverpool::ColorBuffer>(&data)) { if (type == DataType::Color) {
DrawColorBuffer(*buffer); DrawColorBuffer(color);
} else if (const auto* depth_data = std::get_if<DepthBuffer>(&data)) { } else if (type == DataType::Depth) {
DrawDepthBuffer(*depth_data); DrawDepthBuffer(depth.buffer, depth.control);
} }
} }
End(); End();

View File

@ -3,12 +3,10 @@
#pragma once #pragma once
#include <variant>
#include <imgui.h> #include <imgui.h>
#include "common/types.h" #include "common/types.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/amdgpu/regs_color.h"
#include "video_core/amdgpu/regs_depth.h"
namespace Core::Devtools::Widget { namespace Core::Devtools::Widget {
@ -16,15 +14,24 @@ class RegPopup {
int id; int id;
ImGuiWindowFlags flags{ImGuiWindowFlags_NoSavedSettings}; ImGuiWindowFlags flags{ImGuiWindowFlags_NoSavedSettings};
using DepthBuffer = std::tuple<AmdGpu::Liverpool::DepthBuffer, AmdGpu::Liverpool::DepthControl>;
ImVec2 last_pos; ImVec2 last_pos;
std::variant<AmdGpu::Liverpool::ColorBuffer, DepthBuffer> data; AmdGpu::ColorBuffer color;
struct {
AmdGpu::DepthBuffer buffer;
AmdGpu::DepthControl control;
} depth;
enum class DataType {
None = 0,
Color = 1,
Depth = 2,
};
DataType type{};
std::string title{}; std::string title{};
static void DrawColorBuffer(const AmdGpu::Liverpool::ColorBuffer& buffer); static void DrawColorBuffer(const AmdGpu::ColorBuffer& buffer);
static void DrawDepthBuffer(const DepthBuffer& depth_data); static void DrawDepthBuffer(const AmdGpu::DepthBuffer& buffer,
const AmdGpu::DepthControl control);
public: public:
bool open = false; bool open = false;
@ -32,11 +39,10 @@ public:
RegPopup(); RegPopup();
void SetData(const std::string& base_title, AmdGpu::Liverpool::ColorBuffer color_buffer, void SetData(const std::string& base_title, AmdGpu::ColorBuffer color_buffer, u32 cb_id);
u32 cb_id);
void SetData(const std::string& base_title, AmdGpu::Liverpool::DepthBuffer depth_buffer, void SetData(const std::string& base_title, AmdGpu::DepthBuffer depth_buffer,
AmdGpu::Liverpool::DepthControl depth_control); AmdGpu::DepthControl depth_control);
void SetPos(ImVec2 pos, bool auto_resize = false); void SetPos(ImVec2 pos, bool auto_resize = false);

View File

@ -29,7 +29,7 @@ namespace Core::Devtools::Widget {
void RegView::ProcessShader(int shader_id) { void RegView::ProcessShader(int shader_id) {
std::vector<u32> shader_code; std::vector<u32> shader_code;
Vulkan::Liverpool::UserData user_data; AmdGpu::UserData user_data;
if (data.is_compute) { if (data.is_compute) {
shader_code = data.cs_data.code; shader_code = data.cs_data.code;
user_data = data.cs_data.cs_program.user_data; user_data = data.cs_data.cs_program.user_data;
@ -129,7 +129,7 @@ void RegView::DrawGraphicsRegs() {
} }
}; };
for (int cb = 0; cb < AmdGpu::Liverpool::NumColorBuffers; ++cb) { for (int cb = 0; cb < AmdGpu::NUM_COLOR_BUFFERS; ++cb) {
PushID(cb); PushID(cb);
TableNextRow(); TableNextRow();
@ -246,8 +246,7 @@ void RegView::SetData(DebugStateType::RegDump _data, const std::string& base_tit
default_reg_popup.SetData(title, regs.depth_buffer, regs.depth_control); default_reg_popup.SetData(title, regs.depth_buffer, regs.depth_control);
default_reg_popup.open = true; default_reg_popup.open = true;
} }
} else if (last_selected_cb >= 0 && } else if (last_selected_cb >= 0 && last_selected_cb < AmdGpu::NUM_COLOR_BUFFERS) {
last_selected_cb < AmdGpu::Liverpool::NumColorBuffers) {
const auto& buffer = regs.color_buffers[last_selected_cb]; const auto& buffer = regs.color_buffers[last_selected_cb];
const bool has_cb = buffer && regs.color_target_mask.GetMask(last_selected_cb); const bool has_cb = buffer && regs.color_target_mask.GetMask(last_selected_cb);
if (has_cb) { if (has_cb) {
@ -348,7 +347,7 @@ void RegView::Draw() {
} else { } else {
shader->hex_view.DrawContents(shader->user_data.data(), shader->hex_view.DrawContents(shader->user_data.data(),
shader->user_data.size() * shader->user_data.size() *
sizeof(Vulkan::Liverpool::UserData::value_type)); sizeof(AmdGpu::UserData::value_type));
} }
} }
End(); End();
@ -392,4 +391,4 @@ void RegView::Draw() {
} }
} }
} // namespace Core::Devtools::Widget } // namespace Core::Devtools::Widget

View File

@ -2,17 +2,18 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma once #pragma once
#include "core/debug_state.h" #include "core/debug_state.h"
#include "imgui_memory_editor.h" #include "core/devtools/widget/imgui_memory_editor.h"
#include "reg_popup.h" #include "core/devtools/widget/reg_popup.h"
#include "text_editor.h" #include "core/devtools/widget/text_editor.h"
namespace Core::Devtools::Widget { namespace Core::Devtools::Widget {
struct ShaderCache { struct ShaderCache {
MemoryEditor hex_view; MemoryEditor hex_view;
TextEditor dis_view; TextEditor dis_view;
Vulkan::Liverpool::UserData user_data; AmdGpu::UserData user_data;
}; };
class RegView { class RegView {
@ -54,4 +55,4 @@ public:
void Draw(); void Draw();
}; };
} // namespace Core::Devtools::Widget } // namespace Core::Devtools::Widget

View File

@ -10,6 +10,7 @@
#include "core/libraries/videoout/driver.h" #include "core/libraries/videoout/driver.h"
#include "core/libraries/videoout/videoout_error.h" #include "core/libraries/videoout/videoout_error.h"
#include "imgui/renderer/imgui_core.h" #include "imgui/renderer/imgui_core.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/renderer_vulkan/vk_presenter.h" #include "video_core/renderer_vulkan/vk_presenter.h"
extern std::unique_ptr<Vulkan::Presenter> presenter; extern std::unique_ptr<Vulkan::Presenter> presenter;

View File

@ -5,6 +5,8 @@
#include <type_traits> #include <type_traits>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include <magic_enum/magic_enum.hpp>
#include "common/assert.h" #include "common/assert.h"
#include "common/func_traits.h" #include "common/func_traits.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/backend/spirv/emit_spirv.h"
@ -14,7 +16,6 @@
#include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/program.h" #include "shader_recompiler/ir/program.h"
#include "shader_recompiler/runtime_info.h" #include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/types.h"
namespace Shader::Backend::SPIRV { namespace Shader::Backend::SPIRV {
namespace { namespace {
@ -136,7 +137,7 @@ Id TypeId(const EmitContext& ctx, IR::Type type) {
case IR::Type::U32: case IR::Type::U32:
return ctx.U32[1]; return ctx.U32[1];
default: default:
throw NotImplementedException("Phi node type {}", type); UNREACHABLE_MSG("Phi node type {}", type);
} }
} }
@ -224,7 +225,7 @@ spv::ExecutionMode ExecutionMode(AmdGpu::TessellationType primitive) {
case AmdGpu::TessellationType::Quad: case AmdGpu::TessellationType::Quad:
return spv::ExecutionMode::Quads; return spv::ExecutionMode::Quads;
} }
UNREACHABLE_MSG("Tessellation primitive {}", primitive); UNREACHABLE_MSG("Tessellation primitive {}", magic_enum::enum_name(primitive));
} }
spv::ExecutionMode ExecutionMode(AmdGpu::TessellationPartitioning spacing) { spv::ExecutionMode ExecutionMode(AmdGpu::TessellationPartitioning spacing) {
@ -238,7 +239,7 @@ spv::ExecutionMode ExecutionMode(AmdGpu::TessellationPartitioning spacing) {
default: default:
break; break;
} }
UNREACHABLE_MSG("Tessellation spacing {}", spacing); UNREACHABLE_MSG("Tessellation spacing {}", magic_enum::enum_name(spacing));
} }
void SetupCapabilities(const Info& info, const Profile& profile, const RuntimeInfo& runtime_info, void SetupCapabilities(const Info& info, const Profile& profile, const RuntimeInfo& runtime_info,
@ -482,14 +483,12 @@ Id EmitPhi(EmitContext& ctx, IR::Inst* inst) {
void EmitVoid(EmitContext&) {} void EmitVoid(EmitContext&) {}
Id EmitIdentity(EmitContext& ctx, const IR::Value& value) { Id EmitIdentity(EmitContext& ctx, const IR::Value& value) {
throw NotImplementedException("Forward identity declaration"); UNREACHABLE_MSG("Forward identity declaration");
} }
Id EmitConditionRef(EmitContext& ctx, const IR::Value& value) { Id EmitConditionRef(EmitContext& ctx, const IR::Value& value) {
const Id id{ctx.Def(value)}; const Id id{ctx.Def(value)};
if (!Sirit::ValidId(id)) { ASSERT_MSG(Sirit::ValidId(id), "Forward identity declaration");
throw NotImplementedException("Forward identity declaration");
}
return id; return id;
} }

View File

@ -397,13 +397,11 @@ Id EmitImageAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords
} }
Id EmitImageAtomicInc32(EmitContext&, IR::Inst*, u32, Id, Id) { Id EmitImageAtomicInc32(EmitContext&, IR::Inst*, u32, Id, Id) {
// TODO: This is not yet implemented UNREACHABLE_MSG("SPIR-V Instruction");
throw NotImplementedException("SPIR-V Instruction");
} }
Id EmitImageAtomicDec32(EmitContext&, IR::Inst*, u32, Id, Id) { Id EmitImageAtomicDec32(EmitContext&, IR::Inst*, u32, Id, Id) {
// TODO: This is not yet implemented UNREACHABLE_MSG("SPIR-V Instruction");
throw NotImplementedException("SPIR-V Instruction");
} }
Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) { Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {

View File

@ -98,11 +98,11 @@ void EmitEmitPrimitive(EmitContext& ctx) {
} }
void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) {
throw NotImplementedException("Geometry streams"); UNREACHABLE_MSG("Geometry streams");
} }
void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) {
throw NotImplementedException("Geometry streams"); UNREACHABLE_MSG("Geometry streams");
} }
void EmitDebugPrint(EmitContext& ctx, IR::Inst* inst, Id fmt, Id arg0, Id arg1, Id arg2, Id arg3) { void EmitDebugPrint(EmitContext& ctx, IR::Inst* inst, Id fmt, Id arg0, Id arg1, Id arg2, Id arg3) {

View File

@ -6,7 +6,6 @@
#include "shader_recompiler/backend/spirv/spirv_emit_context.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h"
#include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/runtime_info.h" #include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/types.h"
#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/buffer_cache/buffer_cache.h"
#include <boost/container/static_vector.hpp> #include <boost/container/static_vector.hpp>
@ -109,7 +108,7 @@ Id EmitContext::Def(const IR::Value& value) {
case IR::Type::StringLiteral: case IR::Type::StringLiteral:
return String(value.StringLiteral()); return String(value.StringLiteral());
default: default:
throw NotImplementedException("Immediate type {}", value.Type()); UNREACHABLE_MSG("Immediate type {}", value.Type());
} }
} }
@ -786,7 +785,7 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte
void EmitContext::DefineBuffers() { void EmitContext::DefineBuffers() {
for (const auto& desc : info.buffers) { for (const auto& desc : info.buffers) {
const auto buf_sharp = desc.GetSharp(info); const auto buf_sharp = desc.GetSharp(info);
const bool is_storage = desc.IsStorage(buf_sharp, profile); const bool is_storage = desc.IsStorage(buf_sharp);
// Set indexes for special buffers. // Set indexes for special buffers.
if (desc.buffer_type == BufferType::Flatbuf) { if (desc.buffer_type == BufferType::Flatbuf) {
@ -921,7 +920,7 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
default: default:
break; break;
} }
throw InvalidArgument("Invalid texture type {}", type); UNREACHABLE_MSG("Invalid texture type {}", type);
} }
void EmitContext::DefineImagesAndSamplers() { void EmitContext::DefineImagesAndSamplers() {

View File

@ -1,64 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <exception>
#include <string>
#include <utility>
#include <fmt/format.h>
namespace Shader {
class Exception : public std::exception {
public:
explicit Exception(std::string message) noexcept : err_message{std::move(message)} {}
[[nodiscard]] const char* what() const noexcept override {
return err_message.c_str();
}
void Prepend(std::string_view prepend) {
err_message.insert(0, prepend);
}
void Append(std::string_view append) {
err_message += append;
}
private:
std::string err_message;
};
class LogicError : public Exception {
public:
template <typename... Args>
explicit LogicError(const char* message, Args&&... args)
: Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {}
};
class RuntimeError : public Exception {
public:
template <typename... Args>
explicit RuntimeError(const char* message, Args&&... args)
: Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {}
};
class NotImplementedException : public Exception {
public:
template <typename... Args>
explicit NotImplementedException(const char* message, Args&&... args)
: Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {
Append(" is not implemented");
}
};
class InvalidArgument : public Exception {
public:
template <typename... Args>
explicit InvalidArgument(const char* message, Args&&... args)
: Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {}
};
} // namespace Shader

View File

@ -191,7 +191,7 @@ std::string DumpExpr(const Statement* stmt) {
void SanitizeNoBreaks(const Tree& tree) { void SanitizeNoBreaks(const Tree& tree) {
if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) { if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) {
throw NotImplementedException("Capturing statement with break nodes"); UNREACHABLE_MSG("Capturing statement with break nodes");
} }
} }
@ -584,7 +584,7 @@ private:
case StatementType::Variable: case StatementType::Variable:
return ir.GetGotoVariable(stmt.id); return ir.GetGotoVariable(stmt.id);
default: default:
throw NotImplementedException("Statement type {}", u32(stmt.type)); UNREACHABLE_MSG("Statement type {}", u32(stmt.type));
} }
} }

View File

@ -4,22 +4,22 @@
#include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/ir/position.h" #include "shader_recompiler/ir/position.h"
#include "shader_recompiler/ir/reinterpret.h" #include "shader_recompiler/ir/reinterpret.h"
#include "shader_recompiler/profile.h"
#include "shader_recompiler/runtime_info.h" #include "shader_recompiler/runtime_info.h"
namespace Shader::Gcn { namespace Shader::Gcn {
static AmdGpu::NumberFormat NumberFormatCompressed( static AmdGpu::NumberFormat NumberFormatCompressed(AmdGpu::ShaderExportFormat export_format) {
AmdGpu::Liverpool::ShaderExportFormat export_format) {
switch (export_format) { switch (export_format) {
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_FP16: case AmdGpu::ShaderExportFormat::ABGR_FP16:
return AmdGpu::NumberFormat::Float; return AmdGpu::NumberFormat::Float;
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UNORM16: case AmdGpu::ShaderExportFormat::ABGR_UNORM16:
return AmdGpu::NumberFormat::Unorm; return AmdGpu::NumberFormat::Unorm;
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SNORM16: case AmdGpu::ShaderExportFormat::ABGR_SNORM16:
return AmdGpu::NumberFormat::Snorm; return AmdGpu::NumberFormat::Snorm;
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UINT16: case AmdGpu::ShaderExportFormat::ABGR_UINT16:
return AmdGpu::NumberFormat::Uint; return AmdGpu::NumberFormat::Uint;
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SINT16: case AmdGpu::ShaderExportFormat::ABGR_SINT16:
return AmdGpu::NumberFormat::Sint; return AmdGpu::NumberFormat::Sint;
default: default:
UNREACHABLE_MSG("Unimplemented compressed export format {}", UNREACHABLE_MSG("Unimplemented compressed export format {}",
@ -27,18 +27,18 @@ static AmdGpu::NumberFormat NumberFormatCompressed(
} }
} }
static u32 MaskFromExportFormat(u8 mask, AmdGpu::Liverpool::ShaderExportFormat export_format) { static u32 MaskFromExportFormat(u8 mask, AmdGpu::ShaderExportFormat export_format) {
switch (export_format) { switch (export_format) {
case AmdGpu::Liverpool::ShaderExportFormat::R_32: case AmdGpu::ShaderExportFormat::R_32:
// Red only // Red only
return mask & 1; return mask & 1;
case AmdGpu::Liverpool::ShaderExportFormat::GR_32: case AmdGpu::ShaderExportFormat::GR_32:
// Red and Green only // Red and Green only
return mask & 3; return mask & 3;
case AmdGpu::Liverpool::ShaderExportFormat::AR_32: case AmdGpu::ShaderExportFormat::AR_32:
// Red and Alpha only // Red and Alpha only
return mask & 9; return mask & 9;
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_32: case AmdGpu::ShaderExportFormat::ABGR_32:
// All components // All components
return mask; return mask;
default: default:
@ -59,7 +59,7 @@ void Translator::ExportRenderTarget(const GcnInst& inst) {
} }
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx]; const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
if (color_buffer.export_format == AmdGpu::Liverpool::ShaderExportFormat::Zero || exp.en == 0) { if (color_buffer.export_format == AmdGpu::ShaderExportFormat::Zero || exp.en == 0) {
// No export // No export
return; return;
} }

View File

@ -11,9 +11,9 @@
#include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/reg.h" #include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/ir/reinterpret.h" #include "shader_recompiler/ir/reinterpret.h"
#include "shader_recompiler/profile.h"
#include "shader_recompiler/runtime_info.h" #include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/resource.h"
#include "video_core/amdgpu/types.h"
#define MAGIC_ENUM_RANGE_MIN 0 #define MAGIC_ENUM_RANGE_MIN 0
#define MAGIC_ENUM_RANGE_MAX 1515 #define MAGIC_ENUM_RANGE_MAX 1515

View File

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/profile.h"
namespace Shader::Gcn { namespace Shader::Gcn {

View File

@ -5,7 +5,6 @@
#include <span> #include <span>
#include <vector> #include <vector>
#include <boost/container/small_vector.hpp>
#include <boost/container/static_vector.hpp> #include <boost/container/static_vector.hpp>
#include "common/assert.h" #include "common/assert.h"
#include "common/types.h" #include "common/types.h"
@ -17,110 +16,11 @@
#include "shader_recompiler/ir/reg.h" #include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/ir/type.h" #include "shader_recompiler/ir/type.h"
#include "shader_recompiler/params.h" #include "shader_recompiler/params.h"
#include "shader_recompiler/profile.h" #include "shader_recompiler/resource.h"
#include "shader_recompiler/runtime_info.h" #include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/resource.h"
namespace Shader { namespace Shader {
static constexpr size_t NumUserDataRegs = 16;
static constexpr size_t NumImages = 64;
static constexpr size_t NumBuffers = 40;
static constexpr size_t NumSamplers = 16;
static constexpr size_t NumFMasks = 8;
enum class BufferType : u32 {
Guest,
Flatbuf,
BdaPagetable,
FaultBuffer,
GdsBuffer,
SharedMemory,
};
struct Info;
struct BufferResource {
u32 sharp_idx;
IR::Type used_types;
AmdGpu::Buffer inline_cbuf;
BufferType buffer_type;
u8 instance_attrib{};
bool is_written{};
bool is_formatted{};
bool IsSpecial() const noexcept {
return buffer_type != BufferType::Guest;
}
bool IsStorage(const AmdGpu::Buffer& buffer, const Profile& profile) const noexcept {
// When using uniform buffers, a size is required at compilation time, so we need to
// either compile a lot of shader specializations to handle each size or just force it to
// the maximum possible size always. However, for some vendors the shader-supplied size is
// used for bounds checking uniform buffer accesses, so the latter would effectively turn
// off buffer robustness behavior. Instead, force storage buffers which are bounds checked
// using the actual buffer size. We are assuming the performance hit from this is
// acceptable.
return true; // buffer.GetSize() > profile.max_ubo_size || is_written;
}
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept;
};
using BufferResourceList = boost::container::small_vector<BufferResource, NumBuffers>;
struct ImageResource {
u32 sharp_idx;
bool is_depth{};
bool is_atomic{};
bool is_array{};
bool is_written{};
bool is_r128{};
[[nodiscard]] constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept;
};
using ImageResourceList = boost::container::small_vector<ImageResource, NumImages>;
struct SamplerResource {
u32 sharp_idx;
AmdGpu::Sampler inline_sampler;
u32 is_inline_sampler : 1;
u32 associated_image : 4;
u32 disable_aniso : 1;
constexpr AmdGpu::Sampler GetSharp(const Info& info) const noexcept;
};
using SamplerResourceList = boost::container::small_vector<SamplerResource, NumSamplers>;
struct FMaskResource {
u32 sharp_idx;
constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept;
};
using FMaskResourceList = boost::container::small_vector<FMaskResource, NumFMasks>;
struct PushData {
static constexpr u32 XOffsetIndex = 0;
static constexpr u32 YOffsetIndex = 1;
static constexpr u32 XScaleIndex = 2;
static constexpr u32 YScaleIndex = 3;
static constexpr u32 UdRegsIndex = 4;
static constexpr u32 BufOffsetIndex = UdRegsIndex + NumUserDataRegs / 4;
float xoffset;
float yoffset;
float xscale;
float yscale;
std::array<u32, NumUserDataRegs> ud_regs;
std::array<u8, NumBuffers> buf_offsets;
void AddOffset(u32 binding, u32 offset) {
ASSERT(offset < 256 && binding < buf_offsets.size());
buf_offsets[binding] = offset;
}
};
static_assert(sizeof(PushData) <= 128,
"PushData size is greater than minimum size guaranteed by Vulkan spec");
enum class Qualifier : u8 { enum class Qualifier : u8 {
None, None,
Smooth, Smooth,
@ -235,7 +135,7 @@ struct Info {
Dynamic = 1 << 1, Dynamic = 1 << 1,
}; };
ReadConstType readconst_types{}; ReadConstType readconst_types{};
bool uses_dma{false}; bool uses_dma{};
explicit Info(Stage stage_, LogicalStage l_stage_, ShaderParams params) explicit Info(Stage stage_, LogicalStage l_stage_, ShaderParams params)
: stage{stage_}, l_stage{l_stage_}, pgm_hash{params.hash}, pgm_base{params.Base()}, : stage{stage_}, l_stage{l_stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
@ -262,7 +162,7 @@ struct Info {
u32 mask = ud_mask.mask; u32 mask = ud_mask.mask;
while (mask) { while (mask) {
const u32 index = std::countr_zero(mask); const u32 index = std::countr_zero(mask);
ASSERT(bnd.user_data < NumUserDataRegs && index < NumUserDataRegs); ASSERT(bnd.user_data < NUM_USER_DATA_REGS && index < NUM_USER_DATA_REGS);
mask &= ~(1U << index); mask &= ~(1U << index);
push.ud_regs[bnd.user_data++] = user_data[index]; push.ud_regs[bnd.user_data++] = user_data[index];
} }
@ -276,9 +176,8 @@ struct Info {
void RefreshFlatBuf() { void RefreshFlatBuf() {
flattened_ud_buf.resize(srt_info.flattened_bufsize_dw); flattened_ud_buf.resize(srt_info.flattened_bufsize_dw);
ASSERT(user_data.size() <= NumUserDataRegs); ASSERT(user_data.size() <= NUM_USER_DATA_REGS);
std::memcpy(flattened_ud_buf.data(), user_data.data(), user_data.size_bytes()); std::memcpy(flattened_ud_buf.data(), user_data.data(), user_data.size_bytes());
// Run the JIT program to walk the SRT and write the leaves to a flat buffer
if (srt_info.walker_func) { if (srt_info.walker_func) {
srt_info.walker_func(user_data.data(), flattened_ud_buf.data()); srt_info.walker_func(user_data.data(), flattened_ud_buf.data());
} }
@ -296,42 +195,4 @@ struct Info {
}; };
DECLARE_ENUM_FLAG_OPERATORS(Info::ReadConstType); DECLARE_ENUM_FLAG_OPERATORS(Info::ReadConstType);
constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept {
const auto buffer = inline_cbuf ? inline_cbuf : info.ReadUdSharp<AmdGpu::Buffer>(sharp_idx);
if (!buffer.Valid()) {
LOG_DEBUG(Render, "Encountered invalid buffer sharp");
return AmdGpu::Buffer::Null();
}
return buffer;
}
constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept {
AmdGpu::Image image{};
if (!is_r128) {
image = info.ReadUdSharp<AmdGpu::Image>(sharp_idx);
} else {
const auto raw = info.ReadUdSharp<u128>(sharp_idx);
std::memcpy(&image, &raw, sizeof(raw));
}
if (!image.Valid()) {
LOG_DEBUG(Render_Vulkan, "Encountered invalid image sharp");
image = AmdGpu::Image::Null(is_depth);
} else if (is_depth) {
const auto data_fmt = image.GetDataFmt();
if (data_fmt != AmdGpu::DataFormat::Format16 && data_fmt != AmdGpu::DataFormat::Format32) {
LOG_DEBUG(Render_Vulkan, "Encountered non-depth image used with depth instruction!");
image = AmdGpu::Image::Null(true);
}
}
return image;
}
constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept {
return is_inline_sampler ? inline_sampler : info.ReadUdSharp<AmdGpu::Sampler>(sharp_idx);
}
constexpr AmdGpu::Image FMaskResource::GetSharp(const Info& info) const noexcept {
return info.ReadUdSharp<AmdGpu::Image>(sharp_idx);
}
} // namespace Shader } // namespace Shader

View File

@ -30,7 +30,7 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
const auto result_it{instructions.insert(insertion_point, *inst)}; const auto result_it{instructions.insert(insertion_point, *inst)};
if (inst->NumArgs() != args.size()) { if (inst->NumArgs() != args.size()) {
throw InvalidArgument("Invalid number of arguments {} in {}", args.size(), op); UNREACHABLE_MSG("Invalid number of arguments {} in {}", args.size(), op);
} }
std::ranges::for_each(args, [inst, index = size_t{0}](const Value& arg) mutable { std::ranges::for_each(args, [inst, index = size_t{0}](const Value& arg) mutable {
inst->SetArg(index, arg); inst->SetArg(index, arg);

View File

@ -5,8 +5,6 @@
#include <source_location> #include <source_location>
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include "common/assert.h" #include "common/assert.h"
#include "ir_emitter.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/ir/debug_print.h" #include "shader_recompiler/ir/debug_print.h"
#include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/ir_emitter.h"
#include "shader_recompiler/ir/opcodes.h" #include "shader_recompiler/ir/opcodes.h"
@ -196,7 +194,7 @@ U1 IREmitter::Condition(IR::Condition cond) {
case IR::Condition::Execnz: case IR::Condition::Execnz:
return GetExec(); return GetExec();
default: default:
throw NotImplementedException(""); UNREACHABLE_MSG("");
} }
} }
@ -1828,7 +1826,7 @@ U32U64 IREmitter::ConvertFToS(size_t bitsize, const F32F64& value) {
default: default:
break; break;
} }
throw NotImplementedException("Invalid destination bitsize {}", bitsize); UNREACHABLE_MSG("Invalid destination bitsize {}", bitsize);
} }
U32U64 IREmitter::ConvertFToU(size_t bitsize, const F32F64& value) { U32U64 IREmitter::ConvertFToU(size_t bitsize, const F32F64& value) {
@ -1929,7 +1927,7 @@ U8U16U32U64 IREmitter::UConvert(size_t result_bitsize, const U8U16U32U64& value)
default: default:
break; break;
} }
throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); UNREACHABLE_MSG("Conversion from {} to {} bits", value.Type(), result_bitsize);
} }
U8U16U32U64 IR::IREmitter::SConvert(size_t result_bitsize, const U8U16U32U64& value) { U8U16U32U64 IR::IREmitter::SConvert(size_t result_bitsize, const U8U16U32U64& value) {
@ -1946,8 +1944,7 @@ U8U16U32U64 IR::IREmitter::SConvert(size_t result_bitsize, const U8U16U32U64& va
default: default:
break; break;
} }
throw NotImplementedException("Signed Conversion from {} to {} bits", value.Type(), UNREACHABLE_MSG("Signed Conversion from {} to {} bits", value.Type(), result_bitsize);
result_bitsize);
} }
F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) {
@ -1978,7 +1975,7 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) {
default: default:
break; break;
} }
throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); UNREACHABLE_MSG("Conversion from {} to {} bits", value.Type(), result_bitsize);
} }
Value IREmitter::ImageAtomicIAdd(const Value& handle, const Value& coords, const Value& value, Value IREmitter::ImageAtomicIAdd(const Value& handle, const Value& coords, const Value& value,

View File

@ -2,10 +2,8 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm> #include <algorithm>
#include <any>
#include <memory> #include <memory>
#include "shader_recompiler/exception.h"
#include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/type.h" #include "shader_recompiler/ir/type.h"
#include "shader_recompiler/ir/value.h" #include "shader_recompiler/ir/value.h"
@ -21,9 +19,7 @@ Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} {
} }
Inst::Inst(const Inst& base) : op{base.op}, flags{base.flags} { Inst::Inst(const Inst& base) : op{base.op}, flags{base.flags} {
if (base.op == Opcode::Phi) { ASSERT_MSG(base.op != Opcode::Phi, "Copying phi node");
throw NotImplementedException("Copying phi node");
}
std::construct_at(&args); std::construct_at(&args);
const size_t num_args{base.NumArgs()}; const size_t num_args{base.NumArgs()};
for (size_t index = 0; index < num_args; ++index) { for (size_t index = 0; index < num_args; ++index) {
@ -150,7 +146,7 @@ IR::Type Inst::Type() const {
void Inst::SetArg(size_t index, Value value) { void Inst::SetArg(size_t index, Value value) {
if (index >= NumArgs()) { if (index >= NumArgs()) {
throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op); UNREACHABLE_MSG("Out of bounds argument index {} in opcode {}", index, op);
} }
const IR::Value arg{Arg(index)}; const IR::Value arg{Arg(index)};
if (!arg.IsImmediate()) { if (!arg.IsImmediate()) {
@ -171,7 +167,7 @@ Block* Inst::PhiBlock(size_t index) const {
UNREACHABLE_MSG("{} is not a Phi instruction", op); UNREACHABLE_MSG("{} is not a Phi instruction", op);
} }
if (index >= phi_args.size()) { if (index >= phi_args.size()) {
throw InvalidArgument("Out of bounds argument index {} in phi instruction"); UNREACHABLE_MSG("Out of bounds argument index {} in phi instruction");
} }
return phi_args[index].first; return phi_args[index].first;
} }

View File

@ -205,7 +205,7 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
} }
info.srt_info.walker_func = c.getCurr<PFN_SrtWalker>(); info.srt_info.walker_func = c.getCurr<PFN_SrtWalker>();
pass_info.dst_off_dw = NumUserDataRegs; pass_info.dst_off_dw = NUM_USER_DATA_REGS;
ASSERT(pass_info.dst_off_dw == info.srt_info.flattened_bufsize_dw); ASSERT(pass_info.dst_off_dw == info.srt_info.flattened_bufsize_dw);
for (const auto& [sgpr_base, root] : pass_info.srt_roots) { for (const auto& [sgpr_base, root] : pass_info.srt_roots) {

View File

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <unordered_map> #include <unordered_map>
#include <queue>
#include "shader_recompiler/ir/program.h" #include "shader_recompiler/ir/program.h"
namespace Shader::Optimization { namespace Shader::Optimization {

View File

@ -96,7 +96,7 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
if (info.gs_copy_data.output_vertices && if (info.gs_copy_data.output_vertices &&
info.gs_copy_data.output_vertices != output_vertices) { info.gs_copy_data.output_vertices != output_vertices) {
ASSERT_MSG(output_vertices > info.gs_copy_data.output_vertices && ASSERT_MSG(output_vertices > info.gs_copy_data.output_vertices &&
gs_info.mode == AmdGpu::Liverpool::GsMode::Mode::ScenarioG, gs_info.mode == AmdGpu::GsScenario::ScenarioG,
"Invalid geometry shader vertex configuration scenario = {}, max_vert_out = " "Invalid geometry shader vertex configuration scenario = {}, max_vert_out = "
"{}, output_vertices = {}", "{}, output_vertices = {}",
u32(gs_info.mode), output_vertices, info.gs_copy_data.output_vertices); u32(gs_info.mode), output_vertices, info.gs_copy_data.output_vertices);

View File

@ -3,6 +3,7 @@
#include "common/config.h" #include "common/config.h"
#include "shader_recompiler/ir/program.h" #include "shader_recompiler/ir/program.h"
#include "shader_recompiler/profile.h"
#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/buffer_cache/buffer_cache.h"
namespace Shader::Optimization { namespace Shader::Optimization {

View File

@ -13,7 +13,6 @@
#include <boost/intrusive/list.hpp> #include <boost/intrusive/list.hpp>
#include "common/assert.h" #include "common/assert.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/opcodes.h" #include "shader_recompiler/ir/opcodes.h"
#include "shader_recompiler/ir/patch.h" #include "shader_recompiler/ir/patch.h"
@ -105,7 +104,7 @@ public:
explicit TypedValue(const Value& value) : Value(value) { explicit TypedValue(const Value& value) : Value(value) {
if ((value.Type() & type_) == IR::Type::Void) { if ((value.Type() & type_) == IR::Type::Void) {
throw InvalidArgument("Incompatible types {} and {}", type_, value.Type()); UNREACHABLE_MSG("Incompatible types {} and {}", type_, value.Type());
} }
} }

View File

@ -6,6 +6,7 @@
#include "shader_recompiler/frontend/structured_control_flow.h" #include "shader_recompiler/frontend/structured_control_flow.h"
#include "shader_recompiler/ir/passes/ir_passes.h" #include "shader_recompiler/ir/passes/ir_passes.h"
#include "shader_recompiler/ir/post_order.h" #include "shader_recompiler/ir/post_order.h"
#include "shader_recompiler/profile.h"
#include "shader_recompiler/recompiler.h" #include "shader_recompiler/recompiler.h"
namespace Shader { namespace Shader {

View File

@ -0,0 +1,146 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/types.h"
#include "shader_recompiler/ir/type.h"
#include "video_core/amdgpu/resource.h"
#include <boost/container/static_vector.hpp>
namespace Shader {
static constexpr u32 NUM_USER_DATA_REGS = 16;
static constexpr u32 NUM_IMAGES = 64;
static constexpr u32 NUM_BUFFERS = 40;
static constexpr u32 NUM_SAMPLERS = 16;
static constexpr u32 NUM_FMASKS = 8;
enum class BufferType : u32 {
Guest,
Flatbuf,
BdaPagetable,
FaultBuffer,
GdsBuffer,
SharedMemory,
};
struct Info;
struct BufferResource {
u32 sharp_idx;
IR::Type used_types;
AmdGpu::Buffer inline_cbuf;
BufferType buffer_type;
u8 instance_attrib{};
bool is_written{};
bool is_formatted{};
bool IsSpecial() const noexcept {
return buffer_type != BufferType::Guest;
}
bool IsStorage([[maybe_unused]] const AmdGpu::Buffer buffer) const noexcept {
// When using uniform buffers, a size is required at compilation time, so we need to
// either compile a lot of shader specializations to handle each size or just force it to
// the maximum possible size always. However, for some vendors the shader-supplied size is
// used for bounds checking uniform buffer accesses, so the latter would effectively turn
// off buffer robustness behavior. Instead, force storage buffers which are bounds checked
// using the actual buffer size. We are assuming the performance hit from this is
// acceptable.
return true; // buffer.GetSize() > profile.max_ubo_size || is_written;
}
constexpr AmdGpu::Buffer GetSharp(const auto& info) const noexcept {
const auto buffer =
inline_cbuf ? inline_cbuf : info.template ReadUdSharp<AmdGpu::Buffer>(sharp_idx);
if (!buffer.Valid()) {
LOG_DEBUG(Render, "Encountered invalid buffer sharp");
return AmdGpu::Buffer::Null();
}
return buffer;
}
};
using BufferResourceList = boost::container::static_vector<BufferResource, NUM_BUFFERS>;
struct ImageResource {
u32 sharp_idx;
bool is_depth{};
bool is_atomic{};
bool is_array{};
bool is_written{};
bool is_r128{};
constexpr AmdGpu::Image GetSharp(const auto& info) const noexcept {
AmdGpu::Image image{};
if (!is_r128) {
image = info.template ReadUdSharp<AmdGpu::Image>(sharp_idx);
} else {
const auto raw = info.template ReadUdSharp<u128>(sharp_idx);
std::memcpy(&image, &raw, sizeof(raw));
}
if (!image.Valid()) {
LOG_DEBUG(Render_Vulkan, "Encountered invalid image sharp");
image = AmdGpu::Image::Null(is_depth);
} else if (is_depth) {
const auto data_fmt = image.GetDataFmt();
if (data_fmt != AmdGpu::DataFormat::Format16 &&
data_fmt != AmdGpu::DataFormat::Format32) {
LOG_DEBUG(Render_Vulkan,
"Encountered non-depth image used with depth instruction!");
image = AmdGpu::Image::Null(true);
}
}
return image;
}
};
using ImageResourceList = boost::container::static_vector<ImageResource, NUM_IMAGES>;
struct SamplerResource {
u32 sharp_idx;
AmdGpu::Sampler inline_sampler;
u32 is_inline_sampler : 1;
u32 associated_image : 4;
u32 disable_aniso : 1;
constexpr AmdGpu::Sampler GetSharp(const auto& info) const noexcept {
return is_inline_sampler ? inline_sampler
: info.template ReadUdSharp<AmdGpu::Sampler>(sharp_idx);
}
};
using SamplerResourceList = boost::container::static_vector<SamplerResource, NUM_SAMPLERS>;
struct FMaskResource {
u32 sharp_idx;
constexpr AmdGpu::Image GetSharp(const auto& info) const noexcept {
return info.template ReadUdSharp<AmdGpu::Image>(sharp_idx);
}
};
using FMaskResourceList = boost::container::static_vector<FMaskResource, NUM_FMASKS>;
struct PushData {
static constexpr u32 XOffsetIndex = 0;
static constexpr u32 YOffsetIndex = 1;
static constexpr u32 XScaleIndex = 2;
static constexpr u32 YScaleIndex = 3;
static constexpr u32 UdRegsIndex = 4;
static constexpr u32 BufOffsetIndex = UdRegsIndex + NUM_USER_DATA_REGS / 4;
float xoffset;
float yoffset;
float xscale;
float yscale;
std::array<u32, NUM_USER_DATA_REGS> ud_regs;
std::array<u8, NUM_BUFFERS> buf_offsets;
void AddOffset(u32 binding, u32 offset) {
ASSERT(offset < 256 && binding < buf_offsets.size());
buf_offsets[binding] = offset;
}
};
static_assert(sizeof(PushData) <= 128,
"PushData size is greater than minimum size guaranteed by Vulkan spec");
} // namespace Shader

View File

@ -3,13 +3,12 @@
#pragma once #pragma once
#include <algorithm>
#include <span> #include <span>
#include <boost/container/static_vector.hpp>
#include "common/types.h" #include "common/types.h"
#include "shader_recompiler/frontend/tessellation.h" #include "shader_recompiler/frontend/tessellation.h"
#include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pixel_format.h"
#include "video_core/amdgpu/types.h" #include "video_core/amdgpu/regs_shader.h"
#include "video_core/amdgpu/regs_vertex.h"
namespace Shader { namespace Shader {
@ -36,7 +35,7 @@ enum class LogicalStage : u32 {
constexpr u32 MaxStageTypes = static_cast<u32>(LogicalStage::NumLogicalStages); constexpr u32 MaxStageTypes = static_cast<u32>(LogicalStage::NumLogicalStages);
[[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept { constexpr Stage StageFromIndex(size_t index) noexcept {
return static_cast<Stage>(index); return static_cast<Stage>(index);
} }
@ -87,7 +86,6 @@ struct VertexRuntimeInfo {
bool clip_disable{}; bool clip_disable{};
u32 step_rate_0; u32 step_rate_0;
u32 step_rate_1; u32 step_rate_1;
// Domain
AmdGpu::TessellationType tess_type; AmdGpu::TessellationType tess_type;
AmdGpu::TessellationTopology tess_topology; AmdGpu::TessellationTopology tess_topology;
AmdGpu::TessellationPartitioning tess_partitioning; AmdGpu::TessellationPartitioning tess_partitioning;
@ -110,22 +108,24 @@ struct VertexRuntimeInfo {
}; };
struct HullRuntimeInfo { struct HullRuntimeInfo {
// from registers
u32 num_input_control_points; u32 num_input_control_points;
u32 num_threads; u32 num_threads;
AmdGpu::TessellationType tess_type; AmdGpu::TessellationType tess_type;
bool offchip_lds_enable; bool offchip_lds_enable;
// from tess constants buffer
u32 ls_stride; u32 ls_stride;
u32 hs_output_cp_stride; u32 hs_output_cp_stride;
u32 hs_output_base; u32 hs_output_base;
auto operator<=>(const HullRuntimeInfo&) const noexcept = default; void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
ls_stride = tess_constants.ls_stride;
hs_output_cp_stride = tess_constants.hs_cp_stride;
hs_output_base = tess_constants.hs_output_base;
}
// It might be possible for a non-passthrough TCS to have these conditions, in some bool operator==(const HullRuntimeInfo&) const = default;
// dumb situation.
// In that case, it should be fine to assume passthrough and declare some extra // It might be possible for a non-passthrough TCS to have these conditions, in some dumb
// situation. In that case, it should be fine to assume passthrough and declare some extra
// output control points and attributes that shouldnt be read by the TES anyways // output control points and attributes that shouldnt be read by the TES anyways
bool IsPassthrough() const { bool IsPassthrough() const {
return hs_output_base == 0 && ls_stride == hs_output_cp_stride && num_threads == 1; return hs_output_base == 0 && ls_stride == hs_output_cp_stride && num_threads == 1;
@ -138,12 +138,6 @@ struct HullRuntimeInfo {
u32 NumOutputControlPoints() const { u32 NumOutputControlPoints() const {
return IsPassthrough() ? num_input_control_points : num_threads; return IsPassthrough() ? num_input_control_points : num_threads;
} }
void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
ls_stride = tess_constants.ls_stride;
hs_output_cp_stride = tess_constants.hs_cp_stride;
hs_output_base = tess_constants.hs_output_base;
}
}; };
static constexpr auto GsMaxOutputStreams = 4u; static constexpr auto GsMaxOutputStreams = 4u;
@ -157,11 +151,11 @@ struct GeometryRuntimeInfo {
u32 out_vertex_data_size{}; u32 out_vertex_data_size{};
AmdGpu::PrimitiveType in_primitive; AmdGpu::PrimitiveType in_primitive;
GsOutputPrimTypes out_primitive; GsOutputPrimTypes out_primitive;
AmdGpu::Liverpool::GsMode::Mode mode; AmdGpu::GsScenario mode;
std::span<const u32> vs_copy; std::span<const u32> vs_copy;
u64 vs_copy_hash; u64 vs_copy_hash;
bool operator==(const GeometryRuntimeInfo& other) const noexcept { bool operator==(const GeometryRuntimeInfo& other) const {
return num_outputs == other.num_outputs && outputs == other.outputs && num_invocations && return num_outputs == other.num_outputs && outputs == other.outputs && num_invocations &&
other.num_invocations && output_vertices == other.output_vertices && other.num_invocations && output_vertices == other.output_vertices &&
in_primitive == other.in_primitive && in_primitive == other.in_primitive &&
@ -181,10 +175,10 @@ struct PsColorBuffer {
AmdGpu::DataFormat data_format : 6; AmdGpu::DataFormat data_format : 6;
AmdGpu::NumberFormat num_format : 4; AmdGpu::NumberFormat num_format : 4;
AmdGpu::NumberConversion num_conversion : 3; AmdGpu::NumberConversion num_conversion : 3;
AmdGpu::Liverpool::ShaderExportFormat export_format : 4; AmdGpu::ShaderExportFormat export_format : 4;
AmdGpu::CompMapping swizzle; AmdGpu::CompMapping swizzle;
bool operator==(const PsColorBuffer& other) const noexcept = default; bool operator==(const PsColorBuffer& other) const = default;
}; };
struct FragmentRuntimeInfo { struct FragmentRuntimeInfo {
@ -200,18 +194,18 @@ struct FragmentRuntimeInfo {
bool operator==(const PsInput&) const noexcept = default; bool operator==(const PsInput&) const noexcept = default;
}; };
AmdGpu::Liverpool::PsInput en_flags; AmdGpu::PsInput en_flags;
AmdGpu::Liverpool::PsInput addr_flags; AmdGpu::PsInput addr_flags;
u32 num_inputs; u32 num_inputs;
std::array<PsInput, 32> inputs; std::array<PsInput, 32> inputs;
std::array<PsColorBuffer, MaxColorBuffers> color_buffers; std::array<PsColorBuffer, MaxColorBuffers> color_buffers;
AmdGpu::Liverpool::ShaderExportFormat z_export_format; AmdGpu::ShaderExportFormat z_export_format;
u8 mrtz_mask; u8 mrtz_mask;
bool dual_source_blending; bool dual_source_blending;
bool operator==(const FragmentRuntimeInfo& other) const noexcept { bool operator==(const FragmentRuntimeInfo& other) const noexcept {
return std::ranges::equal(color_buffers, other.color_buffers) && return std::ranges::equal(color_buffers, other.color_buffers) &&
en_flags.raw == other.en_flags.raw && addr_flags.raw == other.addr_flags.raw && en_flags == other.en_flags && addr_flags == other.addr_flags &&
num_inputs == other.num_inputs && z_export_format == other.z_export_format && num_inputs == other.num_inputs && z_export_format == other.z_export_format &&
mrtz_mask == other.mrtz_mask && dual_source_blending == other.dual_source_blending && mrtz_mask == other.mrtz_mask && dual_source_blending == other.dual_source_blending &&
std::ranges::equal(inputs.begin(), inputs.begin() + num_inputs, other.inputs.begin(), std::ranges::equal(inputs.begin(), inputs.begin() + num_inputs, other.inputs.begin(),

View File

@ -9,6 +9,7 @@
#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/info.h" #include "shader_recompiler/info.h"
#include "shader_recompiler/profile.h"
namespace Shader { namespace Shader {
@ -114,9 +115,9 @@ struct StageSpecialization {
} }
u32 binding{}; u32 binding{};
ForEachSharp(binding, buffers, info->buffers, ForEachSharp(binding, buffers, info->buffers,
[profile_](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
spec.stride = sharp.GetStride(); spec.stride = sharp.GetStride();
spec.is_storage = desc.IsStorage(sharp, profile_); spec.is_storage = desc.IsStorage(sharp);
spec.is_formatted = desc.is_formatted; spec.is_formatted = desc.is_formatted;
spec.swizzle_enable = sharp.swizzle_enable; spec.swizzle_enable = sharp.swizzle_enable;
if (spec.is_formatted) { if (spec.is_formatted) {

View File

@ -0,0 +1,22 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/types.h"
namespace AmdGpu {
union CbDbExtent {
struct {
u16 width;
u16 height;
};
u32 raw;
bool Valid() const {
return raw != 0;
}
};
} // namespace AmdGpu

View File

@ -1,55 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/types.h"
#include "video_core/amdgpu/liverpool.h"
#include <array>
namespace AmdGpu {
// The following values are taken from fpPS4:
// https://github.com/red-prig/fpPS4/blob/436b43064be4c78229500f3d3c054fc76639247d/chip/pm4_pfp.pas#L410
//
static constexpr std::array reg_array_default{
0x00000000u, 0x80000000u, 0x40004000u, 0xdeadbeefu, 0x00000000u, 0x40004000u, 0x00000000u,
0x40004000u, 0x00000000u, 0x40004000u, 0x00000000u, 0x40004000u, 0xaa99aaaau, 0x00000000u,
0xdeadbeefu, 0xdeadbeefu, 0x80000000u, 0x40004000u, 0x00000000u, 0x00000000u, 0x80000000u,
0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u,
0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u,
0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u,
0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u,
0x40004000u, 0x80000000u, 0x40004000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u,
0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u,
0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u,
0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u,
0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u,
0x2a00161au,
};
void Liverpool::Regs::SetDefaults() {
std::memset(reg_array.data(), 0, reg_array.size() * sizeof(u32));
std::memcpy(&reg_array[ContextRegWordOffset + 0x80], reg_array_default.data(),
reg_array_default.size() * sizeof(u32));
// Individual context regs values
reg_array[ContextRegWordOffset + 0x000d] = 0x40004000u;
reg_array[ContextRegWordOffset + 0x01b6] = 0x00000002u;
reg_array[ContextRegWordOffset + 0x0204] = 0x00090000u;
reg_array[ContextRegWordOffset + 0x0205] = 0x00000004u;
reg_array[ContextRegWordOffset + 0x0295] = 0x00000100u;
reg_array[ContextRegWordOffset + 0x0296] = 0x00000080u;
reg_array[ContextRegWordOffset + 0x0297] = 0x00000002u;
reg_array[ContextRegWordOffset + 0x02aa] = 0x00001000u;
reg_array[ContextRegWordOffset + 0x02f7] = 0x00001000u;
reg_array[ContextRegWordOffset + 0x02f9] = 0x00000005u;
reg_array[ContextRegWordOffset + 0x02fa] = 0x3f800000u;
reg_array[ContextRegWordOffset + 0x02fb] = 0x3f800000u;
reg_array[ContextRegWordOffset + 0x02fc] = 0x3f800000u;
reg_array[ContextRegWordOffset + 0x02fd] = 0x3f800000u;
reg_array[ContextRegWordOffset + 0x0316] = 0x0000000eu;
reg_array[ContextRegWordOffset + 0x0317] = 0x00000010u;
}
} // namespace AmdGpu

View File

@ -12,6 +12,7 @@
#include "core/libraries/kernel/process.h" #include "core/libraries/kernel/process.h"
#include "core/libraries/videoout/driver.h" #include "core/libraries/videoout/driver.h"
#include "core/memory.h" #include "core/memory.h"
#include "core/platform.h"
#include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/pm4_cmds.h" #include "video_core/amdgpu/pm4_cmds.h"
#include "video_core/renderdoc.h" #include "video_core/renderdoc.h"
@ -305,14 +306,14 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
} }
case PM4ItOpcode::SetConfigReg: { case PM4ItOpcode::SetConfigReg: {
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header); const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
const auto reg_addr = ConfigRegWordOffset + set_data->reg_offset; const auto reg_addr = Regs::ConfigRegWordOffset + set_data->reg_offset;
const auto* payload = reinterpret_cast<const u32*>(header + 2); const auto* payload = reinterpret_cast<const u32*>(header + 2);
std::memcpy(&regs.reg_array[reg_addr], payload, (count - 1) * sizeof(u32)); std::memcpy(&regs.reg_array[reg_addr], payload, (count - 1) * sizeof(u32));
break; break;
} }
case PM4ItOpcode::SetContextReg: { case PM4ItOpcode::SetContextReg: {
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header); const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
const auto reg_addr = ContextRegWordOffset + set_data->reg_offset; const auto reg_addr = Regs::ContextRegWordOffset + set_data->reg_offset;
const auto* payload = reinterpret_cast<const u32*>(header + 2); const auto* payload = reinterpret_cast<const u32*>(header + 2);
std::memcpy(&regs.reg_array[reg_addr], payload, (count - 1) * sizeof(u32)); std::memcpy(&regs.reg_array[reg_addr], payload, (count - 1) * sizeof(u32));
@ -335,7 +336,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
case ContextRegs::CbColor7Base: { case ContextRegs::CbColor7Base: {
const auto col_buf_id = (reg_addr - ContextRegs::CbColor0Base) / const auto col_buf_id = (reg_addr - ContextRegs::CbColor0Base) /
(ContextRegs::CbColor1Base - ContextRegs::CbColor0Base); (ContextRegs::CbColor1Base - ContextRegs::CbColor0Base);
ASSERT(col_buf_id < NumColorBuffers); ASSERT(col_buf_id < NUM_COLOR_BUFFERS);
const auto nop_offset = header->type3.count; const auto nop_offset = header->type3.count;
if (nop_offset == 0x0e || nop_offset == 0x0d || nop_offset == 0x0b) { if (nop_offset == 0x0e || nop_offset == 0x0d || nop_offset == 0x0b) {
@ -358,7 +359,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const auto col_buf_id = const auto col_buf_id =
(reg_addr - ContextRegs::CbColor0Cmask) / (reg_addr - ContextRegs::CbColor0Cmask) /
(ContextRegs::CbColor1Cmask - ContextRegs::CbColor0Cmask); (ContextRegs::CbColor1Cmask - ContextRegs::CbColor0Cmask);
ASSERT(col_buf_id < NumColorBuffers); ASSERT(col_buf_id < NUM_COLOR_BUFFERS);
const auto nop_offset = header->type3.count; const auto nop_offset = header->type3.count;
if (nop_offset == 0x04) { if (nop_offset == 0x04) {
@ -394,14 +395,14 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
(set_data->reg_offset - 0x200); (set_data->reg_offset - 0x200);
std::memcpy(addr, header + 2, set_size); std::memcpy(addr, header + 2, set_size);
} else { } else {
std::memcpy(&regs.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, std::memcpy(&regs.reg_array[Regs::ShRegWordOffset + set_data->reg_offset],
set_size); header + 2, set_size);
} }
break; break;
} }
case PM4ItOpcode::SetUconfigReg: { case PM4ItOpcode::SetUconfigReg: {
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header); const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
std::memcpy(&regs.reg_array[UconfigRegWordOffset + set_data->reg_offset], std::memcpy(&regs.reg_array[Regs::UconfigRegWordOffset + set_data->reg_offset],
header + 2, (count - 1) * sizeof(u32)); header + 2, (count - 1) * sizeof(u32));
break; break;
} }
@ -418,7 +419,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndex2*>(header); const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndex2*>(header);
regs.max_index_size = draw_index->max_size; regs.max_index_size = draw_index->max_size;
regs.index_base_address.base_addr_lo = draw_index->index_base_lo; regs.index_base_address.base_addr_lo = draw_index->index_base_lo;
regs.index_base_address.base_addr_hi.Assign(draw_index->index_base_hi); regs.index_base_address.base_addr_hi = draw_index->index_base_hi;
regs.num_indices = draw_index->index_count; regs.num_indices = draw_index->index_count;
regs.draw_initiator = draw_index->draw_initiator; regs.draw_initiator = draw_index->draw_initiator;
if (DebugState.DumpingCurrentReg()) { if (DebugState.DumpingCurrentReg()) {
@ -582,7 +583,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
case PM4ItOpcode::IndexBase: { case PM4ItOpcode::IndexBase: {
const auto* index_base = reinterpret_cast<const PM4CmdDrawIndexBase*>(header); const auto* index_base = reinterpret_cast<const PM4CmdDrawIndexBase*>(header);
regs.index_base_address.base_addr_lo = index_base->addr_lo; regs.index_base_address.base_addr_lo = index_base->addr_lo;
regs.index_base_address.base_addr_hi.Assign(index_base->addr_hi); regs.index_base_address.base_addr_hi = index_base->addr_hi;
break; break;
} }
case PM4ItOpcode::IndexBufferSize: { case PM4ItOpcode::IndexBufferSize: {
@ -638,12 +639,14 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
} }
case PM4ItOpcode::EventWriteEop: { case PM4ItOpcode::EventWriteEop: {
const auto* event_eop = reinterpret_cast<const PM4CmdEventWriteEop*>(header); const auto* event_eop = reinterpret_cast<const PM4CmdEventWriteEop*>(header);
event_eop->SignalFence([](void* address, u64 data, u32 num_bytes) { event_eop->SignalFence(
auto* memory = Core::Memory::Instance(); [](void* address, u64 data, u32 num_bytes) {
if (!memory->TryWriteBacking(address, &data, num_bytes)) { auto* memory = Core::Memory::Instance();
memcpy(address, &data, num_bytes); if (!memory->TryWriteBacking(address, &data, num_bytes)) {
} memcpy(address, &data, num_bytes);
}); }
},
[] { Platform::IrqC::Instance()->Signal(Platform::InterruptId::GfxEop); });
break; break;
} }
case PM4ItOpcode::DmaData: { case PM4ItOpcode::DmaData: {
@ -947,8 +950,8 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
(set_data->reg_offset - 0x200); (set_data->reg_offset - 0x200);
std::memcpy(addr, header + 2, set_size); std::memcpy(addr, header + 2, set_size);
} else { } else {
std::memcpy(&regs.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, std::memcpy(&regs.reg_array[Regs::ShRegWordOffset + set_data->reg_offset],
set_size); header + 2, set_size);
} }
break; break;
} }
@ -1030,7 +1033,9 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
} }
case PM4ItOpcode::ReleaseMem: { case PM4ItOpcode::ReleaseMem: {
const auto* release_mem = reinterpret_cast<const PM4CmdReleaseMem*>(header); const auto* release_mem = reinterpret_cast<const PM4CmdReleaseMem*>(header);
release_mem->SignalFence(static_cast<Platform::InterruptId>(queue.pipe_id)); release_mem->SignalFence([pipe_id = queue.pipe_id] {
Platform::IrqC::Instance()->Signal(static_cast<Platform::InterruptId>(pipe_id));
});
break; break;
} }
case PM4ItOpcode::EventWrite: { case PM4ItOpcode::EventWrite: {
@ -1053,11 +1058,8 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
FIBER_EXIT; FIBER_EXIT;
} }
std::pair<std::span<const u32>, std::span<const u32>> Liverpool::CopyCmdBuffers( Liverpool::CmdBuffer Liverpool::CopyCmdBuffers(std::span<const u32> dcb, std::span<const u32> ccb) {
std::span<const u32> dcb, std::span<const u32> ccb) {
auto& queue = mapped_queues[GfxQueueId]; auto& queue = mapped_queues[GfxQueueId];
// std::vector resize can invalidate spans for commands in flight
ASSERT_MSG(queue.dcb_buffer.capacity() >= queue.dcb_buffer_offset + dcb.size(), ASSERT_MSG(queue.dcb_buffer.capacity() >= queue.dcb_buffer_offset + dcb.size(),
"dcb copy buffer out of reserved space"); "dcb copy buffer out of reserved space");
ASSERT_MSG(queue.ccb_buffer.capacity() >= queue.ccb_buffer_offset + ccb.size(), ASSERT_MSG(queue.ccb_buffer.capacity() >= queue.ccb_buffer_offset + ccb.size(),
@ -1068,8 +1070,8 @@ std::pair<std::span<const u32>, std::span<const u32>> Liverpool::CopyCmdBuffers(
queue.ccb_buffer.resize( queue.ccb_buffer.resize(
std::max(queue.ccb_buffer.size(), queue.ccb_buffer_offset + ccb.size())); std::max(queue.ccb_buffer.size(), queue.ccb_buffer_offset + ccb.size()));
u32 prev_dcb_buffer_offset = queue.dcb_buffer_offset; const u32 prev_dcb_buffer_offset = queue.dcb_buffer_offset;
u32 prev_ccb_buffer_offset = queue.ccb_buffer_offset; const u32 prev_ccb_buffer_offset = queue.ccb_buffer_offset;
if (!dcb.empty()) { if (!dcb.empty()) {
std::memcpy(queue.dcb_buffer.data() + queue.dcb_buffer_offset, dcb.data(), std::memcpy(queue.dcb_buffer.data() + queue.dcb_buffer_offset, dcb.data(),
dcb.size_bytes()); dcb.size_bytes());

File diff suppressed because it is too large Load Diff

View File

@ -4,26 +4,24 @@
#pragma once #pragma once
#include <cstring> #include <cstring>
#include "common/assert.h"
#include "common/bit_field.h" #include "common/bit_field.h"
#include "common/types.h" #include "common/types.h"
#include "common/uint128.h" #include "common/uint128.h"
#include "core/libraries/gnmdriver/gnmdriver.h" #include "core/libraries/gnmdriver/gnmdriver.h"
#include "core/libraries/kernel/time.h" #include "core/libraries/kernel/time.h"
#include "core/platform.h"
#include "video_core/amdgpu/pm4_opcodes.h" #include "video_core/amdgpu/pm4_opcodes.h"
namespace AmdGpu { namespace AmdGpu {
/// This enum defines the Shader types supported in PM4 type 3 header
enum class PM4ShaderType : u32 { enum class PM4ShaderType : u32 {
ShaderGraphics = 0, ///< Graphics shader ShaderGraphics = 0,
ShaderCompute = 1 ///< Compute shader ShaderCompute = 1,
}; };
/// This enum defines the predicate value supported in PM4 type 3 header
enum class PM4Predicate : u32 { enum class PM4Predicate : u32 {
PredDisable = 0, ///< Predicate disabled PredDisable = 0,
PredEnable = 1 ///< Predicate enabled PredEnable = 1,
}; };
union PM4Type0Header { union PM4Type0Header {
@ -466,7 +464,7 @@ struct PM4CmdEventWriteEop {
return data_lo | u64(data_hi) << 32; return data_lo | u64(data_hi) << 32;
} }
void SignalFence(auto&& write_mem) const { void SignalFence(auto&& write_mem, auto&& signal_irq) const {
u32* address = Address<u32>(); u32* address = Address<u32>();
switch (data_sel.Value()) { switch (data_sel.Value()) {
case DataSelect::None: { case DataSelect::None: {
@ -502,7 +500,7 @@ struct PM4CmdEventWriteEop {
ASSERT(data_sel == DataSelect::None); ASSERT(data_sel == DataSelect::None);
[[fallthrough]]; [[fallthrough]];
case InterruptSelect::IrqWhenWriteConfirm: { case InterruptSelect::IrqWhenWriteConfirm: {
Platform::IrqC::Instance()->Signal(Platform::InterruptId::GfxEop); signal_irq();
break; break;
} }
default: { default: {
@ -682,7 +680,7 @@ struct PM4CmdWaitRegMem {
return reg.Value(); return reg.Value();
} }
bool Test(const std::array<u32, Liverpool::NumRegs>& regs) const { bool Test(std::span<const u32> regs) const {
u32 value = mem_space.Value() == MemSpace::Memory ? *Address() : regs[Reg()]; u32 value = mem_space.Value() == MemSpace::Memory ? *Address() : regs[Reg()];
switch (function.Value()) { switch (function.Value()) {
case Function::Always: { case Function::Always: {
@ -934,7 +932,7 @@ struct PM4CmdReleaseMem {
return data_lo | u64(data_hi) << 32; return data_lo | u64(data_hi) << 32;
} }
void SignalFence(Platform::InterruptId irq_id) const { void SignalFence(auto&& signal_irq) const {
switch (data_sel.Value()) { switch (data_sel.Value()) {
case DataSelect::Data32Low: { case DataSelect::Data32Low: {
*Address<u32>() = DataDWord(); *Address<u32>() = DataDWord();
@ -965,7 +963,7 @@ struct PM4CmdReleaseMem {
case InterruptSelect::IrqUndocumented: case InterruptSelect::IrqUndocumented:
[[fallthrough]]; [[fallthrough]];
case InterruptSelect::IrqWhenWriteConfirm: { case InterruptSelect::IrqWhenWriteConfirm: {
Platform::IrqC::Instance()->Signal(irq_id); signal_irq();
break; break;
} }
default: { default: {

View File

@ -0,0 +1,128 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "video_core/amdgpu/regs.h"
namespace AmdGpu {
// The following values are taken from fpPS4:
// https://github.com/red-prig/fpPS4/blob/436b43064be4c78229500f3d3c054fc76639247d/chip/pm4_pfp.pas#L410
static constexpr std::array REG_ARRAY_DEFAULT = {
0x00000000u, 0x80000000u, 0x40004000u, 0xdeadbeefu, 0x00000000u, 0x40004000u, 0x00000000u,
0x40004000u, 0x00000000u, 0x40004000u, 0x00000000u, 0x40004000u, 0xaa99aaaau, 0x00000000u,
0xdeadbeefu, 0xdeadbeefu, 0x80000000u, 0x40004000u, 0x00000000u, 0x00000000u, 0x80000000u,
0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u,
0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u,
0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u,
0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u,
0x40004000u, 0x80000000u, 0x40004000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u,
0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u,
0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u,
0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u,
0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u,
0x2a00161au,
};
void Regs::SetDefaults() {
std::memset(reg_array.data(), 0, reg_array.size() * sizeof(u32));
std::memcpy(&reg_array[ContextRegWordOffset + 0x80], REG_ARRAY_DEFAULT.data(),
REG_ARRAY_DEFAULT.size() * sizeof(u32));
// Individual context regs values
reg_array[ContextRegWordOffset + 0x000d] = 0x40004000u;
reg_array[ContextRegWordOffset + 0x01b6] = 0x00000002u;
reg_array[ContextRegWordOffset + 0x0204] = 0x00090000u;
reg_array[ContextRegWordOffset + 0x0205] = 0x00000004u;
reg_array[ContextRegWordOffset + 0x0295] = 0x00000100u;
reg_array[ContextRegWordOffset + 0x0296] = 0x00000080u;
reg_array[ContextRegWordOffset + 0x0297] = 0x00000002u;
reg_array[ContextRegWordOffset + 0x02aa] = 0x00001000u;
reg_array[ContextRegWordOffset + 0x02f7] = 0x00001000u;
reg_array[ContextRegWordOffset + 0x02f9] = 0x00000005u;
reg_array[ContextRegWordOffset + 0x02fa] = 0x3f800000u;
reg_array[ContextRegWordOffset + 0x02fb] = 0x3f800000u;
reg_array[ContextRegWordOffset + 0x02fc] = 0x3f800000u;
reg_array[ContextRegWordOffset + 0x02fd] = 0x3f800000u;
reg_array[ContextRegWordOffset + 0x0316] = 0x0000000eu;
reg_array[ContextRegWordOffset + 0x0317] = 0x00000010u;
}
#define GFX6_3D_REG_INDEX(field_name) (offsetof(AmdGpu::Regs, field_name) / sizeof(u32))
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
static_assert(GFX6_3D_REG_INDEX(vs_program) == 0x2C48);
static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C);
static_assert(GFX6_3D_REG_INDEX(gs_program) == 0x2C88);
static_assert(GFX6_3D_REG_INDEX(es_program) == 0x2CC8);
static_assert(GFX6_3D_REG_INDEX(hs_program) == 0x2D08);
static_assert(GFX6_3D_REG_INDEX(ls_program) == 0x2D48);
static_assert(GFX6_3D_REG_INDEX(cs_program) == 0x2E00);
static_assert(GFX6_3D_REG_INDEX(cs_program.dim_z) == 0x2E03);
static_assert(GFX6_3D_REG_INDEX(cs_program.user_data) == 0x2E40);
static_assert(GFX6_3D_REG_INDEX(depth_render_control) == 0xA000);
static_assert(GFX6_3D_REG_INDEX(depth_view) == 0xA002);
static_assert(GFX6_3D_REG_INDEX(depth_htile_data_base) == 0xA005);
static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C);
static_assert(GFX6_3D_REG_INDEX(depth_buffer.z_info) == 0xA010);
static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017);
static_assert(GFX6_3D_REG_INDEX(ta_bc_base) == 0xA020);
static_assert(GFX6_3D_REG_INDEX(window_offset) == 0xA080);
static_assert(GFX6_3D_REG_INDEX(window_scissor) == 0xA081);
static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E);
static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F);
static_assert(GFX6_3D_REG_INDEX(generic_scissor) == 0xA090);
static_assert(GFX6_3D_REG_INDEX(viewport_scissors) == 0xA094);
static_assert(GFX6_3D_REG_INDEX(index_offset) == 0xA102);
static_assert(GFX6_3D_REG_INDEX(primitive_restart_index) == 0xA103);
static_assert(GFX6_3D_REG_INDEX(stencil_control) == 0xA10B);
static_assert(GFX6_3D_REG_INDEX(viewports) == 0xA10F);
static_assert(GFX6_3D_REG_INDEX(clip_user_data) == 0xA16F);
static_assert(GFX6_3D_REG_INDEX(ps_inputs) == 0xA191);
static_assert(GFX6_3D_REG_INDEX(vs_output_config) == 0xA1B1);
static_assert(GFX6_3D_REG_INDEX(ps_input_ena) == 0xA1B3);
static_assert(GFX6_3D_REG_INDEX(ps_input_addr) == 0xA1B4);
static_assert(GFX6_3D_REG_INDEX(shader_pos_format) == 0xA1C3);
static_assert(GFX6_3D_REG_INDEX(z_export_format) == 0xA1C4);
static_assert(GFX6_3D_REG_INDEX(color_export_format) == 0xA1C5);
static_assert(GFX6_3D_REG_INDEX(blend_control) == 0xA1E0);
static_assert(GFX6_3D_REG_INDEX(index_base_address) == 0xA1F9);
static_assert(GFX6_3D_REG_INDEX(draw_initiator) == 0xA1FC);
static_assert(GFX6_3D_REG_INDEX(depth_control) == 0xA200);
static_assert(GFX6_3D_REG_INDEX(color_control) == 0xA202);
static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206);
static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
static_assert(GFX6_3D_REG_INDEX(line_control) == 0xA282);
static_assert(GFX6_3D_REG_INDEX(hs_clamp) == 0xA287);
static_assert(GFX6_3D_REG_INDEX(vgt_gs_mode) == 0xA290);
static_assert(GFX6_3D_REG_INDEX(mode_control) == 0xA292);
static_assert(GFX6_3D_REG_INDEX(vgt_gs_out_prim_type) == 0xA29B);
static_assert(GFX6_3D_REG_INDEX(index_size) == 0xA29D);
static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F);
static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1);
static_assert(GFX6_3D_REG_INDEX(enable_primitive_restart) == 0xA2A5);
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_0) == 0xA2A8);
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_1) == 0xA2A9);
static_assert(GFX6_3D_REG_INDEX(vgt_esgs_ring_itemsize) == 0xA2AB);
static_assert(GFX6_3D_REG_INDEX(vgt_gsvs_ring_itemsize) == 0xA2AC);
static_assert(GFX6_3D_REG_INDEX(stage_enable) == 0xA2D5);
static_assert(GFX6_3D_REG_INDEX(vgt_gs_vert_itemsize[0]) == 0xA2D7);
static_assert(GFX6_3D_REG_INDEX(tess_config) == 0xA2DB);
static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);
static_assert(GFX6_3D_REG_INDEX(vgt_gs_instance_cnt) == 0xA2E4);
static_assert(GFX6_3D_REG_INDEX(vgt_strmout_config) == 0xA2E5);
static_assert(GFX6_3D_REG_INDEX(vgt_strmout_buffer_config) == 0xA2E6);
static_assert(GFX6_3D_REG_INDEX(aa_config) == 0xA2F8);
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318);
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].pitch) == 0xA319);
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A);
static_assert(GFX6_3D_REG_INDEX(color_buffers[7].base_address) == 0xA381);
static_assert(GFX6_3D_REG_INDEX(cp_strmout_cntl) == 0xC03F);
static_assert(GFX6_3D_REG_INDEX(primitive_type) == 0xC242);
static_assert(GFX6_3D_REG_INDEX(num_instances) == 0xC24D);
static_assert(GFX6_3D_REG_INDEX(vgt_tf_memory_base) == 0xc250);
#undef GFX6_3D_REG_INDEX
} // namespace AmdGpu

View File

@ -0,0 +1,189 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "video_core/amdgpu/regs_color.h"
#include "video_core/amdgpu/regs_depth.h"
#include "video_core/amdgpu/regs_primitive.h"
#include "video_core/amdgpu/regs_shader.h"
#include "video_core/amdgpu/regs_texture.h"
#include "video_core/amdgpu/regs_vertex.h"
namespace AmdGpu {
#define DO_CONCAT2(x, y) x##y
#define CONCAT2(x, y) DO_CONCAT2(x, y)
#define INSERT_PADDING_WORDS(num_words) \
[[maybe_unused]] std::array<u32, num_words> CONCAT2(pad, __LINE__)
union Regs {
static constexpr u32 NumRegs = 0xD000;
static constexpr u32 UconfigRegWordOffset = 0xC000;
static constexpr u32 ContextRegWordOffset = 0xA000;
static constexpr u32 ConfigRegWordOffset = 0x2000;
static constexpr u32 ShRegWordOffset = 0x2C00;
struct {
INSERT_PADDING_WORDS(11272);
ShaderProgram ps_program;
INSERT_PADDING_WORDS(44);
ShaderProgram vs_program;
INSERT_PADDING_WORDS(44);
ShaderProgram gs_program;
INSERT_PADDING_WORDS(44);
ShaderProgram es_program;
INSERT_PADDING_WORDS(44);
ShaderProgram hs_program;
INSERT_PADDING_WORDS(44);
ShaderProgram ls_program;
INSERT_PADDING_WORDS(164);
ComputeProgram cs_program;
INSERT_PADDING_WORDS(29104);
DepthRenderControl depth_render_control;
INSERT_PADDING_WORDS(1);
DepthView depth_view;
DepthRenderOverride depth_render_override;
INSERT_PADDING_WORDS(1);
Address depth_htile_data_base;
INSERT_PADDING_WORDS(2);
float depth_bounds_min;
float depth_bounds_max;
u32 stencil_clear;
float depth_clear;
Scissor screen_scissor;
INSERT_PADDING_WORDS(2);
DepthBuffer depth_buffer;
INSERT_PADDING_WORDS(8);
BorderColorBuffer ta_bc_base;
INSERT_PADDING_WORDS(94);
WindowOffset window_offset;
ViewportScissor window_scissor;
INSERT_PADDING_WORDS(11);
ColorBufferMask color_target_mask;
ColorBufferMask color_shader_mask;
ViewportScissor generic_scissor;
INSERT_PADDING_WORDS(2);
std::array<ViewportScissor, NUM_VIEWPORTS> viewport_scissors;
std::array<ViewportDepth, NUM_VIEWPORTS> viewport_depths;
INSERT_PADDING_WORDS(46);
u32 index_offset;
u32 primitive_restart_index;
INSERT_PADDING_WORDS(1);
BlendConstants blend_constants;
INSERT_PADDING_WORDS(2);
StencilControl stencil_control;
StencilRefMask stencil_ref_front;
StencilRefMask stencil_ref_back;
INSERT_PADDING_WORDS(1);
std::array<ViewportBounds, NUM_VIEWPORTS> viewports;
std::array<ClipUserData, NUM_CLIP_PLANES> clip_user_data;
INSERT_PADDING_WORDS(10);
std::array<PsInputControl, 32> ps_inputs;
VsOutputConfig vs_output_config;
INSERT_PADDING_WORDS(1);
PsInput ps_input_ena;
PsInput ps_input_addr;
INSERT_PADDING_WORDS(1);
u32 num_interp : 6;
INSERT_PADDING_WORDS(12);
ShaderPosFormat shader_pos_format;
ShaderExportFormat z_export_format;
ColorExportFormat color_export_format;
INSERT_PADDING_WORDS(26);
std::array<BlendControl, NUM_COLOR_BUFFERS> blend_control;
INSERT_PADDING_WORDS(17);
IndexBufferBase index_base_address;
INSERT_PADDING_WORDS(1);
u32 draw_initiator;
INSERT_PADDING_WORDS(3);
DepthControl depth_control;
INSERT_PADDING_WORDS(1);
ColorControl color_control;
DepthShaderControl depth_shader_control;
ClipperControl clipper_control;
PolygonControl polygon_control;
ViewportControl viewport_control;
VsOutputControl vs_output_control;
INSERT_PADDING_WORDS(122);
LineControl line_control;
INSERT_PADDING_WORDS(4);
TessFactorClamp hs_clamp;
INSERT_PADDING_WORDS(7);
GsMode vgt_gs_mode;
INSERT_PADDING_WORDS(1);
ModeControl mode_control;
INSERT_PADDING_WORDS(8);
GsOutPrimitiveType vgt_gs_out_prim_type;
INSERT_PADDING_WORDS(1);
u32 index_size;
u32 max_index_size;
IndexBufferType index_buffer_type;
INSERT_PADDING_WORDS(1);
u32 enable_primitive_id;
INSERT_PADDING_WORDS(3);
u32 enable_primitive_restart;
INSERT_PADDING_WORDS(2);
u32 vgt_instance_step_rate_0;
u32 vgt_instance_step_rate_1;
INSERT_PADDING_WORDS(1);
u32 vgt_esgs_ring_itemsize;
u32 vgt_gsvs_ring_itemsize;
INSERT_PADDING_WORDS(33);
u32 vgt_gs_max_vert_out : 11;
INSERT_PADDING_WORDS(6);
ShaderStageEnable stage_enable;
LsHsConfig ls_hs_config;
u32 vgt_gs_vert_itemsize[4];
TessellationConfig tess_config;
INSERT_PADDING_WORDS(3);
PolygonOffset poly_offset;
GsInstances vgt_gs_instance_cnt;
StreamOutConfig vgt_strmout_config;
StreamOutBufferConfig vgt_strmout_buffer_config;
INSERT_PADDING_WORDS(17);
AaConfig aa_config;
INSERT_PADDING_WORDS(31);
ColorBuffer color_buffers[NUM_COLOR_BUFFERS];
INSERT_PADDING_WORDS(7343);
StreamOutControl cp_strmout_cntl;
INSERT_PADDING_WORDS(514);
PrimitiveType primitive_type;
INSERT_PADDING_WORDS(9);
u32 num_indices;
VgtNumInstances num_instances;
INSERT_PADDING_WORDS(2);
TessFactorMemoryBase vgt_tf_memory_base;
};
std::array<u32, NumRegs> reg_array;
const ShaderProgram* ProgramForStage(u32 index) const {
switch (index) {
case 0:
return &ps_program;
case 1:
return &vs_program;
case 2:
return &gs_program;
case 3:
return &es_program;
case 4:
return &hs_program;
case 5:
return &ls_program;
}
return nullptr;
}
bool IsClipDisabled() const {
return clipper_control.clip_disable || primitive_type == PrimitiveType::RectList;
}
void SetDefaults();
};
#undef DO_CONCAT2
#undef CONCAT2
#undef INSERT_PADDING_WORDS
} // namespace AmdGpu

View File

@ -0,0 +1,307 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "video_core/amdgpu/pixel_format.h"
#include "video_core/amdgpu/tiling.h"
namespace AmdGpu {
static constexpr u32 NUM_COLOR_BUFFERS = 8;
using BlendConstants = std::array<float, 4>;
struct BlendControl {
enum class BlendFactor : u32 {
Zero = 0,
One = 1,
SrcColor = 2,
OneMinusSrcColor = 3,
SrcAlpha = 4,
OneMinusSrcAlpha = 5,
DstAlpha = 6,
OneMinusDstAlpha = 7,
DstColor = 8,
OneMinusDstColor = 9,
SrcAlphaSaturate = 10,
ConstantColor = 13,
OneMinusConstantColor = 14,
Src1Color = 15,
InvSrc1Color = 16,
Src1Alpha = 17,
InvSrc1Alpha = 18,
ConstantAlpha = 19,
OneMinusConstantAlpha = 20,
};
enum class BlendFunc : u32 {
Add = 0,
Subtract = 1,
Min = 2,
Max = 3,
ReverseSubtract = 4,
};
BlendFactor color_src_factor : 5;
BlendFunc color_func : 3;
BlendFactor color_dst_factor : 5;
u32 : 3;
BlendFactor alpha_src_factor : 5;
BlendFunc alpha_func : 3;
BlendFactor alpha_dst_factor : 5;
u32 separate_alpha_blend : 1;
u32 enable : 1;
u32 disable_rop3 : 1;
bool operator==(const BlendControl& other) const = default;
};
struct ColorControl {
enum class OperationMode : u32 {
Disable = 0u,
Normal = 1u,
EliminateFastClear = 2u,
Resolve = 3u,
Err = 4u,
FmaskDecompress = 5u,
};
enum class LogicOp : u32 {
Clear = 0x00,
Nor = 0x11,
AndInverted = 0x22,
CopyInverted = 0x33,
AndReverse = 0x44,
Invert = 0x55,
Xor = 0x66,
Nand = 0x77,
And = 0x88,
Equiv = 0x99,
Noop = 0xAA,
OrInverted = 0xBB,
Copy = 0xCC,
OrReverse = 0xDD,
Or = 0xEE,
Set = 0xFF,
};
u32 disable_dual_quad : 1;
u32 : 2;
u32 degamma_enable : 1;
OperationMode mode : 3;
u32 : 9;
LogicOp rop3 : 8;
};
struct ColorBufferMask {
enum ColorComponent : u32 {
ComponentR = (1u << 0),
ComponentG = (1u << 1),
ComponentB = (1u << 2),
ComponentA = (1u << 3),
};
u32 raw;
u32 GetMask(u32 buf_id) const {
return (raw >> (buf_id * 4)) & 0xfu;
}
void SetMask(u32 buf_id, u32 mask) {
raw &= ~(0xf << (buf_id * 4));
raw |= (mask << (buf_id * 4));
}
};
struct ColorBuffer {
enum class EndianSwap : u32 {
None = 0,
Swap8In16 = 1,
Swap8In32 = 2,
Swap8In64 = 3,
};
enum class SwapMode : u32 {
Standard = 0,
Alternate = 1,
StandardReverse = 2,
AlternateReverse = 3,
};
enum class RoundMode : u32 {
ByHalf = 0,
Truncate = 1,
};
u32 base_address;
struct {
u32 tile_max : 11;
u32 : 9;
u32 fmask_tile_max : 11;
} pitch;
struct {
u32 tile_max : 22;
} slice;
struct {
u32 slice_start : 11;
u32 : 2;
u32 slice_max : 11;
} view;
union Color0Info {
u32 raw;
struct {
EndianSwap endian : 2;
u32 format : 5;
u32 linear_general : 1;
u32 number_type : 3;
SwapMode comp_swap : 2;
u32 fast_clear : 1;
u32 compression : 1;
u32 blend_clamp : 1;
u32 blend_bypass : 1;
u32 simple_float : 1;
RoundMode round_mode : 1;
u32 cmask_is_linear : 1;
u32 blend_opt_dont_rd_dst : 3;
u32 blend_opt_discard_pixel : 3;
u32 fmask_compression_disable_ci : 1;
u32 fmask_compress_1frag_only : 1;
u32 dcc_enable : 1;
u32 cmask_addr_type : 2;
u32 alt_tile_mode : 1;
};
} info;
union Color0Attrib {
u32 raw;
struct {
TileMode tile_mode_index : 5;
u32 fmask_tile_mode_index : 5;
u32 fmask_bank_height : 2;
u32 num_samples_log2 : 3;
u32 num_fragments_log2 : 2;
u32 force_dst_alpha_1 : 1;
};
} attrib;
u32 pad0;
u32 cmask_base_address;
struct {
u32 tile_max : 14;
} cmask_slice;
u32 fmask_base_address;
struct {
u32 tile_max : 14;
} fmask_slice;
u32 clear_word0;
u32 clear_word1;
std::array<u32, 2> pad1;
operator bool() const {
return base_address && info.format;
}
u32 Pitch() const {
return (pitch.tile_max + 1) << 3;
}
u32 Height() const {
return (slice.tile_max + 1) * 64 / Pitch();
}
u64 Address() const {
return u64(base_address) << 8 | (info.linear_general ? (view.slice_start & 0xff) : 0);
}
VAddr CmaskAddress() const {
return VAddr(cmask_base_address) << 8;
}
VAddr FmaskAddress() const {
return VAddr(fmask_base_address) << 8;
}
u32 NumSamples() const {
return 1 << attrib.num_fragments_log2;
}
u32 BaseSlice() const {
return info.linear_general ? 0 : view.slice_start;
}
u32 NumSlices() const {
return view.slice_max + 1;
}
u32 GetColorSliceSize() const {
const auto num_bytes_per_element = NumBitsPerBlock(DataFormat(info.format)) / 8u;
const auto slice_size = num_bytes_per_element * (slice.tile_max + 1) * 64u * NumSamples();
return slice_size;
}
TileMode GetTileMode() const {
return info.linear_general ? TileMode::DisplayLinearGeneral : attrib.tile_mode_index;
}
bool IsTiled() const {
return GetTileMode() != TileMode::DisplayLinearAligned &&
GetTileMode() != TileMode::DisplayLinearGeneral;
}
DataFormat GetDataFmt() const {
return RemapDataFormat(DataFormat(info.format));
}
NumberFormat GetNumberFmt() const {
return RemapNumberFormat(GetFixedNumberFormat(), DataFormat(info.format));
}
NumberConversion GetNumberConversion() const {
return MapNumberConversion(GetFixedNumberFormat(), DataFormat(info.format));
}
CompMapping Swizzle() const {
// clang-format off
static constexpr std::array<std::array<CompMapping, 4>, 4> mrt_swizzles{{
// Standard
std::array<CompMapping, 4>{{
{.r = CompSwizzle::Red, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero},
{.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero},
{.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Blue, .a = CompSwizzle::Zero},
{.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Blue, .a = CompSwizzle::Alpha},
}},
// Alternate
std::array<CompMapping, 4>{{
{.r = CompSwizzle::Green, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero},
{.r = CompSwizzle::Red, .g = CompSwizzle::Alpha, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero},
{.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Alpha, .a = CompSwizzle::Zero},
{.r = CompSwizzle::Blue, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Alpha},
}},
// StandardReverse
std::array<CompMapping, 4>{{
{.r = CompSwizzle::Blue, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero},
{.r = CompSwizzle::Green, .g = CompSwizzle::Red, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero},
{.r = CompSwizzle::Blue, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Zero},
{.r = CompSwizzle::Alpha, .g = CompSwizzle::Blue, .b = CompSwizzle::Green, .a = CompSwizzle::Red},
}},
// AlternateReverse
std::array<CompMapping, 4>{{
{.r = CompSwizzle::Alpha, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero},
{.r = CompSwizzle::Alpha, .g = CompSwizzle::Red, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero},
{.r = CompSwizzle::Alpha, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Zero},
{.r = CompSwizzle::Alpha, .g = CompSwizzle::Red, .b = CompSwizzle::Green, .a = CompSwizzle::Blue},
}},
}};
// clang-format on
const auto swap_idx = static_cast<u32>(info.comp_swap);
const auto components_idx = NumComponents(DataFormat(info.format)) - 1;
const auto mrt_swizzle = mrt_swizzles[swap_idx][components_idx];
return RemapSwizzle(DataFormat(info.format), mrt_swizzle);
}
NumberFormat GetFixedNumberFormat() const {
// There is a small difference between T# and CB number types, account for it.
const auto number_fmt = NumberFormat(info.number_type);
return number_fmt == NumberFormat::SnormNz ? NumberFormat::Srgb : number_fmt;
}
};
} // namespace AmdGpu

View File

@ -0,0 +1,291 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/assert.h"
#include "common/types.h"
#include "video_core/amdgpu/tiling.h"
namespace AmdGpu {
enum class ZOrder : u32 {
LateZ = 0,
EarlyZLateZ = 1,
ReZ = 2,
EarlyZReZ = 3,
};
enum class ConservativeDepth : u32 {
Any = 0,
LessThanZ = 1,
GreaterThanZ = 2,
};
struct DepthShaderControl {
u32 z_export_enable : 1;
u32 stencil_test_val_export_enable : 1;
u32 stencil_op_val_export_enable : 1;
u32 : 1;
ZOrder z_order : 2;
u32 kill_enable : 1;
u32 coverage_to_mask_enable : 1;
u32 mask_export_enable : 1;
u32 exec_on_hier_fail : 1;
u32 exec_on_noop : 1;
u32 alpha_to_mask_disable : 1;
u32 depth_before_shader : 1;
ConservativeDepth conservative_z_export : 2;
};
enum class CompareFunc : u32 {
Never = 0,
Less = 1,
Equal = 2,
LessEqual = 3,
Greater = 4,
NotEqual = 5,
GreaterEqual = 6,
Always = 7,
};
struct DepthControl {
u32 stencil_enable : 1;
u32 depth_enable : 1;
u32 depth_write_enable : 1;
u32 depth_bounds_enable : 1;
CompareFunc depth_func : 3;
u32 backface_enable : 1;
CompareFunc stencil_ref_func : 3;
u32 : 9;
CompareFunc stencil_bf_func : 3;
u32 : 7;
u32 enable_color_writes_on_depth_fail : 1;
u32 disable_color_writes_on_depth_pass : 1;
};
enum class StencilFunc : u32 {
Keep = 0,
Zero = 1,
Ones = 2,
ReplaceTest = 3,
ReplaceOp = 4,
AddClamp = 5,
SubClamp = 6,
Invert = 7,
AddWrap = 8,
SubWrap = 9,
And = 10,
Or = 11,
Xor = 12,
Nand = 13,
Nor = 14,
Xnor = 15,
};
struct StencilControl {
StencilFunc stencil_fail_front : 4;
StencilFunc stencil_zpass_front : 4;
StencilFunc stencil_zfail_front : 4;
StencilFunc stencil_fail_back : 4;
StencilFunc stencil_zpass_back : 4;
StencilFunc stencil_zfail_back : 4;
};
struct StencilRefMask {
u8 stencil_test_val;
u8 stencil_mask;
u8 stencil_write_mask;
u8 stencil_op_val;
};
struct DepthRenderControl {
u32 depth_clear_enable : 1;
u32 stencil_clear_enable : 1;
u32 depth_copy : 1;
u32 stencil_copy : 1;
u32 resummarize_enable : 1;
u32 stencil_compress_disable : 1;
u32 depth_compress_disable : 1;
u32 copy_centroid : 1;
u32 copy_sample : 1;
u32 decompress_enable : 1;
};
struct DepthView {
u32 slice_start : 11;
u32 : 2;
u32 slice_max : 11;
u32 z_read_only : 1;
u32 stencil_read_only : 1;
u32 NumSlices() const {
return slice_max + 1u;
}
};
enum class ForceEnable : u32 {
Off = 0,
Enable = 1,
Disable = 2,
};
enum class ForceSumm : u32 {
Off = 0,
MinZ = 1,
MaxZ = 2,
Both = 3,
};
struct DepthRenderOverride {
ForceEnable force_hiz_enable : 2;
ForceEnable force_his_enable0 : 2;
ForceEnable force_his_enable1 : 2;
u32 force_shader_z_order : 1;
u32 fast_z_disable : 1;
u32 fast_stencil_disable : 1;
u32 noop_cull_disable : 1;
u32 force_color_kill : 1;
u32 force_z_read : 1;
u32 force_stencil_read : 1;
ForceEnable force_full_z_range : 2;
u32 force_qc_smask_conflict : 1;
u32 disable_viewport_clamp : 1;
u32 ignore_sc_zrange : 1;
u32 disable_fully_covered : 1;
ForceSumm force_z_limit_summ : 2;
u32 max_tiles_in_dtt : 5;
u32 disable_tile_rate_tiles : 1;
u32 force_z_dirty : 1;
u32 force_stencil_dirty : 1;
u32 force_z_valid : 1;
u32 force_stencil_valid : 1;
u32 preserve_compression : 1;
};
struct Eqaa {
u32 max_anchor_samples : 1;
u32 : 3;
u32 ps_iter_samples : 3;
u32 : 1;
u32 mask_export_num_samples : 3;
u32 : 1;
u32 alpha_to_mask_num_samples : 3;
u32 : 1;
u32 high_quality_intersections : 1;
u32 incoherent_eqaa_reads : 1;
u32 interpolate_comp_z : 1;
u32 interpolate_src_z : 1;
u32 static_anchor_associations : 1;
u32 alpha_to_mask_eqaa_disable : 1;
u32 : 2;
u32 overrasterization_amount : 3;
u32 enable_postz_overrasterization : 1;
};
struct DepthBuffer {
enum class ZFormat : u32 {
Invalid = 0,
Z16 = 1,
Z32Float = 3,
};
enum class StencilFormat : u32 {
Invalid = 0,
Stencil8 = 1,
};
struct ZInfo {
ZFormat format : 2;
u32 num_samples : 2;
u32 : 9;
u32 tile_split : 3;
u32 : 4;
u32 tile_mode_index : 3;
u32 decompress_on_n_zplanes : 4;
u32 allow_expclear : 1;
u32 read_size : 1;
u32 tile_surface_enable : 1;
u32 clear_disallowed : 1;
u32 zrange_precision : 1;
} z_info;
struct {
StencilFormat format : 1;
} stencil_info;
u32 z_read_base;
u32 stencil_read_base;
u32 z_write_base;
u32 stencil_write_base;
struct {
u32 pitch_tile_max : 11;
u32 height_tile_max : 11;
} depth_size;
struct {
u32 tile_max : 22;
} depth_slice;
bool DepthValid() const {
return DepthAddress() != 0 && z_info.format != ZFormat::Invalid;
}
bool StencilValid() const {
return StencilAddress() != 0 && stencil_info.format != StencilFormat::Invalid;
}
bool DepthWriteValid() const {
return DepthWriteAddress() != 0 && z_info.format != ZFormat::Invalid;
}
bool StencilWriteValid() const {
return StencilWriteAddress() != 0 && stencil_info.format != StencilFormat::Invalid;
}
u32 Pitch() const {
return (depth_size.pitch_tile_max + 1) << 3;
}
u32 Height() const {
return (depth_size.height_tile_max + 1) << 3;
}
u64 DepthAddress() const {
return u64(z_read_base) << 8;
}
u64 StencilAddress() const {
return u64(stencil_read_base) << 8;
}
u64 DepthWriteAddress() const {
return u64(z_write_base) << 8;
}
u64 StencilWriteAddress() const {
return u64(stencil_write_base) << 8;
}
u32 NumSamples() const {
return 1u << z_info.num_samples; // spec doesn't say it is a log2
}
u32 NumBits() const {
return z_info.format == ZFormat::Z32Float ? 32 : 16;
}
u32 GetDepthSliceSize() const {
ASSERT(z_info.format != ZFormat::Invalid);
const auto bpe = NumBits() >> 3; // in bytes
return (depth_slice.tile_max + 1) * 64 * bpe * NumSamples();
}
TileMode GetTileMode() const {
return static_cast<TileMode>(z_info.tile_mode_index);
}
bool IsTiled() const {
return GetTileMode() != TileMode::DisplayLinearAligned &&
GetTileMode() != TileMode::DisplayLinearGeneral;
}
};
} // namespace AmdGpu

View File

@ -0,0 +1,237 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/types.h"
namespace AmdGpu {
static constexpr u32 NUM_VIEWPORTS = 16;
static constexpr u32 NUM_CLIP_PLANES = 6;
enum class ClipSpace : u32 {
MinusWToW = 0,
ZeroToW = 1,
};
enum class PrimKillCond : u32 {
AllVtx = 0,
AnyVtx = 1,
};
struct ClipperControl {
u32 user_clip_plane_enable : 6;
u32 : 10;
u32 clip_disable : 1;
u32 : 2;
ClipSpace clip_space : 1;
u32 : 1;
PrimKillCond vtx_kill_or : 1;
u32 dx_rasterization_kill : 1;
u32 : 1;
u32 dx_linear_attr_clip_enable : 1;
u32 : 1;
u32 zclip_near_disable : 1;
u32 zclip_far_disable : 1;
bool ZclipEnable() const {
if (zclip_near_disable != zclip_far_disable) {
return false;
}
return !zclip_near_disable;
}
};
enum class PolygonMode : u32 {
Point = 0,
Line = 1,
Fill = 2,
};
enum class ProvokingVtxLast : u32 {
First = 0,
Last = 1,
};
enum class CullMode : u32 {
None = 0,
Front = 1,
Back = 2,
FrontAndBack = 3,
};
enum class FrontFace : u32 {
CounterClockwise = 0,
Clockwise = 1,
};
struct PolygonControl {
u32 cull_front : 1;
u32 cull_back : 1;
FrontFace front_face : 1;
u32 enable_polygon_mode : 2;
PolygonMode polygon_mode_front : 3;
PolygonMode polygon_mode_back : 3;
u32 enable_polygon_offset_front : 1;
u32 enable_polygon_offset_back : 1;
u32 enable_polygon_offset_para : 1;
u32 : 2;
u32 enable_window_offset : 1;
u32 : 2;
ProvokingVtxLast provoking_vtx_last : 1;
u32 persp_corr_dis : 1;
u32 multi_prim_ib_ena : 1;
PolygonMode PolyMode() const {
return enable_polygon_mode ? polygon_mode_front : PolygonMode::Fill;
}
CullMode CullingMode() const {
return static_cast<CullMode>(cull_front | cull_back << 1);
}
bool NeedsBias() const {
return enable_polygon_offset_back || enable_polygon_offset_front ||
enable_polygon_offset_para;
}
};
struct VsOutputControl {
u32 clip_distance_enable : 8;
u32 cull_distance_enable : 8;
u32 use_vtx_point_size : 1;
u32 use_vtx_edge_flag : 1;
u32 use_vtx_render_target_idx : 1;
u32 use_vtx_viewport_idx : 1;
u32 use_vtx_kill_flag : 1;
u32 vs_out_misc_enable : 1;
u32 vs_out_ccdist0_enable : 1;
u32 vs_out_ccdist1_enable : 1;
u32 vs_out_misc_side_bus_ena : 1;
u32 use_vtx_gs_cut_flag : 1;
bool IsClipDistEnabled(u32 index) const {
return (clip_distance_enable >> index) & 1;
}
bool IsCullDistEnabled(u32 index) const {
return (cull_distance_enable >> index) & 1;
}
};
struct LineControl {
u32 width_fixed_point;
float Width() const {
return static_cast<float>(width_fixed_point) / 8.0;
}
};
struct ModeControl {
u32 msaa_enable : 1;
u32 vport_scissor_enable : 1;
u32 line_stripple_enable : 1;
u32 send_unlit_stiles_to_pkr : 1;
};
struct Scissor {
struct {
s16 top_left_x;
s16 top_left_y;
};
struct {
s16 bottom_right_x;
s16 bottom_right_y;
};
static u16 Clamp(s16 value) {
return std::max(s16(0), value);
}
u32 GetWidth() const {
return static_cast<u32>(Clamp(bottom_right_x) - Clamp(top_left_x));
}
u32 GetHeight() const {
return static_cast<u32>(Clamp(bottom_right_y) - Clamp(top_left_y));
}
};
struct WindowOffset {
s32 window_x_offset : 16;
s32 window_y_offset : 16;
};
struct ViewportScissor {
struct {
u16 top_left_x : 15;
u16 top_left_y : 15;
u16 window_offset_disable : 1;
};
struct {
u16 bottom_right_x : 15;
u16 bottom_right_y : 15;
};
u32 GetWidth() const {
return bottom_right_x - top_left_x;
}
u32 GetHeight() const {
return bottom_right_y - top_left_y;
}
};
struct ViewportDepth {
float zmin;
float zmax;
};
struct ViewportBounds {
float xscale;
float xoffset;
float yscale;
float yoffset;
float zscale;
float zoffset;
};
struct ViewportControl {
u32 xscale_enable : 1;
u32 xoffset_enable : 1;
u32 yscale_enable : 1;
u32 yoffset_enable : 1;
u32 zscale_enable : 1;
u32 zoffset_enable : 1;
u32 : 2;
u32 xy_transformed : 1;
u32 z_transformed : 1;
u32 w_transformed : 1;
u32 perfcounter_ref : 1;
};
struct ClipUserData {
u32 data_x;
u32 data_y;
u32 data_z;
u32 data_w;
};
struct AaConfig {
u32 msaa_num_samples : 3;
u32 : 1;
u32 aa_mask_centroid_dtmn : 1;
u32 : 8;
u32 max_sample_dst : 4;
u32 : 3;
u32 msaa_exposed_samples : 3;
u32 : 1;
u32 detail_to_exposed_mode : 2;
u32 NumSamples() const {
return 1 << msaa_num_samples;
}
};
} // namespace AmdGpu

View File

@ -0,0 +1,241 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/assert.h"
#include "common/types.h"
#include "shader_recompiler/params.h"
namespace AmdGpu {
static constexpr u32 NUM_USER_DATA = 16;
using UserData = std::array<u32, NUM_USER_DATA>;
struct BinaryInfo {
static constexpr std::array<u8, 7> signature_ref = {0x4f, 0x72, 0x62, 0x53,
0x68, 0x64, 0x72}; // OrbShdr
std::array<u8, sizeof(signature_ref)> signature;
u8 version;
u32 pssl_or_cg : 1;
u32 cached : 1;
u32 type : 4;
u32 source_type : 2;
u32 length : 24;
u8 chunk_usage_base_offset_in_dw;
u8 num_input_usage_slots;
u8 is_srt : 1;
u8 is_srt_used_info_valid : 1;
u8 is_extended_usage_info : 1;
u8 reserved2 : 5;
u8 reserved3;
u64 shader_hash;
u32 crc32;
bool Valid() const {
return signature == signature_ref;
}
};
enum class FpRoundMode : u32 {
NearestEven = 0,
PlusInf = 1,
MinInf = 2,
ToZero = 3,
};
enum class FpDenormMode : u32 {
InOutFlush = 0,
InAllowOutFlush = 1,
InFlushOutAllow = 2,
InOutAllow = 3,
};
struct ShaderProgram {
u64 address : 40;
struct {
u32 num_vgprs : 6;
u32 num_sgprs : 4;
u32 priority : 2;
FpRoundMode fp_round_mode32 : 2;
FpRoundMode fp_round_mode64 : 2;
FpDenormMode fp_denorm_mode32 : 2;
FpDenormMode fp_denorm_mode64 : 2;
u32 : 4;
u32 vgpr_comp_cnt : 2;
u32 : 6;
u32 scratch_en : 1;
u32 num_user_regs : 5;
u32 : 1;
u32 oc_lds_en : 1;
} settings;
UserData user_data;
template <typename T = u8*>
const T Address() const {
return std::bit_cast<T>(address << 8);
}
[[nodiscard]] u32 NumVgprs() const {
// Each increment allocates 4 registers, where 0 = 4 registers.
return (settings.num_vgprs + 1) * 4;
}
};
struct VsOutputConfig {
u32 : 1;
u32 export_count_min_one : 5;
u32 half_pack : 1;
u32 NumExports() const {
return export_count_min_one + 1;
}
};
struct PsInputControl {
u32 input_offset : 5;
u32 use_default : 1;
u32 : 2;
u32 default_value : 2;
u32 flat_shade : 1;
};
struct PsInput {
u32 persp_sample_ena : 1;
u32 persp_center_ena : 1;
u32 persp_centroid_ena : 1;
u32 persp_pull_model_ena : 1;
u32 linear_sample_ena : 1;
u32 linear_center_ena : 1;
u32 linear_centroid_ena : 1;
u32 line_stipple_tex_ena : 1;
u32 pos_x_float_ena : 1;
u32 pos_y_float_ena : 1;
u32 pos_z_float_ena : 1;
u32 pos_w_float_ena : 1;
u32 front_face_ena : 1;
u32 ancillary_ena : 1;
u32 sample_coverage_ena : 1;
u32 pos_fixed_pt_ena : 1;
bool operator==(const PsInput&) const = default;
};
enum class ShaderExportComp : u32 {
None = 0,
OneComp = 1,
TwoComp = 2,
FourCompCompressed = 3,
FourComp = 4,
};
struct ShaderPosFormat {
ShaderExportComp pos0 : 4;
ShaderExportComp pos1 : 4;
ShaderExportComp pos2 : 4;
ShaderExportComp pos3 : 4;
};
enum class ShaderExportFormat : u32 {
Zero = 0,
R_32 = 1,
GR_32 = 2,
AR_32 = 3,
ABGR_FP16 = 4,
ABGR_UNORM16 = 5,
ABGR_SNORM16 = 6,
ABGR_UINT16 = 7,
ABGR_SINT16 = 8,
ABGR_32 = 9,
};
struct ColorExportFormat {
u32 raw;
[[nodiscard]] ShaderExportFormat GetFormat(const u32 buf_idx) const {
return static_cast<ShaderExportFormat>((raw >> (buf_idx * 4)) & 0xfu);
}
};
struct ComputeProgram {
u32 dispatch_initiator;
u32 dim_x;
u32 dim_y;
u32 dim_z;
u32 start_x;
u32 start_y;
u32 start_z;
struct {
u16 full;
u16 partial;
} num_thread_x, num_thread_y, num_thread_z;
u32 pad0;
u32 max_wave_id : 12;
u64 address : 40;
std::array<u32, 4> pad1;
struct {
u64 num_vgprs : 6;
u64 num_sgprs : 4;
u64 : 23;
u64 num_user_regs : 5;
u64 : 1;
u64 tgid_enable : 3;
u64 : 5;
u64 lds_dwords : 9;
} settings;
u32 pad2;
u32 resource_limits;
std::array<u32, 42> pad3;
UserData user_data;
template <typename T = u8*>
const T Address() const {
return std::bit_cast<T>(address << 8);
}
u32 SharedMemSize() const noexcept {
// lds_dwords is in units of 128 dwords. We return bytes.
return settings.lds_dwords * 128 * 4;
}
u32 NumWorkgroups() const noexcept {
return dim_x * dim_y * dim_z;
}
bool IsTgidEnabled(u32 i) const noexcept {
return (settings.tgid_enable >> i) & 1;
}
};
static constexpr const BinaryInfo& SearchBinaryInfo(const u32* code) {
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
if (code[0] == token_mov_vcchi) {
const auto* info = std::bit_cast<const BinaryInfo*>(code + (code[1] + 1) * 2);
if (info->Valid()) {
return *info;
}
}
constexpr u32 signature_size = sizeof(BinaryInfo::signature_ref) / sizeof(u8);
constexpr u32 search_limit = 0x4000;
const u32* end = code + search_limit;
for (const u32* it = code; it < end; ++it) {
if (const BinaryInfo* info = std::bit_cast<const BinaryInfo*>(it); info->Valid()) {
return *info;
}
}
UNREACHABLE_MSG("Shader binary info not found.");
}
static constexpr Shader::ShaderParams GetParams(const auto& sh) {
const auto* code = sh.template Address<u32*>();
const auto& bininfo = SearchBinaryInfo(code);
return {
.user_data = sh.user_data,
.code = std::span{code, bininfo.length / sizeof(u32)},
.hash = bininfo.shader_hash,
};
}
} // namespace AmdGpu

View File

@ -0,0 +1,20 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <bit>
#include "common/types.h"
namespace AmdGpu {
struct BorderColorBuffer {
u64 base_addr : 40;
template <typename T = VAddr>
const T Address() const {
return std::bit_cast<T>(base_addr << 8);
}
};
} // namespace AmdGpu

View File

@ -0,0 +1,257 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/assert.h"
#include "common/types.h"
namespace AmdGpu {
enum class PrimitiveType : u32 {
None = 0,
PointList = 1,
LineList = 2,
LineStrip = 3,
TriangleList = 4,
TriangleFan = 5,
TriangleStrip = 6,
PatchPrimitive = 9,
AdjLineList = 10,
AdjLineStrip = 11,
AdjTriangleList = 12,
AdjTriangleStrip = 13,
RectList = 17,
LineLoop = 18,
QuadList = 19,
QuadStrip = 20,
Polygon = 21,
};
struct IndexBufferBase {
u32 base_addr_hi : 8;
u32 base_addr_lo;
template <typename T = VAddr>
T Address() const {
return std::bit_cast<T>((base_addr_lo & ~1U) | u64(base_addr_hi) << 32);
}
};
enum class IndexType : u32 {
Index16 = 0,
Index32 = 1,
};
enum class IndexSwapMode : u32 {
None = 0,
Swap16 = 1,
Swap32 = 2,
SwapWord = 3,
};
union IndexBufferType {
u32 raw;
struct {
IndexType index_type : 2;
IndexSwapMode swap_mode : 2;
};
};
struct VgtNumInstances {
u32 num_instances;
u32 NumInstances() const {
return num_instances == 0 ? 1 : num_instances;
}
};
struct PolygonOffset {
float depth_bias;
float front_scale;
float front_offset;
float back_scale;
float back_offset;
};
struct Address {
u32 address;
VAddr GetAddress() const {
return u64(address) << 8;
}
};
union ShaderStageEnable {
enum VgtStages : u32 {
Vs = 0u, // always enabled
EsGs = 0xB0u,
LsHs = 0x45u,
};
VgtStages raw;
struct {
u32 ls_en : 2;
u32 hs_en : 1;
u32 es_en : 2;
u32 gs_en : 1;
u32 vs_en : 2;
u32 dynamic_hs : 1;
};
bool IsStageEnabled(u32 stage) const {
switch (stage) {
case 0:
case 1:
return true;
case 2:
return gs_en;
case 3:
return es_en;
case 4:
return hs_en;
case 5:
return ls_en;
default:
UNREACHABLE();
}
}
};
union GsInstances {
u32 raw;
struct {
u32 enable : 2;
u32 count : 6;
};
bool IsEnabled() const {
return enable && count > 0;
}
};
enum class GsOutputPrimitiveType : u32 {
PointList = 0,
LineStrip = 1,
TriangleStrip = 2,
};
union GsOutPrimitiveType {
u32 raw;
struct {
GsOutputPrimitiveType outprim_type : 6;
GsOutputPrimitiveType outprim_type1 : 6;
GsOutputPrimitiveType outprim_type2 : 6;
GsOutputPrimitiveType outprim_type3 : 6;
u32 reserved : 3;
u32 unique_type_per_stream : 1;
};
GsOutputPrimitiveType GetPrimitiveType(u32 stream) const {
if (unique_type_per_stream == 0) {
return outprim_type;
}
switch (stream) {
case 0:
return outprim_type;
case 1:
return outprim_type1;
case 2:
return outprim_type2;
case 3:
return outprim_type3;
default:
UNREACHABLE();
}
}
};
enum class GsScenario : u32 {
Off = 0,
ScenarioA = 1,
ScenarioB = 2,
ScenarioG = 3,
ScenarioC = 4,
};
struct GsMode {
GsScenario mode : 3;
u32 cut_mode : 2;
u32 : 17;
u32 onchip : 2;
};
struct StreamOutControl {
u32 offset_update_done : 1;
u32 : 31;
};
union StreamOutConfig {
u32 raw;
struct {
u32 streamout_0_en : 1;
u32 streamout_1_en : 1;
u32 streamout_2_en : 1;
u32 streamout_3_en : 1;
u32 rast_stream : 3;
u32 : 1;
u32 rast_stream_mask : 4;
u32 : 19;
u32 use_rast_stream_mask : 1;
};
};
struct StreamOutBufferConfig {
u32 stream_0_buf_en : 4;
u32 stream_1_buf_en : 4;
u32 stream_2_buf_en : 4;
u32 stream_3_buf_en : 4;
};
struct LsHsConfig {
u32 num_patches : 8;
u32 hs_input_control_points : 6;
u32 hs_output_control_points : 6;
};
enum class TessellationType : u32 {
Isoline = 0,
Triangle = 1,
Quad = 2,
};
enum class TessellationPartitioning : u32 {
Integer = 0,
Pow2 = 1,
FracOdd = 2,
FracEven = 3,
};
enum class TessellationTopology : u32 {
Point = 0,
Line = 1,
TriangleCw = 2,
TriangleCcw = 3,
};
struct TessellationConfig {
TessellationType type : 2;
TessellationPartitioning partitioning : 3;
TessellationTopology topology : 3;
};
struct TessFactorMemoryBase {
u32 base;
u64 MemoryBase() const {
return static_cast<u64>(base) << 8;
}
};
struct TessFactorClamp {
float hs_max_tess;
float hs_min_tess;
};
} // namespace AmdGpu

View File

@ -1,146 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <string_view>
#include <fmt/format.h>
#include "common/types.h"
namespace AmdGpu {
enum class FpRoundMode : u32 {
NearestEven = 0,
PlusInf = 1,
MinInf = 2,
ToZero = 3,
};
enum class FpDenormMode : u32 {
InOutFlush = 0,
InAllowOutFlush = 1,
InFlushOutAllow = 2,
InOutAllow = 3,
};
enum class TessellationType : u32 {
Isoline = 0,
Triangle = 1,
Quad = 2,
};
constexpr std::string_view NameOf(TessellationType type) {
switch (type) {
case TessellationType::Isoline:
return "Isoline";
case TessellationType::Triangle:
return "Triangle";
case TessellationType::Quad:
return "Quad";
default:
return "Unknown";
}
}
enum class TessellationPartitioning : u32 {
Integer = 0,
Pow2 = 1,
FracOdd = 2,
FracEven = 3,
};
constexpr std::string_view NameOf(TessellationPartitioning partitioning) {
switch (partitioning) {
case TessellationPartitioning::Integer:
return "Integer";
case TessellationPartitioning::Pow2:
return "Pow2";
case TessellationPartitioning::FracOdd:
return "FracOdd";
case TessellationPartitioning::FracEven:
return "FracEven";
default:
return "Unknown";
}
}
enum class TessellationTopology : u32 {
Point = 0,
Line = 1,
TriangleCw = 2,
TriangleCcw = 3,
};
constexpr std::string_view NameOf(TessellationTopology topology) {
switch (topology) {
case TessellationTopology::Point:
return "Point";
case TessellationTopology::Line:
return "Line";
case TessellationTopology::TriangleCw:
return "TriangleCw";
case TessellationTopology::TriangleCcw:
return "TriangleCcw";
default:
return "Unknown";
}
}
// See `VGT_PRIMITIVE_TYPE` description in [Radeon Sea Islands 3D/Compute Register Reference Guide]
enum class PrimitiveType : u32 {
None = 0,
PointList = 1,
LineList = 2,
LineStrip = 3,
TriangleList = 4,
TriangleFan = 5,
TriangleStrip = 6,
PatchPrimitive = 9,
AdjLineList = 10,
AdjLineStrip = 11,
AdjTriangleList = 12,
AdjTriangleStrip = 13,
RectList = 17,
LineLoop = 18,
QuadList = 19,
QuadStrip = 20,
Polygon = 21,
};
enum class GsOutputPrimitiveType : u32 {
PointList = 0,
LineStrip = 1,
TriangleStrip = 2,
};
} // namespace AmdGpu
template <>
struct fmt::formatter<AmdGpu::TessellationType> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
auto format(AmdGpu::TessellationType type, format_context& ctx) const {
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type));
}
};
template <>
struct fmt::formatter<AmdGpu::TessellationPartitioning> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
auto format(AmdGpu::TessellationPartitioning type, format_context& ctx) const {
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type));
}
};
template <>
struct fmt::formatter<AmdGpu::TessellationTopology> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
auto format(AmdGpu::TessellationTopology type, format_context& ctx) const {
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type));
}
};

View File

@ -23,7 +23,7 @@ namespace VideoCore {
static constexpr size_t DataShareBufferSize = 64_KB; static constexpr size_t DataShareBufferSize = 64_KB;
static constexpr size_t StagingBufferSize = 512_MB; static constexpr size_t StagingBufferSize = 512_MB;
static constexpr size_t UboStreamBufferSize = 128_MB; static constexpr size_t UboStreamBufferSize = 64_MB;
static constexpr size_t DownloadBufferSize = 128_MB; static constexpr size_t DownloadBufferSize = 128_MB;
static constexpr size_t DeviceBufferSize = 128_MB; static constexpr size_t DeviceBufferSize = 128_MB;
static constexpr size_t MaxPageFaults = 1024; static constexpr size_t MaxPageFaults = 1024;
@ -329,8 +329,7 @@ void BufferCache::BindIndexBuffer(u32 index_offset) {
const auto& regs = liverpool->regs; const auto& regs = liverpool->regs;
// Figure out index type and size. // Figure out index type and size.
const bool is_index16 = const bool is_index16 = regs.index_buffer_type.index_type == AmdGpu::IndexType::Index16;
regs.index_buffer_type.index_type == AmdGpu::Liverpool::IndexType::Index16;
const vk::IndexType index_type = is_index16 ? vk::IndexType::eUint16 : vk::IndexType::eUint32; const vk::IndexType index_type = is_index16 ? vk::IndexType::eUint16 : vk::IndexType::eUint32;
const u32 index_size = is_index16 ? sizeof(u16) : sizeof(u32); const u32 index_size = is_index16 ? sizeof(u16) : sizeof(u32);
const VAddr index_address = const VAddr index_address =

View File

@ -13,27 +13,27 @@
namespace Vulkan::LiverpoolToVK { namespace Vulkan::LiverpoolToVK {
using DepthBuffer = Liverpool::DepthBuffer; using DepthBuffer = AmdGpu::DepthBuffer;
vk::StencilOp StencilOp(Liverpool::StencilFunc op) { vk::StencilOp StencilOp(AmdGpu::StencilFunc op) {
switch (op) { switch (op) {
case Liverpool::StencilFunc::Keep: case AmdGpu::StencilFunc::Keep:
return vk::StencilOp::eKeep; return vk::StencilOp::eKeep;
case Liverpool::StencilFunc::Zero: case AmdGpu::StencilFunc::Zero:
return vk::StencilOp::eZero; return vk::StencilOp::eZero;
case Liverpool::StencilFunc::ReplaceTest: case AmdGpu::StencilFunc::ReplaceTest:
return vk::StencilOp::eReplace; return vk::StencilOp::eReplace;
case Liverpool::StencilFunc::AddClamp: case AmdGpu::StencilFunc::AddClamp:
return vk::StencilOp::eIncrementAndClamp; return vk::StencilOp::eIncrementAndClamp;
case Liverpool::StencilFunc::SubClamp: case AmdGpu::StencilFunc::SubClamp:
return vk::StencilOp::eDecrementAndClamp; return vk::StencilOp::eDecrementAndClamp;
case Liverpool::StencilFunc::Invert: case AmdGpu::StencilFunc::Invert:
return vk::StencilOp::eInvert; return vk::StencilOp::eInvert;
case Liverpool::StencilFunc::AddWrap: case AmdGpu::StencilFunc::AddWrap:
return vk::StencilOp::eIncrementAndWrap; return vk::StencilOp::eIncrementAndWrap;
case Liverpool::StencilFunc::SubWrap: case AmdGpu::StencilFunc::SubWrap:
return vk::StencilOp::eDecrementAndWrap; return vk::StencilOp::eDecrementAndWrap;
case Liverpool::StencilFunc::ReplaceOp: case AmdGpu::StencilFunc::ReplaceOp:
return vk::StencilOp::eReplace; return vk::StencilOp::eReplace;
default: default:
UNREACHABLE(); UNREACHABLE();
@ -41,23 +41,23 @@ vk::StencilOp StencilOp(Liverpool::StencilFunc op) {
} }
} }
vk::CompareOp CompareOp(Liverpool::CompareFunc func) { vk::CompareOp CompareOp(AmdGpu::CompareFunc func) {
switch (func) { switch (func) {
case Liverpool::CompareFunc::Always: case AmdGpu::CompareFunc::Always:
return vk::CompareOp::eAlways; return vk::CompareOp::eAlways;
case Liverpool::CompareFunc::Equal: case AmdGpu::CompareFunc::Equal:
return vk::CompareOp::eEqual; return vk::CompareOp::eEqual;
case Liverpool::CompareFunc::GreaterEqual: case AmdGpu::CompareFunc::GreaterEqual:
return vk::CompareOp::eGreaterOrEqual; return vk::CompareOp::eGreaterOrEqual;
case Liverpool::CompareFunc::Greater: case AmdGpu::CompareFunc::Greater:
return vk::CompareOp::eGreater; return vk::CompareOp::eGreater;
case Liverpool::CompareFunc::LessEqual: case AmdGpu::CompareFunc::LessEqual:
return vk::CompareOp::eLessOrEqual; return vk::CompareOp::eLessOrEqual;
case Liverpool::CompareFunc::Less: case AmdGpu::CompareFunc::Less:
return vk::CompareOp::eLess; return vk::CompareOp::eLess;
case Liverpool::CompareFunc::NotEqual: case AmdGpu::CompareFunc::NotEqual:
return vk::CompareOp::eNotEqual; return vk::CompareOp::eNotEqual;
case Liverpool::CompareFunc::Never: case AmdGpu::CompareFunc::Never:
return vk::CompareOp::eNever; return vk::CompareOp::eNever;
default: default:
UNREACHABLE(); UNREACHABLE();
@ -126,13 +126,13 @@ vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type) {
} }
} }
vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode) { vk::PolygonMode PolygonMode(AmdGpu::PolygonMode mode) {
switch (mode) { switch (mode) {
case Liverpool::PolygonMode::Point: case AmdGpu::PolygonMode::Point:
return vk::PolygonMode::ePoint; return vk::PolygonMode::ePoint;
case Liverpool::PolygonMode::Line: case AmdGpu::PolygonMode::Line:
return vk::PolygonMode::eLine; return vk::PolygonMode::eLine;
case Liverpool::PolygonMode::Fill: case AmdGpu::PolygonMode::Fill:
return vk::PolygonMode::eFill; return vk::PolygonMode::eFill;
default: default:
UNREACHABLE(); UNREACHABLE();
@ -140,15 +140,15 @@ vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode) {
} }
} }
vk::CullModeFlags CullMode(Liverpool::CullMode mode) { vk::CullModeFlags CullMode(AmdGpu::CullMode mode) {
switch (mode) { switch (mode) {
case Liverpool::CullMode::None: case AmdGpu::CullMode::None:
return vk::CullModeFlagBits::eNone; return vk::CullModeFlagBits::eNone;
case Liverpool::CullMode::Front: case AmdGpu::CullMode::Front:
return vk::CullModeFlagBits::eFront; return vk::CullModeFlagBits::eFront;
case Liverpool::CullMode::Back: case AmdGpu::CullMode::Back:
return vk::CullModeFlagBits::eBack; return vk::CullModeFlagBits::eBack;
case Liverpool::CullMode::FrontAndBack: case AmdGpu::CullMode::FrontAndBack:
return vk::CullModeFlagBits::eFrontAndBack; return vk::CullModeFlagBits::eFrontAndBack;
default: default:
UNREACHABLE(); UNREACHABLE();
@ -156,11 +156,11 @@ vk::CullModeFlags CullMode(Liverpool::CullMode mode) {
} }
} }
vk::FrontFace FrontFace(Liverpool::FrontFace face) { vk::FrontFace FrontFace(AmdGpu::FrontFace face) {
switch (face) { switch (face) {
case Liverpool::FrontFace::Clockwise: case AmdGpu::FrontFace::Clockwise:
return vk::FrontFace::eClockwise; return vk::FrontFace::eClockwise;
case Liverpool::FrontFace::CounterClockwise: case AmdGpu::FrontFace::CounterClockwise:
return vk::FrontFace::eCounterClockwise; return vk::FrontFace::eCounterClockwise;
default: default:
UNREACHABLE(); UNREACHABLE();
@ -168,8 +168,8 @@ vk::FrontFace FrontFace(Liverpool::FrontFace face) {
} }
} }
vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor) { vk::BlendFactor BlendFactor(AmdGpu::BlendControl::BlendFactor factor) {
using BlendFactor = Liverpool::BlendControl::BlendFactor; using BlendFactor = AmdGpu::BlendControl::BlendFactor;
switch (factor) { switch (factor) {
case BlendFactor::Zero: case BlendFactor::Zero:
return vk::BlendFactor::eZero; return vk::BlendFactor::eZero;
@ -214,8 +214,8 @@ vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor) {
} }
} }
bool IsDualSourceBlendFactor(Liverpool::BlendControl::BlendFactor factor) { bool IsDualSourceBlendFactor(AmdGpu::BlendControl::BlendFactor factor) {
using BlendFactor = Liverpool::BlendControl::BlendFactor; using BlendFactor = AmdGpu::BlendControl::BlendFactor;
switch (factor) { switch (factor) {
case BlendFactor::Src1Color: case BlendFactor::Src1Color:
case BlendFactor::Src1Alpha: case BlendFactor::Src1Alpha:
@ -227,8 +227,8 @@ bool IsDualSourceBlendFactor(Liverpool::BlendControl::BlendFactor factor) {
} }
} }
vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func) { vk::BlendOp BlendOp(AmdGpu::BlendControl::BlendFunc func) {
using BlendFunc = Liverpool::BlendControl::BlendFunc; using BlendFunc = AmdGpu::BlendControl::BlendFunc;
switch (func) { switch (func) {
case BlendFunc::Add: case BlendFunc::Add:
return vk::BlendOp::eAdd; return vk::BlendOp::eAdd;
@ -245,8 +245,8 @@ vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func) {
} }
} }
vk::LogicOp LogicOp(Liverpool::ColorControl::LogicOp logic_op) { vk::LogicOp LogicOp(AmdGpu::ColorControl::LogicOp logic_op) {
using LogicOp = Liverpool::ColorControl::LogicOp; using LogicOp = AmdGpu::ColorControl::LogicOp;
switch (logic_op) { switch (logic_op) {
case LogicOp::Clear: case LogicOp::Clear:
return vk::LogicOp::eClear; return vk::LogicOp::eClear;
@ -805,9 +805,9 @@ vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat
return format->vk_format; return format->vk_format;
} }
vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer) { vk::ClearValue ColorBufferClearValue(const AmdGpu::ColorBuffer& color_buffer) {
const auto comp_swizzle = color_buffer.Swizzle(); const auto comp_swizzle = color_buffer.Swizzle();
const auto format = color_buffer.info.format.Value(); const auto format = AmdGpu::DataFormat(color_buffer.info.format);
const auto number_type = color_buffer.GetFixedNumberFormat(); const auto number_type = color_buffer.GetFixedNumberFormat();
const auto& c0 = color_buffer.clear_word0; const auto& c0 = color_buffer.clear_word0;

View File

@ -5,36 +5,37 @@
#include <span> #include <span>
#include "common/assert.h" #include "common/assert.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/pixel_format.h" #include "video_core/amdgpu/pixel_format.h"
#include "video_core/amdgpu/regs_color.h"
#include "video_core/amdgpu/regs_depth.h"
#include "video_core/amdgpu/regs_primitive.h"
#include "video_core/amdgpu/regs_vertex.h"
#include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/vk_common.h" #include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan::LiverpoolToVK { namespace Vulkan::LiverpoolToVK {
using Liverpool = AmdGpu::Liverpool; vk::StencilOp StencilOp(AmdGpu::StencilFunc op);
vk::StencilOp StencilOp(Liverpool::StencilFunc op); vk::CompareOp CompareOp(AmdGpu::CompareFunc func);
vk::CompareOp CompareOp(Liverpool::CompareFunc func);
bool IsPrimitiveCulled(AmdGpu::PrimitiveType type); bool IsPrimitiveCulled(AmdGpu::PrimitiveType type);
vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type); vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type);
vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode); vk::PolygonMode PolygonMode(AmdGpu::PolygonMode mode);
vk::CullModeFlags CullMode(Liverpool::CullMode mode); vk::CullModeFlags CullMode(AmdGpu::CullMode mode);
vk::FrontFace FrontFace(Liverpool::FrontFace mode); vk::FrontFace FrontFace(AmdGpu::FrontFace mode);
vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor); vk::BlendFactor BlendFactor(AmdGpu::BlendControl::BlendFactor factor);
bool IsDualSourceBlendFactor(Liverpool::BlendControl::BlendFactor factor); bool IsDualSourceBlendFactor(AmdGpu::BlendControl::BlendFactor factor);
vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func); vk::BlendOp BlendOp(AmdGpu::BlendControl::BlendFunc func);
vk::LogicOp LogicOp(Liverpool::ColorControl::LogicOp logic_op); vk::LogicOp LogicOp(AmdGpu::ColorControl::LogicOp logic_op);
vk::SamplerAddressMode ClampMode(AmdGpu::ClampMode mode); vk::SamplerAddressMode ClampMode(AmdGpu::ClampMode mode);
@ -63,17 +64,17 @@ std::span<const SurfaceFormatInfo> SurfaceFormats();
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format); vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format);
struct DepthFormatInfo { struct DepthFormatInfo {
Liverpool::DepthBuffer::ZFormat z_format; AmdGpu::DepthBuffer::ZFormat z_format;
Liverpool::DepthBuffer::StencilFormat stencil_format; AmdGpu::DepthBuffer::StencilFormat stencil_format;
vk::Format vk_format; vk::Format vk_format;
vk::FormatFeatureFlags2 flags; vk::FormatFeatureFlags2 flags;
}; };
std::span<const DepthFormatInfo> DepthFormats(); std::span<const DepthFormatInfo> DepthFormats();
vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format, vk::Format DepthFormat(AmdGpu::DepthBuffer::ZFormat z_format,
Liverpool::DepthBuffer::StencilFormat stencil_format); AmdGpu::DepthBuffer::StencilFormat stencil_format);
vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer); vk::ClearValue ColorBufferClearValue(const AmdGpu::ColorBuffer& color_buffer);
vk::SampleCountFlagBits NumSamples(u32 num_samples, vk::SampleCountFlags supported_flags); vk::SampleCountFlagBits NumSamples(u32 num_samples, vk::SampleCountFlags supported_flags);

View File

@ -3,6 +3,7 @@
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include "shader_recompiler/info.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
@ -31,8 +32,8 @@ ComputePipeline::ComputePipeline(const Instance& instance, Scheduler& scheduler,
const auto sharp = buffer.GetSharp(*info); const auto sharp = buffer.GetSharp(*info);
bindings.push_back({ bindings.push_back({
.binding = binding++, .binding = binding++,
.descriptorType = buffer.IsStorage(sharp, profile) ? vk::DescriptorType::eStorageBuffer .descriptorType = buffer.IsStorage(sharp) ? vk::DescriptorType::eStorageBuffer
: vk::DescriptorType::eUniformBuffer, : vk::DescriptorType::eUniformBuffer,
.descriptorCount = 1, .descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute, .stageFlags = vk::ShaderStageFlagBits::eCompute,
}); });

View File

@ -4,12 +4,10 @@
#include <algorithm> #include <algorithm>
#include <utility> #include <utility>
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include <boost/container/static_vector.hpp>
#include "common/assert.h" #include "common/assert.h"
#include "shader_recompiler/backend/spirv/emit_spirv_quad_rect.h" #include "shader_recompiler/backend/spirv/emit_spirv_quad_rect.h"
#include "shader_recompiler/frontend/fetch_shader.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
@ -118,7 +116,7 @@ GraphicsPipeline::GraphicsPipeline(
.lineWidth = 1.0f, .lineWidth = 1.0f,
}, },
vk::PipelineRasterizationProvokingVertexStateCreateInfoEXT{ vk::PipelineRasterizationProvokingVertexStateCreateInfoEXT{
.provokingVertexMode = key.provoking_vtx_last == Liverpool::ProvokingVtxLast::First .provokingVertexMode = key.provoking_vtx_last == AmdGpu::ProvokingVtxLast::First
? vk::ProvokingVertexModeEXT::eFirstVertex ? vk::ProvokingVertexModeEXT::eFirstVertex
: vk::ProvokingVertexModeEXT::eLastVertex, : vk::ProvokingVertexModeEXT::eLastVertex,
}, },
@ -142,7 +140,7 @@ GraphicsPipeline::GraphicsPipeline(
}; };
const vk::PipelineViewportDepthClipControlCreateInfoEXT clip_control = { const vk::PipelineViewportDepthClipControlCreateInfoEXT clip_control = {
.negativeOneToOne = key.clip_space == Liverpool::ClipSpace::MinusWToW, .negativeOneToOne = key.clip_space == AmdGpu::ClipSpace::MinusWToW,
}; };
const vk::PipelineViewportStateCreateInfo viewport_info = { const vk::PipelineViewportStateCreateInfo viewport_info = {
@ -259,7 +257,7 @@ GraphicsPipeline::GraphicsPipeline(
color_formats[i] = color_format; color_formats[i] = color_format;
} }
std::array<vk::SampleCountFlagBits, Liverpool::NumColorBuffers> color_samples; std::array<vk::SampleCountFlagBits, AmdGpu::NUM_COLOR_BUFFERS> color_samples;
std::ranges::transform(key.color_samples, color_samples.begin(), [&instance](u8 num_samples) { std::ranges::transform(key.color_samples, color_samples.begin(), [&instance](u8 num_samples) {
return num_samples ? LiverpoolToVK::NumSamples(num_samples, instance.GetColorSampleCounts()) return num_samples ? LiverpoolToVK::NumSamples(num_samples, instance.GetColorSampleCounts())
: vk::SampleCountFlagBits::e1; : vk::SampleCountFlagBits::e1;
@ -275,16 +273,15 @@ GraphicsPipeline::GraphicsPipeline(
.pNext = instance.IsMixedDepthSamplesSupported() ? &mixed_samples : nullptr, .pNext = instance.IsMixedDepthSamplesSupported() ? &mixed_samples : nullptr,
.colorAttachmentCount = key.num_color_attachments, .colorAttachmentCount = key.num_color_attachments,
.pColorAttachmentFormats = color_formats.data(), .pColorAttachmentFormats = color_formats.data(),
.depthAttachmentFormat = key.z_format != Liverpool::DepthBuffer::ZFormat::Invalid .depthAttachmentFormat = key.z_format != AmdGpu::DepthBuffer::ZFormat::Invalid
? depth_format ? depth_format
: vk::Format::eUndefined, : vk::Format::eUndefined,
.stencilAttachmentFormat = .stencilAttachmentFormat = key.stencil_format != AmdGpu::DepthBuffer::StencilFormat::Invalid
key.stencil_format != Liverpool::DepthBuffer::StencilFormat::Invalid ? depth_format
? depth_format : vk::Format::eUndefined,
: vk::Format::eUndefined,
}; };
std::array<vk::PipelineColorBlendAttachmentState, Liverpool::NumColorBuffers> attachments; std::array<vk::PipelineColorBlendAttachmentState, AmdGpu::NUM_COLOR_BUFFERS> attachments;
for (u32 i = 0; i < key.num_color_attachments; i++) { for (u32 i = 0; i < key.num_color_attachments; i++) {
const auto& control = key.blend_controls[i]; const auto& control = key.blend_controls[i];
@ -335,7 +332,7 @@ GraphicsPipeline::GraphicsPipeline(
// Unfortunatelly, Vulkan doesn't provide any control on blend inputs, so below we detecting // Unfortunatelly, Vulkan doesn't provide any control on blend inputs, so below we detecting
// such cases and override alpha value in order to emulate HW behaviour. // such cases and override alpha value in order to emulate HW behaviour.
const auto has_alpha_masked_out = const auto has_alpha_masked_out =
(key.cb_shader_mask.GetMask(i) & Liverpool::ColorBufferMask::ComponentA) == 0; (key.cb_shader_mask.GetMask(i) & AmdGpu::ColorBufferMask::ComponentA) == 0;
const auto has_src_alpha_in_src_blend = src_color == vk::BlendFactor::eSrcAlpha || const auto has_src_alpha_in_src_blend = src_color == vk::BlendFactor::eSrcAlpha ||
src_color == vk::BlendFactor::eOneMinusSrcAlpha; src_color == vk::BlendFactor::eOneMinusSrcAlpha;
const auto has_src_alpha_in_dst_blend = dst_color == vk::BlendFactor::eSrcAlpha || const auto has_src_alpha_in_dst_blend = dst_color == vk::BlendFactor::eSrcAlpha ||
@ -354,7 +351,7 @@ GraphicsPipeline::GraphicsPipeline(
const vk::PipelineColorBlendStateCreateInfo color_blending = { const vk::PipelineColorBlendStateCreateInfo color_blending = {
.logicOpEnable = .logicOpEnable =
instance.IsLogicOpSupported() && key.logic_op != Liverpool::ColorControl::LogicOp::Copy, instance.IsLogicOpSupported() && key.logic_op != AmdGpu::ColorControl::LogicOp::Copy,
.logicOp = LiverpoolToVK::LogicOp(key.logic_op), .logicOp = LiverpoolToVK::LogicOp(key.logic_op),
.attachmentCount = key.num_color_attachments, .attachmentCount = key.num_color_attachments,
.pAttachments = attachments.data(), .pAttachments = attachments.data(),
@ -451,9 +448,8 @@ void GraphicsPipeline::BuildDescSetLayout() {
const auto sharp = buffer.GetSharp(*stage); const auto sharp = buffer.GetSharp(*stage);
bindings.push_back({ bindings.push_back({
.binding = binding++, .binding = binding++,
.descriptorType = buffer.IsStorage(sharp, profile) .descriptorType = buffer.IsStorage(sharp) ? vk::DescriptorType::eStorageBuffer
? vk::DescriptorType::eStorageBuffer : vk::DescriptorType::eUniformBuffer,
: vk::DescriptorType::eUniformBuffer,
.descriptorCount = 1, .descriptorCount = 1,
.stageFlags = stage_bit, .stageFlags = stage_bit,
}); });

View File

@ -6,10 +6,10 @@
#include <boost/container/static_vector.hpp> #include <boost/container/static_vector.hpp>
#include <xxhash.h> #include <xxhash.h>
#include "common/types.h"
#include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/frontend/fetch_shader.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/amdgpu/regs_color.h"
#include "video_core/renderer_vulkan/vk_common.h" #include "video_core/amdgpu/regs_depth.h"
#include "video_core/amdgpu/regs_primitive.h"
#include "video_core/renderer_vulkan/vk_pipeline_common.h" #include "video_core/renderer_vulkan/vk_pipeline_common.h"
namespace VideoCore { namespace VideoCore {
@ -26,8 +26,6 @@ class Instance;
class Scheduler; class Scheduler;
class DescriptorHeap; class DescriptorHeap;
using Liverpool = AmdGpu::Liverpool;
template <typename T> template <typename T>
using VertexInputs = boost::container::static_vector<T, MaxVertexBufferCount>; using VertexInputs = boost::container::static_vector<T, MaxVertexBufferCount>;
@ -36,25 +34,25 @@ struct GraphicsPipelineKey {
std::array<vk::Format, MaxVertexBufferCount> vertex_buffer_formats; std::array<vk::Format, MaxVertexBufferCount> vertex_buffer_formats;
u32 patch_control_points; u32 patch_control_points;
u32 num_color_attachments; u32 num_color_attachments;
std::array<Shader::PsColorBuffer, Liverpool::NumColorBuffers> color_buffers; std::array<Shader::PsColorBuffer, AmdGpu::NUM_COLOR_BUFFERS> color_buffers;
std::array<Liverpool::BlendControl, Liverpool::NumColorBuffers> blend_controls; std::array<AmdGpu::BlendControl, AmdGpu::NUM_COLOR_BUFFERS> blend_controls;
std::array<vk::ColorComponentFlags, Liverpool::NumColorBuffers> write_masks; std::array<vk::ColorComponentFlags, AmdGpu::NUM_COLOR_BUFFERS> write_masks;
Liverpool::ColorBufferMask cb_shader_mask; AmdGpu::ColorBufferMask cb_shader_mask;
Liverpool::ColorControl::LogicOp logic_op; AmdGpu::ColorControl::LogicOp logic_op;
u8 num_samples; u8 num_samples;
u8 depth_samples; u8 depth_samples;
std::array<u8, Liverpool::NumColorBuffers> color_samples; std::array<u8, AmdGpu::NUM_COLOR_BUFFERS> color_samples;
u32 mrt_mask; u32 mrt_mask;
struct { struct {
Liverpool::DepthBuffer::ZFormat z_format : 2; AmdGpu::DepthBuffer::ZFormat z_format : 2;
Liverpool::DepthBuffer::StencilFormat stencil_format : 1; AmdGpu::DepthBuffer::StencilFormat stencil_format : 1;
u32 depth_clamp_enable : 1; u32 depth_clamp_enable : 1;
}; };
struct { struct {
AmdGpu::PrimitiveType prim_type : 5; AmdGpu::PrimitiveType prim_type : 5;
Liverpool::PolygonMode polygon_mode : 2; AmdGpu::PolygonMode polygon_mode : 2;
Liverpool::ClipSpace clip_space : 1; AmdGpu::ClipSpace clip_space : 1;
Liverpool::ProvokingVtxLast provoking_vtx_last : 1; AmdGpu::ProvokingVtxLast provoking_vtx_last : 1;
u32 depth_clip_enable : 1; u32 depth_clip_enable : 1;
}; };

View File

@ -12,14 +12,13 @@
#include "shader_recompiler/info.h" #include "shader_recompiler/info.h"
#include "shader_recompiler/recompiler.h" #include "shader_recompiler/recompiler.h"
#include "shader_recompiler/runtime_info.h" #include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_presenter.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_shader_util.h"
extern std::unique_ptr<Vulkan::Presenter> presenter;
namespace Vulkan { namespace Vulkan {
using Shader::LogicalStage; using Shader::LogicalStage;
@ -36,8 +35,7 @@ constexpr static std::array DescriptorHeapSizes = {
vk::DescriptorPoolSize{vk::DescriptorType::eSampler, 1024}, vk::DescriptorPoolSize{vk::DescriptorType::eSampler, 1024},
}; };
static u32 MapOutputs(std::span<Shader::OutputMap, 3> outputs, static u32 MapOutputs(std::span<Shader::OutputMap, 3> outputs, const AmdGpu::VsOutputControl& ctl) {
const AmdGpu::Liverpool::VsOutputControl& ctl) {
u32 num_outputs = 0; u32 num_outputs = 0;
if (ctl.vs_out_misc_enable) { if (ctl.vs_out_misc_enable) {
@ -110,10 +108,10 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
} }
case Stage::Hull: { case Stage::Hull: {
BuildCommon(regs.hs_program); BuildCommon(regs.hs_program);
info.hs_info.num_input_control_points = regs.ls_hs_config.hs_input_control_points.Value(); info.hs_info.num_input_control_points = regs.ls_hs_config.hs_input_control_points;
info.hs_info.num_threads = regs.ls_hs_config.hs_output_control_points.Value(); info.hs_info.num_threads = regs.ls_hs_config.hs_output_control_points;
info.hs_info.tess_type = regs.tess_config.type; info.hs_info.tess_type = regs.tess_config.type;
info.hs_info.offchip_lds_enable = regs.hs_program.settings.rsrc2_hs.oc_lds_en.Value(); info.hs_info.offchip_lds_enable = regs.hs_program.settings.oc_lds_en;
// We need to initialize most hs_info fields after finding the V# with tess constants // We need to initialize most hs_info fields after finding the V# with tess constants
break; break;
@ -130,7 +128,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
info.vs_info.num_outputs = MapOutputs(info.vs_info.outputs, regs.vs_output_control); info.vs_info.num_outputs = MapOutputs(info.vs_info.outputs, regs.vs_output_control);
info.vs_info.emulate_depth_negative_one_to_one = info.vs_info.emulate_depth_negative_one_to_one =
!instance.IsDepthClipControlSupported() && !instance.IsDepthClipControlSupported() &&
regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW; regs.clipper_control.clip_space == AmdGpu::ClipSpace::MinusWToW;
info.vs_info.tess_emulated_primitive = info.vs_info.tess_emulated_primitive =
regs.primitive_type == AmdGpu::PrimitiveType::RectList || regs.primitive_type == AmdGpu::PrimitiveType::RectList ||
regs.primitive_type == AmdGpu::PrimitiveType::QuadList; regs.primitive_type == AmdGpu::PrimitiveType::QuadList;
@ -157,7 +155,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
gs_info.in_vertex_data_size = regs.vgt_esgs_ring_itemsize; gs_info.in_vertex_data_size = regs.vgt_esgs_ring_itemsize;
gs_info.out_vertex_data_size = regs.vgt_gs_vert_itemsize[0]; gs_info.out_vertex_data_size = regs.vgt_gs_vert_itemsize[0];
gs_info.mode = regs.vgt_gs_mode.mode; gs_info.mode = regs.vgt_gs_mode.mode;
const auto params_vc = Liverpool::GetParams(regs.vs_program); const auto params_vc = AmdGpu::GetParams(regs.vs_program);
gs_info.vs_copy = params_vc.code; gs_info.vs_copy = params_vc.code;
gs_info.vs_copy_hash = params_vc.hash; gs_info.vs_copy_hash = params_vc.hash;
DumpShader(gs_info.vs_copy, gs_info.vs_copy_hash, Shader::Stage::Vertex, 0, "copy.bin"); DumpShader(gs_info.vs_copy, gs_info.vs_copy_hash, Shader::Stage::Vertex, 0, "copy.bin");
@ -191,7 +189,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
const auto& ps_inputs = regs.ps_inputs; const auto& ps_inputs = regs.ps_inputs;
for (u32 i = 0; i < regs.num_interp; i++) { for (u32 i = 0; i < regs.num_interp; i++) {
info.fs_info.inputs[i] = { info.fs_info.inputs[i] = {
.param_index = u8(ps_inputs[i].input_offset.Value()), .param_index = u8(ps_inputs[i].input_offset),
.is_default = bool(ps_inputs[i].use_default), .is_default = bool(ps_inputs[i].use_default),
.is_flat = bool(ps_inputs[i].flat_shade), .is_flat = bool(ps_inputs[i].flat_shade),
.default_value = u8(ps_inputs[i].default_value), .default_value = u8(ps_inputs[i].default_value),
@ -327,11 +325,11 @@ bool PipelineCache::RefreshGraphicsKey() {
const bool db_enabled = regs.depth_buffer.DepthValid() || regs.depth_buffer.StencilValid(); const bool db_enabled = regs.depth_buffer.DepthValid() || regs.depth_buffer.StencilValid();
key.z_format = regs.depth_buffer.DepthValid() ? regs.depth_buffer.z_info.format.Value() key.z_format = regs.depth_buffer.DepthValid() ? regs.depth_buffer.z_info.format
: Liverpool::DepthBuffer::ZFormat::Invalid; : AmdGpu::DepthBuffer::ZFormat::Invalid;
key.stencil_format = regs.depth_buffer.StencilValid() key.stencil_format = regs.depth_buffer.StencilValid()
? regs.depth_buffer.stencil_info.format.Value() ? regs.depth_buffer.stencil_info.format
: Liverpool::DepthBuffer::StencilFormat::Invalid; : AmdGpu::DepthBuffer::StencilFormat::Invalid;
key.depth_clamp_enable = !regs.depth_render_override.disable_viewport_clamp; key.depth_clamp_enable = !regs.depth_render_override.disable_viewport_clamp;
key.depth_clip_enable = regs.clipper_control.ZclipEnable(); key.depth_clip_enable = regs.clipper_control.ZclipEnable();
key.clip_space = regs.clipper_control.clip_space; key.clip_space = regs.clipper_control.clip_space;
@ -339,17 +337,17 @@ bool PipelineCache::RefreshGraphicsKey() {
key.prim_type = regs.primitive_type; key.prim_type = regs.primitive_type;
key.polygon_mode = regs.polygon_control.PolyMode(); key.polygon_mode = regs.polygon_control.PolyMode();
key.patch_control_points = key.patch_control_points =
regs.stage_enable.hs_en ? regs.ls_hs_config.hs_input_control_points.Value() : 0; regs.stage_enable.hs_en ? regs.ls_hs_config.hs_input_control_points : 0;
key.logic_op = regs.color_control.rop3; key.logic_op = regs.color_control.rop3;
key.depth_samples = db_enabled ? regs.depth_buffer.NumSamples() : 1; key.depth_samples = db_enabled ? regs.depth_buffer.NumSamples() : 1;
key.num_samples = key.depth_samples; key.num_samples = key.depth_samples;
key.cb_shader_mask = regs.color_shader_mask; key.cb_shader_mask = regs.color_shader_mask;
const bool skip_cb_binding = const bool skip_cb_binding =
regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable; regs.color_control.mode == AmdGpu::ColorControl::OperationMode::Disable;
// First pass to fill render target information needed by shader recompiler // First pass to fill render target information needed by shader recompiler
for (s32 cb = 0; cb < Liverpool::NumColorBuffers && !skip_cb_binding; ++cb) { for (s32 cb = 0; cb < AmdGpu::NUM_COLOR_BUFFERS && !skip_cb_binding; ++cb) {
const auto& col_buf = regs.color_buffers[cb]; const auto& col_buf = regs.color_buffers[cb];
if (!col_buf || !regs.color_target_mask.GetMask(cb)) { if (!col_buf || !regs.color_target_mask.GetMask(cb)) {
// No attachment bound or writing to it is disabled. // No attachment bound or writing to it is disabled.
@ -436,15 +434,7 @@ bool PipelineCache::RefreshGraphicsStages() {
return false; return false;
} }
const auto& bininfo = Liverpool::GetBinaryInfo(*pgm); const auto params = AmdGpu::GetParams(*pgm);
if (!bininfo.Valid()) {
LOG_WARNING(Render_Vulkan, "Invalid binary info structure!");
key.stage_hashes[stage_out_idx] = 0;
infos[stage_out_idx] = nullptr;
return false;
}
auto params = Liverpool::GetParams(*pgm);
std::optional<Shader::Gcn::FetchShaderData> fetch_shader_; std::optional<Shader::Gcn::FetchShaderData> fetch_shader_;
std::tie(infos[stage_out_idx], modules[stage_out_idx], fetch_shader_, std::tie(infos[stage_out_idx], modules[stage_out_idx], fetch_shader_,
key.stage_hashes[stage_out_idx]) = key.stage_hashes[stage_out_idx]) =
@ -463,7 +453,7 @@ bool PipelineCache::RefreshGraphicsStages() {
key.num_color_attachments = std::bit_width(key.mrt_mask); key.num_color_attachments = std::bit_width(key.mrt_mask);
switch (regs.stage_enable.raw) { switch (regs.stage_enable.raw) {
case Liverpool::ShaderStageEnable::VgtStages::EsGs: case AmdGpu::ShaderStageEnable::VgtStages::EsGs:
if (!instance.IsGeometryStageSupported()) { if (!instance.IsGeometryStageSupported()) {
LOG_WARNING(Render_Vulkan, "Geometry shader stage unsupported, skipping"); LOG_WARNING(Render_Vulkan, "Geometry shader stage unsupported, skipping");
return false; return false;
@ -479,7 +469,7 @@ bool PipelineCache::RefreshGraphicsStages() {
return false; return false;
} }
break; break;
case Liverpool::ShaderStageEnable::VgtStages::LsHs: case AmdGpu::ShaderStageEnable::VgtStages::LsHs:
if (!instance.IsTessellationSupported() || if (!instance.IsTessellationSupported() ||
(regs.tess_config.type == AmdGpu::TessellationType::Isoline && (regs.tess_config.type == AmdGpu::TessellationType::Isoline &&
!instance.IsTessellationIsolinesSupported())) { !instance.IsTessellationIsolinesSupported())) {
@ -519,7 +509,7 @@ bool PipelineCache::RefreshGraphicsStages() {
bool PipelineCache::RefreshComputeKey() { bool PipelineCache::RefreshComputeKey() {
Shader::Backend::Bindings binding{}; Shader::Backend::Bindings binding{};
const auto& cs_pgm = liverpool->GetCsRegs(); const auto& cs_pgm = liverpool->GetCsRegs();
const auto cs_params = Liverpool::GetParams(cs_pgm); const auto cs_params = AmdGpu::GetParams(cs_pgm);
std::tie(infos[0], modules[0], fetch_shader, compute_key.value) = std::tie(infos[0], modules[0], fetch_shader, compute_key.value) =
GetProgram(Shader::Stage::Compute, LogicalStage::Compute, cs_params, binding); GetProgram(Shader::Stage::Compute, LogicalStage::Compute, cs_params, binding);
return true; return true;

View File

@ -19,6 +19,10 @@ struct std::hash<vk::ShaderModule> {
} }
}; };
namespace AmdGpu {
class Liverpool;
}
namespace Shader { namespace Shader {
struct Info; struct Info;
} }

View File

@ -3,13 +3,11 @@
#include <boost/container/static_vector.hpp> #include <boost/container/static_vector.hpp>
#include "shader_recompiler/info.h" #include "shader_recompiler/resource.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_pipeline_common.h" #include "video_core/renderer_vulkan/vk_pipeline_common.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/texture_cache/texture_cache.h"
namespace Vulkan { namespace Vulkan {

View File

@ -3,15 +3,16 @@
#pragma once #pragma once
#include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/info.h"
#include "shader_recompiler/profile.h" #include "shader_recompiler/profile.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/renderer_vulkan/vk_common.h" #include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/texture_cache/texture_cache.h"
namespace VideoCore { #include <boost/container/small_vector.hpp>
class BufferCache;
} // namespace VideoCore namespace Shader {
struct Info;
struct PushData;
} // namespace Shader
namespace Vulkan { namespace Vulkan {
@ -74,7 +75,7 @@ protected:
vk::UniqueDescriptorSetLayout desc_layout; vk::UniqueDescriptorSetLayout desc_layout;
std::array<const Shader::Info*, Shader::MaxStageTypes> stages{}; std::array<const Shader::Info*, Shader::MaxStageTypes> stages{};
bool uses_push_descriptors{}; bool uses_push_descriptors{};
const bool is_compute; bool is_compute;
}; };
} // namespace Vulkan } // namespace Vulkan

View File

@ -15,6 +15,7 @@
#include <vector> #include <vector>
#include <fmt/ranges.h> #include <fmt/ranges.h>
#include "common/assert.h" #include "common/assert.h"
#include "common/config.h" #include "common/config.h"
#include "common/logging/log.h" #include "common/logging/log.h"
@ -459,4 +460,4 @@ vk::UniqueDebugUtilsMessengerEXT CreateDebugCallback(vk::Instance instance) {
return std::move(messenger); return std::move(messenger);
} }
} // namespace Vulkan } // namespace Vulkan

View File

@ -146,6 +146,10 @@ Presenter::~Presenter() {
ImGui::Core::Shutdown(device); ImGui::Core::Shutdown(device);
} }
bool Presenter::IsVideoOutSurface(const AmdGpu::ColorBuffer& color_buffer) const {
return std::ranges::find(vo_buffers_addr, color_buffer.Address()) != vo_buffers_addr.cend();
}
void Presenter::RecreateFrame(Frame* frame, u32 width, u32 height) { void Presenter::RecreateFrame(Frame* frame, u32 width, u32 height) {
const vk::Device device = instance.GetDevice(); const vk::Device device = instance.GetDevice();
if (frame->imgui_texture) { if (frame->imgui_texture) {
@ -288,7 +292,7 @@ static vk::Format GetFrameViewFormat(const Libraries::VideoOut::PixelFormat form
Frame* Presenter::PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute, Frame* Presenter::PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
VAddr cpu_address) { VAddr cpu_address) {
auto desc = VideoCore::TextureCache::VideoOutDesc{attribute, cpu_address}; auto desc = VideoCore::TextureCache::ImageDesc{attribute, cpu_address};
const auto image_id = texture_cache.FindImage(desc); const auto image_id = texture_cache.FindImage(desc);
texture_cache.UpdateImage(image_id); texture_cache.UpdateImage(image_id);

View File

@ -6,9 +6,7 @@
#include <condition_variable> #include <condition_variable>
#include "core/libraries/videoout/buffer.h" #include "core/libraries/videoout/buffer.h"
#include "imgui/imgui_config.h"
#include "imgui/imgui_texture.h" #include "imgui/imgui_texture.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/renderer_vulkan/host_passes/fsr_pass.h" #include "video_core/renderer_vulkan/host_passes/fsr_pass.h"
#include "video_core/renderer_vulkan/host_passes/pp_pass.h" #include "video_core/renderer_vulkan/host_passes/pp_pass.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
@ -82,20 +80,18 @@ public:
pp_settings.hdr = enable ? 1 : 0; pp_settings.hdr = enable ? 1 : 0;
} }
bool IsVideoOutSurface(const AmdGpu::Liverpool::ColorBuffer& color_buffer) const {
return std::ranges::find(vo_buffers_addr, color_buffer.Address()) != vo_buffers_addr.cend();
}
VideoCore::Image& RegisterVideoOutSurface( VideoCore::Image& RegisterVideoOutSurface(
const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) { const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) {
vo_buffers_addr.emplace_back(cpu_address); vo_buffers_addr.emplace_back(cpu_address);
auto desc = VideoCore::TextureCache::VideoOutDesc{attribute, cpu_address}; auto desc = VideoCore::TextureCache::ImageDesc{attribute, cpu_address};
const auto image_id = texture_cache.FindImage(desc); const auto image_id = texture_cache.FindImage(desc);
auto& image = texture_cache.GetImage(image_id); auto& image = texture_cache.GetImage(image_id);
image.usage.vo_surface = 1u; image.usage.vo_surface = 1u;
return image; return image;
} }
bool IsVideoOutSurface(const AmdGpu::ColorBuffer& color_buffer) const;
Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute, Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
VAddr cpu_address); VAddr cpu_address);

View File

@ -6,6 +6,7 @@
#include "core/memory.h" #include "core/memory.h"
#include "shader_recompiler/runtime_info.h" #include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/liverpool.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
@ -19,7 +20,7 @@
namespace Vulkan { namespace Vulkan {
static Shader::PushData MakeUserData(const AmdGpu::Liverpool::Regs& regs) { static Shader::PushData MakeUserData(const AmdGpu::Regs& regs) {
// TODO(roamic): Add support for multiple viewports and geometry shaders when ViewportIndex // TODO(roamic): Add support for multiple viewports and geometry shaders when ViewportIndex
// is encountered and implemented in the recompiler. // is encountered and implemented in the recompiler.
Shader::PushData push_data{}; Shader::PushData push_data{};
@ -60,20 +61,18 @@ void Rasterizer::CpSync() {
bool Rasterizer::FilterDraw() { bool Rasterizer::FilterDraw() {
const auto& regs = liverpool->regs; const auto& regs = liverpool->regs;
// There are several cases (e.g. FCE, FMask/HTile decompression) where we don't need to do an if (regs.color_control.mode == AmdGpu::ColorControl::OperationMode::EliminateFastClear) {
// actual draw hence can skip pipeline creation.
if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::EliminateFastClear) {
// Clears the render target if FCE is launched before any draws // Clears the render target if FCE is launched before any draws
EliminateFastClear(); EliminateFastClear();
return false; return false;
} }
if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::FmaskDecompress) { if (regs.color_control.mode == AmdGpu::ColorControl::OperationMode::FmaskDecompress) {
// TODO: check for a valid MRT1 to promote the draw to the resolve pass. // TODO: check for a valid MRT1 to promote the draw to the resolve pass.
LOG_TRACE(Render_Vulkan, "FMask decompression pass skipped"); LOG_TRACE(Render_Vulkan, "FMask decompression pass skipped");
ScopedMarkerInsert("FmaskDecompress"); ScopedMarkerInsert("FmaskDecompress");
return false; return false;
} }
if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::Resolve) { if (regs.color_control.mode == AmdGpu::ColorControl::OperationMode::Resolve) {
LOG_TRACE(Render_Vulkan, "Resolve pass"); LOG_TRACE(Render_Vulkan, "Resolve pass");
Resolve(); Resolve();
return false; return false;
@ -85,7 +84,7 @@ bool Rasterizer::FilterDraw() {
} }
const bool cb_disabled = const bool cb_disabled =
regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable; regs.color_control.mode == AmdGpu::ColorControl::OperationMode::Disable;
const auto depth_copy = const auto depth_copy =
regs.depth_render_override.force_z_dirty && regs.depth_render_override.force_z_valid && regs.depth_render_override.force_z_dirty && regs.depth_render_override.force_z_valid &&
regs.depth_buffer.DepthValid() && regs.depth_buffer.DepthWriteValid() && regs.depth_buffer.DepthValid() && regs.depth_buffer.DepthWriteValid() &&
@ -116,7 +115,7 @@ void Rasterizer::PrepareRenderState(const GraphicsPipeline* pipeline) {
} }
const bool skip_cb_binding = const bool skip_cb_binding =
regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable; regs.color_control.mode == AmdGpu::ColorControl::OperationMode::Disable;
for (s32 cb = 0; cb < std::bit_width(key.mrt_mask); ++cb) { for (s32 cb = 0; cb < std::bit_width(key.mrt_mask); ++cb) {
auto& [image_id, desc] = cb_descs[cb]; auto& [image_id, desc] = cb_descs[cb];
const auto& col_buf = regs.color_buffers[cb]; const auto& col_buf = regs.color_buffers[cb];
@ -147,8 +146,8 @@ void Rasterizer::PrepareRenderState(const GraphicsPipeline* pipeline) {
} }
} }
[[nodiscard]] std::pair<u32, u32> GetDrawOffsets( static std::pair<u32, u32> GetDrawOffsets(
const AmdGpu::Liverpool::Regs& regs, const Shader::Info& info, const AmdGpu::Regs& regs, const Shader::Info& info,
const std::optional<Shader::Gcn::FetchShaderData>& fetch_shader) { const std::optional<Shader::Gcn::FetchShaderData>& fetch_shader) {
u32 vertex_offset = regs.index_offset; u32 vertex_offset = regs.index_offset;
u32 instance_offset = 0; u32 instance_offset = 0;
@ -168,7 +167,7 @@ void Rasterizer::EliminateFastClear() {
if (!col_buf || !col_buf.info.fast_clear) { if (!col_buf || !col_buf.info.fast_clear) {
return; return;
} }
VideoCore::TextureCache::RenderTargetDesc desc(col_buf, liverpool->last_cb_extent[0]); VideoCore::TextureCache::ImageDesc desc(col_buf, liverpool->last_cb_extent[0]);
const auto image_id = texture_cache.FindImage(desc); const auto image_id = texture_cache.FindImage(desc);
const auto& image_view = texture_cache.FindRenderTarget(image_id, desc); const auto& image_view = texture_cache.FindRenderTarget(image_id, desc);
if (!texture_cache.IsMetaCleared(col_buf.CmaskAddress(), col_buf.view.slice_start)) { if (!texture_cache.IsMetaCleared(col_buf.CmaskAddress(), col_buf.view.slice_start)) {
@ -540,7 +539,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
for (u32 i = 0; i < buffer_bindings.size(); i++) { for (u32 i = 0; i < buffer_bindings.size(); i++) {
const auto& [buffer_id, vsharp, size] = buffer_bindings[i]; const auto& [buffer_id, vsharp, size] = buffer_bindings[i];
const auto& desc = stage.buffers[i]; const auto& desc = stage.buffers[i];
const bool is_storage = desc.IsStorage(vsharp, pipeline_cache.GetProfile()); const bool is_storage = desc.IsStorage(vsharp);
const u32 alignment = const u32 alignment =
is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment(); is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment();
// Buffer is not from the cache, either a special buffer or unbound. // Buffer is not from the cache, either a special buffer or unbound.
@ -846,37 +845,27 @@ RenderState Rasterizer::BeginRendering(const GraphicsPipeline* pipeline) {
} }
void Rasterizer::Resolve() { void Rasterizer::Resolve() {
// Read from MRT0, average all samples, and write to MRT1, which is one-sample
const auto& mrt0_hint = liverpool->last_cb_extent[0]; const auto& mrt0_hint = liverpool->last_cb_extent[0];
const auto& mrt1_hint = liverpool->last_cb_extent[1]; const auto& mrt1_hint = liverpool->last_cb_extent[1];
VideoCore::TextureCache::RenderTargetDesc mrt0_desc{liverpool->regs.color_buffers[0], VideoCore::TextureCache::ImageDesc mrt0_desc{liverpool->regs.color_buffers[0], mrt0_hint};
mrt0_hint}; VideoCore::TextureCache::ImageDesc mrt1_desc{liverpool->regs.color_buffers[1], mrt1_hint};
VideoCore::TextureCache::RenderTargetDesc mrt1_desc{liverpool->regs.color_buffers[1],
mrt1_hint};
auto& mrt0_image = texture_cache.GetImage(texture_cache.FindImage(mrt0_desc, true)); auto& mrt0_image = texture_cache.GetImage(texture_cache.FindImage(mrt0_desc, true));
auto& mrt1_image = texture_cache.GetImage(texture_cache.FindImage(mrt1_desc, true)); auto& mrt1_image = texture_cache.GetImage(texture_cache.FindImage(mrt1_desc, true));
VideoCore::SubresourceRange mrt0_range;
mrt0_range.base.layer = liverpool->regs.color_buffers[0].view.slice_start;
mrt0_range.extent.layers = liverpool->regs.color_buffers[0].NumSlices() - mrt0_range.base.layer;
VideoCore::SubresourceRange mrt1_range;
mrt1_range.base.layer = liverpool->regs.color_buffers[1].view.slice_start;
mrt1_range.extent.layers = liverpool->regs.color_buffers[1].NumSlices() - mrt1_range.base.layer;
ScopeMarkerBegin(fmt::format("Resolve:MRT0={:#x}:MRT1={:#x}", ScopeMarkerBegin(fmt::format("Resolve:MRT0={:#x}:MRT1={:#x}",
liverpool->regs.color_buffers[0].Address(), liverpool->regs.color_buffers[0].Address(),
liverpool->regs.color_buffers[1].Address())); liverpool->regs.color_buffers[1].Address()));
mrt1_image.Resolve(mrt0_image, mrt0_range, mrt1_range); mrt1_image.Resolve(mrt0_image, mrt0_desc.view_info.range, mrt1_desc.view_info.range);
ScopeMarkerEnd(); ScopeMarkerEnd();
} }
void Rasterizer::DepthStencilCopy(bool is_depth, bool is_stencil) { void Rasterizer::DepthStencilCopy(bool is_depth, bool is_stencil) {
auto& regs = liverpool->regs; auto& regs = liverpool->regs;
auto read_desc = VideoCore::TextureCache::DepthTargetDesc( auto read_desc = VideoCore::TextureCache::ImageDesc(
regs.depth_buffer, regs.depth_view, regs.depth_control, regs.depth_buffer, regs.depth_view, regs.depth_control,
regs.depth_htile_data_base.GetAddress(), liverpool->last_db_extent, false); regs.depth_htile_data_base.GetAddress(), liverpool->last_db_extent, false);
auto write_desc = VideoCore::TextureCache::DepthTargetDesc( auto write_desc = VideoCore::TextureCache::ImageDesc(
regs.depth_buffer, regs.depth_view, regs.depth_control, regs.depth_buffer, regs.depth_view, regs.depth_control,
regs.depth_htile_data_base.GetAddress(), liverpool->last_db_extent, true); regs.depth_htile_data_base.GetAddress(), liverpool->last_db_extent, true);
@ -904,6 +893,7 @@ void Rasterizer::DepthStencilCopy(bool is_depth, bool is_stencil) {
if (is_stencil) { if (is_stencil) {
aspect_mask |= vk::ImageAspectFlagBits::eStencil; aspect_mask |= vk::ImageAspectFlagBits::eStencil;
} }
vk::ImageCopy region = { vk::ImageCopy region = {
.srcSubresource = .srcSubresource =
{ {
@ -1013,16 +1003,16 @@ void Rasterizer::UpdateViewportScissorState() const {
const auto combined_scissor_value_br = [](s16 scr, s16 win, s16 gen, s16 win_offset) { const auto combined_scissor_value_br = [](s16 scr, s16 win, s16 gen, s16 win_offset) {
return std::min({scr, s16(win + win_offset), s16(gen + win_offset)}); return std::min({scr, s16(win + win_offset), s16(gen + win_offset)});
}; };
const bool enable_offset = !regs.window_scissor.window_offset_disable.Value(); const bool enable_offset = !regs.window_scissor.window_offset_disable;
Liverpool::Scissor scsr{}; AmdGpu::Scissor scsr{};
scsr.top_left_x = combined_scissor_value_tl( scsr.top_left_x = combined_scissor_value_tl(
regs.screen_scissor.top_left_x, s16(regs.window_scissor.top_left_x.Value()), regs.screen_scissor.top_left_x, s16(regs.window_scissor.top_left_x),
s16(regs.generic_scissor.top_left_x.Value()), s16(regs.generic_scissor.top_left_x),
enable_offset ? regs.window_offset.window_x_offset : 0); enable_offset ? regs.window_offset.window_x_offset : 0);
scsr.top_left_y = combined_scissor_value_tl( scsr.top_left_y = combined_scissor_value_tl(
regs.screen_scissor.top_left_y, s16(regs.window_scissor.top_left_y.Value()), regs.screen_scissor.top_left_y, s16(regs.window_scissor.top_left_y),
s16(regs.generic_scissor.top_left_y.Value()), s16(regs.generic_scissor.top_left_y),
enable_offset ? regs.window_offset.window_y_offset : 0); enable_offset ? regs.window_offset.window_y_offset : 0);
scsr.bottom_right_x = combined_scissor_value_br( scsr.bottom_right_x = combined_scissor_value_br(
regs.screen_scissor.bottom_right_x, regs.window_scissor.bottom_right_x, regs.screen_scissor.bottom_right_x, regs.window_scissor.bottom_right_x,
@ -1033,8 +1023,8 @@ void Rasterizer::UpdateViewportScissorState() const {
regs.generic_scissor.bottom_right_y, regs.generic_scissor.bottom_right_y,
enable_offset ? regs.window_offset.window_y_offset : 0); enable_offset ? regs.window_offset.window_y_offset : 0);
boost::container::static_vector<vk::Viewport, Liverpool::NumViewports> viewports; boost::container::static_vector<vk::Viewport, AmdGpu::NUM_VIEWPORTS> viewports;
boost::container::static_vector<vk::Rect2D, Liverpool::NumViewports> scissors; boost::container::static_vector<vk::Rect2D, AmdGpu::NUM_VIEWPORTS> scissors;
if (regs.polygon_control.enable_window_offset && if (regs.polygon_control.enable_window_offset &&
(regs.window_offset.window_x_offset != 0 || regs.window_offset.window_y_offset != 0)) { (regs.window_offset.window_x_offset != 0 || regs.window_offset.window_y_offset != 0)) {
@ -1043,7 +1033,7 @@ void Rasterizer::UpdateViewportScissorState() const {
} }
const auto& vp_ctl = regs.viewport_control; const auto& vp_ctl = regs.viewport_control;
for (u32 i = 0; i < Liverpool::NumViewports; i++) { for (u32 i = 0; i < AmdGpu::NUM_VIEWPORTS; i++) {
const auto& vp = regs.viewports[i]; const auto& vp = regs.viewports[i];
const auto& vp_d = regs.viewport_depths[i]; const auto& vp_d = regs.viewport_depths[i];
if (vp.xscale == 0) { if (vp.xscale == 0) {
@ -1059,7 +1049,7 @@ void Rasterizer::UpdateViewportScissorState() const {
// https://gitlab.freedesktop.org/mesa/mesa/-/blob/209a0ed/src/amd/vulkan/radv_cmd_buffer.c#L3103-3109 // https://gitlab.freedesktop.org/mesa/mesa/-/blob/209a0ed/src/amd/vulkan/radv_cmd_buffer.c#L3103-3109
// When the clip space is ranged [-1...1], the zoffset is centered. // When the clip space is ranged [-1...1], the zoffset is centered.
// By reversing the above viewport calculations, we get the following: // By reversing the above viewport calculations, we get the following:
if (regs.clipper_control.clip_space == AmdGpu::Liverpool::ClipSpace::MinusWToW) { if (regs.clipper_control.clip_space == AmdGpu::ClipSpace::MinusWToW) {
viewport.minDepth = zoffset - zscale; viewport.minDepth = zoffset - zscale;
viewport.maxDepth = zoffset + zscale; viewport.maxDepth = zoffset + zscale;
} else { } else {
@ -1098,13 +1088,13 @@ void Rasterizer::UpdateViewportScissorState() const {
auto vp_scsr = scsr; auto vp_scsr = scsr;
if (regs.mode_control.vport_scissor_enable) { if (regs.mode_control.vport_scissor_enable) {
vp_scsr.top_left_x = vp_scsr.top_left_x =
std::max(vp_scsr.top_left_x, s16(regs.viewport_scissors[i].top_left_x.Value())); std::max(vp_scsr.top_left_x, s16(regs.viewport_scissors[i].top_left_x));
vp_scsr.top_left_y = vp_scsr.top_left_y =
std::max(vp_scsr.top_left_y, s16(regs.viewport_scissors[i].top_left_y.Value())); std::max(vp_scsr.top_left_y, s16(regs.viewport_scissors[i].top_left_y));
vp_scsr.bottom_right_x = vp_scsr.bottom_right_x = std::min(AmdGpu::Scissor::Clamp(vp_scsr.bottom_right_x),
std::min(vp_scsr.bottom_right_x, regs.viewport_scissors[i].bottom_right_x); regs.viewport_scissors[i].bottom_right_x);
vp_scsr.bottom_right_y = vp_scsr.bottom_right_y = std::min(AmdGpu::Scissor::Clamp(vp_scsr.bottom_right_y),
std::min(vp_scsr.bottom_right_y, regs.viewport_scissors[i].bottom_right_y); regs.viewport_scissors[i].bottom_right_y);
} }
scissors.push_back({ scissors.push_back({
.offset = {vp_scsr.top_left_x, vp_scsr.top_left_y}, .offset = {vp_scsr.top_left_x, vp_scsr.top_left_y},
@ -1187,8 +1177,8 @@ void Rasterizer::UpdateDepthStencilState() const {
const auto back = const auto back =
regs.depth_control.backface_enable ? regs.stencil_ref_back : regs.stencil_ref_front; regs.depth_control.backface_enable ? regs.stencil_ref_back : regs.stencil_ref_front;
dynamic_state.SetStencilReferences(front.stencil_test_val, back.stencil_test_val); dynamic_state.SetStencilReferences(front.stencil_test_val, back.stencil_test_val);
dynamic_state.SetStencilWriteMasks(!stencil_clear ? front.stencil_write_mask.Value() : 0U, dynamic_state.SetStencilWriteMasks(!stencil_clear ? front.stencil_write_mask : 0U,
!stencil_clear ? back.stencil_write_mask.Value() : 0U); !stencil_clear ? back.stencil_write_mask : 0U);
dynamic_state.SetStencilCompareMasks(front.stencil_mask, back.stencil_mask); dynamic_state.SetStencilCompareMasks(front.stencil_mask, back.stencil_mask);
} }
} }

View File

@ -127,22 +127,21 @@ private:
Common::SharedFirstMutex mapped_ranges_mutex; Common::SharedFirstMutex mapped_ranges_mutex;
PipelineCache pipeline_cache; PipelineCache pipeline_cache;
using RenderTargetInfo = using RenderTargetInfo = std::pair<VideoCore::ImageId, VideoCore::TextureCache::ImageDesc>;
std::pair<VideoCore::ImageId, VideoCore::TextureCache::RenderTargetDesc>; std::array<RenderTargetInfo, AmdGpu::NUM_COLOR_BUFFERS> cb_descs;
std::array<RenderTargetInfo, Liverpool::NumColorBuffers> cb_descs; std::pair<VideoCore::ImageId, VideoCore::TextureCache::ImageDesc> db_desc;
std::pair<VideoCore::ImageId, VideoCore::TextureCache::DepthTargetDesc> db_desc; boost::container::static_vector<vk::DescriptorImageInfo, Shader::NUM_IMAGES> image_infos;
boost::container::static_vector<vk::DescriptorImageInfo, Shader::NumImages> image_infos; boost::container::static_vector<vk::DescriptorBufferInfo, Shader::NUM_BUFFERS> buffer_infos;
boost::container::static_vector<vk::DescriptorBufferInfo, Shader::NumBuffers> buffer_infos; boost::container::static_vector<VideoCore::ImageId, Shader::NUM_IMAGES> bound_images;
boost::container::static_vector<VideoCore::ImageId, Shader::NumImages> bound_images;
Pipeline::DescriptorWrites set_writes; Pipeline::DescriptorWrites set_writes;
Pipeline::BufferBarriers buffer_barriers; Pipeline::BufferBarriers buffer_barriers;
Shader::PushData push_data; Shader::PushData push_data;
using BufferBindingInfo = std::tuple<VideoCore::BufferId, AmdGpu::Buffer, u64>; using BufferBindingInfo = std::tuple<VideoCore::BufferId, AmdGpu::Buffer, u64>;
boost::container::static_vector<BufferBindingInfo, Shader::NumBuffers> buffer_bindings; boost::container::static_vector<BufferBindingInfo, Shader::NUM_BUFFERS> buffer_bindings;
using ImageBindingInfo = std::pair<VideoCore::ImageId, VideoCore::TextureCache::TextureDesc>; using ImageBindingInfo = std::pair<VideoCore::ImageId, VideoCore::TextureCache::ImageDesc>;
boost::container::static_vector<ImageBindingInfo, Shader::NumImages> image_bindings; boost::container::static_vector<ImageBindingInfo, Shader::NUM_IMAGES> image_bindings;
bool fault_process_pending{}; bool fault_process_pending{};
bool attachment_feedback_loop{}; bool attachment_feedback_loop{};
}; };

View File

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h" #include "common/assert.h"
@ -152,20 +152,20 @@ void Scheduler::SubmitExecution(SubmitInfo& info) {
}; };
const vk::TimelineSemaphoreSubmitInfo timeline_si = { const vk::TimelineSemaphoreSubmitInfo timeline_si = {
.waitSemaphoreValueCount = static_cast<u32>(info.wait_ticks.size()), .waitSemaphoreValueCount = info.num_wait_semas,
.pWaitSemaphoreValues = info.wait_ticks.data(), .pWaitSemaphoreValues = info.wait_ticks.data(),
.signalSemaphoreValueCount = static_cast<u32>(info.signal_ticks.size()), .signalSemaphoreValueCount = info.num_signal_semas,
.pSignalSemaphoreValues = info.signal_ticks.data(), .pSignalSemaphoreValues = info.signal_ticks.data(),
}; };
const vk::SubmitInfo submit_info = { const vk::SubmitInfo submit_info = {
.pNext = &timeline_si, .pNext = &timeline_si,
.waitSemaphoreCount = static_cast<u32>(info.wait_semas.size()), .waitSemaphoreCount = info.num_wait_semas,
.pWaitSemaphores = info.wait_semas.data(), .pWaitSemaphores = info.wait_semas.data(),
.pWaitDstStageMask = wait_stage_masks.data(), .pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = 1U, .commandBufferCount = 1U,
.pCommandBuffers = &current_cmdbuf, .pCommandBuffers = &current_cmdbuf,
.signalSemaphoreCount = static_cast<u32>(info.signal_semas.size()), .signalSemaphoreCount = info.num_signal_semas,
.pSignalSemaphores = info.signal_semas.data(), .pSignalSemaphores = info.signal_semas.data(),
}; };

View File

@ -1,14 +1,15 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma once #pragma once
#include <condition_variable> #include <condition_variable>
#include <boost/container/static_vector.hpp> #include <mutex>
#include <queue>
#include "common/types.h"
#include "common/unique_function.h" #include "common/unique_function.h"
#include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/regs_color.h"
#include "video_core/amdgpu/regs_primitive.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h" #include "video_core/renderer_vulkan/vk_resource_pool.h"
@ -45,20 +46,22 @@ struct RenderState {
}; };
struct SubmitInfo { struct SubmitInfo {
boost::container::static_vector<vk::Semaphore, 3> wait_semas; std::array<vk::Semaphore, 3> wait_semas;
boost::container::static_vector<u64, 3> wait_ticks; std::array<u64, 3> wait_ticks;
boost::container::static_vector<vk::Semaphore, 3> signal_semas; std::array<vk::Semaphore, 3> signal_semas;
boost::container::static_vector<u64, 3> signal_ticks; std::array<u64, 3> signal_ticks;
vk::Fence fence; vk::Fence fence;
u32 num_wait_semas;
u32 num_signal_semas;
void AddWait(vk::Semaphore semaphore, u64 tick = 1) { void AddWait(vk::Semaphore semaphore, u64 tick = 1) {
wait_semas.emplace_back(semaphore); wait_semas[num_wait_semas] = semaphore;
wait_ticks.emplace_back(tick); wait_ticks[num_wait_semas++] = tick;
} }
void AddSignal(vk::Semaphore semaphore, u64 tick = 1) { void AddSignal(vk::Semaphore semaphore, u64 tick = 1) {
signal_semas.emplace_back(semaphore); signal_semas[num_signal_semas] = semaphore;
signal_ticks.emplace_back(tick); signal_ticks[num_signal_semas++] = tick;
} }
void AddSignal(vk::Fence fence) { void AddSignal(vk::Fence fence) {
@ -66,9 +69,9 @@ struct SubmitInfo {
} }
}; };
using Viewports = boost::container::static_vector<vk::Viewport, AmdGpu::Liverpool::NumViewports>; using Viewports = boost::container::static_vector<vk::Viewport, AmdGpu::NUM_VIEWPORTS>;
using Scissors = boost::container::static_vector<vk::Rect2D, AmdGpu::Liverpool::NumViewports>; using Scissors = boost::container::static_vector<vk::Rect2D, AmdGpu::NUM_VIEWPORTS>;
using ColorWriteMasks = std::array<vk::ColorComponentFlags, AmdGpu::Liverpool::NumColorBuffers>; using ColorWriteMasks = std::array<vk::ColorComponentFlags, AmdGpu::NUM_COLOR_BUFFERS>;
struct StencilOps { struct StencilOps {
vk::StencilOp fail_op{}; vk::StencilOp fail_op{};
vk::StencilOp pass_op{}; vk::StencilOp pass_op{};
@ -413,6 +416,7 @@ private:
const Instance& instance; const Instance& instance;
MasterSemaphore master_semaphore; MasterSemaphore master_semaphore;
CommandPool command_pool; CommandPool command_pool;
DynamicState dynamic_state;
vk::CommandBuffer current_cmdbuf; vk::CommandBuffer current_cmdbuf;
std::condition_variable_any event_cv; std::condition_variable_any event_cv;
struct PendingOp { struct PendingOp {
@ -421,7 +425,6 @@ private:
}; };
std::queue<PendingOp> pending_ops; std::queue<PendingOp> pending_ops;
RenderState render_state; RenderState render_state;
DynamicState dynamic_state;
bool is_rendering = false; bool is_rendering = false;
tracy::VkCtxScope* profiler_scope{}; tracy::VkCtxScope* profiler_scope{};
}; };

View File

@ -12,8 +12,7 @@ namespace Vulkan {
static constexpr u64 COPY_SHADER_HASH = 0xfefebf9f; static constexpr u64 COPY_SHADER_HASH = 0xfefebf9f;
static bool ExecuteCopyShaderHLE(const Shader::Info& info, static bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::ComputeProgram& cs_program,
const AmdGpu::Liverpool::ComputeProgram& cs_program,
Rasterizer& rasterizer) { Rasterizer& rasterizer) {
auto& scheduler = rasterizer.GetScheduler(); auto& scheduler = rasterizer.GetScheduler();
auto& buffer_cache = rasterizer.GetBufferCache(); auto& buffer_cache = rasterizer.GetBufferCache();
@ -121,8 +120,8 @@ static bool ExecuteCopyShaderHLE(const Shader::Info& info,
return true; return true;
} }
bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs, bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Regs& regs,
const AmdGpu::Liverpool::ComputeProgram& cs_program, Rasterizer& rasterizer) { const AmdGpu::ComputeProgram& cs_program, Rasterizer& rasterizer) {
switch (info.pgm_hash) { switch (info.pgm_hash) {
case COPY_SHADER_HASH: case COPY_SHADER_HASH:
return ExecuteCopyShaderHLE(info, cs_program, rasterizer); return ExecuteCopyShaderHLE(info, cs_program, rasterizer);

View File

@ -3,7 +3,10 @@
#pragma once #pragma once
#include "video_core/amdgpu/liverpool.h" namespace AmdGpu {
struct ComputeProgram;
union Regs;
} // namespace AmdGpu
namespace Shader { namespace Shader {
struct Info; struct Info;
@ -14,7 +17,7 @@ namespace Vulkan {
class Rasterizer; class Rasterizer;
/// Attempts to execute a shader using HLE if possible. /// Attempts to execute a shader using HLE if possible.
bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs, bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Regs& regs,
const AmdGpu::Liverpool::ComputeProgram& cs_program, Rasterizer& rasterizer); const AmdGpu::ComputeProgram& cs_program, Rasterizer& rasterizer);
} // namespace Vulkan } // namespace Vulkan

View File

@ -11,6 +11,8 @@
#include <deque> #include <deque>
#include <optional> #include <optional>
#include <boost/container/small_vector.hpp>
#include <boost/container/static_vector.hpp>
namespace Vulkan { namespace Vulkan {
class Instance; class Instance;

View File

@ -4,7 +4,7 @@
#include "common/assert.h" #include "common/assert.h"
#include "core/libraries/kernel/process.h" #include "core/libraries/kernel/process.h"
#include "core/libraries/videoout/buffer.h" #include "core/libraries/videoout/buffer.h"
#include "shader_recompiler/info.h" #include "shader_recompiler/resource.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/texture_cache/image_info.h" #include "video_core/texture_cache/image_info.h"
#include "video_core/texture_cache/tile.h" #include "video_core/texture_cache/tile.h"
@ -54,8 +54,7 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group,
UpdateSize(); UpdateSize();
} }
ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, ImageInfo::ImageInfo(const AmdGpu::ColorBuffer& buffer, AmdGpu::CbDbExtent hint) noexcept {
const AmdGpu::Liverpool::CbDbExtent& hint /*= {}*/) noexcept {
props.is_tiled = buffer.IsTiled(); props.is_tiled = buffer.IsTiled();
tile_mode = buffer.GetTileMode(); tile_mode = buffer.GetTileMode();
array_mode = AmdGpu::GetArrayMode(tile_mode); array_mode = AmdGpu::GetArrayMode(tile_mode);
@ -74,27 +73,25 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
guest_address = buffer.Address(); guest_address = buffer.Address();
if (props.is_tiled) { if (props.is_tiled) {
guest_size = buffer.GetColorSliceSize() * resources.layers; guest_size = buffer.GetColorSliceSize() * resources.layers;
mips_layout.emplace_back(guest_size, pitch, buffer.Height(), 0); mips_layout[0] = MipInfo(guest_size, pitch, buffer.Height(), 0);
} else { } else {
std::tie(std::ignore, std::ignore, guest_size) = std::tie(std::ignore, std::ignore, guest_size) =
ImageSizeLinearAligned(pitch, size.height, num_bits, num_samples); ImageSizeLinearAligned(pitch, size.height, num_bits, num_samples);
guest_size *= resources.layers; guest_size *= resources.layers;
mips_layout.emplace_back(guest_size, pitch, size.height, 0); mips_layout[0] = MipInfo(guest_size, pitch, size.height, 0);
} }
alt_tile = Libraries::Kernel::sceKernelIsNeoMode() && buffer.info.alt_tile_mode; alt_tile = Libraries::Kernel::sceKernelIsNeoMode() && buffer.info.alt_tile_mode;
} }
ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices, ImageInfo::ImageInfo(const AmdGpu::DepthBuffer& buffer, u32 num_slices, VAddr htile_address,
VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint, AmdGpu::CbDbExtent hint, bool write_buffer) noexcept {
bool write_buffer) noexcept {
tile_mode = buffer.GetTileMode(); tile_mode = buffer.GetTileMode();
array_mode = AmdGpu::GetArrayMode(tile_mode); array_mode = AmdGpu::GetArrayMode(tile_mode);
pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format); pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format);
type = AmdGpu::ImageType::Color2D; type = AmdGpu::ImageType::Color2D;
props.is_tiled = buffer.IsTiled(); props.is_tiled = buffer.IsTiled();
props.is_depth = true; props.is_depth = true;
props.has_stencil = props.has_stencil = buffer.stencil_info.format != AmdGpu::DepthBuffer::StencilFormat::Invalid;
buffer.stencil_info.format != AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid;
num_samples = buffer.NumSamples(); num_samples = buffer.NumSamples();
num_bits = buffer.NumBits(); num_bits = buffer.NumBits();
size.width = hint.Valid() ? hint.width : buffer.Pitch(); size.width = hint.Valid() ? hint.width : buffer.Pitch();
@ -102,7 +99,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice
size.depth = 1; size.depth = 1;
pitch = buffer.Pitch(); pitch = buffer.Pitch();
resources.layers = num_slices; resources.layers = num_slices;
meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0; meta_info.htile_addr = buffer.z_info.tile_surface_enable ? htile_address : 0;
stencil_addr = write_buffer ? buffer.StencilWriteAddress() : buffer.StencilAddress(); stencil_addr = write_buffer ? buffer.StencilWriteAddress() : buffer.StencilAddress();
stencil_size = pitch * size.height * sizeof(u8); stencil_size = pitch * size.height * sizeof(u8);
@ -110,12 +107,12 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice
guest_address = write_buffer ? buffer.DepthWriteAddress() : buffer.DepthAddress(); guest_address = write_buffer ? buffer.DepthWriteAddress() : buffer.DepthAddress();
if (props.is_tiled) { if (props.is_tiled) {
guest_size = buffer.GetDepthSliceSize() * resources.layers; guest_size = buffer.GetDepthSliceSize() * resources.layers;
mips_layout.emplace_back(guest_size, pitch, buffer.Height(), 0); mips_layout[0] = MipInfo(guest_size, pitch, buffer.Height(), 0);
} else { } else {
std::tie(std::ignore, std::ignore, guest_size) = std::tie(std::ignore, std::ignore, guest_size) =
ImageSizeLinearAligned(pitch, size.height, num_bits, num_samples); ImageSizeLinearAligned(pitch, size.height, num_bits, num_samples);
guest_size *= resources.layers; guest_size *= resources.layers;
mips_layout.emplace_back(guest_size, pitch, size.height, 0); mips_layout[0] = MipInfo(guest_size, pitch, size.height, 0);
} }
} }
@ -154,8 +151,6 @@ bool ImageInfo::IsCompatible(const ImageInfo& info) const {
} }
void ImageInfo::UpdateSize() { void ImageInfo::UpdateSize() {
mips_layout.clear();
MipInfo mip_info{};
guest_size = 0; guest_size = 0;
for (s32 mip = 0; mip < resources.levels; ++mip) { for (s32 mip = 0; mip < resources.levels; ++mip) {
u32 mip_w = pitch >> mip; u32 mip_w = pitch >> mip;
@ -175,6 +170,7 @@ void ImageInfo::UpdateSize() {
mip_d = std::bit_ceil(mip_d); mip_d = std::bit_ceil(mip_d);
} }
auto& mip_info = mips_layout[mip];
switch (array_mode) { switch (array_mode) {
case AmdGpu::ArrayMode::ArrayLinearAligned: { case AmdGpu::ArrayMode::ArrayLinearAligned: {
std::tie(mip_info.pitch, mip_info.height, mip_info.size) = std::tie(mip_info.pitch, mip_info.height, mip_info.size) =
@ -210,7 +206,6 @@ void ImageInfo::UpdateSize() {
} }
mip_info.size *= mip_d * resources.layers; mip_info.size *= mip_d * resources.layers;
mip_info.offset = guest_size; mip_info.offset = guest_size;
mips_layout.emplace_back(mip_info);
guest_size += mip_info.size; guest_size += mip_info.size;
} }
} }
@ -229,13 +224,9 @@ s32 ImageInfo::MipOf(const ImageInfo& info) const {
return -1; return -1;
} }
if (info.mips_layout.empty()) {
UNREACHABLE();
}
// Find mip // Find mip
auto mip = -1; auto mip = -1;
for (auto m = 0; m < info.mips_layout.size(); ++m) { for (auto m = 0; m < info.resources.levels; ++m) {
const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = info.mips_layout[m]; const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = info.mips_layout[m];
const VAddr mip_base = info.guest_address + mip_ofs; const VAddr mip_base = info.guest_address + mip_ofs;
const VAddr mip_end = mip_base + mip_size; const VAddr mip_end = mip_base + mip_size;

View File

@ -3,16 +3,18 @@
#pragma once #pragma once
#include <boost/container/static_vector.hpp>
#include "common/types.h" #include "common/types.h"
#include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/cb_db_extent.h"
#include "video_core/amdgpu/tiling.h"
#include "video_core/renderer_vulkan/vk_common.h" #include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/texture_cache/types.h" #include "video_core/texture_cache/types.h"
namespace AmdGpu { namespace AmdGpu {
struct ColorBuffer;
struct DepthBuffer;
struct Image;
enum class ImageType : u64; enum class ImageType : u64;
} } // namespace AmdGpu
namespace Libraries::VideoOut { namespace Libraries::VideoOut {
struct BufferAttributeGroup; struct BufferAttributeGroup;
@ -36,10 +38,9 @@ struct ImageProperties {
struct ImageInfo { struct ImageInfo {
ImageInfo() = default; ImageInfo() = default;
ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group, VAddr cpu_address) noexcept; ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group, VAddr cpu_address) noexcept;
ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, ImageInfo(const AmdGpu::ColorBuffer& buffer, AmdGpu::CbDbExtent hint) noexcept;
const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept; ImageInfo(const AmdGpu::DepthBuffer& buffer, u32 num_slices, VAddr htile_address,
ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices, VAddr htile_address, AmdGpu::CbDbExtent hint, bool write_buffer = false) noexcept;
const AmdGpu::Liverpool::CbDbExtent& hint = {}, bool write_buffer = false) noexcept;
ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept; ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept;
bool IsTiled() const { bool IsTiled() const {
@ -60,7 +61,7 @@ struct ImageInfo {
VAddr cmask_addr; VAddr cmask_addr;
VAddr fmask_addr; VAddr fmask_addr;
VAddr htile_addr; VAddr htile_addr;
u32 htile_clear_mask = u32(-1); s32 htile_clear_mask = -1;
} meta_info{}; } meta_info{};
ImageProperties props{}; ImageProperties props{};
@ -79,7 +80,7 @@ struct ImageInfo {
u32 height; u32 height;
u32 offset; u32 offset;
}; };
boost::container::static_vector<MipInfo, 16> mips_layout; std::array<MipInfo, 16> mips_layout;
VAddr guest_address{}; VAddr guest_address{};
u32 guest_size{}; u32 guest_size{};
u8 bank_swizzle{}; u8 bank_swizzle{};

View File

@ -2,8 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "common/logging/log.h" #include "common/logging/log.h"
#include "shader_recompiler/info.h" #include "shader_recompiler/resource.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/texture_cache/image.h" #include "video_core/texture_cache/image.h"
@ -71,17 +70,16 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageReso
} }
} }
ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer) noexcept { ImageViewInfo::ImageViewInfo(const AmdGpu::ColorBuffer& col_buffer) noexcept {
range.base.layer = col_buffer.view.slice_start; range.base.layer = col_buffer.BaseSlice();
range.extent.layers = col_buffer.NumSlices() - range.base.layer; range.extent.layers = col_buffer.NumSlices() - range.base.layer;
type = range.extent.layers > 1 ? AmdGpu::ImageType::Color2DArray : AmdGpu::ImageType::Color2D; type = range.extent.layers > 1 ? AmdGpu::ImageType::Color2DArray : AmdGpu::ImageType::Color2D;
format = format =
Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.GetDataFmt(), col_buffer.GetNumberFmt()); Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.GetDataFmt(), col_buffer.GetNumberFmt());
} }
ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer, ImageViewInfo::ImageViewInfo(const AmdGpu::DepthBuffer& depth_buffer, AmdGpu::DepthView view,
AmdGpu::Liverpool::DepthView view, AmdGpu::DepthControl ctl) {
AmdGpu::Liverpool::DepthControl ctl) {
format = Vulkan::LiverpoolToVK::DepthFormat(depth_buffer.z_info.format, format = Vulkan::LiverpoolToVK::DepthFormat(depth_buffer.z_info.format,
depth_buffer.stencil_info.format); depth_buffer.stencil_info.format);
is_storage = ctl.depth_write_enable; is_storage = ctl.depth_write_enable;

View File

@ -3,12 +3,19 @@
#pragma once #pragma once
#include "shader_recompiler/info.h" #include "video_core/amdgpu/regs_depth.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/vk_common.h" #include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/texture_cache/types.h" #include "video_core/texture_cache/types.h"
namespace AmdGpu {
struct ColorBuffer;
}
namespace Shader {
struct ImageResource;
}
namespace Vulkan { namespace Vulkan {
class Instance; class Instance;
class Scheduler; class Scheduler;
@ -19,9 +26,9 @@ namespace VideoCore {
struct ImageViewInfo { struct ImageViewInfo {
ImageViewInfo() = default; ImageViewInfo() = default;
ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept; ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept;
ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer) noexcept; ImageViewInfo(const AmdGpu::ColorBuffer& col_buffer) noexcept;
ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer, ImageViewInfo(const AmdGpu::DepthBuffer& depth_buffer, AmdGpu::DepthView view,
AmdGpu::Liverpool::DepthView view, AmdGpu::Liverpool::DepthControl ctl); AmdGpu::DepthControl ctl);
AmdGpu::ImageType type = AmdGpu::ImageType::Color2D; AmdGpu::ImageType type = AmdGpu::ImageType::Color2D;
vk::Format format = vk::Format::eR8G8B8A8Unorm; vk::Format format = vk::Format::eR8G8B8A8Unorm;

View File

@ -2,7 +2,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm> #include <algorithm>
#include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/texture_cache/sampler.h" #include "video_core/texture_cache/sampler.h"
@ -10,7 +9,7 @@
namespace VideoCore { namespace VideoCore {
Sampler::Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sampler, Sampler::Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sampler,
const AmdGpu::Liverpool::BorderColorBufferBase& border_color_base) { const AmdGpu::BorderColorBuffer border_color_base) {
using namespace Vulkan; using namespace Vulkan;
const bool anisotropy_enable = instance.IsAnisotropicFilteringSupported() && const bool anisotropy_enable = instance.IsAnisotropicFilteringSupported() &&
(AmdGpu::IsAnisoFilter(sampler.xy_mag_filter) || (AmdGpu::IsAnisoFilter(sampler.xy_mag_filter) ||

View File

@ -3,6 +3,7 @@
#pragma once #pragma once
#include "video_core/amdgpu/regs_texture.h"
#include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/vk_common.h" #include "video_core/renderer_vulkan/vk_common.h"
@ -15,7 +16,7 @@ namespace VideoCore {
class Sampler { class Sampler {
public: public:
explicit Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sampler, explicit Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sampler,
const AmdGpu::Liverpool::BorderColorBufferBase& border_color_base); const AmdGpu::BorderColorBuffer border_color_base);
~Sampler(); ~Sampler();
Sampler(const Sampler&) = delete; Sampler(const Sampler&) = delete;

View File

@ -6,7 +6,6 @@
#include "common/assert.h" #include "common/assert.h"
#include "common/config.h" #include "common/config.h"
#include "common/debug.h" #include "common/debug.h"
#include "common/polyfill_thread.h"
#include "common/scope_exit.h" #include "common/scope_exit.h"
#include "core/memory.h" #include "core/memory.h"
#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/buffer_cache/buffer_cache.h"
@ -140,8 +139,8 @@ void TextureCache::DownloadedImagesThread(const std::stop_token& token) {
DownloadedImage image; DownloadedImage image;
{ {
std::unique_lock lock{downloaded_images_mutex}; std::unique_lock lock{downloaded_images_mutex};
Common::CondvarWait(downloaded_images_cv, lock, token, downloaded_images_cv.wait(lock, token,
[this] { return !downloaded_images_queue.empty(); }); [this] { return !downloaded_images_queue.empty(); });
if (token.stop_requested()) { if (token.stop_requested()) {
break; break;
} }
@ -212,7 +211,7 @@ void TextureCache::InvalidateMemoryFromGPU(VAddr address, size_t max_size) {
void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) { void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) {
std::scoped_lock lk{mutex}; std::scoped_lock lk{mutex};
boost::container::small_vector<ImageId, 16> deleted_images; ImageIds deleted_images;
ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
for (const ImageId id : deleted_images) { for (const ImageId id : deleted_images) {
// TODO: Download image data back to host. // TODO: Download image data back to host.
@ -440,7 +439,7 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) {
return new_image_id; return new_image_id;
} }
ImageId TextureCache::FindImage(BaseDesc& desc, bool exact_fmt) { ImageId TextureCache::FindImage(ImageDesc& desc, bool exact_fmt) {
const auto& info = desc.info; const auto& info = desc.info;
if (info.guest_address == 0) [[unlikely]] { if (info.guest_address == 0) [[unlikely]] {
@ -448,7 +447,7 @@ ImageId TextureCache::FindImage(BaseDesc& desc, bool exact_fmt) {
} }
std::scoped_lock lock{mutex}; std::scoped_lock lock{mutex};
boost::container::small_vector<ImageId, 8> image_ids; ImageIds image_ids;
ForEachImageInRegion(info.guest_address, info.guest_size, ForEachImageInRegion(info.guest_address, info.guest_size,
[&](ImageId image_id, Image& image) { image_ids.push_back(image_id); }); [&](ImageId image_id, Image& image) { image_ids.push_back(image_id); });
@ -529,13 +528,12 @@ ImageId TextureCache::FindImage(BaseDesc& desc, bool exact_fmt) {
} }
ImageId TextureCache::FindImageFromRange(VAddr address, size_t size, bool ensure_valid) { ImageId TextureCache::FindImageFromRange(VAddr address, size_t size, bool ensure_valid) {
boost::container::small_vector<ImageId, 4> image_ids; ImageIds image_ids;
ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) { ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) {
if (image.info.guest_address != address) { if (image.info.guest_address != address) {
return; return;
} }
if (ensure_valid && (False(image.flags & ImageFlagBits::GpuModified) || if (ensure_valid && !image.SafeToDownload()) {
True(image.flags & ImageFlagBits::Dirty))) {
return; return;
} }
image_ids.push_back(image_id); image_ids.push_back(image_id);
@ -559,7 +557,7 @@ ImageId TextureCache::FindImageFromRange(VAddr address, size_t size, bool ensure
return {}; return {};
} }
ImageView& TextureCache::FindTexture(ImageId image_id, const BaseDesc& desc) { ImageView& TextureCache::FindTexture(ImageId image_id, const ImageDesc& desc) {
Image& image = slot_images[image_id]; Image& image = slot_images[image_id];
if (desc.type == BindingType::Storage) { if (desc.type == BindingType::Storage) {
image.flags |= ImageFlagBits::GpuModified; image.flags |= ImageFlagBits::GpuModified;
@ -572,7 +570,7 @@ ImageView& TextureCache::FindTexture(ImageId image_id, const BaseDesc& desc) {
return image.FindView(desc.view_info); return image.FindView(desc.view_info);
} }
ImageView& TextureCache::FindRenderTarget(ImageId image_id, const BaseDesc& desc) { ImageView& TextureCache::FindRenderTarget(ImageId image_id, const ImageDesc& desc) {
Image& image = slot_images[image_id]; Image& image = slot_images[image_id];
image.flags |= ImageFlagBits::GpuModified; image.flags |= ImageFlagBits::GpuModified;
if (Config::readbackLinearImages() && !image.info.props.is_tiled) { if (Config::readbackLinearImages() && !image.info.props.is_tiled) {
@ -597,7 +595,7 @@ ImageView& TextureCache::FindRenderTarget(ImageId image_id, const BaseDesc& desc
return image.FindView(desc.view_info, false); return image.FindView(desc.view_info, false);
} }
ImageView& TextureCache::FindDepthTarget(ImageId image_id, const BaseDesc& desc) { ImageView& TextureCache::FindDepthTarget(ImageId image_id, const ImageDesc& desc) {
Image& image = slot_images[image_id]; Image& image = slot_images[image_id];
image.flags |= ImageFlagBits::GpuModified; image.flags |= ImageFlagBits::GpuModified;
image.usage.depth_target = 1u; image.usage.depth_target = 1u;
@ -662,10 +660,8 @@ void TextureCache::RefreshImage(Image& image) {
image.hash = hash; image.hash = hash;
} }
const auto& num_layers = image.info.resources.layers; const u32 num_layers = image.info.resources.layers;
const auto& num_mips = image.info.resources.levels; const u32 num_mips = image.info.resources.levels;
ASSERT(num_mips == image.info.mips_layout.size());
const bool is_gpu_modified = True(image.flags & ImageFlagBits::GpuModified); const bool is_gpu_modified = True(image.flags & ImageFlagBits::GpuModified);
const bool is_gpu_dirty = True(image.flags & ImageFlagBits::GpuDirty); const bool is_gpu_dirty = True(image.flags & ImageFlagBits::GpuDirty);
@ -731,9 +727,8 @@ void TextureCache::RefreshImage(Image& image) {
image.Upload(image_copies, buffer, offset); image.Upload(image_copies, buffer, offset);
} }
vk::Sampler TextureCache::GetSampler( vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler,
const AmdGpu::Sampler& sampler, AmdGpu::BorderColorBuffer border_color_base) {
const AmdGpu::Liverpool::BorderColorBufferBase& border_color_base) {
const u64 hash = XXH3_64bits(&sampler, sizeof(sampler)); const u64 hash = XXH3_64bits(&sampler, sizeof(sampler));
const auto [it, new_sampler] = samplers.try_emplace(hash, instance, sampler, border_color_base); const auto [it, new_sampler] = samplers.try_emplace(hash, instance, sampler, border_color_base);
return it->second.Handle(); return it->second.Handle();

View File

@ -3,13 +3,17 @@
#pragma once #pragma once
#include <condition_variable>
#include <mutex>
#include <thread>
#include <unordered_set> #include <unordered_set>
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include <queue>
#include <tsl/robin_map.h> #include <tsl/robin_map.h>
#include "common/lru_cache.h" #include "common/lru_cache.h"
#include "common/slot_vector.h" #include "common/slot_vector.h"
#include "video_core/amdgpu/resource.h" #include "shader_recompiler/resource.h"
#include "video_core/multi_level_page_table.h" #include "video_core/multi_level_page_table.h"
#include "video_core/texture_cache/blit_helper.h" #include "video_core/texture_cache/blit_helper.h"
#include "video_core/texture_cache/image.h" #include "video_core/texture_cache/image.h"
@ -32,8 +36,10 @@ class TextureCache {
static constexpr s64 DEFAULT_CRITICAL_GC_MEMORY = 3_GB; static constexpr s64 DEFAULT_CRITICAL_GC_MEMORY = 3_GB;
static constexpr s64 TARGET_GC_THRESHOLD = 8_GB; static constexpr s64 TARGET_GC_THRESHOLD = 8_GB;
using ImageIds = boost::container::small_vector<ImageId, 16>;
struct Traits { struct Traits {
using Entry = boost::container::small_vector<ImageId, 16>; using Entry = ImageIds;
static constexpr size_t AddressSpaceBits = 40; static constexpr size_t AddressSpaceBits = 40;
static constexpr size_t FirstLevelBits = 10; static constexpr size_t FirstLevelBits = 10;
static constexpr size_t PageBits = 20; static constexpr size_t PageBits = 20;
@ -49,44 +55,24 @@ public:
VideoOut, VideoOut,
}; };
struct BaseDesc { struct ImageDesc {
ImageInfo info; ImageInfo info;
ImageViewInfo view_info; ImageViewInfo view_info;
BindingType type{BindingType::Texture}; BindingType type{BindingType::Texture};
BaseDesc() = default; ImageDesc() = default;
BaseDesc(BindingType type_, ImageInfo info_, ImageViewInfo view_info_) noexcept ImageDesc(const AmdGpu::Image& image, const Shader::ImageResource& desc)
: info{std::move(info_)}, view_info{std::move(view_info_)}, type{type_} {} : info{image, desc}, view_info{image, desc},
}; type{desc.is_written ? BindingType::Storage : BindingType::Texture} {}
ImageDesc(const AmdGpu::ColorBuffer& buffer, AmdGpu::CbDbExtent hint)
struct TextureDesc : public BaseDesc { : info{buffer, hint}, view_info{buffer}, type{BindingType::RenderTarget} {}
TextureDesc() = default; ImageDesc(const AmdGpu::DepthBuffer& buffer, AmdGpu::DepthView view,
TextureDesc(const AmdGpu::Image& image, const Shader::ImageResource& desc) AmdGpu::DepthControl ctl, VAddr htile_address, AmdGpu::CbDbExtent hint,
: BaseDesc{desc.is_written ? BindingType::Storage : BindingType::Texture, bool write_buffer = false)
ImageInfo{image, desc}, ImageViewInfo{image, desc}} {} : info{buffer, view.NumSlices(), htile_address, hint, write_buffer},
}; view_info{buffer, view, ctl}, type{BindingType::DepthTarget} {}
ImageDesc(const Libraries::VideoOut::BufferAttributeGroup& group, VAddr cpu_address)
struct RenderTargetDesc : public BaseDesc { : info{group, cpu_address}, type{BindingType::VideoOut} {}
RenderTargetDesc() = default;
RenderTargetDesc(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint = {})
: BaseDesc{BindingType::RenderTarget, ImageInfo{buffer, hint}, ImageViewInfo{buffer}} {}
};
struct DepthTargetDesc : public BaseDesc {
DepthTargetDesc() = default;
DepthTargetDesc(const AmdGpu::Liverpool::DepthBuffer& buffer,
const AmdGpu::Liverpool::DepthView& view,
const AmdGpu::Liverpool::DepthControl& ctl, VAddr htile_address,
const AmdGpu::Liverpool::CbDbExtent& hint = {}, bool write_buffer = false)
: BaseDesc{BindingType::DepthTarget,
ImageInfo{buffer, view.NumSlices(), htile_address, hint, write_buffer},
ImageViewInfo{buffer, view, ctl}} {}
};
struct VideoOutDesc : public BaseDesc {
VideoOutDesc(const Libraries::VideoOut::BufferAttributeGroup& group, VAddr cpu_address)
: BaseDesc{BindingType::VideoOut, ImageInfo{group, cpu_address}, ImageViewInfo{}} {}
}; };
public: public:
@ -111,19 +97,19 @@ public:
void ProcessDownloadImages(); void ProcessDownloadImages();
/// Retrieves the image handle of the image with the provided attributes. /// Retrieves the image handle of the image with the provided attributes.
[[nodiscard]] ImageId FindImage(BaseDesc& desc, bool exact_fmt = false); [[nodiscard]] ImageId FindImage(ImageDesc& desc, bool exact_fmt = false);
/// Retrieves image whose address matches provided /// Retrieves image whose address matches provided
[[nodiscard]] ImageId FindImageFromRange(VAddr address, size_t size, bool ensure_valid = true); [[nodiscard]] ImageId FindImageFromRange(VAddr address, size_t size, bool ensure_valid = true);
/// Retrieves an image view with the properties of the specified image id. /// Retrieves an image view with the properties of the specified image id.
[[nodiscard]] ImageView& FindTexture(ImageId image_id, const BaseDesc& desc); [[nodiscard]] ImageView& FindTexture(ImageId image_id, const ImageDesc& desc);
/// Retrieves the render target with specified properties /// Retrieves the render target with specified properties
[[nodiscard]] ImageView& FindRenderTarget(ImageId image_id, const BaseDesc& desc); [[nodiscard]] ImageView& FindRenderTarget(ImageId image_id, const ImageDesc& desc);
/// Retrieves the depth target with specified properties /// Retrieves the depth target with specified properties
[[nodiscard]] ImageView& FindDepthTarget(ImageId image_id, const BaseDesc& desc); [[nodiscard]] ImageView& FindDepthTarget(ImageId image_id, const ImageDesc& desc);
/// Updates image contents if it was modified by CPU. /// Updates image contents if it was modified by CPU.
void UpdateImage(ImageId image_id) { void UpdateImage(ImageId image_id) {
@ -151,9 +137,8 @@ public:
void RefreshImage(Image& image); void RefreshImage(Image& image);
/// Retrieves the sampler that matches the provided S# descriptor. /// Retrieves the sampler that matches the provided S# descriptor.
[[nodiscard]] vk::Sampler GetSampler( [[nodiscard]] vk::Sampler GetSampler(const AmdGpu::Sampler& sampler,
const AmdGpu::Sampler& sampler, AmdGpu::BorderColorBuffer border_color_base);
const AmdGpu::Liverpool::BorderColorBufferBase& border_color_base);
/// Retrieves the image with the specified id. /// Retrieves the image with the specified id.
[[nodiscard]] Image& GetImage(ImageId id) { [[nodiscard]] Image& GetImage(ImageId id) {
@ -212,7 +197,7 @@ public:
void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) {
using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
boost::container::small_vector<ImageId, 32> images; ImageIds images;
ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) { ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) {
const auto it = page_table.find(page); const auto it = page_table.find(page);
if (it == nullptr) { if (it == nullptr) {
@ -329,7 +314,6 @@ private:
Common::LeastRecentlyUsedCache<ImageId, u64> lru_cache; Common::LeastRecentlyUsedCache<ImageId, u64> lru_cache;
PageTable page_table; PageTable page_table;
std::mutex mutex; std::mutex mutex;
struct DownloadedImage { struct DownloadedImage {
u64 tick; u64 tick;
VAddr device_addr; VAddr device_addr;
@ -340,7 +324,6 @@ private:
std::mutex downloaded_images_mutex; std::mutex downloaded_images_mutex;
std::condition_variable_any downloaded_images_cv; std::condition_variable_any downloaded_images_cv;
std::jthread downloaded_images_thread; std::jthread downloaded_images_thread;
struct MetaDataInfo { struct MetaDataInfo {
enum class Type { enum class Type {
CMask, CMask,
@ -348,7 +331,7 @@ private:
HTile, HTile,
}; };
Type type; Type type;
u32 clear_mask{u32(-1)}; s32 clear_mask = -1;
}; };
tsl::robin_map<VAddr, MetaDataInfo> surface_metas; tsl::robin_map<VAddr, MetaDataInfo> surface_metas;
}; };