amdgpu: Split liverpool registers and cleanup (#3707)
Some checks are pending
Build and Release / reuse (push) Waiting to run
Build and Release / clang-format (push) Waiting to run
Build and Release / get-info (push) Waiting to run
Build and Release / windows-sdl (push) Blocked by required conditions
Build and Release / windows-qt (push) Blocked by required conditions
Build and Release / macos-sdl (push) Blocked by required conditions
Build and Release / macos-qt (push) Blocked by required conditions
Build and Release / linux-sdl (push) Blocked by required conditions
Build and Release / linux-qt (push) Blocked by required conditions
Build and Release / linux-sdl-gcc (push) Blocked by required conditions
Build and Release / linux-qt-gcc (push) Blocked by required conditions
Build and Release / pre-release (push) Blocked by required conditions

This commit is contained in:
TheTurtle 2025-10-05 23:42:40 +03:00 committed by GitHub
parent d17a4fb8cc
commit 8f37cfb739
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
75 changed files with 2505 additions and 2641 deletions

View File

@ -851,10 +851,10 @@ if (ARCHITECTURE STREQUAL "x86_64")
src/core/cpu_patches.h)
endif()
set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/profile.h
set(SHADER_RECOMPILER src/shader_recompiler/profile.h
src/shader_recompiler/recompiler.cpp
src/shader_recompiler/recompiler.h
src/shader_recompiler/resource.h
src/shader_recompiler/info.h
src/shader_recompiler/params.h
src/shader_recompiler/runtime_info.h
@ -952,17 +952,24 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/ir/value.h
)
set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
set(VIDEO_CORE src/video_core/amdgpu/cb_db_extent.h
src/video_core/amdgpu/liverpool.cpp
src/video_core/amdgpu/liverpool.h
src/video_core/amdgpu/pixel_format.cpp
src/video_core/amdgpu/pixel_format.h
src/video_core/amdgpu/pm4_cmds.h
src/video_core/amdgpu/pm4_opcodes.h
src/video_core/amdgpu/regs_color.h
src/video_core/amdgpu/regs_depth.h
src/video_core/amdgpu/regs.cpp
src/video_core/amdgpu/regs.h
src/video_core/amdgpu/regs_primitive.h
src/video_core/amdgpu/regs_shader.h
src/video_core/amdgpu/regs_texture.h
src/video_core/amdgpu/regs_vertex.h
src/video_core/amdgpu/resource.h
src/video_core/amdgpu/tiling.cpp
src/video_core/amdgpu/tiling.h
src/video_core/amdgpu/types.h
src/video_core/amdgpu/default_context.cpp
src/video_core/buffer_cache/buffer.cpp
src/video_core/buffer_cache/buffer.h
src/video_core/buffer_cache/buffer_cache.cpp

View File

@ -1,20 +1,14 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include <half.hpp>
#include "common/number_utils.h"
#include "video_core/amdgpu/pixel_format.h"
#include "video_core/amdgpu/types.h"
#define UF11_EXPONENT_SHIFT 6
#define UF10_EXPONENT_SHIFT 5
#define RGB9E5_MANTISSA_BITS 9
#define RGB9E5_EXP_BIAS 1
#define F32_INFINITY 0x7f800000
constexpr u32 UF11_EXPONENT_SHIFT = 6;
constexpr u32 UF10_EXPONENT_SHIFT = 5;
constexpr u32 RGB9E5_MANTISSA_BITS = 9;
constexpr u32 RGB9E5_EXP_BIAS = 1;
constexpr u32 F32_INFINITY = 0x7f800000;
namespace NumberUtils {

View File

@ -157,7 +157,7 @@ std::optional<RegDump*> DebugStateImpl::GetRegDump(uintptr_t base_addr, uintptr_
}
void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
const AmdGpu::Liverpool::Regs& regs) {
const AmdGpu::Regs& regs) {
std::scoped_lock lock{frame_dump_list_mutex};
auto dump = GetRegDump(base_addr, header_addr);
@ -170,15 +170,14 @@ void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
for (int i = 0; i < RegDump::MaxShaderStages; i++) {
if ((*dump)->regs.stage_enable.IsStageEnabled(i)) {
auto stage = (*dump)->regs.ProgramForStage(i);
if (stage->address_lo != 0) {
const auto& info = AmdGpu::Liverpool::SearchBinaryInfo(stage->Address<u32*>());
auto code = stage->Code();
if (stage->address) {
const auto params = AmdGpu::GetParams(*stage);
(*dump)->stages[i] = PipelineShaderProgramDump{
.name = Vulkan::PipelineCache::GetShaderName(Shader::StageFromIndex(i),
info.shader_hash),
.hash = info.shader_hash,
params.hash),
.hash = params.hash,
.user_data = *stage,
.code = std::vector<u32>{code.begin(), code.end()},
.code = std::vector<u32>{params.code.begin(), params.code.end()},
};
}
}
@ -198,12 +197,12 @@ void DebugStateImpl::PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_a
auto& cs = (*dump)->regs.cs_program;
cs = cs_state;
const auto& info = AmdGpu::Liverpool::SearchBinaryInfo(cs.Address<u32*>());
const auto params = AmdGpu::GetParams(cs);
(*dump)->cs_data = PipelineComputerProgramDump{
.name = Vulkan::PipelineCache::GetShaderName(Shader::Stage::Compute, info.shader_hash),
.hash = info.shader_hash,
.name = Vulkan::PipelineCache::GetShaderName(Shader::Stage::Compute, params.hash),
.hash = params.hash,
.cs_program = cs,
.code = std::vector<u32>{cs.Code().begin(), cs.Code().end()},
.code = std::vector<u32>{params.code.begin(), params.code.end()},
};
}

View File

@ -11,7 +11,9 @@
#include <queue>
#include "common/types.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/regs.h"
#include "video_core/renderer_vulkan/vk_common.h"
#ifdef _WIN32
#ifndef WIN32_LEAN_AND_MEAN
@ -54,21 +56,21 @@ struct QueueDump {
struct PipelineShaderProgramDump {
std::string name;
u64 hash;
Vulkan::Liverpool::ShaderProgram user_data{};
AmdGpu::ShaderProgram user_data{};
std::vector<u32> code{};
};
struct PipelineComputerProgramDump {
std::string name;
u64 hash;
Vulkan::Liverpool::ComputeProgram cs_program{};
AmdGpu::ComputeProgram cs_program{};
std::vector<u32> code{};
};
struct RegDump {
bool is_compute{false};
static constexpr size_t MaxShaderStages = 5;
Vulkan::Liverpool::Regs regs{};
AmdGpu::Regs regs;
std::array<PipelineShaderProgramDump, MaxShaderStages> stages{};
PipelineComputerProgramDump cs_data{};
};
@ -219,9 +221,8 @@ public:
void PushQueueDump(QueueDump dump);
void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
const AmdGpu::Liverpool::Regs& regs);
using CsState = AmdGpu::Liverpool::ComputeProgram;
void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, const AmdGpu::Regs& regs);
using CsState = AmdGpu::ComputeProgram;
void PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr, const CsState& cs_state);
void CollectShader(const std::string& name, Shader::LogicalStage l_stage,

View File

@ -65,7 +65,7 @@ static HdrType GetNext(HdrType this_pm4, uint32_t n) {
}
void ParsePolygonControl(u32 value, bool begin_table) {
auto const reg = reinterpret_cast<AmdGpu::Liverpool::PolygonControl const&>(value);
auto const reg = reinterpret_cast<AmdGpu::PolygonControl const&>(value);
if (!begin_table ||
BeginTable("PA_SU_SC_MODE_CNTL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) {
@ -73,80 +73,80 @@ void ParsePolygonControl(u32 value, bool begin_table) {
TableSetColumnIndex(0);
Text("CULL_FRONT");
TableSetColumnIndex(1);
Text("%X", reg.cull_front.Value());
Text("%X", reg.cull_front);
TableNextRow();
TableSetColumnIndex(0);
Text("CULL_BACK");
TableSetColumnIndex(1);
Text("%X", reg.cull_back.Value());
Text("%X", reg.cull_back);
TableNextRow();
TableSetColumnIndex(0);
Text("FACE");
TableSetColumnIndex(1);
Text("%s", enum_name(reg.front_face.Value()).data());
Text("%s", enum_name(reg.front_face).data());
TableNextRow();
TableSetColumnIndex(0);
Text("POLY_MODE");
TableSetColumnIndex(1);
Text("%X", reg.enable_polygon_mode.Value());
Text("%X", reg.enable_polygon_mode);
TableNextRow();
TableSetColumnIndex(0);
Text("POLYMODE_FRONT_PTYPE");
TableSetColumnIndex(1);
Text("%s", enum_name(reg.polygon_mode_front.Value()).data());
Text("%s", enum_name(reg.polygon_mode_front).data());
TableNextRow();
TableSetColumnIndex(0);
Text("POLYMODE_BACK_PTYPE");
TableSetColumnIndex(1);
Text("%s", enum_name(reg.polygon_mode_back.Value()).data());
Text("%s", enum_name(reg.polygon_mode_back).data());
TableNextRow();
TableSetColumnIndex(0);
Text("POLY_OFFSET_FRONT_ENABLE");
TableSetColumnIndex(1);
Text("%X", reg.enable_polygon_offset_front.Value());
Text("%X", reg.enable_polygon_offset_front);
TableNextRow();
TableSetColumnIndex(0);
Text("POLY_OFFSET_BACK_ENABLE");
TableSetColumnIndex(1);
Text("%X", reg.enable_polygon_offset_back.Value());
Text("%X", reg.enable_polygon_offset_back);
TableNextRow();
TableSetColumnIndex(0);
Text("POLY_OFFSET_PARA_ENABLE");
TableSetColumnIndex(1);
Text("%X", reg.enable_polygon_offset_para.Value());
Text("%X", reg.enable_polygon_offset_para);
TableNextRow();
TableSetColumnIndex(0);
Text("VTX_WINDOW_OFFSET_ENABLE");
TableSetColumnIndex(1);
Text("%X", reg.enable_window_offset.Value());
Text("%X", reg.enable_window_offset);
TableNextRow();
TableSetColumnIndex(0);
Text("PROVOKING_VTX_LAST");
TableSetColumnIndex(1);
Text("%X (%s)", (u32)reg.provoking_vtx_last.Value(),
enum_name(reg.provoking_vtx_last.Value()).data());
Text("%X (%s)", static_cast<u32>(reg.provoking_vtx_last),
enum_name(reg.provoking_vtx_last).data());
TableNextRow();
TableSetColumnIndex(0);
Text("PERSP_CORR_DIS");
TableSetColumnIndex(1);
Text("%X", reg.persp_corr_dis.Value());
Text("%X", reg.persp_corr_dis);
TableNextRow();
TableSetColumnIndex(0);
Text("MULTI_PRIM_IB_ENA");
TableSetColumnIndex(1);
Text("%X", reg.multi_prim_ib_ena.Value());
Text("%X", reg.multi_prim_ib_ena);
if (begin_table) {
EndTable();
@ -155,7 +155,7 @@ void ParsePolygonControl(u32 value, bool begin_table) {
}
void ParseAaConfig(u32 value, bool begin_table) {
auto const reg = reinterpret_cast<Liverpool::AaConfig const&>(value);
auto const reg = reinterpret_cast<AmdGpu::AaConfig const&>(value);
if (!begin_table ||
BeginTable("PA_SC_AA_CONFIG", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) {
@ -163,31 +163,31 @@ void ParseAaConfig(u32 value, bool begin_table) {
TableSetColumnIndex(0);
Text("MSAA_NUM_SAMPLES");
TableSetColumnIndex(1);
Text("%X", reg.msaa_num_samples.Value());
Text("%X", reg.msaa_num_samples);
TableNextRow();
TableSetColumnIndex(0);
Text("AA_MASK_CENTROID_DTMN");
TableSetColumnIndex(1);
Text("%X", reg.aa_mask_centroid_dtmn.Value());
Text("%X", reg.aa_mask_centroid_dtmn);
TableNextRow();
TableSetColumnIndex(0);
Text("MAX_SAMPLE_DIST");
TableSetColumnIndex(1);
Text("%X", reg.max_sample_dst.Value());
Text("%X", reg.max_sample_dst);
TableNextRow();
TableSetColumnIndex(0);
Text("MSAA_EXPOSED_SAMPLES");
TableSetColumnIndex(1);
Text("%X", reg.msaa_exposed_samples.Value());
Text("%X", reg.msaa_exposed_samples);
TableNextRow();
TableSetColumnIndex(0);
Text("DETAIL_TO_EXPOSED_MODE");
TableSetColumnIndex(1);
Text("%X", reg.detail_to_exposed_mode.Value());
Text("%X", reg.detail_to_exposed_mode);
if (begin_table) {
EndTable();
@ -196,7 +196,7 @@ void ParseAaConfig(u32 value, bool begin_table) {
}
void ParseViewportControl(u32 value, bool begin_table) {
auto const reg = reinterpret_cast<Liverpool::ViewportControl const&>(value);
auto const reg = reinterpret_cast<AmdGpu::ViewportControl const&>(value);
if (!begin_table ||
BeginTable("PA_CL_VTE_CNTL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) {
@ -204,61 +204,61 @@ void ParseViewportControl(u32 value, bool begin_table) {
TableSetColumnIndex(0);
Text("VPORT_X_SCALE_ENA");
TableSetColumnIndex(1);
Text("%X", reg.xscale_enable.Value());
Text("%X", reg.xscale_enable);
TableNextRow();
TableSetColumnIndex(0);
Text("VPORT_X_OFFSET_ENA");
TableSetColumnIndex(1);
Text("%X", reg.yoffset_enable.Value());
Text("%X", reg.yoffset_enable);
TableNextRow();
TableSetColumnIndex(0);
Text("VPORT_Y_SCALE_ENA");
TableSetColumnIndex(1);
Text("%X", reg.yscale_enable.Value());
Text("%X", reg.yscale_enable);
TableNextRow();
TableSetColumnIndex(0);
Text("VPORT_Y_OFFSET_ENA");
TableSetColumnIndex(1);
Text("%X", reg.yoffset_enable.Value());
Text("%X", reg.yoffset_enable);
TableNextRow();
TableSetColumnIndex(0);
Text("VPORT_Z_SCALE_ENA");
TableSetColumnIndex(1);
Text("%X", reg.zscale_enable.Value());
Text("%X", reg.zscale_enable);
TableNextRow();
TableSetColumnIndex(0);
Text("VPORT_Z_OFFSET_ENA");
TableSetColumnIndex(1);
Text("%X", reg.zoffset_enable.Value());
Text("%X", reg.zoffset_enable);
TableNextRow();
TableSetColumnIndex(0);
Text("VTX_XY_FMT");
TableSetColumnIndex(1);
Text("%X", reg.xy_transformed.Value());
Text("%X", reg.xy_transformed);
TableNextRow();
TableSetColumnIndex(0);
Text("VTX_Z_FMT");
TableSetColumnIndex(1);
Text("%X", reg.z_transformed.Value());
Text("%X", reg.z_transformed);
TableNextRow();
TableSetColumnIndex(0);
Text("VTX_W0_FMT");
TableSetColumnIndex(1);
Text("%X", reg.w_transformed.Value());
Text("%X", reg.w_transformed);
TableNextRow();
TableSetColumnIndex(0);
Text("PERFCOUNTER_REF");
TableSetColumnIndex(1);
Text("%X", reg.perfcounter_ref.Value());
Text("%X", reg.perfcounter_ref);
if (begin_table) {
EndTable();
@ -267,7 +267,7 @@ void ParseViewportControl(u32 value, bool begin_table) {
}
void ParseColorControl(u32 value, bool begin_table) {
auto const reg = reinterpret_cast<Liverpool::ColorControl const&>(value);
auto const reg = reinterpret_cast<AmdGpu::ColorControl const&>(value);
if (!begin_table ||
BeginTable("CB_COLOR_CONTROL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) {
@ -275,25 +275,25 @@ void ParseColorControl(u32 value, bool begin_table) {
TableSetColumnIndex(0);
Text("DISABLE_DUAL_QUAD__VI");
TableSetColumnIndex(1);
Text("%X", reg.disable_dual_quad.Value());
Text("%X", reg.disable_dual_quad);
TableNextRow();
TableSetColumnIndex(0);
Text("DEGAMMA_ENABLE");
TableSetColumnIndex(1);
Text("%X", reg.degamma_enable.Value());
Text("%X", reg.degamma_enable);
TableNextRow();
TableSetColumnIndex(0);
Text("MODE");
TableSetColumnIndex(1);
Text("%X (%s)", (u32)reg.mode.Value(), enum_name(reg.mode.Value()).data());
Text("%X (%s)", static_cast<u32>(reg.mode), enum_name(reg.mode).data());
TableNextRow();
TableSetColumnIndex(0);
Text("ROP3");
TableSetColumnIndex(1);
Text("%X", static_cast<u32>(reg.rop3.Value()));
Text("%X", static_cast<u32>(reg.rop3));
if (begin_table) {
EndTable();
@ -302,7 +302,7 @@ void ParseColorControl(u32 value, bool begin_table) {
}
void ParseColor0Info(u32 value, bool begin_table) {
auto const reg = reinterpret_cast<Liverpool::ColorBuffer::Color0Info const&>(value);
auto const reg = reinterpret_cast<AmdGpu::ColorBuffer::Color0Info const&>(value);
if (!begin_table ||
BeginTable("CB_COLOR_INFO", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) {
@ -310,109 +310,109 @@ void ParseColor0Info(u32 value, bool begin_table) {
TableSetColumnIndex(0);
Text("ENDIAN");
TableSetColumnIndex(1);
Text("%s", enum_name(reg.endian.Value()).data());
Text("%s", enum_name(reg.endian).data());
TableNextRow();
TableSetColumnIndex(0);
Text("FORMAT");
TableSetColumnIndex(1);
Text("%s", enum_name(reg.format.Value()).data());
Text("%s", enum_name(AmdGpu::DataFormat(reg.format)).data());
TableNextRow();
TableSetColumnIndex(0);
Text("LINEAR_GENERAL");
TableSetColumnIndex(1);
Text("%X", reg.linear_general.Value());
Text("%X", reg.linear_general);
TableNextRow();
TableSetColumnIndex(0);
Text("NUMBER_TYPE");
TableSetColumnIndex(1);
Text("%s", enum_name(reg.number_type.Value()).data());
Text("%s", enum_name(AmdGpu::NumberFormat(reg.number_type)).data());
TableNextRow();
TableSetColumnIndex(0);
Text("COMP_SWAP");
TableSetColumnIndex(1);
Text("%s", enum_name(reg.comp_swap.Value()).data());
Text("%s", enum_name(reg.comp_swap).data());
TableNextRow();
TableSetColumnIndex(0);
Text("FAST_CLEAR");
TableSetColumnIndex(1);
Text("%X", reg.fast_clear.Value());
Text("%X", reg.fast_clear);
TableNextRow();
TableSetColumnIndex(0);
Text("COMPRESSION");
TableSetColumnIndex(1);
Text("%X", reg.compression.Value());
Text("%X", reg.compression);
TableNextRow();
TableSetColumnIndex(0);
Text("BLEND_CLAMP");
TableSetColumnIndex(1);
Text("%X", reg.blend_clamp.Value());
Text("%X", reg.blend_clamp);
TableNextRow();
TableSetColumnIndex(0);
Text("BLEND_BYPASS");
TableSetColumnIndex(1);
Text("%X", reg.blend_bypass.Value());
Text("%X", reg.blend_bypass);
TableNextRow();
TableSetColumnIndex(0);
Text("SIMPLE_FLOAT");
TableSetColumnIndex(1);
Text("%X", reg.simple_float.Value());
Text("%X", reg.simple_float);
TableNextRow();
TableSetColumnIndex(0);
Text("ROUND_MODE");
TableSetColumnIndex(1);
Text("%X (%s)", (u32)reg.round_mode.Value(), enum_name(reg.round_mode.Value()).data());
Text("%X (%s)", static_cast<u32>(reg.round_mode), enum_name(reg.round_mode).data());
TableNextRow();
TableSetColumnIndex(0);
Text("CMASK_IS_LINEAR");
TableSetColumnIndex(1);
Text("%X", reg.cmask_is_linear.Value());
Text("%X", reg.cmask_is_linear);
TableNextRow();
TableSetColumnIndex(0);
Text("BLEND_OPT_DONT_RD_DST");
TableSetColumnIndex(1);
Text("%X", reg.blend_opt_dont_rd_dst.Value());
Text("%X", reg.blend_opt_dont_rd_dst);
TableNextRow();
TableSetColumnIndex(0);
Text("BLEND_OPT_DISCARD_PIXEL");
TableSetColumnIndex(1);
Text("%X", reg.blend_opt_discard_pixel.Value());
Text("%X", reg.blend_opt_discard_pixel);
TableNextRow();
TableSetColumnIndex(0);
Text("FMASK_COMPRESSION_DISABLE__CI__VI");
TableSetColumnIndex(1);
Text("%X", reg.fmask_compression_disable_ci.Value());
Text("%X", reg.fmask_compression_disable_ci);
TableNextRow();
TableSetColumnIndex(0);
Text("FMASK_COMPRESS_1FRAG_ONLY__VI");
TableSetColumnIndex(1);
Text("%X", reg.fmask_compress_1frag_only.Value());
Text("%X", reg.fmask_compress_1frag_only);
TableNextRow();
TableSetColumnIndex(0);
Text("DCC_ENABLE__VI");
TableSetColumnIndex(1);
Text("%X", reg.dcc_enable.Value());
Text("%X", reg.dcc_enable);
TableNextRow();
TableSetColumnIndex(0);
Text("CMASK_ADDR_TYPE__VI");
TableSetColumnIndex(1);
Text("%X", reg.cmask_addr_type.Value());
Text("%X", reg.cmask_addr_type);
if (begin_table) {
EndTable();
@ -421,7 +421,7 @@ void ParseColor0Info(u32 value, bool begin_table) {
}
void ParseColor0Attrib(u32 value, bool begin_table) {
auto const reg = reinterpret_cast<Liverpool::ColorBuffer::Color0Attrib const&>(value);
auto const reg = reinterpret_cast<AmdGpu::ColorBuffer::Color0Attrib const&>(value);
if (!begin_table ||
BeginTable("CB_COLOR_ATTRIB", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) {
@ -429,37 +429,37 @@ void ParseColor0Attrib(u32 value, bool begin_table) {
TableSetColumnIndex(0);
Text("TILE_MODE_INDEX");
TableSetColumnIndex(1);
Text("%s", enum_name(reg.tile_mode_index.Value()).data());
Text("%s", enum_name(reg.tile_mode_index).data());
TableNextRow();
TableSetColumnIndex(0);
Text("FMASK_TILE_MODE_INDEX");
TableSetColumnIndex(1);
Text("%X", reg.fmask_tile_mode_index.Value());
Text("%X", reg.fmask_tile_mode_index);
TableNextRow();
TableSetColumnIndex(0);
Text("FMASK_BANK_HEIGHT");
TableSetColumnIndex(1);
Text("%X", reg.fmask_bank_height.Value());
Text("%X", reg.fmask_bank_height);
TableNextRow();
TableSetColumnIndex(0);
Text("NUM_SAMPLES");
TableSetColumnIndex(1);
Text("%X", reg.num_samples_log2.Value());
Text("%X", reg.num_samples_log2);
TableNextRow();
TableSetColumnIndex(0);
Text("NUM_FRAGMENTS");
TableSetColumnIndex(1);
Text("%X", reg.num_fragments_log2.Value());
Text("%X", reg.num_fragments_log2);
TableNextRow();
TableSetColumnIndex(0);
Text("FORCE_DST_ALPHA_1");
TableSetColumnIndex(1);
Text("%X", reg.force_dst_alpha_1.Value());
Text("%X", reg.force_dst_alpha_1);
if (begin_table) {
EndTable();
@ -468,7 +468,7 @@ void ParseColor0Attrib(u32 value, bool begin_table) {
}
void ParseBlendControl(u32 value, bool begin_table) {
auto const reg = reinterpret_cast<Liverpool::BlendControl const&>(value);
auto const reg = reinterpret_cast<AmdGpu::BlendControl const&>(value);
if (!begin_table ||
BeginTable("CB_BLEND_CONTROL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) {
@ -476,59 +476,59 @@ void ParseBlendControl(u32 value, bool begin_table) {
TableSetColumnIndex(0);
Text("COLOR_SRCBLEND");
TableSetColumnIndex(1);
Text("%X (%s)", (u32)reg.color_src_factor.Value(),
enum_name(reg.color_src_factor.Value()).data());
Text("%X (%s)", static_cast<u32>(reg.color_src_factor),
enum_name(reg.color_src_factor).data());
TableNextRow();
TableSetColumnIndex(0);
Text("COLOR_COMB_FCN");
TableSetColumnIndex(1);
Text("%X (%s)", (u32)reg.color_func.Value(), enum_name(reg.color_func.Value()).data());
Text("%X (%s)", static_cast<u32>(reg.color_func), enum_name(reg.color_func).data());
TableNextRow();
TableSetColumnIndex(0);
Text("COLOR_DESTBLEND");
TableSetColumnIndex(1);
Text("%X (%s)", (u32)reg.color_dst_factor.Value(),
enum_name(reg.color_dst_factor.Value()).data());
Text("%X (%s)", static_cast<u32>(reg.color_dst_factor),
enum_name(reg.color_dst_factor).data());
TableNextRow();
TableSetColumnIndex(0);
Text("ALPHA_SRCBLEND");
TableSetColumnIndex(1);
Text("%X (%s)", (u32)reg.alpha_src_factor.Value(),
enum_name(reg.alpha_src_factor.Value()).data());
Text("%X (%s)", static_cast<u32>(reg.alpha_src_factor),
enum_name(reg.alpha_src_factor).data());
TableNextRow();
TableSetColumnIndex(0);
Text("ALPHA_COMB_FCN");
TableSetColumnIndex(1);
Text("%X (%s)", (u32)reg.alpha_func.Value(), enum_name(reg.alpha_func.Value()).data());
Text("%X (%s)", static_cast<u32>(reg.alpha_func), enum_name(reg.alpha_func).data());
TableNextRow();
TableSetColumnIndex(0);
Text("ALPHA_DESTBLEND");
TableSetColumnIndex(1);
Text("%X (%s)", (u32)reg.alpha_dst_factor.Value(),
enum_name(reg.alpha_dst_factor.Value()).data());
Text("%X (%s)", static_cast<u32>(reg.alpha_dst_factor),
enum_name(reg.alpha_dst_factor).data());
TableNextRow();
TableSetColumnIndex(0);
Text("SEPARATE_ALPHA_BLEND");
TableSetColumnIndex(1);
Text("%X", reg.separate_alpha_blend.Value());
Text("%X", reg.separate_alpha_blend);
TableNextRow();
TableSetColumnIndex(0);
Text("ENABLE");
TableSetColumnIndex(1);
Text("%X", reg.enable.Value());
Text("%X", reg.enable);
TableNextRow();
TableSetColumnIndex(0);
Text("DISABLE_ROP3");
TableSetColumnIndex(1);
Text("%X", reg.disable_rop3.Value());
Text("%X", reg.disable_rop3);
if (begin_table) {
EndTable();
@ -537,7 +537,7 @@ void ParseBlendControl(u32 value, bool begin_table) {
}
void ParseDepthRenderControl(u32 value, bool begin_table) {
auto const reg = reinterpret_cast<Liverpool::DepthRenderControl const&>(value);
auto const reg = reinterpret_cast<AmdGpu::DepthRenderControl const&>(value);
if (!begin_table ||
BeginTable("DB_RENDER_CONTROL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) {
@ -545,61 +545,61 @@ void ParseDepthRenderControl(u32 value, bool begin_table) {
TableSetColumnIndex(0);
Text("DEPTH_CLEAR_ENABLE");
TableSetColumnIndex(1);
Text("%X", reg.depth_clear_enable.Value());
Text("%X", reg.depth_clear_enable);
TableNextRow();
TableSetColumnIndex(0);
Text("STENCIL_CLEAR_ENABLE");
TableSetColumnIndex(1);
Text("%X", reg.stencil_clear_enable.Value());
Text("%X", reg.stencil_clear_enable);
TableNextRow();
TableSetColumnIndex(0);
Text("DEPTH_COPY");
TableSetColumnIndex(1);
Text("%X", reg.depth_clear_enable.Value());
Text("%X", reg.depth_clear_enable);
TableNextRow();
TableSetColumnIndex(0);
Text("STENCIL_COPY");
TableSetColumnIndex(1);
Text("%X", reg.stencil_copy.Value());
Text("%X", reg.stencil_copy);
TableNextRow();
TableSetColumnIndex(0);
Text("RESUMMARIZE_ENABLE");
TableSetColumnIndex(1);
Text("%X", reg.resummarize_enable.Value());
Text("%X", reg.resummarize_enable);
TableNextRow();
TableSetColumnIndex(0);
Text("STENCIL_COMPRESS_DISABLE");
TableSetColumnIndex(1);
Text("%X", reg.stencil_compress_disable.Value());
Text("%X", reg.stencil_compress_disable);
TableNextRow();
TableSetColumnIndex(0);
Text("DEPTH_COMPRESS_DISABLE");
TableSetColumnIndex(1);
Text("%X", reg.depth_compress_disable.Value());
Text("%X", reg.depth_compress_disable);
TableNextRow();
TableSetColumnIndex(0);
Text("COPY_CENTROID");
TableSetColumnIndex(1);
Text("%X", reg.copy_centroid.Value());
Text("%X", reg.copy_centroid);
TableNextRow();
TableSetColumnIndex(0);
Text("COPY_SAMPLE");
TableSetColumnIndex(1);
Text("%X", reg.copy_sample.Value());
Text("%X", reg.copy_sample);
TableNextRow();
TableSetColumnIndex(0);
Text("DECOMPRESS_ENABLE__VI");
TableSetColumnIndex(1);
Text("%X", reg.decompress_enable.Value());
Text("%X", reg.decompress_enable);
if (begin_table) {
EndTable();
@ -608,7 +608,7 @@ void ParseDepthRenderControl(u32 value, bool begin_table) {
}
void ParseDepthControl(u32 value, bool begin_table) {
auto const reg = reinterpret_cast<Liverpool::DepthControl const&>(value);
auto const reg = reinterpret_cast<AmdGpu::DepthControl const&>(value);
if (!begin_table ||
BeginTable("DB_DEPTH_CONTROL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) {
@ -616,63 +616,63 @@ void ParseDepthControl(u32 value, bool begin_table) {
TableSetColumnIndex(0);
Text("STENCIL_ENABLE");
TableSetColumnIndex(1);
Text("%X", reg.stencil_enable.Value());
Text("%X", reg.stencil_enable);
TableNextRow();
TableSetColumnIndex(0);
Text("Z_ENABLE");
TableSetColumnIndex(1);
Text("%X", reg.depth_enable.Value());
Text("%X", reg.depth_enable);
TableNextRow();
TableSetColumnIndex(0);
Text("Z_WRITE_ENABLE");
TableSetColumnIndex(1);
Text("%X", reg.depth_write_enable.Value());
Text("%X", reg.depth_write_enable);
TableNextRow();
TableSetColumnIndex(0);
Text("DEPTH_BOUNDS_ENABLE");
TableSetColumnIndex(1);
Text("%X", reg.depth_bounds_enable.Value());
Text("%X", reg.depth_bounds_enable);
TableNextRow();
TableSetColumnIndex(0);
Text("ZFUNC");
TableSetColumnIndex(1);
Text("%X (%s)", (u32)reg.depth_func.Value(), enum_name(reg.depth_func.Value()).data());
Text("%X (%s)", static_cast<u32>(reg.depth_func), enum_name(reg.depth_func).data());
TableNextRow();
TableSetColumnIndex(0);
Text("BACKFACE_ENABLE");
TableSetColumnIndex(1);
Text("%X", reg.backface_enable.Value());
Text("%X", reg.backface_enable);
TableNextRow();
TableSetColumnIndex(0);
Text("STENCILFUNC");
TableSetColumnIndex(1);
Text("%X (%s)", (u32)reg.stencil_ref_func.Value(),
enum_name(reg.stencil_ref_func.Value()).data());
Text("%X (%s)", static_cast<u32>(reg.stencil_ref_func),
enum_name(reg.stencil_ref_func).data());
TableNextRow();
TableSetColumnIndex(0);
Text("STENCILFUNC_BF");
TableSetColumnIndex(1);
Text("%X (%s)", (u32)reg.stencil_bf_func.Value(),
enum_name(reg.stencil_bf_func.Value()).data());
Text("%X (%s)", static_cast<u32>(reg.stencil_bf_func),
enum_name(reg.stencil_bf_func).data());
TableNextRow();
TableSetColumnIndex(0);
Text("ENABLE_COLOR_WRITES_ON_DEPTH_FAIL");
TableSetColumnIndex(1);
Text("%X", reg.enable_color_writes_on_depth_fail.Value());
Text("%X", reg.enable_color_writes_on_depth_fail);
TableNextRow();
TableSetColumnIndex(0);
Text("DISABLE_COLOR_WRITES_ON_DEPTH_PASS");
TableSetColumnIndex(1);
Text("%X", reg.disable_color_writes_on_depth_pass.Value());
Text("%X", reg.disable_color_writes_on_depth_pass);
if (begin_table) {
EndTable();
@ -681,7 +681,7 @@ void ParseDepthControl(u32 value, bool begin_table) {
}
void ParseEqaa(u32 value, bool begin_table) {
auto const reg = reinterpret_cast<Liverpool::Eqaa const&>(value);
auto const reg = reinterpret_cast<AmdGpu::Eqaa const&>(value);
if (!begin_table ||
BeginTable("DB_DEPTH_CONTROL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) {
@ -689,73 +689,73 @@ void ParseEqaa(u32 value, bool begin_table) {
TableSetColumnIndex(0);
Text("MAX_ANCHOR_SAMPLES");
TableSetColumnIndex(1);
Text("%X", reg.max_anchor_samples.Value());
Text("%X", reg.max_anchor_samples);
TableNextRow();
TableSetColumnIndex(0);
Text("PS_ITER_SAMPLES");
TableSetColumnIndex(1);
Text("%X", reg.ps_iter_samples.Value());
Text("%X", reg.ps_iter_samples);
TableNextRow();
TableSetColumnIndex(0);
Text("MASK_EXPORT_NUM_SAMPLES");
TableSetColumnIndex(1);
Text("%X", reg.mask_export_num_samples.Value());
Text("%X", reg.mask_export_num_samples);
TableNextRow();
TableSetColumnIndex(0);
Text("ALPHA_TO_MASK_NUM_SAMPLES");
TableSetColumnIndex(1);
Text("%X", reg.alpha_to_mask_num_samples.Value());
Text("%X", reg.alpha_to_mask_num_samples);
TableNextRow();
TableSetColumnIndex(0);
Text("HIGH_QUALITY_INTERSECTIONS");
TableSetColumnIndex(1);
Text("%X", reg.high_quality_intersections.Value());
Text("%X", reg.high_quality_intersections);
TableNextRow();
TableSetColumnIndex(0);
Text("INCOHERENT_EQAA_READS");
TableSetColumnIndex(1);
Text("%X", reg.incoherent_eqaa_reads.Value());
Text("%X", reg.incoherent_eqaa_reads);
TableNextRow();
TableSetColumnIndex(0);
Text("INTERPOLATE_COMP_Z");
TableSetColumnIndex(1);
Text("%X", reg.interpolate_comp_z.Value());
Text("%X", reg.interpolate_comp_z);
TableNextRow();
TableSetColumnIndex(0);
Text("INTERPOLATE_SRC_Z");
TableSetColumnIndex(1);
Text("%X", reg.interpolate_src_z.Value());
Text("%X", reg.interpolate_src_z);
TableNextRow();
TableSetColumnIndex(0);
Text("STATIC_ANCHOR_ASSOCIATIONS");
TableSetColumnIndex(1);
Text("%X", reg.static_anchor_associations.Value());
Text("%X", reg.static_anchor_associations);
TableNextRow();
TableSetColumnIndex(0);
Text("ALPHA_TO_MASK_EQAA_DISABLE");
TableSetColumnIndex(1);
Text("%X", reg.alpha_to_mask_eqaa_disable.Value());
Text("%X", reg.alpha_to_mask_eqaa_disable);
TableNextRow();
TableSetColumnIndex(0);
Text("OVERRASTERIZATION_AMOUNT");
TableSetColumnIndex(1);
Text("%X", reg.overrasterization_amount.Value());
Text("%X", reg.overrasterization_amount);
TableNextRow();
TableSetColumnIndex(0);
Text("ENABLE_POSTZ_OVERRASTERIZATION");
TableSetColumnIndex(1);
Text("%X", reg.enable_postz_overrasterization.Value());
Text("%X", reg.enable_postz_overrasterization);
if (begin_table) {
EndTable();
@ -764,7 +764,7 @@ void ParseEqaa(u32 value, bool begin_table) {
}
void ParseZInfo(u32 value, bool begin_table) {
auto const reg = reinterpret_cast<Liverpool::DepthBuffer::ZInfo const&>(value);
auto const reg = reinterpret_cast<AmdGpu::DepthBuffer::ZInfo const&>(value);
if (!begin_table ||
BeginTable("DB_DEPTH_CONTROL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) {
@ -772,61 +772,61 @@ void ParseZInfo(u32 value, bool begin_table) {
TableSetColumnIndex(0);
Text("FORMAT");
TableSetColumnIndex(1);
Text("%X (%s)", (u32)reg.format.Value(), enum_name(reg.format.Value()).data());
Text("%X (%s)", static_cast<u32>(reg.format), enum_name(reg.format).data());
TableNextRow();
TableSetColumnIndex(0);
Text("NUM_SAMPLES");
TableSetColumnIndex(1);
Text("%X", reg.num_samples.Value());
Text("%X", reg.num_samples);
TableNextRow();
TableSetColumnIndex(0);
Text("TILE_SPLIT__CI__VI");
TableSetColumnIndex(1);
Text("%X", reg.tile_split.Value());
Text("%X", reg.tile_split);
TableNextRow();
TableSetColumnIndex(0);
Text("TILE_MODE_INDEX");
TableSetColumnIndex(1);
Text("%X", static_cast<u32>(reg.tile_mode_index.Value()));
Text("%X", static_cast<u32>(reg.tile_mode_index));
TableNextRow();
TableSetColumnIndex(0);
Text("DECOMPRESS_ON_N_ZPLANES__VI");
TableSetColumnIndex(1);
Text("%X", reg.decompress_on_n_zplanes.Value());
Text("%X", reg.decompress_on_n_zplanes);
TableNextRow();
TableSetColumnIndex(0);
Text("ALLOW_EXPCLEAR");
TableSetColumnIndex(1);
Text("%X", reg.allow_expclear.Value());
Text("%X", reg.allow_expclear);
TableNextRow();
TableSetColumnIndex(0);
Text("READ_SIZE");
TableSetColumnIndex(1);
Text("%X", reg.read_size.Value());
Text("%X", reg.read_size);
TableNextRow();
TableSetColumnIndex(0);
Text("TILE_SURFACE_ENABLE");
TableSetColumnIndex(1);
Text("%X", reg.tile_surface_en.Value());
Text("%X", reg.tile_surface_enable);
TableNextRow();
TableSetColumnIndex(0);
Text("CLEAR_DISALLOWED__VI");
TableSetColumnIndex(1);
Text("%X", reg.clear_disallowed.Value());
Text("%X", reg.clear_disallowed);
TableNextRow();
TableSetColumnIndex(0);
Text("ZRANGE_PRECISION");
TableSetColumnIndex(1);
Text("%X", reg.zrange_precision.Value());
Text("%X", reg.zrange_precision);
if (begin_table) {
EndTable();
@ -1515,4 +1515,4 @@ void CmdListViewer::Draw(bool only_batches_view, CmdListFilter& filter) {
PopID();
}
} // namespace Core::Devtools::Widget
} // namespace Core::Devtools::Widget

View File

@ -5,14 +5,13 @@
#pragma once
#include <memory>
#include <vector>
#include <imgui.h>
#include "common.h"
#include "common/types.h"
#include "imgui_memory_editor.h"
#include "reg_view.h"
#include "core/devtools/widget/imgui_memory_editor.h"
#include "core/devtools/widget/reg_view.h"
namespace AmdGpu {
union PM4Type3Header;

View File

@ -16,7 +16,7 @@ using magic_enum::enum_name;
namespace Core::Devtools::Widget {
void RegPopup::DrawColorBuffer(const AmdGpu::Liverpool::ColorBuffer& buffer) {
void RegPopup::DrawColorBuffer(const AmdGpu::ColorBuffer& buffer) {
if (BeginTable("COLOR_BUFFER", 2, ImGuiTableFlags_Borders)) {
TableNextRow();
@ -36,7 +36,7 @@ void RegPopup::DrawColorBuffer(const AmdGpu::Liverpool::ColorBuffer& buffer) {
if (TreeNode("Color0Info")) {
TableNextRow();
TableNextColumn();
ParseColor0Info(buffer.info.u32all, false);
ParseColor0Info(buffer.info.raw, false);
TreePop();
}
@ -45,7 +45,7 @@ void RegPopup::DrawColorBuffer(const AmdGpu::Liverpool::ColorBuffer& buffer) {
if (TreeNode("Color0Attrib")) {
TableNextRow();
TableNextColumn();
ParseColor0Attrib(buffer.attrib.u32all, false);
ParseColor0Attrib(buffer.attrib.raw, false);
TreePop();
}
@ -75,9 +75,8 @@ void RegPopup::DrawColorBuffer(const AmdGpu::Liverpool::ColorBuffer& buffer) {
}
}
void RegPopup::DrawDepthBuffer(const DepthBuffer& depth_data) {
const auto& [depth_buffer, depth_control] = depth_data;
void RegPopup::DrawDepthBuffer(const AmdGpu::DepthBuffer& buffer,
const AmdGpu::DepthControl control) {
SeparatorText("Depth buffer");
if (BeginTable("DEPTH_BUFFER", 2, ImGuiTableFlags_Borders)) {
@ -85,31 +84,31 @@ void RegPopup::DrawDepthBuffer(const DepthBuffer& depth_data) {
// clang-format off
DrawValueRowList(
"Z_INFO.FORMAT", depth_buffer.z_info.format,
"Z_INFO.NUM_SAMPLES", depth_buffer.z_info.num_samples,
"Z_INFO.TILE_SPLIT", depth_buffer.z_info.tile_split,
"Z_INFO.TILE_MODE_INDEX", depth_buffer.z_info.tile_mode_index,
"Z_INFO.DECOMPRESS_ON_N_ZPLANES", depth_buffer.z_info.decompress_on_n_zplanes,
"Z_INFO.ALLOW_EXPCLEAR", depth_buffer.z_info.allow_expclear,
"Z_INFO.READ_SIZE", depth_buffer.z_info.read_size,
"Z_INFO.TILE_SURFACE_EN", depth_buffer.z_info.tile_surface_en,
"Z_INFO.CLEAR_DISALLOWED", depth_buffer.z_info.clear_disallowed,
"Z_INFO.ZRANGE_PRECISION", depth_buffer.z_info.zrange_precision,
"STENCIL_INFO.FORMAT", depth_buffer.stencil_info.format,
"Z_READ_BASE", depth_buffer.z_read_base,
"STENCIL_READ_BASE", depth_buffer.stencil_read_base,
"Z_WRITE_BASE", depth_buffer.z_write_base,
"STENCIL_WRITE_BASE", depth_buffer.stencil_write_base,
"DEPTH_SIZE.PITCH_TILE_MAX", depth_buffer.depth_size.pitch_tile_max,
"DEPTH_SIZE.HEIGHT_TILE_MAX", depth_buffer.depth_size.height_tile_max,
"DEPTH_SLICE.TILE_MAX", depth_buffer.depth_slice.tile_max,
"Pitch()", depth_buffer.Pitch(),
"Height()", depth_buffer.Height(),
"DepthAddress()", depth_buffer.DepthAddress(),
"StencilAddress()", depth_buffer.StencilAddress(),
"NumSamples()", depth_buffer.NumSamples(),
"NumBits()", depth_buffer.NumBits(),
"GetDepthSliceSize()", depth_buffer.GetDepthSliceSize()
"Z_INFO.FORMAT", buffer.z_info.format,
"Z_INFO.NUM_SAMPLES", buffer.z_info.num_samples,
"Z_INFO.TILE_SPLIT", buffer.z_info.tile_split,
"Z_INFO.TILE_MODE_INDEX", buffer.z_info.tile_mode_index,
"Z_INFO.DECOMPRESS_ON_N_ZPLANES", buffer.z_info.decompress_on_n_zplanes,
"Z_INFO.ALLOW_EXPCLEAR", buffer.z_info.allow_expclear,
"Z_INFO.READ_SIZE", buffer.z_info.read_size,
"Z_INFO.TILE_SURFACE_ENABLE", buffer.z_info.tile_surface_enable,
"Z_INFO.CLEAR_DISALLOWED", buffer.z_info.clear_disallowed,
"Z_INFO.ZRANGE_PRECISION", buffer.z_info.zrange_precision,
"STENCIL_INFO.FORMAT", buffer.stencil_info.format,
"Z_READ_BASE", buffer.z_read_base,
"STENCIL_READ_BASE", buffer.stencil_read_base,
"Z_WRITE_BASE", buffer.z_write_base,
"STENCIL_WRITE_BASE", buffer.stencil_write_base,
"DEPTH_SIZE.PITCH_TILE_MAX", buffer.depth_size.pitch_tile_max,
"DEPTH_SIZE.HEIGHT_TILE_MAX", buffer.depth_size.height_tile_max,
"DEPTH_SLICE.TILE_MAX", buffer.depth_slice.tile_max,
"Pitch()", buffer.Pitch(),
"Height()", buffer.Height(),
"DepthAddress()", buffer.DepthAddress(),
"StencilAddress()", buffer.StencilAddress(),
"NumSamples()", buffer.NumSamples(),
"NumBits()", buffer.NumBits(),
"GetDepthSliceSize()", buffer.GetDepthSliceSize()
);
// clang-format on
@ -121,16 +120,16 @@ void RegPopup::DrawDepthBuffer(const DepthBuffer& depth_data) {
// clang-format off
DrawValueRowList(
"STENCIL_ENABLE", depth_control.stencil_enable,
"DEPTH_ENABLE", depth_control.depth_enable,
"DEPTH_WRITE_ENABLE", depth_control.depth_write_enable,
"DEPTH_BOUNDS_ENABLE", depth_control.depth_bounds_enable,
"DEPTH_FUNC", depth_control.depth_func,
"BACKFACE_ENABLE", depth_control.backface_enable,
"STENCIL_FUNC", depth_control.stencil_ref_func,
"STENCIL_FUNC_BF", depth_control.stencil_bf_func,
"ENABLE_COLOR_WRITES_ON_DEPTH_FAIL", depth_control.enable_color_writes_on_depth_fail,
"DISABLE_COLOR_WRITES_ON_DEPTH_PASS", depth_control.disable_color_writes_on_depth_pass
"STENCIL_ENABLE", control.stencil_enable,
"DEPTH_ENABLE", control.depth_enable,
"DEPTH_WRITE_ENABLE", control.depth_write_enable,
"DEPTH_BOUNDS_ENABLE", control.depth_bounds_enable,
"DEPTH_FUNC", control.depth_func,
"BACKFACE_ENABLE", control.backface_enable,
"STENCIL_FUNC", control.stencil_ref_func,
"STENCIL_FUNC_BF", control.stencil_bf_func,
"ENABLE_COLOR_WRITES_ON_DEPTH_FAIL", control.enable_color_writes_on_depth_fail,
"DISABLE_COLOR_WRITES_ON_DEPTH_PASS", control.disable_color_writes_on_depth_pass
);
// clang-format on
@ -143,15 +142,17 @@ RegPopup::RegPopup() {
id = unique_id++;
}
void RegPopup::SetData(const std::string& base_title, AmdGpu::Liverpool::ColorBuffer color_buffer,
u32 cb_id) {
this->data = color_buffer;
void RegPopup::SetData(const std::string& base_title, AmdGpu::ColorBuffer color_buffer, u32 cb_id) {
this->type = DataType::Color;
this->color = color_buffer;
this->title = fmt::format("{}/CB #{}", base_title, cb_id);
}
void RegPopup::SetData(const std::string& base_title, AmdGpu::Liverpool::DepthBuffer depth_buffer,
AmdGpu::Liverpool::DepthControl depth_control) {
this->data = std::make_tuple(depth_buffer, depth_control);
void RegPopup::SetData(const std::string& base_title, AmdGpu::DepthBuffer depth_buffer,
AmdGpu::DepthControl depth_control) {
this->type = DataType::Depth;
this->depth.buffer = depth_buffer;
this->depth.control = depth_control;
this->title = fmt::format("{}/Depth", base_title);
}
@ -161,10 +162,10 @@ void RegPopup::SetPos(ImVec2 pos, bool auto_resize) {
Begin(name, &open, flags);
SetWindowPos(pos);
if (auto_resize) {
if (std::holds_alternative<AmdGpu::Liverpool::ColorBuffer>(data)) {
if (type == DataType::Color) {
SetWindowSize({365.0f, 520.0f});
KeepWindowInside();
} else if (std::holds_alternative<DepthBuffer>(data)) {
} else if (type == DataType::Depth) {
SetWindowSize({404.0f, 543.0f});
KeepWindowInside();
}
@ -182,10 +183,10 @@ void RegPopup::Draw() {
moved = true;
}
if (const auto* buffer = std::get_if<AmdGpu::Liverpool::ColorBuffer>(&data)) {
DrawColorBuffer(*buffer);
} else if (const auto* depth_data = std::get_if<DepthBuffer>(&data)) {
DrawDepthBuffer(*depth_data);
if (type == DataType::Color) {
DrawColorBuffer(color);
} else if (type == DataType::Depth) {
DrawDepthBuffer(depth.buffer, depth.control);
}
}
End();

View File

@ -3,12 +3,10 @@
#pragma once
#include <variant>
#include <imgui.h>
#include "common/types.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/amdgpu/regs_color.h"
#include "video_core/amdgpu/regs_depth.h"
namespace Core::Devtools::Widget {
@ -16,15 +14,24 @@ class RegPopup {
int id;
ImGuiWindowFlags flags{ImGuiWindowFlags_NoSavedSettings};
using DepthBuffer = std::tuple<AmdGpu::Liverpool::DepthBuffer, AmdGpu::Liverpool::DepthControl>;
ImVec2 last_pos;
std::variant<AmdGpu::Liverpool::ColorBuffer, DepthBuffer> data;
AmdGpu::ColorBuffer color;
struct {
AmdGpu::DepthBuffer buffer;
AmdGpu::DepthControl control;
} depth;
enum class DataType {
None = 0,
Color = 1,
Depth = 2,
};
DataType type{};
std::string title{};
static void DrawColorBuffer(const AmdGpu::Liverpool::ColorBuffer& buffer);
static void DrawColorBuffer(const AmdGpu::ColorBuffer& buffer);
static void DrawDepthBuffer(const DepthBuffer& depth_data);
static void DrawDepthBuffer(const AmdGpu::DepthBuffer& buffer,
const AmdGpu::DepthControl control);
public:
bool open = false;
@ -32,11 +39,10 @@ public:
RegPopup();
void SetData(const std::string& base_title, AmdGpu::Liverpool::ColorBuffer color_buffer,
u32 cb_id);
void SetData(const std::string& base_title, AmdGpu::ColorBuffer color_buffer, u32 cb_id);
void SetData(const std::string& base_title, AmdGpu::Liverpool::DepthBuffer depth_buffer,
AmdGpu::Liverpool::DepthControl depth_control);
void SetData(const std::string& base_title, AmdGpu::DepthBuffer depth_buffer,
AmdGpu::DepthControl depth_control);
void SetPos(ImVec2 pos, bool auto_resize = false);

View File

@ -29,7 +29,7 @@ namespace Core::Devtools::Widget {
void RegView::ProcessShader(int shader_id) {
std::vector<u32> shader_code;
Vulkan::Liverpool::UserData user_data;
AmdGpu::UserData user_data;
if (data.is_compute) {
shader_code = data.cs_data.code;
user_data = data.cs_data.cs_program.user_data;
@ -129,7 +129,7 @@ void RegView::DrawGraphicsRegs() {
}
};
for (int cb = 0; cb < AmdGpu::Liverpool::NumColorBuffers; ++cb) {
for (int cb = 0; cb < AmdGpu::NUM_COLOR_BUFFERS; ++cb) {
PushID(cb);
TableNextRow();
@ -246,8 +246,7 @@ void RegView::SetData(DebugStateType::RegDump _data, const std::string& base_tit
default_reg_popup.SetData(title, regs.depth_buffer, regs.depth_control);
default_reg_popup.open = true;
}
} else if (last_selected_cb >= 0 &&
last_selected_cb < AmdGpu::Liverpool::NumColorBuffers) {
} else if (last_selected_cb >= 0 && last_selected_cb < AmdGpu::NUM_COLOR_BUFFERS) {
const auto& buffer = regs.color_buffers[last_selected_cb];
const bool has_cb = buffer && regs.color_target_mask.GetMask(last_selected_cb);
if (has_cb) {
@ -348,7 +347,7 @@ void RegView::Draw() {
} else {
shader->hex_view.DrawContents(shader->user_data.data(),
shader->user_data.size() *
sizeof(Vulkan::Liverpool::UserData::value_type));
sizeof(AmdGpu::UserData::value_type));
}
}
End();
@ -392,4 +391,4 @@ void RegView::Draw() {
}
}
} // namespace Core::Devtools::Widget
} // namespace Core::Devtools::Widget

View File

@ -2,17 +2,18 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "core/debug_state.h"
#include "imgui_memory_editor.h"
#include "reg_popup.h"
#include "text_editor.h"
#include "core/devtools/widget/imgui_memory_editor.h"
#include "core/devtools/widget/reg_popup.h"
#include "core/devtools/widget/text_editor.h"
namespace Core::Devtools::Widget {
struct ShaderCache {
MemoryEditor hex_view;
TextEditor dis_view;
Vulkan::Liverpool::UserData user_data;
AmdGpu::UserData user_data;
};
class RegView {
@ -54,4 +55,4 @@ public:
void Draw();
};
} // namespace Core::Devtools::Widget
} // namespace Core::Devtools::Widget

View File

@ -10,6 +10,7 @@
#include "core/libraries/videoout/driver.h"
#include "core/libraries/videoout/videoout_error.h"
#include "imgui/renderer/imgui_core.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/renderer_vulkan/vk_presenter.h"
extern std::unique_ptr<Vulkan::Presenter> presenter;

View File

@ -5,6 +5,8 @@
#include <type_traits>
#include <utility>
#include <vector>
#include <magic_enum/magic_enum.hpp>
#include "common/assert.h"
#include "common/func_traits.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h"
@ -14,7 +16,6 @@
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/types.h"
namespace Shader::Backend::SPIRV {
namespace {
@ -136,7 +137,7 @@ Id TypeId(const EmitContext& ctx, IR::Type type) {
case IR::Type::U32:
return ctx.U32[1];
default:
throw NotImplementedException("Phi node type {}", type);
UNREACHABLE_MSG("Phi node type {}", type);
}
}
@ -224,7 +225,7 @@ spv::ExecutionMode ExecutionMode(AmdGpu::TessellationType primitive) {
case AmdGpu::TessellationType::Quad:
return spv::ExecutionMode::Quads;
}
UNREACHABLE_MSG("Tessellation primitive {}", primitive);
UNREACHABLE_MSG("Tessellation primitive {}", magic_enum::enum_name(primitive));
}
spv::ExecutionMode ExecutionMode(AmdGpu::TessellationPartitioning spacing) {
@ -238,7 +239,7 @@ spv::ExecutionMode ExecutionMode(AmdGpu::TessellationPartitioning spacing) {
default:
break;
}
UNREACHABLE_MSG("Tessellation spacing {}", spacing);
UNREACHABLE_MSG("Tessellation spacing {}", magic_enum::enum_name(spacing));
}
void SetupCapabilities(const Info& info, const Profile& profile, const RuntimeInfo& runtime_info,
@ -482,14 +483,12 @@ Id EmitPhi(EmitContext& ctx, IR::Inst* inst) {
void EmitVoid(EmitContext&) {}
Id EmitIdentity(EmitContext& ctx, const IR::Value& value) {
throw NotImplementedException("Forward identity declaration");
UNREACHABLE_MSG("Forward identity declaration");
}
Id EmitConditionRef(EmitContext& ctx, const IR::Value& value) {
const Id id{ctx.Def(value)};
if (!Sirit::ValidId(id)) {
throw NotImplementedException("Forward identity declaration");
}
ASSERT_MSG(Sirit::ValidId(id), "Forward identity declaration");
return id;
}

View File

@ -397,13 +397,11 @@ Id EmitImageAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords
}
Id EmitImageAtomicInc32(EmitContext&, IR::Inst*, u32, Id, Id) {
// TODO: This is not yet implemented
throw NotImplementedException("SPIR-V Instruction");
UNREACHABLE_MSG("SPIR-V Instruction");
}
Id EmitImageAtomicDec32(EmitContext&, IR::Inst*, u32, Id, Id) {
// TODO: This is not yet implemented
throw NotImplementedException("SPIR-V Instruction");
UNREACHABLE_MSG("SPIR-V Instruction");
}
Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {

View File

@ -98,11 +98,11 @@ void EmitEmitPrimitive(EmitContext& ctx) {
}
void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) {
throw NotImplementedException("Geometry streams");
UNREACHABLE_MSG("Geometry streams");
}
void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) {
throw NotImplementedException("Geometry streams");
UNREACHABLE_MSG("Geometry streams");
}
void EmitDebugPrint(EmitContext& ctx, IR::Inst* inst, Id fmt, Id arg0, Id arg1, Id arg2, Id arg3) {

View File

@ -6,7 +6,6 @@
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
#include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/types.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include <boost/container/static_vector.hpp>
@ -109,7 +108,7 @@ Id EmitContext::Def(const IR::Value& value) {
case IR::Type::StringLiteral:
return String(value.StringLiteral());
default:
throw NotImplementedException("Immediate type {}", value.Type());
UNREACHABLE_MSG("Immediate type {}", value.Type());
}
}
@ -786,7 +785,7 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte
void EmitContext::DefineBuffers() {
for (const auto& desc : info.buffers) {
const auto buf_sharp = desc.GetSharp(info);
const bool is_storage = desc.IsStorage(buf_sharp, profile);
const bool is_storage = desc.IsStorage(buf_sharp);
// Set indexes for special buffers.
if (desc.buffer_type == BufferType::Flatbuf) {
@ -921,7 +920,7 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
default:
break;
}
throw InvalidArgument("Invalid texture type {}", type);
UNREACHABLE_MSG("Invalid texture type {}", type);
}
void EmitContext::DefineImagesAndSamplers() {

View File

@ -1,64 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <exception>
#include <string>
#include <utility>
#include <fmt/format.h>
namespace Shader {
class Exception : public std::exception {
public:
explicit Exception(std::string message) noexcept : err_message{std::move(message)} {}
[[nodiscard]] const char* what() const noexcept override {
return err_message.c_str();
}
void Prepend(std::string_view prepend) {
err_message.insert(0, prepend);
}
void Append(std::string_view append) {
err_message += append;
}
private:
std::string err_message;
};
class LogicError : public Exception {
public:
template <typename... Args>
explicit LogicError(const char* message, Args&&... args)
: Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {}
};
class RuntimeError : public Exception {
public:
template <typename... Args>
explicit RuntimeError(const char* message, Args&&... args)
: Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {}
};
class NotImplementedException : public Exception {
public:
template <typename... Args>
explicit NotImplementedException(const char* message, Args&&... args)
: Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {
Append(" is not implemented");
}
};
class InvalidArgument : public Exception {
public:
template <typename... Args>
explicit InvalidArgument(const char* message, Args&&... args)
: Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {}
};
} // namespace Shader

View File

@ -191,7 +191,7 @@ std::string DumpExpr(const Statement* stmt) {
void SanitizeNoBreaks(const Tree& tree) {
if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) {
throw NotImplementedException("Capturing statement with break nodes");
UNREACHABLE_MSG("Capturing statement with break nodes");
}
}
@ -584,7 +584,7 @@ private:
case StatementType::Variable:
return ir.GetGotoVariable(stmt.id);
default:
throw NotImplementedException("Statement type {}", u32(stmt.type));
UNREACHABLE_MSG("Statement type {}", u32(stmt.type));
}
}

View File

@ -4,22 +4,22 @@
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/ir/position.h"
#include "shader_recompiler/ir/reinterpret.h"
#include "shader_recompiler/profile.h"
#include "shader_recompiler/runtime_info.h"
namespace Shader::Gcn {
static AmdGpu::NumberFormat NumberFormatCompressed(
AmdGpu::Liverpool::ShaderExportFormat export_format) {
static AmdGpu::NumberFormat NumberFormatCompressed(AmdGpu::ShaderExportFormat export_format) {
switch (export_format) {
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_FP16:
case AmdGpu::ShaderExportFormat::ABGR_FP16:
return AmdGpu::NumberFormat::Float;
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UNORM16:
case AmdGpu::ShaderExportFormat::ABGR_UNORM16:
return AmdGpu::NumberFormat::Unorm;
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SNORM16:
case AmdGpu::ShaderExportFormat::ABGR_SNORM16:
return AmdGpu::NumberFormat::Snorm;
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UINT16:
case AmdGpu::ShaderExportFormat::ABGR_UINT16:
return AmdGpu::NumberFormat::Uint;
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SINT16:
case AmdGpu::ShaderExportFormat::ABGR_SINT16:
return AmdGpu::NumberFormat::Sint;
default:
UNREACHABLE_MSG("Unimplemented compressed export format {}",
@ -27,18 +27,18 @@ static AmdGpu::NumberFormat NumberFormatCompressed(
}
}
static u32 MaskFromExportFormat(u8 mask, AmdGpu::Liverpool::ShaderExportFormat export_format) {
static u32 MaskFromExportFormat(u8 mask, AmdGpu::ShaderExportFormat export_format) {
switch (export_format) {
case AmdGpu::Liverpool::ShaderExportFormat::R_32:
case AmdGpu::ShaderExportFormat::R_32:
// Red only
return mask & 1;
case AmdGpu::Liverpool::ShaderExportFormat::GR_32:
case AmdGpu::ShaderExportFormat::GR_32:
// Red and Green only
return mask & 3;
case AmdGpu::Liverpool::ShaderExportFormat::AR_32:
case AmdGpu::ShaderExportFormat::AR_32:
// Red and Alpha only
return mask & 9;
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_32:
case AmdGpu::ShaderExportFormat::ABGR_32:
// All components
return mask;
default:
@ -59,7 +59,7 @@ void Translator::ExportRenderTarget(const GcnInst& inst) {
}
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
if (color_buffer.export_format == AmdGpu::Liverpool::ShaderExportFormat::Zero || exp.en == 0) {
if (color_buffer.export_format == AmdGpu::ShaderExportFormat::Zero || exp.en == 0) {
// No export
return;
}

View File

@ -11,9 +11,9 @@
#include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/ir/reinterpret.h"
#include "shader_recompiler/profile.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/amdgpu/types.h"
#define MAGIC_ENUM_RANGE_MIN 0
#define MAGIC_ENUM_RANGE_MAX 1515

View File

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/profile.h"
namespace Shader::Gcn {

View File

@ -5,7 +5,6 @@
#include <span>
#include <vector>
#include <boost/container/small_vector.hpp>
#include <boost/container/static_vector.hpp>
#include "common/assert.h"
#include "common/types.h"
@ -17,110 +16,11 @@
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/ir/type.h"
#include "shader_recompiler/params.h"
#include "shader_recompiler/profile.h"
#include "shader_recompiler/resource.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/resource.h"
namespace Shader {
static constexpr size_t NumUserDataRegs = 16;
static constexpr size_t NumImages = 64;
static constexpr size_t NumBuffers = 40;
static constexpr size_t NumSamplers = 16;
static constexpr size_t NumFMasks = 8;
enum class BufferType : u32 {
Guest,
Flatbuf,
BdaPagetable,
FaultBuffer,
GdsBuffer,
SharedMemory,
};
struct Info;
struct BufferResource {
u32 sharp_idx;
IR::Type used_types;
AmdGpu::Buffer inline_cbuf;
BufferType buffer_type;
u8 instance_attrib{};
bool is_written{};
bool is_formatted{};
bool IsSpecial() const noexcept {
return buffer_type != BufferType::Guest;
}
bool IsStorage(const AmdGpu::Buffer& buffer, const Profile& profile) const noexcept {
// When using uniform buffers, a size is required at compilation time, so we need to
// either compile a lot of shader specializations to handle each size or just force it to
// the maximum possible size always. However, for some vendors the shader-supplied size is
// used for bounds checking uniform buffer accesses, so the latter would effectively turn
// off buffer robustness behavior. Instead, force storage buffers which are bounds checked
// using the actual buffer size. We are assuming the performance hit from this is
// acceptable.
return true; // buffer.GetSize() > profile.max_ubo_size || is_written;
}
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept;
};
using BufferResourceList = boost::container::small_vector<BufferResource, NumBuffers>;
struct ImageResource {
u32 sharp_idx;
bool is_depth{};
bool is_atomic{};
bool is_array{};
bool is_written{};
bool is_r128{};
[[nodiscard]] constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept;
};
using ImageResourceList = boost::container::small_vector<ImageResource, NumImages>;
struct SamplerResource {
u32 sharp_idx;
AmdGpu::Sampler inline_sampler;
u32 is_inline_sampler : 1;
u32 associated_image : 4;
u32 disable_aniso : 1;
constexpr AmdGpu::Sampler GetSharp(const Info& info) const noexcept;
};
using SamplerResourceList = boost::container::small_vector<SamplerResource, NumSamplers>;
struct FMaskResource {
u32 sharp_idx;
constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept;
};
using FMaskResourceList = boost::container::small_vector<FMaskResource, NumFMasks>;
struct PushData {
static constexpr u32 XOffsetIndex = 0;
static constexpr u32 YOffsetIndex = 1;
static constexpr u32 XScaleIndex = 2;
static constexpr u32 YScaleIndex = 3;
static constexpr u32 UdRegsIndex = 4;
static constexpr u32 BufOffsetIndex = UdRegsIndex + NumUserDataRegs / 4;
float xoffset;
float yoffset;
float xscale;
float yscale;
std::array<u32, NumUserDataRegs> ud_regs;
std::array<u8, NumBuffers> buf_offsets;
void AddOffset(u32 binding, u32 offset) {
ASSERT(offset < 256 && binding < buf_offsets.size());
buf_offsets[binding] = offset;
}
};
static_assert(sizeof(PushData) <= 128,
"PushData size is greater than minimum size guaranteed by Vulkan spec");
enum class Qualifier : u8 {
None,
Smooth,
@ -235,7 +135,7 @@ struct Info {
Dynamic = 1 << 1,
};
ReadConstType readconst_types{};
bool uses_dma{false};
bool uses_dma{};
explicit Info(Stage stage_, LogicalStage l_stage_, ShaderParams params)
: stage{stage_}, l_stage{l_stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
@ -262,7 +162,7 @@ struct Info {
u32 mask = ud_mask.mask;
while (mask) {
const u32 index = std::countr_zero(mask);
ASSERT(bnd.user_data < NumUserDataRegs && index < NumUserDataRegs);
ASSERT(bnd.user_data < NUM_USER_DATA_REGS && index < NUM_USER_DATA_REGS);
mask &= ~(1U << index);
push.ud_regs[bnd.user_data++] = user_data[index];
}
@ -276,9 +176,8 @@ struct Info {
void RefreshFlatBuf() {
flattened_ud_buf.resize(srt_info.flattened_bufsize_dw);
ASSERT(user_data.size() <= NumUserDataRegs);
ASSERT(user_data.size() <= NUM_USER_DATA_REGS);
std::memcpy(flattened_ud_buf.data(), user_data.data(), user_data.size_bytes());
// Run the JIT program to walk the SRT and write the leaves to a flat buffer
if (srt_info.walker_func) {
srt_info.walker_func(user_data.data(), flattened_ud_buf.data());
}
@ -296,42 +195,4 @@ struct Info {
};
DECLARE_ENUM_FLAG_OPERATORS(Info::ReadConstType);
constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept {
const auto buffer = inline_cbuf ? inline_cbuf : info.ReadUdSharp<AmdGpu::Buffer>(sharp_idx);
if (!buffer.Valid()) {
LOG_DEBUG(Render, "Encountered invalid buffer sharp");
return AmdGpu::Buffer::Null();
}
return buffer;
}
constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept {
AmdGpu::Image image{};
if (!is_r128) {
image = info.ReadUdSharp<AmdGpu::Image>(sharp_idx);
} else {
const auto raw = info.ReadUdSharp<u128>(sharp_idx);
std::memcpy(&image, &raw, sizeof(raw));
}
if (!image.Valid()) {
LOG_DEBUG(Render_Vulkan, "Encountered invalid image sharp");
image = AmdGpu::Image::Null(is_depth);
} else if (is_depth) {
const auto data_fmt = image.GetDataFmt();
if (data_fmt != AmdGpu::DataFormat::Format16 && data_fmt != AmdGpu::DataFormat::Format32) {
LOG_DEBUG(Render_Vulkan, "Encountered non-depth image used with depth instruction!");
image = AmdGpu::Image::Null(true);
}
}
return image;
}
constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept {
return is_inline_sampler ? inline_sampler : info.ReadUdSharp<AmdGpu::Sampler>(sharp_idx);
}
constexpr AmdGpu::Image FMaskResource::GetSharp(const Info& info) const noexcept {
return info.ReadUdSharp<AmdGpu::Image>(sharp_idx);
}
} // namespace Shader

View File

@ -30,7 +30,7 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
const auto result_it{instructions.insert(insertion_point, *inst)};
if (inst->NumArgs() != args.size()) {
throw InvalidArgument("Invalid number of arguments {} in {}", args.size(), op);
UNREACHABLE_MSG("Invalid number of arguments {} in {}", args.size(), op);
}
std::ranges::for_each(args, [inst, index = size_t{0}](const Value& arg) mutable {
inst->SetArg(index, arg);

View File

@ -5,8 +5,6 @@
#include <source_location>
#include <boost/container/small_vector.hpp>
#include "common/assert.h"
#include "ir_emitter.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/ir/debug_print.h"
#include "shader_recompiler/ir/ir_emitter.h"
#include "shader_recompiler/ir/opcodes.h"
@ -196,7 +194,7 @@ U1 IREmitter::Condition(IR::Condition cond) {
case IR::Condition::Execnz:
return GetExec();
default:
throw NotImplementedException("");
UNREACHABLE_MSG("");
}
}
@ -1828,7 +1826,7 @@ U32U64 IREmitter::ConvertFToS(size_t bitsize, const F32F64& value) {
default:
break;
}
throw NotImplementedException("Invalid destination bitsize {}", bitsize);
UNREACHABLE_MSG("Invalid destination bitsize {}", bitsize);
}
U32U64 IREmitter::ConvertFToU(size_t bitsize, const F32F64& value) {
@ -1929,7 +1927,7 @@ U8U16U32U64 IREmitter::UConvert(size_t result_bitsize, const U8U16U32U64& value)
default:
break;
}
throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
UNREACHABLE_MSG("Conversion from {} to {} bits", value.Type(), result_bitsize);
}
U8U16U32U64 IR::IREmitter::SConvert(size_t result_bitsize, const U8U16U32U64& value) {
@ -1946,8 +1944,7 @@ U8U16U32U64 IR::IREmitter::SConvert(size_t result_bitsize, const U8U16U32U64& va
default:
break;
}
throw NotImplementedException("Signed Conversion from {} to {} bits", value.Type(),
result_bitsize);
UNREACHABLE_MSG("Signed Conversion from {} to {} bits", value.Type(), result_bitsize);
}
F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) {
@ -1978,7 +1975,7 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) {
default:
break;
}
throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
UNREACHABLE_MSG("Conversion from {} to {} bits", value.Type(), result_bitsize);
}
Value IREmitter::ImageAtomicIAdd(const Value& handle, const Value& coords, const Value& value,

View File

@ -2,10 +2,8 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <any>
#include <memory>
#include "shader_recompiler/exception.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/type.h"
#include "shader_recompiler/ir/value.h"
@ -21,9 +19,7 @@ Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} {
}
Inst::Inst(const Inst& base) : op{base.op}, flags{base.flags} {
if (base.op == Opcode::Phi) {
throw NotImplementedException("Copying phi node");
}
ASSERT_MSG(base.op != Opcode::Phi, "Copying phi node");
std::construct_at(&args);
const size_t num_args{base.NumArgs()};
for (size_t index = 0; index < num_args; ++index) {
@ -150,7 +146,7 @@ IR::Type Inst::Type() const {
void Inst::SetArg(size_t index, Value value) {
if (index >= NumArgs()) {
throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op);
UNREACHABLE_MSG("Out of bounds argument index {} in opcode {}", index, op);
}
const IR::Value arg{Arg(index)};
if (!arg.IsImmediate()) {
@ -171,7 +167,7 @@ Block* Inst::PhiBlock(size_t index) const {
UNREACHABLE_MSG("{} is not a Phi instruction", op);
}
if (index >= phi_args.size()) {
throw InvalidArgument("Out of bounds argument index {} in phi instruction");
UNREACHABLE_MSG("Out of bounds argument index {} in phi instruction");
}
return phi_args[index].first;
}

View File

@ -205,7 +205,7 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
}
info.srt_info.walker_func = c.getCurr<PFN_SrtWalker>();
pass_info.dst_off_dw = NumUserDataRegs;
pass_info.dst_off_dw = NUM_USER_DATA_REGS;
ASSERT(pass_info.dst_off_dw == info.srt_info.flattened_bufsize_dw);
for (const auto& [sgpr_base, root] : pass_info.srt_roots) {

View File

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <unordered_map>
#include <queue>
#include "shader_recompiler/ir/program.h"
namespace Shader::Optimization {

View File

@ -96,7 +96,7 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
if (info.gs_copy_data.output_vertices &&
info.gs_copy_data.output_vertices != output_vertices) {
ASSERT_MSG(output_vertices > info.gs_copy_data.output_vertices &&
gs_info.mode == AmdGpu::Liverpool::GsMode::Mode::ScenarioG,
gs_info.mode == AmdGpu::GsScenario::ScenarioG,
"Invalid geometry shader vertex configuration scenario = {}, max_vert_out = "
"{}, output_vertices = {}",
u32(gs_info.mode), output_vertices, info.gs_copy_data.output_vertices);

View File

@ -3,6 +3,7 @@
#include "common/config.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/profile.h"
#include "video_core/buffer_cache/buffer_cache.h"
namespace Shader::Optimization {

View File

@ -13,7 +13,6 @@
#include <boost/intrusive/list.hpp>
#include "common/assert.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/opcodes.h"
#include "shader_recompiler/ir/patch.h"
@ -105,7 +104,7 @@ public:
explicit TypedValue(const Value& value) : Value(value) {
if ((value.Type() & type_) == IR::Type::Void) {
throw InvalidArgument("Incompatible types {} and {}", type_, value.Type());
UNREACHABLE_MSG("Incompatible types {} and {}", type_, value.Type());
}
}

View File

@ -6,6 +6,7 @@
#include "shader_recompiler/frontend/structured_control_flow.h"
#include "shader_recompiler/ir/passes/ir_passes.h"
#include "shader_recompiler/ir/post_order.h"
#include "shader_recompiler/profile.h"
#include "shader_recompiler/recompiler.h"
namespace Shader {

View File

@ -0,0 +1,146 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/types.h"
#include "shader_recompiler/ir/type.h"
#include "video_core/amdgpu/resource.h"
#include <boost/container/static_vector.hpp>
namespace Shader {
static constexpr u32 NUM_USER_DATA_REGS = 16;
static constexpr u32 NUM_IMAGES = 64;
static constexpr u32 NUM_BUFFERS = 40;
static constexpr u32 NUM_SAMPLERS = 16;
static constexpr u32 NUM_FMASKS = 8;
enum class BufferType : u32 {
Guest,
Flatbuf,
BdaPagetable,
FaultBuffer,
GdsBuffer,
SharedMemory,
};
struct Info;
struct BufferResource {
u32 sharp_idx;
IR::Type used_types;
AmdGpu::Buffer inline_cbuf;
BufferType buffer_type;
u8 instance_attrib{};
bool is_written{};
bool is_formatted{};
bool IsSpecial() const noexcept {
return buffer_type != BufferType::Guest;
}
bool IsStorage([[maybe_unused]] const AmdGpu::Buffer buffer) const noexcept {
// When using uniform buffers, a size is required at compilation time, so we need to
// either compile a lot of shader specializations to handle each size or just force it to
// the maximum possible size always. However, for some vendors the shader-supplied size is
// used for bounds checking uniform buffer accesses, so the latter would effectively turn
// off buffer robustness behavior. Instead, force storage buffers which are bounds checked
// using the actual buffer size. We are assuming the performance hit from this is
// acceptable.
return true; // buffer.GetSize() > profile.max_ubo_size || is_written;
}
constexpr AmdGpu::Buffer GetSharp(const auto& info) const noexcept {
const auto buffer =
inline_cbuf ? inline_cbuf : info.template ReadUdSharp<AmdGpu::Buffer>(sharp_idx);
if (!buffer.Valid()) {
LOG_DEBUG(Render, "Encountered invalid buffer sharp");
return AmdGpu::Buffer::Null();
}
return buffer;
}
};
using BufferResourceList = boost::container::static_vector<BufferResource, NUM_BUFFERS>;
struct ImageResource {
u32 sharp_idx;
bool is_depth{};
bool is_atomic{};
bool is_array{};
bool is_written{};
bool is_r128{};
constexpr AmdGpu::Image GetSharp(const auto& info) const noexcept {
AmdGpu::Image image{};
if (!is_r128) {
image = info.template ReadUdSharp<AmdGpu::Image>(sharp_idx);
} else {
const auto raw = info.template ReadUdSharp<u128>(sharp_idx);
std::memcpy(&image, &raw, sizeof(raw));
}
if (!image.Valid()) {
LOG_DEBUG(Render_Vulkan, "Encountered invalid image sharp");
image = AmdGpu::Image::Null(is_depth);
} else if (is_depth) {
const auto data_fmt = image.GetDataFmt();
if (data_fmt != AmdGpu::DataFormat::Format16 &&
data_fmt != AmdGpu::DataFormat::Format32) {
LOG_DEBUG(Render_Vulkan,
"Encountered non-depth image used with depth instruction!");
image = AmdGpu::Image::Null(true);
}
}
return image;
}
};
using ImageResourceList = boost::container::static_vector<ImageResource, NUM_IMAGES>;
struct SamplerResource {
u32 sharp_idx;
AmdGpu::Sampler inline_sampler;
u32 is_inline_sampler : 1;
u32 associated_image : 4;
u32 disable_aniso : 1;
constexpr AmdGpu::Sampler GetSharp(const auto& info) const noexcept {
return is_inline_sampler ? inline_sampler
: info.template ReadUdSharp<AmdGpu::Sampler>(sharp_idx);
}
};
using SamplerResourceList = boost::container::static_vector<SamplerResource, NUM_SAMPLERS>;
struct FMaskResource {
u32 sharp_idx;
constexpr AmdGpu::Image GetSharp(const auto& info) const noexcept {
return info.template ReadUdSharp<AmdGpu::Image>(sharp_idx);
}
};
using FMaskResourceList = boost::container::static_vector<FMaskResource, NUM_FMASKS>;
struct PushData {
static constexpr u32 XOffsetIndex = 0;
static constexpr u32 YOffsetIndex = 1;
static constexpr u32 XScaleIndex = 2;
static constexpr u32 YScaleIndex = 3;
static constexpr u32 UdRegsIndex = 4;
static constexpr u32 BufOffsetIndex = UdRegsIndex + NUM_USER_DATA_REGS / 4;
float xoffset;
float yoffset;
float xscale;
float yscale;
std::array<u32, NUM_USER_DATA_REGS> ud_regs;
std::array<u8, NUM_BUFFERS> buf_offsets;
void AddOffset(u32 binding, u32 offset) {
ASSERT(offset < 256 && binding < buf_offsets.size());
buf_offsets[binding] = offset;
}
};
static_assert(sizeof(PushData) <= 128,
"PushData size is greater than minimum size guaranteed by Vulkan spec");
} // namespace Shader

View File

@ -3,13 +3,12 @@
#pragma once
#include <algorithm>
#include <span>
#include <boost/container/static_vector.hpp>
#include "common/types.h"
#include "shader_recompiler/frontend/tessellation.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/types.h"
#include "video_core/amdgpu/pixel_format.h"
#include "video_core/amdgpu/regs_shader.h"
#include "video_core/amdgpu/regs_vertex.h"
namespace Shader {
@ -36,7 +35,7 @@ enum class LogicalStage : u32 {
constexpr u32 MaxStageTypes = static_cast<u32>(LogicalStage::NumLogicalStages);
[[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept {
constexpr Stage StageFromIndex(size_t index) noexcept {
return static_cast<Stage>(index);
}
@ -87,7 +86,6 @@ struct VertexRuntimeInfo {
bool clip_disable{};
u32 step_rate_0;
u32 step_rate_1;
// Domain
AmdGpu::TessellationType tess_type;
AmdGpu::TessellationTopology tess_topology;
AmdGpu::TessellationPartitioning tess_partitioning;
@ -110,22 +108,24 @@ struct VertexRuntimeInfo {
};
struct HullRuntimeInfo {
// from registers
u32 num_input_control_points;
u32 num_threads;
AmdGpu::TessellationType tess_type;
bool offchip_lds_enable;
// from tess constants buffer
u32 ls_stride;
u32 hs_output_cp_stride;
u32 hs_output_base;
auto operator<=>(const HullRuntimeInfo&) const noexcept = default;
void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
ls_stride = tess_constants.ls_stride;
hs_output_cp_stride = tess_constants.hs_cp_stride;
hs_output_base = tess_constants.hs_output_base;
}
// It might be possible for a non-passthrough TCS to have these conditions, in some
// dumb situation.
// In that case, it should be fine to assume passthrough and declare some extra
bool operator==(const HullRuntimeInfo&) const = default;
// It might be possible for a non-passthrough TCS to have these conditions, in some dumb
// situation. In that case, it should be fine to assume passthrough and declare some extra
// output control points and attributes that shouldnt be read by the TES anyways
bool IsPassthrough() const {
return hs_output_base == 0 && ls_stride == hs_output_cp_stride && num_threads == 1;
@ -138,12 +138,6 @@ struct HullRuntimeInfo {
u32 NumOutputControlPoints() const {
return IsPassthrough() ? num_input_control_points : num_threads;
}
void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
ls_stride = tess_constants.ls_stride;
hs_output_cp_stride = tess_constants.hs_cp_stride;
hs_output_base = tess_constants.hs_output_base;
}
};
static constexpr auto GsMaxOutputStreams = 4u;
@ -157,11 +151,11 @@ struct GeometryRuntimeInfo {
u32 out_vertex_data_size{};
AmdGpu::PrimitiveType in_primitive;
GsOutputPrimTypes out_primitive;
AmdGpu::Liverpool::GsMode::Mode mode;
AmdGpu::GsScenario mode;
std::span<const u32> vs_copy;
u64 vs_copy_hash;
bool operator==(const GeometryRuntimeInfo& other) const noexcept {
bool operator==(const GeometryRuntimeInfo& other) const {
return num_outputs == other.num_outputs && outputs == other.outputs && num_invocations &&
other.num_invocations && output_vertices == other.output_vertices &&
in_primitive == other.in_primitive &&
@ -181,10 +175,10 @@ struct PsColorBuffer {
AmdGpu::DataFormat data_format : 6;
AmdGpu::NumberFormat num_format : 4;
AmdGpu::NumberConversion num_conversion : 3;
AmdGpu::Liverpool::ShaderExportFormat export_format : 4;
AmdGpu::ShaderExportFormat export_format : 4;
AmdGpu::CompMapping swizzle;
bool operator==(const PsColorBuffer& other) const noexcept = default;
bool operator==(const PsColorBuffer& other) const = default;
};
struct FragmentRuntimeInfo {
@ -200,18 +194,18 @@ struct FragmentRuntimeInfo {
bool operator==(const PsInput&) const noexcept = default;
};
AmdGpu::Liverpool::PsInput en_flags;
AmdGpu::Liverpool::PsInput addr_flags;
AmdGpu::PsInput en_flags;
AmdGpu::PsInput addr_flags;
u32 num_inputs;
std::array<PsInput, 32> inputs;
std::array<PsColorBuffer, MaxColorBuffers> color_buffers;
AmdGpu::Liverpool::ShaderExportFormat z_export_format;
AmdGpu::ShaderExportFormat z_export_format;
u8 mrtz_mask;
bool dual_source_blending;
bool operator==(const FragmentRuntimeInfo& other) const noexcept {
return std::ranges::equal(color_buffers, other.color_buffers) &&
en_flags.raw == other.en_flags.raw && addr_flags.raw == other.addr_flags.raw &&
en_flags == other.en_flags && addr_flags == other.addr_flags &&
num_inputs == other.num_inputs && z_export_format == other.z_export_format &&
mrtz_mask == other.mrtz_mask && dual_source_blending == other.dual_source_blending &&
std::ranges::equal(inputs.begin(), inputs.begin() + num_inputs, other.inputs.begin(),

View File

@ -9,6 +9,7 @@
#include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/info.h"
#include "shader_recompiler/profile.h"
namespace Shader {
@ -114,9 +115,9 @@ struct StageSpecialization {
}
u32 binding{};
ForEachSharp(binding, buffers, info->buffers,
[profile_](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
spec.stride = sharp.GetStride();
spec.is_storage = desc.IsStorage(sharp, profile_);
spec.is_storage = desc.IsStorage(sharp);
spec.is_formatted = desc.is_formatted;
spec.swizzle_enable = sharp.swizzle_enable;
if (spec.is_formatted) {

View File

@ -0,0 +1,22 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/types.h"
namespace AmdGpu {
union CbDbExtent {
struct {
u16 width;
u16 height;
};
u32 raw;
bool Valid() const {
return raw != 0;
}
};
} // namespace AmdGpu

View File

@ -1,55 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/types.h"
#include "video_core/amdgpu/liverpool.h"
#include <array>
namespace AmdGpu {
// The following values are taken from fpPS4:
// https://github.com/red-prig/fpPS4/blob/436b43064be4c78229500f3d3c054fc76639247d/chip/pm4_pfp.pas#L410
//
static constexpr std::array reg_array_default{
0x00000000u, 0x80000000u, 0x40004000u, 0xdeadbeefu, 0x00000000u, 0x40004000u, 0x00000000u,
0x40004000u, 0x00000000u, 0x40004000u, 0x00000000u, 0x40004000u, 0xaa99aaaau, 0x00000000u,
0xdeadbeefu, 0xdeadbeefu, 0x80000000u, 0x40004000u, 0x00000000u, 0x00000000u, 0x80000000u,
0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u,
0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u,
0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u,
0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u,
0x40004000u, 0x80000000u, 0x40004000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u,
0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u,
0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u,
0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u,
0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u,
0x2a00161au,
};
void Liverpool::Regs::SetDefaults() {
std::memset(reg_array.data(), 0, reg_array.size() * sizeof(u32));
std::memcpy(&reg_array[ContextRegWordOffset + 0x80], reg_array_default.data(),
reg_array_default.size() * sizeof(u32));
// Individual context regs values
reg_array[ContextRegWordOffset + 0x000d] = 0x40004000u;
reg_array[ContextRegWordOffset + 0x01b6] = 0x00000002u;
reg_array[ContextRegWordOffset + 0x0204] = 0x00090000u;
reg_array[ContextRegWordOffset + 0x0205] = 0x00000004u;
reg_array[ContextRegWordOffset + 0x0295] = 0x00000100u;
reg_array[ContextRegWordOffset + 0x0296] = 0x00000080u;
reg_array[ContextRegWordOffset + 0x0297] = 0x00000002u;
reg_array[ContextRegWordOffset + 0x02aa] = 0x00001000u;
reg_array[ContextRegWordOffset + 0x02f7] = 0x00001000u;
reg_array[ContextRegWordOffset + 0x02f9] = 0x00000005u;
reg_array[ContextRegWordOffset + 0x02fa] = 0x3f800000u;
reg_array[ContextRegWordOffset + 0x02fb] = 0x3f800000u;
reg_array[ContextRegWordOffset + 0x02fc] = 0x3f800000u;
reg_array[ContextRegWordOffset + 0x02fd] = 0x3f800000u;
reg_array[ContextRegWordOffset + 0x0316] = 0x0000000eu;
reg_array[ContextRegWordOffset + 0x0317] = 0x00000010u;
}
} // namespace AmdGpu

View File

@ -12,6 +12,7 @@
#include "core/libraries/kernel/process.h"
#include "core/libraries/videoout/driver.h"
#include "core/memory.h"
#include "core/platform.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/pm4_cmds.h"
#include "video_core/renderdoc.h"
@ -305,14 +306,14 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
}
case PM4ItOpcode::SetConfigReg: {
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
const auto reg_addr = ConfigRegWordOffset + set_data->reg_offset;
const auto reg_addr = Regs::ConfigRegWordOffset + set_data->reg_offset;
const auto* payload = reinterpret_cast<const u32*>(header + 2);
std::memcpy(&regs.reg_array[reg_addr], payload, (count - 1) * sizeof(u32));
break;
}
case PM4ItOpcode::SetContextReg: {
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
const auto reg_addr = ContextRegWordOffset + set_data->reg_offset;
const auto reg_addr = Regs::ContextRegWordOffset + set_data->reg_offset;
const auto* payload = reinterpret_cast<const u32*>(header + 2);
std::memcpy(&regs.reg_array[reg_addr], payload, (count - 1) * sizeof(u32));
@ -335,7 +336,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
case ContextRegs::CbColor7Base: {
const auto col_buf_id = (reg_addr - ContextRegs::CbColor0Base) /
(ContextRegs::CbColor1Base - ContextRegs::CbColor0Base);
ASSERT(col_buf_id < NumColorBuffers);
ASSERT(col_buf_id < NUM_COLOR_BUFFERS);
const auto nop_offset = header->type3.count;
if (nop_offset == 0x0e || nop_offset == 0x0d || nop_offset == 0x0b) {
@ -358,7 +359,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const auto col_buf_id =
(reg_addr - ContextRegs::CbColor0Cmask) /
(ContextRegs::CbColor1Cmask - ContextRegs::CbColor0Cmask);
ASSERT(col_buf_id < NumColorBuffers);
ASSERT(col_buf_id < NUM_COLOR_BUFFERS);
const auto nop_offset = header->type3.count;
if (nop_offset == 0x04) {
@ -394,14 +395,14 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
(set_data->reg_offset - 0x200);
std::memcpy(addr, header + 2, set_size);
} else {
std::memcpy(&regs.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
set_size);
std::memcpy(&regs.reg_array[Regs::ShRegWordOffset + set_data->reg_offset],
header + 2, set_size);
}
break;
}
case PM4ItOpcode::SetUconfigReg: {
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
std::memcpy(&regs.reg_array[UconfigRegWordOffset + set_data->reg_offset],
std::memcpy(&regs.reg_array[Regs::UconfigRegWordOffset + set_data->reg_offset],
header + 2, (count - 1) * sizeof(u32));
break;
}
@ -418,7 +419,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndex2*>(header);
regs.max_index_size = draw_index->max_size;
regs.index_base_address.base_addr_lo = draw_index->index_base_lo;
regs.index_base_address.base_addr_hi.Assign(draw_index->index_base_hi);
regs.index_base_address.base_addr_hi = draw_index->index_base_hi;
regs.num_indices = draw_index->index_count;
regs.draw_initiator = draw_index->draw_initiator;
if (DebugState.DumpingCurrentReg()) {
@ -582,7 +583,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
case PM4ItOpcode::IndexBase: {
const auto* index_base = reinterpret_cast<const PM4CmdDrawIndexBase*>(header);
regs.index_base_address.base_addr_lo = index_base->addr_lo;
regs.index_base_address.base_addr_hi.Assign(index_base->addr_hi);
regs.index_base_address.base_addr_hi = index_base->addr_hi;
break;
}
case PM4ItOpcode::IndexBufferSize: {
@ -638,12 +639,14 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
}
case PM4ItOpcode::EventWriteEop: {
const auto* event_eop = reinterpret_cast<const PM4CmdEventWriteEop*>(header);
event_eop->SignalFence([](void* address, u64 data, u32 num_bytes) {
auto* memory = Core::Memory::Instance();
if (!memory->TryWriteBacking(address, &data, num_bytes)) {
memcpy(address, &data, num_bytes);
}
});
event_eop->SignalFence(
[](void* address, u64 data, u32 num_bytes) {
auto* memory = Core::Memory::Instance();
if (!memory->TryWriteBacking(address, &data, num_bytes)) {
memcpy(address, &data, num_bytes);
}
},
[] { Platform::IrqC::Instance()->Signal(Platform::InterruptId::GfxEop); });
break;
}
case PM4ItOpcode::DmaData: {
@ -947,8 +950,8 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
(set_data->reg_offset - 0x200);
std::memcpy(addr, header + 2, set_size);
} else {
std::memcpy(&regs.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
set_size);
std::memcpy(&regs.reg_array[Regs::ShRegWordOffset + set_data->reg_offset],
header + 2, set_size);
}
break;
}
@ -1030,7 +1033,9 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
}
case PM4ItOpcode::ReleaseMem: {
const auto* release_mem = reinterpret_cast<const PM4CmdReleaseMem*>(header);
release_mem->SignalFence(static_cast<Platform::InterruptId>(queue.pipe_id));
release_mem->SignalFence([pipe_id = queue.pipe_id] {
Platform::IrqC::Instance()->Signal(static_cast<Platform::InterruptId>(pipe_id));
});
break;
}
case PM4ItOpcode::EventWrite: {
@ -1053,11 +1058,8 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
FIBER_EXIT;
}
std::pair<std::span<const u32>, std::span<const u32>> Liverpool::CopyCmdBuffers(
std::span<const u32> dcb, std::span<const u32> ccb) {
Liverpool::CmdBuffer Liverpool::CopyCmdBuffers(std::span<const u32> dcb, std::span<const u32> ccb) {
auto& queue = mapped_queues[GfxQueueId];
// std::vector resize can invalidate spans for commands in flight
ASSERT_MSG(queue.dcb_buffer.capacity() >= queue.dcb_buffer_offset + dcb.size(),
"dcb copy buffer out of reserved space");
ASSERT_MSG(queue.ccb_buffer.capacity() >= queue.ccb_buffer_offset + ccb.size(),
@ -1068,8 +1070,8 @@ std::pair<std::span<const u32>, std::span<const u32>> Liverpool::CopyCmdBuffers(
queue.ccb_buffer.resize(
std::max(queue.ccb_buffer.size(), queue.ccb_buffer_offset + ccb.size()));
u32 prev_dcb_buffer_offset = queue.dcb_buffer_offset;
u32 prev_ccb_buffer_offset = queue.ccb_buffer_offset;
const u32 prev_dcb_buffer_offset = queue.dcb_buffer_offset;
const u32 prev_ccb_buffer_offset = queue.ccb_buffer_offset;
if (!dcb.empty()) {
std::memcpy(queue.dcb_buffer.data() + queue.dcb_buffer_offset, dcb.data(),
dcb.size_bytes());

File diff suppressed because it is too large Load Diff

View File

@ -4,26 +4,24 @@
#pragma once
#include <cstring>
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/types.h"
#include "common/uint128.h"
#include "core/libraries/gnmdriver/gnmdriver.h"
#include "core/libraries/kernel/time.h"
#include "core/platform.h"
#include "video_core/amdgpu/pm4_opcodes.h"
namespace AmdGpu {
/// This enum defines the Shader types supported in PM4 type 3 header
enum class PM4ShaderType : u32 {
ShaderGraphics = 0, ///< Graphics shader
ShaderCompute = 1 ///< Compute shader
ShaderGraphics = 0,
ShaderCompute = 1,
};
/// This enum defines the predicate value supported in PM4 type 3 header
enum class PM4Predicate : u32 {
PredDisable = 0, ///< Predicate disabled
PredEnable = 1 ///< Predicate enabled
PredDisable = 0,
PredEnable = 1,
};
union PM4Type0Header {
@ -466,7 +464,7 @@ struct PM4CmdEventWriteEop {
return data_lo | u64(data_hi) << 32;
}
void SignalFence(auto&& write_mem) const {
void SignalFence(auto&& write_mem, auto&& signal_irq) const {
u32* address = Address<u32>();
switch (data_sel.Value()) {
case DataSelect::None: {
@ -502,7 +500,7 @@ struct PM4CmdEventWriteEop {
ASSERT(data_sel == DataSelect::None);
[[fallthrough]];
case InterruptSelect::IrqWhenWriteConfirm: {
Platform::IrqC::Instance()->Signal(Platform::InterruptId::GfxEop);
signal_irq();
break;
}
default: {
@ -682,7 +680,7 @@ struct PM4CmdWaitRegMem {
return reg.Value();
}
bool Test(const std::array<u32, Liverpool::NumRegs>& regs) const {
bool Test(std::span<const u32> regs) const {
u32 value = mem_space.Value() == MemSpace::Memory ? *Address() : regs[Reg()];
switch (function.Value()) {
case Function::Always: {
@ -934,7 +932,7 @@ struct PM4CmdReleaseMem {
return data_lo | u64(data_hi) << 32;
}
void SignalFence(Platform::InterruptId irq_id) const {
void SignalFence(auto&& signal_irq) const {
switch (data_sel.Value()) {
case DataSelect::Data32Low: {
*Address<u32>() = DataDWord();
@ -965,7 +963,7 @@ struct PM4CmdReleaseMem {
case InterruptSelect::IrqUndocumented:
[[fallthrough]];
case InterruptSelect::IrqWhenWriteConfirm: {
Platform::IrqC::Instance()->Signal(irq_id);
signal_irq();
break;
}
default: {

View File

@ -0,0 +1,128 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "video_core/amdgpu/regs.h"
namespace AmdGpu {
// The following values are taken from fpPS4:
// https://github.com/red-prig/fpPS4/blob/436b43064be4c78229500f3d3c054fc76639247d/chip/pm4_pfp.pas#L410
static constexpr std::array REG_ARRAY_DEFAULT = {
0x00000000u, 0x80000000u, 0x40004000u, 0xdeadbeefu, 0x00000000u, 0x40004000u, 0x00000000u,
0x40004000u, 0x00000000u, 0x40004000u, 0x00000000u, 0x40004000u, 0xaa99aaaau, 0x00000000u,
0xdeadbeefu, 0xdeadbeefu, 0x80000000u, 0x40004000u, 0x00000000u, 0x00000000u, 0x80000000u,
0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u,
0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u,
0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u,
0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u,
0x40004000u, 0x80000000u, 0x40004000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u,
0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u,
0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u,
0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u,
0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u,
0x2a00161au,
};
void Regs::SetDefaults() {
std::memset(reg_array.data(), 0, reg_array.size() * sizeof(u32));
std::memcpy(&reg_array[ContextRegWordOffset + 0x80], REG_ARRAY_DEFAULT.data(),
REG_ARRAY_DEFAULT.size() * sizeof(u32));
// Individual context regs values
reg_array[ContextRegWordOffset + 0x000d] = 0x40004000u;
reg_array[ContextRegWordOffset + 0x01b6] = 0x00000002u;
reg_array[ContextRegWordOffset + 0x0204] = 0x00090000u;
reg_array[ContextRegWordOffset + 0x0205] = 0x00000004u;
reg_array[ContextRegWordOffset + 0x0295] = 0x00000100u;
reg_array[ContextRegWordOffset + 0x0296] = 0x00000080u;
reg_array[ContextRegWordOffset + 0x0297] = 0x00000002u;
reg_array[ContextRegWordOffset + 0x02aa] = 0x00001000u;
reg_array[ContextRegWordOffset + 0x02f7] = 0x00001000u;
reg_array[ContextRegWordOffset + 0x02f9] = 0x00000005u;
reg_array[ContextRegWordOffset + 0x02fa] = 0x3f800000u;
reg_array[ContextRegWordOffset + 0x02fb] = 0x3f800000u;
reg_array[ContextRegWordOffset + 0x02fc] = 0x3f800000u;
reg_array[ContextRegWordOffset + 0x02fd] = 0x3f800000u;
reg_array[ContextRegWordOffset + 0x0316] = 0x0000000eu;
reg_array[ContextRegWordOffset + 0x0317] = 0x00000010u;
}
#define GFX6_3D_REG_INDEX(field_name) (offsetof(AmdGpu::Regs, field_name) / sizeof(u32))
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
static_assert(GFX6_3D_REG_INDEX(vs_program) == 0x2C48);
static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C);
static_assert(GFX6_3D_REG_INDEX(gs_program) == 0x2C88);
static_assert(GFX6_3D_REG_INDEX(es_program) == 0x2CC8);
static_assert(GFX6_3D_REG_INDEX(hs_program) == 0x2D08);
static_assert(GFX6_3D_REG_INDEX(ls_program) == 0x2D48);
static_assert(GFX6_3D_REG_INDEX(cs_program) == 0x2E00);
static_assert(GFX6_3D_REG_INDEX(cs_program.dim_z) == 0x2E03);
static_assert(GFX6_3D_REG_INDEX(cs_program.user_data) == 0x2E40);
static_assert(GFX6_3D_REG_INDEX(depth_render_control) == 0xA000);
static_assert(GFX6_3D_REG_INDEX(depth_view) == 0xA002);
static_assert(GFX6_3D_REG_INDEX(depth_htile_data_base) == 0xA005);
static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C);
static_assert(GFX6_3D_REG_INDEX(depth_buffer.z_info) == 0xA010);
static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017);
static_assert(GFX6_3D_REG_INDEX(ta_bc_base) == 0xA020);
static_assert(GFX6_3D_REG_INDEX(window_offset) == 0xA080);
static_assert(GFX6_3D_REG_INDEX(window_scissor) == 0xA081);
static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E);
static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F);
static_assert(GFX6_3D_REG_INDEX(generic_scissor) == 0xA090);
static_assert(GFX6_3D_REG_INDEX(viewport_scissors) == 0xA094);
static_assert(GFX6_3D_REG_INDEX(index_offset) == 0xA102);
static_assert(GFX6_3D_REG_INDEX(primitive_restart_index) == 0xA103);
static_assert(GFX6_3D_REG_INDEX(stencil_control) == 0xA10B);
static_assert(GFX6_3D_REG_INDEX(viewports) == 0xA10F);
static_assert(GFX6_3D_REG_INDEX(clip_user_data) == 0xA16F);
static_assert(GFX6_3D_REG_INDEX(ps_inputs) == 0xA191);
static_assert(GFX6_3D_REG_INDEX(vs_output_config) == 0xA1B1);
static_assert(GFX6_3D_REG_INDEX(ps_input_ena) == 0xA1B3);
static_assert(GFX6_3D_REG_INDEX(ps_input_addr) == 0xA1B4);
static_assert(GFX6_3D_REG_INDEX(shader_pos_format) == 0xA1C3);
static_assert(GFX6_3D_REG_INDEX(z_export_format) == 0xA1C4);
static_assert(GFX6_3D_REG_INDEX(color_export_format) == 0xA1C5);
static_assert(GFX6_3D_REG_INDEX(blend_control) == 0xA1E0);
static_assert(GFX6_3D_REG_INDEX(index_base_address) == 0xA1F9);
static_assert(GFX6_3D_REG_INDEX(draw_initiator) == 0xA1FC);
static_assert(GFX6_3D_REG_INDEX(depth_control) == 0xA200);
static_assert(GFX6_3D_REG_INDEX(color_control) == 0xA202);
static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206);
static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
static_assert(GFX6_3D_REG_INDEX(line_control) == 0xA282);
static_assert(GFX6_3D_REG_INDEX(hs_clamp) == 0xA287);
static_assert(GFX6_3D_REG_INDEX(vgt_gs_mode) == 0xA290);
static_assert(GFX6_3D_REG_INDEX(mode_control) == 0xA292);
static_assert(GFX6_3D_REG_INDEX(vgt_gs_out_prim_type) == 0xA29B);
static_assert(GFX6_3D_REG_INDEX(index_size) == 0xA29D);
static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F);
static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1);
static_assert(GFX6_3D_REG_INDEX(enable_primitive_restart) == 0xA2A5);
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_0) == 0xA2A8);
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_1) == 0xA2A9);
static_assert(GFX6_3D_REG_INDEX(vgt_esgs_ring_itemsize) == 0xA2AB);
static_assert(GFX6_3D_REG_INDEX(vgt_gsvs_ring_itemsize) == 0xA2AC);
static_assert(GFX6_3D_REG_INDEX(stage_enable) == 0xA2D5);
static_assert(GFX6_3D_REG_INDEX(vgt_gs_vert_itemsize[0]) == 0xA2D7);
static_assert(GFX6_3D_REG_INDEX(tess_config) == 0xA2DB);
static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);
static_assert(GFX6_3D_REG_INDEX(vgt_gs_instance_cnt) == 0xA2E4);
static_assert(GFX6_3D_REG_INDEX(vgt_strmout_config) == 0xA2E5);
static_assert(GFX6_3D_REG_INDEX(vgt_strmout_buffer_config) == 0xA2E6);
static_assert(GFX6_3D_REG_INDEX(aa_config) == 0xA2F8);
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318);
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].pitch) == 0xA319);
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A);
static_assert(GFX6_3D_REG_INDEX(color_buffers[7].base_address) == 0xA381);
static_assert(GFX6_3D_REG_INDEX(cp_strmout_cntl) == 0xC03F);
static_assert(GFX6_3D_REG_INDEX(primitive_type) == 0xC242);
static_assert(GFX6_3D_REG_INDEX(num_instances) == 0xC24D);
static_assert(GFX6_3D_REG_INDEX(vgt_tf_memory_base) == 0xc250);
#undef GFX6_3D_REG_INDEX
} // namespace AmdGpu

View File

@ -0,0 +1,189 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "video_core/amdgpu/regs_color.h"
#include "video_core/amdgpu/regs_depth.h"
#include "video_core/amdgpu/regs_primitive.h"
#include "video_core/amdgpu/regs_shader.h"
#include "video_core/amdgpu/regs_texture.h"
#include "video_core/amdgpu/regs_vertex.h"
namespace AmdGpu {
#define DO_CONCAT2(x, y) x##y
#define CONCAT2(x, y) DO_CONCAT2(x, y)
#define INSERT_PADDING_WORDS(num_words) \
[[maybe_unused]] std::array<u32, num_words> CONCAT2(pad, __LINE__)
union Regs {
static constexpr u32 NumRegs = 0xD000;
static constexpr u32 UconfigRegWordOffset = 0xC000;
static constexpr u32 ContextRegWordOffset = 0xA000;
static constexpr u32 ConfigRegWordOffset = 0x2000;
static constexpr u32 ShRegWordOffset = 0x2C00;
struct {
INSERT_PADDING_WORDS(11272);
ShaderProgram ps_program;
INSERT_PADDING_WORDS(44);
ShaderProgram vs_program;
INSERT_PADDING_WORDS(44);
ShaderProgram gs_program;
INSERT_PADDING_WORDS(44);
ShaderProgram es_program;
INSERT_PADDING_WORDS(44);
ShaderProgram hs_program;
INSERT_PADDING_WORDS(44);
ShaderProgram ls_program;
INSERT_PADDING_WORDS(164);
ComputeProgram cs_program;
INSERT_PADDING_WORDS(29104);
DepthRenderControl depth_render_control;
INSERT_PADDING_WORDS(1);
DepthView depth_view;
DepthRenderOverride depth_render_override;
INSERT_PADDING_WORDS(1);
Address depth_htile_data_base;
INSERT_PADDING_WORDS(2);
float depth_bounds_min;
float depth_bounds_max;
u32 stencil_clear;
float depth_clear;
Scissor screen_scissor;
INSERT_PADDING_WORDS(2);
DepthBuffer depth_buffer;
INSERT_PADDING_WORDS(8);
BorderColorBuffer ta_bc_base;
INSERT_PADDING_WORDS(94);
WindowOffset window_offset;
ViewportScissor window_scissor;
INSERT_PADDING_WORDS(11);
ColorBufferMask color_target_mask;
ColorBufferMask color_shader_mask;
ViewportScissor generic_scissor;
INSERT_PADDING_WORDS(2);
std::array<ViewportScissor, NUM_VIEWPORTS> viewport_scissors;
std::array<ViewportDepth, NUM_VIEWPORTS> viewport_depths;
INSERT_PADDING_WORDS(46);
u32 index_offset;
u32 primitive_restart_index;
INSERT_PADDING_WORDS(1);
BlendConstants blend_constants;
INSERT_PADDING_WORDS(2);
StencilControl stencil_control;
StencilRefMask stencil_ref_front;
StencilRefMask stencil_ref_back;
INSERT_PADDING_WORDS(1);
std::array<ViewportBounds, NUM_VIEWPORTS> viewports;
std::array<ClipUserData, NUM_CLIP_PLANES> clip_user_data;
INSERT_PADDING_WORDS(10);
std::array<PsInputControl, 32> ps_inputs;
VsOutputConfig vs_output_config;
INSERT_PADDING_WORDS(1);
PsInput ps_input_ena;
PsInput ps_input_addr;
INSERT_PADDING_WORDS(1);
u32 num_interp : 6;
INSERT_PADDING_WORDS(12);
ShaderPosFormat shader_pos_format;
ShaderExportFormat z_export_format;
ColorExportFormat color_export_format;
INSERT_PADDING_WORDS(26);
std::array<BlendControl, NUM_COLOR_BUFFERS> blend_control;
INSERT_PADDING_WORDS(17);
IndexBufferBase index_base_address;
INSERT_PADDING_WORDS(1);
u32 draw_initiator;
INSERT_PADDING_WORDS(3);
DepthControl depth_control;
INSERT_PADDING_WORDS(1);
ColorControl color_control;
DepthShaderControl depth_shader_control;
ClipperControl clipper_control;
PolygonControl polygon_control;
ViewportControl viewport_control;
VsOutputControl vs_output_control;
INSERT_PADDING_WORDS(122);
LineControl line_control;
INSERT_PADDING_WORDS(4);
TessFactorClamp hs_clamp;
INSERT_PADDING_WORDS(7);
GsMode vgt_gs_mode;
INSERT_PADDING_WORDS(1);
ModeControl mode_control;
INSERT_PADDING_WORDS(8);
GsOutPrimitiveType vgt_gs_out_prim_type;
INSERT_PADDING_WORDS(1);
u32 index_size;
u32 max_index_size;
IndexBufferType index_buffer_type;
INSERT_PADDING_WORDS(1);
u32 enable_primitive_id;
INSERT_PADDING_WORDS(3);
u32 enable_primitive_restart;
INSERT_PADDING_WORDS(2);
u32 vgt_instance_step_rate_0;
u32 vgt_instance_step_rate_1;
INSERT_PADDING_WORDS(1);
u32 vgt_esgs_ring_itemsize;
u32 vgt_gsvs_ring_itemsize;
INSERT_PADDING_WORDS(33);
u32 vgt_gs_max_vert_out : 11;
INSERT_PADDING_WORDS(6);
ShaderStageEnable stage_enable;
LsHsConfig ls_hs_config;
u32 vgt_gs_vert_itemsize[4];
TessellationConfig tess_config;
INSERT_PADDING_WORDS(3);
PolygonOffset poly_offset;
GsInstances vgt_gs_instance_cnt;
StreamOutConfig vgt_strmout_config;
StreamOutBufferConfig vgt_strmout_buffer_config;
INSERT_PADDING_WORDS(17);
AaConfig aa_config;
INSERT_PADDING_WORDS(31);
ColorBuffer color_buffers[NUM_COLOR_BUFFERS];
INSERT_PADDING_WORDS(7343);
StreamOutControl cp_strmout_cntl;
INSERT_PADDING_WORDS(514);
PrimitiveType primitive_type;
INSERT_PADDING_WORDS(9);
u32 num_indices;
VgtNumInstances num_instances;
INSERT_PADDING_WORDS(2);
TessFactorMemoryBase vgt_tf_memory_base;
};
std::array<u32, NumRegs> reg_array;
const ShaderProgram* ProgramForStage(u32 index) const {
switch (index) {
case 0:
return &ps_program;
case 1:
return &vs_program;
case 2:
return &gs_program;
case 3:
return &es_program;
case 4:
return &hs_program;
case 5:
return &ls_program;
}
return nullptr;
}
bool IsClipDisabled() const {
return clipper_control.clip_disable || primitive_type == PrimitiveType::RectList;
}
void SetDefaults();
};
#undef DO_CONCAT2
#undef CONCAT2
#undef INSERT_PADDING_WORDS
} // namespace AmdGpu

View File

@ -0,0 +1,307 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "video_core/amdgpu/pixel_format.h"
#include "video_core/amdgpu/tiling.h"
namespace AmdGpu {
static constexpr u32 NUM_COLOR_BUFFERS = 8;
using BlendConstants = std::array<float, 4>;
struct BlendControl {
enum class BlendFactor : u32 {
Zero = 0,
One = 1,
SrcColor = 2,
OneMinusSrcColor = 3,
SrcAlpha = 4,
OneMinusSrcAlpha = 5,
DstAlpha = 6,
OneMinusDstAlpha = 7,
DstColor = 8,
OneMinusDstColor = 9,
SrcAlphaSaturate = 10,
ConstantColor = 13,
OneMinusConstantColor = 14,
Src1Color = 15,
InvSrc1Color = 16,
Src1Alpha = 17,
InvSrc1Alpha = 18,
ConstantAlpha = 19,
OneMinusConstantAlpha = 20,
};
enum class BlendFunc : u32 {
Add = 0,
Subtract = 1,
Min = 2,
Max = 3,
ReverseSubtract = 4,
};
BlendFactor color_src_factor : 5;
BlendFunc color_func : 3;
BlendFactor color_dst_factor : 5;
u32 : 3;
BlendFactor alpha_src_factor : 5;
BlendFunc alpha_func : 3;
BlendFactor alpha_dst_factor : 5;
u32 separate_alpha_blend : 1;
u32 enable : 1;
u32 disable_rop3 : 1;
bool operator==(const BlendControl& other) const = default;
};
struct ColorControl {
enum class OperationMode : u32 {
Disable = 0u,
Normal = 1u,
EliminateFastClear = 2u,
Resolve = 3u,
Err = 4u,
FmaskDecompress = 5u,
};
enum class LogicOp : u32 {
Clear = 0x00,
Nor = 0x11,
AndInverted = 0x22,
CopyInverted = 0x33,
AndReverse = 0x44,
Invert = 0x55,
Xor = 0x66,
Nand = 0x77,
And = 0x88,
Equiv = 0x99,
Noop = 0xAA,
OrInverted = 0xBB,
Copy = 0xCC,
OrReverse = 0xDD,
Or = 0xEE,
Set = 0xFF,
};
u32 disable_dual_quad : 1;
u32 : 2;
u32 degamma_enable : 1;
OperationMode mode : 3;
u32 : 9;
LogicOp rop3 : 8;
};
struct ColorBufferMask {
enum ColorComponent : u32 {
ComponentR = (1u << 0),
ComponentG = (1u << 1),
ComponentB = (1u << 2),
ComponentA = (1u << 3),
};
u32 raw;
u32 GetMask(u32 buf_id) const {
return (raw >> (buf_id * 4)) & 0xfu;
}
void SetMask(u32 buf_id, u32 mask) {
raw &= ~(0xf << (buf_id * 4));
raw |= (mask << (buf_id * 4));
}
};
struct ColorBuffer {
enum class EndianSwap : u32 {
None = 0,
Swap8In16 = 1,
Swap8In32 = 2,
Swap8In64 = 3,
};
enum class SwapMode : u32 {
Standard = 0,
Alternate = 1,
StandardReverse = 2,
AlternateReverse = 3,
};
enum class RoundMode : u32 {
ByHalf = 0,
Truncate = 1,
};
u32 base_address;
struct {
u32 tile_max : 11;
u32 : 9;
u32 fmask_tile_max : 11;
} pitch;
struct {
u32 tile_max : 22;
} slice;
struct {
u32 slice_start : 11;
u32 : 2;
u32 slice_max : 11;
} view;
union Color0Info {
u32 raw;
struct {
EndianSwap endian : 2;
u32 format : 5;
u32 linear_general : 1;
u32 number_type : 3;
SwapMode comp_swap : 2;
u32 fast_clear : 1;
u32 compression : 1;
u32 blend_clamp : 1;
u32 blend_bypass : 1;
u32 simple_float : 1;
RoundMode round_mode : 1;
u32 cmask_is_linear : 1;
u32 blend_opt_dont_rd_dst : 3;
u32 blend_opt_discard_pixel : 3;
u32 fmask_compression_disable_ci : 1;
u32 fmask_compress_1frag_only : 1;
u32 dcc_enable : 1;
u32 cmask_addr_type : 2;
u32 alt_tile_mode : 1;
};
} info;
union Color0Attrib {
u32 raw;
struct {
TileMode tile_mode_index : 5;
u32 fmask_tile_mode_index : 5;
u32 fmask_bank_height : 2;
u32 num_samples_log2 : 3;
u32 num_fragments_log2 : 2;
u32 force_dst_alpha_1 : 1;
};
} attrib;
u32 pad0;
u32 cmask_base_address;
struct {
u32 tile_max : 14;
} cmask_slice;
u32 fmask_base_address;
struct {
u32 tile_max : 14;
} fmask_slice;
u32 clear_word0;
u32 clear_word1;
std::array<u32, 2> pad1;
operator bool() const {
return base_address && info.format;
}
u32 Pitch() const {
return (pitch.tile_max + 1) << 3;
}
u32 Height() const {
return (slice.tile_max + 1) * 64 / Pitch();
}
u64 Address() const {
return u64(base_address) << 8 | (info.linear_general ? (view.slice_start & 0xff) : 0);
}
VAddr CmaskAddress() const {
return VAddr(cmask_base_address) << 8;
}
VAddr FmaskAddress() const {
return VAddr(fmask_base_address) << 8;
}
u32 NumSamples() const {
return 1 << attrib.num_fragments_log2;
}
u32 BaseSlice() const {
return info.linear_general ? 0 : view.slice_start;
}
u32 NumSlices() const {
return view.slice_max + 1;
}
u32 GetColorSliceSize() const {
const auto num_bytes_per_element = NumBitsPerBlock(DataFormat(info.format)) / 8u;
const auto slice_size = num_bytes_per_element * (slice.tile_max + 1) * 64u * NumSamples();
return slice_size;
}
TileMode GetTileMode() const {
return info.linear_general ? TileMode::DisplayLinearGeneral : attrib.tile_mode_index;
}
bool IsTiled() const {
return GetTileMode() != TileMode::DisplayLinearAligned &&
GetTileMode() != TileMode::DisplayLinearGeneral;
}
DataFormat GetDataFmt() const {
return RemapDataFormat(DataFormat(info.format));
}
NumberFormat GetNumberFmt() const {
return RemapNumberFormat(GetFixedNumberFormat(), DataFormat(info.format));
}
NumberConversion GetNumberConversion() const {
return MapNumberConversion(GetFixedNumberFormat(), DataFormat(info.format));
}
CompMapping Swizzle() const {
// clang-format off
static constexpr std::array<std::array<CompMapping, 4>, 4> mrt_swizzles{{
// Standard
std::array<CompMapping, 4>{{
{.r = CompSwizzle::Red, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero},
{.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero},
{.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Blue, .a = CompSwizzle::Zero},
{.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Blue, .a = CompSwizzle::Alpha},
}},
// Alternate
std::array<CompMapping, 4>{{
{.r = CompSwizzle::Green, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero},
{.r = CompSwizzle::Red, .g = CompSwizzle::Alpha, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero},
{.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Alpha, .a = CompSwizzle::Zero},
{.r = CompSwizzle::Blue, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Alpha},
}},
// StandardReverse
std::array<CompMapping, 4>{{
{.r = CompSwizzle::Blue, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero},
{.r = CompSwizzle::Green, .g = CompSwizzle::Red, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero},
{.r = CompSwizzle::Blue, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Zero},
{.r = CompSwizzle::Alpha, .g = CompSwizzle::Blue, .b = CompSwizzle::Green, .a = CompSwizzle::Red},
}},
// AlternateReverse
std::array<CompMapping, 4>{{
{.r = CompSwizzle::Alpha, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero},
{.r = CompSwizzle::Alpha, .g = CompSwizzle::Red, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero},
{.r = CompSwizzle::Alpha, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Zero},
{.r = CompSwizzle::Alpha, .g = CompSwizzle::Red, .b = CompSwizzle::Green, .a = CompSwizzle::Blue},
}},
}};
// clang-format on
const auto swap_idx = static_cast<u32>(info.comp_swap);
const auto components_idx = NumComponents(DataFormat(info.format)) - 1;
const auto mrt_swizzle = mrt_swizzles[swap_idx][components_idx];
return RemapSwizzle(DataFormat(info.format), mrt_swizzle);
}
NumberFormat GetFixedNumberFormat() const {
// There is a small difference between T# and CB number types, account for it.
const auto number_fmt = NumberFormat(info.number_type);
return number_fmt == NumberFormat::SnormNz ? NumberFormat::Srgb : number_fmt;
}
};
} // namespace AmdGpu

View File

@ -0,0 +1,291 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/assert.h"
#include "common/types.h"
#include "video_core/amdgpu/tiling.h"
namespace AmdGpu {
enum class ZOrder : u32 {
LateZ = 0,
EarlyZLateZ = 1,
ReZ = 2,
EarlyZReZ = 3,
};
enum class ConservativeDepth : u32 {
Any = 0,
LessThanZ = 1,
GreaterThanZ = 2,
};
struct DepthShaderControl {
u32 z_export_enable : 1;
u32 stencil_test_val_export_enable : 1;
u32 stencil_op_val_export_enable : 1;
u32 : 1;
ZOrder z_order : 2;
u32 kill_enable : 1;
u32 coverage_to_mask_enable : 1;
u32 mask_export_enable : 1;
u32 exec_on_hier_fail : 1;
u32 exec_on_noop : 1;
u32 alpha_to_mask_disable : 1;
u32 depth_before_shader : 1;
ConservativeDepth conservative_z_export : 2;
};
enum class CompareFunc : u32 {
Never = 0,
Less = 1,
Equal = 2,
LessEqual = 3,
Greater = 4,
NotEqual = 5,
GreaterEqual = 6,
Always = 7,
};
struct DepthControl {
u32 stencil_enable : 1;
u32 depth_enable : 1;
u32 depth_write_enable : 1;
u32 depth_bounds_enable : 1;
CompareFunc depth_func : 3;
u32 backface_enable : 1;
CompareFunc stencil_ref_func : 3;
u32 : 9;
CompareFunc stencil_bf_func : 3;
u32 : 7;
u32 enable_color_writes_on_depth_fail : 1;
u32 disable_color_writes_on_depth_pass : 1;
};
enum class StencilFunc : u32 {
Keep = 0,
Zero = 1,
Ones = 2,
ReplaceTest = 3,
ReplaceOp = 4,
AddClamp = 5,
SubClamp = 6,
Invert = 7,
AddWrap = 8,
SubWrap = 9,
And = 10,
Or = 11,
Xor = 12,
Nand = 13,
Nor = 14,
Xnor = 15,
};
struct StencilControl {
StencilFunc stencil_fail_front : 4;
StencilFunc stencil_zpass_front : 4;
StencilFunc stencil_zfail_front : 4;
StencilFunc stencil_fail_back : 4;
StencilFunc stencil_zpass_back : 4;
StencilFunc stencil_zfail_back : 4;
};
struct StencilRefMask {
u8 stencil_test_val;
u8 stencil_mask;
u8 stencil_write_mask;
u8 stencil_op_val;
};
struct DepthRenderControl {
u32 depth_clear_enable : 1;
u32 stencil_clear_enable : 1;
u32 depth_copy : 1;
u32 stencil_copy : 1;
u32 resummarize_enable : 1;
u32 stencil_compress_disable : 1;
u32 depth_compress_disable : 1;
u32 copy_centroid : 1;
u32 copy_sample : 1;
u32 decompress_enable : 1;
};
struct DepthView {
u32 slice_start : 11;
u32 : 2;
u32 slice_max : 11;
u32 z_read_only : 1;
u32 stencil_read_only : 1;
u32 NumSlices() const {
return slice_max + 1u;
}
};
enum class ForceEnable : u32 {
Off = 0,
Enable = 1,
Disable = 2,
};
enum class ForceSumm : u32 {
Off = 0,
MinZ = 1,
MaxZ = 2,
Both = 3,
};
struct DepthRenderOverride {
ForceEnable force_hiz_enable : 2;
ForceEnable force_his_enable0 : 2;
ForceEnable force_his_enable1 : 2;
u32 force_shader_z_order : 1;
u32 fast_z_disable : 1;
u32 fast_stencil_disable : 1;
u32 noop_cull_disable : 1;
u32 force_color_kill : 1;
u32 force_z_read : 1;
u32 force_stencil_read : 1;
ForceEnable force_full_z_range : 2;
u32 force_qc_smask_conflict : 1;
u32 disable_viewport_clamp : 1;
u32 ignore_sc_zrange : 1;
u32 disable_fully_covered : 1;
ForceSumm force_z_limit_summ : 2;
u32 max_tiles_in_dtt : 5;
u32 disable_tile_rate_tiles : 1;
u32 force_z_dirty : 1;
u32 force_stencil_dirty : 1;
u32 force_z_valid : 1;
u32 force_stencil_valid : 1;
u32 preserve_compression : 1;
};
struct Eqaa {
u32 max_anchor_samples : 1;
u32 : 3;
u32 ps_iter_samples : 3;
u32 : 1;
u32 mask_export_num_samples : 3;
u32 : 1;
u32 alpha_to_mask_num_samples : 3;
u32 : 1;
u32 high_quality_intersections : 1;
u32 incoherent_eqaa_reads : 1;
u32 interpolate_comp_z : 1;
u32 interpolate_src_z : 1;
u32 static_anchor_associations : 1;
u32 alpha_to_mask_eqaa_disable : 1;
u32 : 2;
u32 overrasterization_amount : 3;
u32 enable_postz_overrasterization : 1;
};
struct DepthBuffer {
enum class ZFormat : u32 {
Invalid = 0,
Z16 = 1,
Z32Float = 3,
};
enum class StencilFormat : u32 {
Invalid = 0,
Stencil8 = 1,
};
struct ZInfo {
ZFormat format : 2;
u32 num_samples : 2;
u32 : 9;
u32 tile_split : 3;
u32 : 4;
u32 tile_mode_index : 3;
u32 decompress_on_n_zplanes : 4;
u32 allow_expclear : 1;
u32 read_size : 1;
u32 tile_surface_enable : 1;
u32 clear_disallowed : 1;
u32 zrange_precision : 1;
} z_info;
struct {
StencilFormat format : 1;
} stencil_info;
u32 z_read_base;
u32 stencil_read_base;
u32 z_write_base;
u32 stencil_write_base;
struct {
u32 pitch_tile_max : 11;
u32 height_tile_max : 11;
} depth_size;
struct {
u32 tile_max : 22;
} depth_slice;
bool DepthValid() const {
return DepthAddress() != 0 && z_info.format != ZFormat::Invalid;
}
bool StencilValid() const {
return StencilAddress() != 0 && stencil_info.format != StencilFormat::Invalid;
}
bool DepthWriteValid() const {
return DepthWriteAddress() != 0 && z_info.format != ZFormat::Invalid;
}
bool StencilWriteValid() const {
return StencilWriteAddress() != 0 && stencil_info.format != StencilFormat::Invalid;
}
u32 Pitch() const {
return (depth_size.pitch_tile_max + 1) << 3;
}
u32 Height() const {
return (depth_size.height_tile_max + 1) << 3;
}
u64 DepthAddress() const {
return u64(z_read_base) << 8;
}
u64 StencilAddress() const {
return u64(stencil_read_base) << 8;
}
u64 DepthWriteAddress() const {
return u64(z_write_base) << 8;
}
u64 StencilWriteAddress() const {
return u64(stencil_write_base) << 8;
}
u32 NumSamples() const {
return 1u << z_info.num_samples; // spec doesn't say it is a log2
}
u32 NumBits() const {
return z_info.format == ZFormat::Z32Float ? 32 : 16;
}
u32 GetDepthSliceSize() const {
ASSERT(z_info.format != ZFormat::Invalid);
const auto bpe = NumBits() >> 3; // in bytes
return (depth_slice.tile_max + 1) * 64 * bpe * NumSamples();
}
TileMode GetTileMode() const {
return static_cast<TileMode>(z_info.tile_mode_index);
}
bool IsTiled() const {
return GetTileMode() != TileMode::DisplayLinearAligned &&
GetTileMode() != TileMode::DisplayLinearGeneral;
}
};
} // namespace AmdGpu

View File

@ -0,0 +1,237 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/types.h"
namespace AmdGpu {
static constexpr u32 NUM_VIEWPORTS = 16;
static constexpr u32 NUM_CLIP_PLANES = 6;
enum class ClipSpace : u32 {
MinusWToW = 0,
ZeroToW = 1,
};
enum class PrimKillCond : u32 {
AllVtx = 0,
AnyVtx = 1,
};
struct ClipperControl {
u32 user_clip_plane_enable : 6;
u32 : 10;
u32 clip_disable : 1;
u32 : 2;
ClipSpace clip_space : 1;
u32 : 1;
PrimKillCond vtx_kill_or : 1;
u32 dx_rasterization_kill : 1;
u32 : 1;
u32 dx_linear_attr_clip_enable : 1;
u32 : 1;
u32 zclip_near_disable : 1;
u32 zclip_far_disable : 1;
bool ZclipEnable() const {
if (zclip_near_disable != zclip_far_disable) {
return false;
}
return !zclip_near_disable;
}
};
enum class PolygonMode : u32 {
Point = 0,
Line = 1,
Fill = 2,
};
enum class ProvokingVtxLast : u32 {
First = 0,
Last = 1,
};
enum class CullMode : u32 {
None = 0,
Front = 1,
Back = 2,
FrontAndBack = 3,
};
enum class FrontFace : u32 {
CounterClockwise = 0,
Clockwise = 1,
};
struct PolygonControl {
u32 cull_front : 1;
u32 cull_back : 1;
FrontFace front_face : 1;
u32 enable_polygon_mode : 2;
PolygonMode polygon_mode_front : 3;
PolygonMode polygon_mode_back : 3;
u32 enable_polygon_offset_front : 1;
u32 enable_polygon_offset_back : 1;
u32 enable_polygon_offset_para : 1;
u32 : 2;
u32 enable_window_offset : 1;
u32 : 2;
ProvokingVtxLast provoking_vtx_last : 1;
u32 persp_corr_dis : 1;
u32 multi_prim_ib_ena : 1;
PolygonMode PolyMode() const {
return enable_polygon_mode ? polygon_mode_front : PolygonMode::Fill;
}
CullMode CullingMode() const {
return static_cast<CullMode>(cull_front | cull_back << 1);
}
bool NeedsBias() const {
return enable_polygon_offset_back || enable_polygon_offset_front ||
enable_polygon_offset_para;
}
};
struct VsOutputControl {
u32 clip_distance_enable : 8;
u32 cull_distance_enable : 8;
u32 use_vtx_point_size : 1;
u32 use_vtx_edge_flag : 1;
u32 use_vtx_render_target_idx : 1;
u32 use_vtx_viewport_idx : 1;
u32 use_vtx_kill_flag : 1;
u32 vs_out_misc_enable : 1;
u32 vs_out_ccdist0_enable : 1;
u32 vs_out_ccdist1_enable : 1;
u32 vs_out_misc_side_bus_ena : 1;
u32 use_vtx_gs_cut_flag : 1;
bool IsClipDistEnabled(u32 index) const {
return (clip_distance_enable >> index) & 1;
}
bool IsCullDistEnabled(u32 index) const {
return (cull_distance_enable >> index) & 1;
}
};
struct LineControl {
u32 width_fixed_point;
float Width() const {
return static_cast<float>(width_fixed_point) / 8.0;
}
};
struct ModeControl {
u32 msaa_enable : 1;
u32 vport_scissor_enable : 1;
u32 line_stripple_enable : 1;
u32 send_unlit_stiles_to_pkr : 1;
};
struct Scissor {
struct {
s16 top_left_x;
s16 top_left_y;
};
struct {
s16 bottom_right_x;
s16 bottom_right_y;
};
static u16 Clamp(s16 value) {
return std::max(s16(0), value);
}
u32 GetWidth() const {
return static_cast<u32>(Clamp(bottom_right_x) - Clamp(top_left_x));
}
u32 GetHeight() const {
return static_cast<u32>(Clamp(bottom_right_y) - Clamp(top_left_y));
}
};
struct WindowOffset {
s32 window_x_offset : 16;
s32 window_y_offset : 16;
};
struct ViewportScissor {
struct {
u16 top_left_x : 15;
u16 top_left_y : 15;
u16 window_offset_disable : 1;
};
struct {
u16 bottom_right_x : 15;
u16 bottom_right_y : 15;
};
u32 GetWidth() const {
return bottom_right_x - top_left_x;
}
u32 GetHeight() const {
return bottom_right_y - top_left_y;
}
};
struct ViewportDepth {
float zmin;
float zmax;
};
struct ViewportBounds {
float xscale;
float xoffset;
float yscale;
float yoffset;
float zscale;
float zoffset;
};
struct ViewportControl {
u32 xscale_enable : 1;
u32 xoffset_enable : 1;
u32 yscale_enable : 1;
u32 yoffset_enable : 1;
u32 zscale_enable : 1;
u32 zoffset_enable : 1;
u32 : 2;
u32 xy_transformed : 1;
u32 z_transformed : 1;
u32 w_transformed : 1;
u32 perfcounter_ref : 1;
};
struct ClipUserData {
u32 data_x;
u32 data_y;
u32 data_z;
u32 data_w;
};
struct AaConfig {
u32 msaa_num_samples : 3;
u32 : 1;
u32 aa_mask_centroid_dtmn : 1;
u32 : 8;
u32 max_sample_dst : 4;
u32 : 3;
u32 msaa_exposed_samples : 3;
u32 : 1;
u32 detail_to_exposed_mode : 2;
u32 NumSamples() const {
return 1 << msaa_num_samples;
}
};
} // namespace AmdGpu

View File

@ -0,0 +1,241 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/assert.h"
#include "common/types.h"
#include "shader_recompiler/params.h"
namespace AmdGpu {
static constexpr u32 NUM_USER_DATA = 16;
using UserData = std::array<u32, NUM_USER_DATA>;
struct BinaryInfo {
static constexpr std::array<u8, 7> signature_ref = {0x4f, 0x72, 0x62, 0x53,
0x68, 0x64, 0x72}; // OrbShdr
std::array<u8, sizeof(signature_ref)> signature;
u8 version;
u32 pssl_or_cg : 1;
u32 cached : 1;
u32 type : 4;
u32 source_type : 2;
u32 length : 24;
u8 chunk_usage_base_offset_in_dw;
u8 num_input_usage_slots;
u8 is_srt : 1;
u8 is_srt_used_info_valid : 1;
u8 is_extended_usage_info : 1;
u8 reserved2 : 5;
u8 reserved3;
u64 shader_hash;
u32 crc32;
bool Valid() const {
return signature == signature_ref;
}
};
enum class FpRoundMode : u32 {
NearestEven = 0,
PlusInf = 1,
MinInf = 2,
ToZero = 3,
};
enum class FpDenormMode : u32 {
InOutFlush = 0,
InAllowOutFlush = 1,
InFlushOutAllow = 2,
InOutAllow = 3,
};
struct ShaderProgram {
u64 address : 40;
struct {
u32 num_vgprs : 6;
u32 num_sgprs : 4;
u32 priority : 2;
FpRoundMode fp_round_mode32 : 2;
FpRoundMode fp_round_mode64 : 2;
FpDenormMode fp_denorm_mode32 : 2;
FpDenormMode fp_denorm_mode64 : 2;
u32 : 4;
u32 vgpr_comp_cnt : 2;
u32 : 6;
u32 scratch_en : 1;
u32 num_user_regs : 5;
u32 : 1;
u32 oc_lds_en : 1;
} settings;
UserData user_data;
template <typename T = u8*>
const T Address() const {
return std::bit_cast<T>(address << 8);
}
[[nodiscard]] u32 NumVgprs() const {
// Each increment allocates 4 registers, where 0 = 4 registers.
return (settings.num_vgprs + 1) * 4;
}
};
struct VsOutputConfig {
u32 : 1;
u32 export_count_min_one : 5;
u32 half_pack : 1;
u32 NumExports() const {
return export_count_min_one + 1;
}
};
struct PsInputControl {
u32 input_offset : 5;
u32 use_default : 1;
u32 : 2;
u32 default_value : 2;
u32 flat_shade : 1;
};
struct PsInput {
u32 persp_sample_ena : 1;
u32 persp_center_ena : 1;
u32 persp_centroid_ena : 1;
u32 persp_pull_model_ena : 1;
u32 linear_sample_ena : 1;
u32 linear_center_ena : 1;
u32 linear_centroid_ena : 1;
u32 line_stipple_tex_ena : 1;
u32 pos_x_float_ena : 1;
u32 pos_y_float_ena : 1;
u32 pos_z_float_ena : 1;
u32 pos_w_float_ena : 1;
u32 front_face_ena : 1;
u32 ancillary_ena : 1;
u32 sample_coverage_ena : 1;
u32 pos_fixed_pt_ena : 1;
bool operator==(const PsInput&) const = default;
};
enum class ShaderExportComp : u32 {
None = 0,
OneComp = 1,
TwoComp = 2,
FourCompCompressed = 3,
FourComp = 4,
};
struct ShaderPosFormat {
ShaderExportComp pos0 : 4;
ShaderExportComp pos1 : 4;
ShaderExportComp pos2 : 4;
ShaderExportComp pos3 : 4;
};
enum class ShaderExportFormat : u32 {
Zero = 0,
R_32 = 1,
GR_32 = 2,
AR_32 = 3,
ABGR_FP16 = 4,
ABGR_UNORM16 = 5,
ABGR_SNORM16 = 6,
ABGR_UINT16 = 7,
ABGR_SINT16 = 8,
ABGR_32 = 9,
};
struct ColorExportFormat {
u32 raw;
[[nodiscard]] ShaderExportFormat GetFormat(const u32 buf_idx) const {
return static_cast<ShaderExportFormat>((raw >> (buf_idx * 4)) & 0xfu);
}
};
struct ComputeProgram {
u32 dispatch_initiator;
u32 dim_x;
u32 dim_y;
u32 dim_z;
u32 start_x;
u32 start_y;
u32 start_z;
struct {
u16 full;
u16 partial;
} num_thread_x, num_thread_y, num_thread_z;
u32 pad0;
u32 max_wave_id : 12;
u64 address : 40;
std::array<u32, 4> pad1;
struct {
u64 num_vgprs : 6;
u64 num_sgprs : 4;
u64 : 23;
u64 num_user_regs : 5;
u64 : 1;
u64 tgid_enable : 3;
u64 : 5;
u64 lds_dwords : 9;
} settings;
u32 pad2;
u32 resource_limits;
std::array<u32, 42> pad3;
UserData user_data;
template <typename T = u8*>
const T Address() const {
return std::bit_cast<T>(address << 8);
}
u32 SharedMemSize() const noexcept {
// lds_dwords is in units of 128 dwords. We return bytes.
return settings.lds_dwords * 128 * 4;
}
u32 NumWorkgroups() const noexcept {
return dim_x * dim_y * dim_z;
}
bool IsTgidEnabled(u32 i) const noexcept {
return (settings.tgid_enable >> i) & 1;
}
};
static constexpr const BinaryInfo& SearchBinaryInfo(const u32* code) {
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
if (code[0] == token_mov_vcchi) {
const auto* info = std::bit_cast<const BinaryInfo*>(code + (code[1] + 1) * 2);
if (info->Valid()) {
return *info;
}
}
constexpr u32 signature_size = sizeof(BinaryInfo::signature_ref) / sizeof(u8);
constexpr u32 search_limit = 0x4000;
const u32* end = code + search_limit;
for (const u32* it = code; it < end; ++it) {
if (const BinaryInfo* info = std::bit_cast<const BinaryInfo*>(it); info->Valid()) {
return *info;
}
}
UNREACHABLE_MSG("Shader binary info not found.");
}
static constexpr Shader::ShaderParams GetParams(const auto& sh) {
const auto* code = sh.template Address<u32*>();
const auto& bininfo = SearchBinaryInfo(code);
return {
.user_data = sh.user_data,
.code = std::span{code, bininfo.length / sizeof(u32)},
.hash = bininfo.shader_hash,
};
}
} // namespace AmdGpu

View File

@ -0,0 +1,20 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <bit>
#include "common/types.h"
namespace AmdGpu {
struct BorderColorBuffer {
u64 base_addr : 40;
template <typename T = VAddr>
const T Address() const {
return std::bit_cast<T>(base_addr << 8);
}
};
} // namespace AmdGpu

View File

@ -0,0 +1,257 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/assert.h"
#include "common/types.h"
namespace AmdGpu {
enum class PrimitiveType : u32 {
None = 0,
PointList = 1,
LineList = 2,
LineStrip = 3,
TriangleList = 4,
TriangleFan = 5,
TriangleStrip = 6,
PatchPrimitive = 9,
AdjLineList = 10,
AdjLineStrip = 11,
AdjTriangleList = 12,
AdjTriangleStrip = 13,
RectList = 17,
LineLoop = 18,
QuadList = 19,
QuadStrip = 20,
Polygon = 21,
};
struct IndexBufferBase {
u32 base_addr_hi : 8;
u32 base_addr_lo;
template <typename T = VAddr>
T Address() const {
return std::bit_cast<T>((base_addr_lo & ~1U) | u64(base_addr_hi) << 32);
}
};
enum class IndexType : u32 {
Index16 = 0,
Index32 = 1,
};
enum class IndexSwapMode : u32 {
None = 0,
Swap16 = 1,
Swap32 = 2,
SwapWord = 3,
};
union IndexBufferType {
u32 raw;
struct {
IndexType index_type : 2;
IndexSwapMode swap_mode : 2;
};
};
struct VgtNumInstances {
u32 num_instances;
u32 NumInstances() const {
return num_instances == 0 ? 1 : num_instances;
}
};
struct PolygonOffset {
float depth_bias;
float front_scale;
float front_offset;
float back_scale;
float back_offset;
};
struct Address {
u32 address;
VAddr GetAddress() const {
return u64(address) << 8;
}
};
union ShaderStageEnable {
enum VgtStages : u32 {
Vs = 0u, // always enabled
EsGs = 0xB0u,
LsHs = 0x45u,
};
VgtStages raw;
struct {
u32 ls_en : 2;
u32 hs_en : 1;
u32 es_en : 2;
u32 gs_en : 1;
u32 vs_en : 2;
u32 dynamic_hs : 1;
};
bool IsStageEnabled(u32 stage) const {
switch (stage) {
case 0:
case 1:
return true;
case 2:
return gs_en;
case 3:
return es_en;
case 4:
return hs_en;
case 5:
return ls_en;
default:
UNREACHABLE();
}
}
};
union GsInstances {
u32 raw;
struct {
u32 enable : 2;
u32 count : 6;
};
bool IsEnabled() const {
return enable && count > 0;
}
};
enum class GsOutputPrimitiveType : u32 {
PointList = 0,
LineStrip = 1,
TriangleStrip = 2,
};
union GsOutPrimitiveType {
u32 raw;
struct {
GsOutputPrimitiveType outprim_type : 6;
GsOutputPrimitiveType outprim_type1 : 6;
GsOutputPrimitiveType outprim_type2 : 6;
GsOutputPrimitiveType outprim_type3 : 6;
u32 reserved : 3;
u32 unique_type_per_stream : 1;
};
GsOutputPrimitiveType GetPrimitiveType(u32 stream) const {
if (unique_type_per_stream == 0) {
return outprim_type;
}
switch (stream) {
case 0:
return outprim_type;
case 1:
return outprim_type1;
case 2:
return outprim_type2;
case 3:
return outprim_type3;
default:
UNREACHABLE();
}
}
};
enum class GsScenario : u32 {
Off = 0,
ScenarioA = 1,
ScenarioB = 2,
ScenarioG = 3,
ScenarioC = 4,
};
struct GsMode {
GsScenario mode : 3;
u32 cut_mode : 2;
u32 : 17;
u32 onchip : 2;
};
struct StreamOutControl {
u32 offset_update_done : 1;
u32 : 31;
};
union StreamOutConfig {
u32 raw;
struct {
u32 streamout_0_en : 1;
u32 streamout_1_en : 1;
u32 streamout_2_en : 1;
u32 streamout_3_en : 1;
u32 rast_stream : 3;
u32 : 1;
u32 rast_stream_mask : 4;
u32 : 19;
u32 use_rast_stream_mask : 1;
};
};
struct StreamOutBufferConfig {
u32 stream_0_buf_en : 4;
u32 stream_1_buf_en : 4;
u32 stream_2_buf_en : 4;
u32 stream_3_buf_en : 4;
};
struct LsHsConfig {
u32 num_patches : 8;
u32 hs_input_control_points : 6;
u32 hs_output_control_points : 6;
};
enum class TessellationType : u32 {
Isoline = 0,
Triangle = 1,
Quad = 2,
};
enum class TessellationPartitioning : u32 {
Integer = 0,
Pow2 = 1,
FracOdd = 2,
FracEven = 3,
};
enum class TessellationTopology : u32 {
Point = 0,
Line = 1,
TriangleCw = 2,
TriangleCcw = 3,
};
struct TessellationConfig {
TessellationType type : 2;
TessellationPartitioning partitioning : 3;
TessellationTopology topology : 3;
};
struct TessFactorMemoryBase {
u32 base;
u64 MemoryBase() const {
return static_cast<u64>(base) << 8;
}
};
struct TessFactorClamp {
float hs_max_tess;
float hs_min_tess;
};
} // namespace AmdGpu

View File

@ -1,146 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <string_view>
#include <fmt/format.h>
#include "common/types.h"
namespace AmdGpu {
enum class FpRoundMode : u32 {
NearestEven = 0,
PlusInf = 1,
MinInf = 2,
ToZero = 3,
};
enum class FpDenormMode : u32 {
InOutFlush = 0,
InAllowOutFlush = 1,
InFlushOutAllow = 2,
InOutAllow = 3,
};
enum class TessellationType : u32 {
Isoline = 0,
Triangle = 1,
Quad = 2,
};
constexpr std::string_view NameOf(TessellationType type) {
switch (type) {
case TessellationType::Isoline:
return "Isoline";
case TessellationType::Triangle:
return "Triangle";
case TessellationType::Quad:
return "Quad";
default:
return "Unknown";
}
}
enum class TessellationPartitioning : u32 {
Integer = 0,
Pow2 = 1,
FracOdd = 2,
FracEven = 3,
};
constexpr std::string_view NameOf(TessellationPartitioning partitioning) {
switch (partitioning) {
case TessellationPartitioning::Integer:
return "Integer";
case TessellationPartitioning::Pow2:
return "Pow2";
case TessellationPartitioning::FracOdd:
return "FracOdd";
case TessellationPartitioning::FracEven:
return "FracEven";
default:
return "Unknown";
}
}
enum class TessellationTopology : u32 {
Point = 0,
Line = 1,
TriangleCw = 2,
TriangleCcw = 3,
};
constexpr std::string_view NameOf(TessellationTopology topology) {
switch (topology) {
case TessellationTopology::Point:
return "Point";
case TessellationTopology::Line:
return "Line";
case TessellationTopology::TriangleCw:
return "TriangleCw";
case TessellationTopology::TriangleCcw:
return "TriangleCcw";
default:
return "Unknown";
}
}
// See `VGT_PRIMITIVE_TYPE` description in [Radeon Sea Islands 3D/Compute Register Reference Guide]
enum class PrimitiveType : u32 {
None = 0,
PointList = 1,
LineList = 2,
LineStrip = 3,
TriangleList = 4,
TriangleFan = 5,
TriangleStrip = 6,
PatchPrimitive = 9,
AdjLineList = 10,
AdjLineStrip = 11,
AdjTriangleList = 12,
AdjTriangleStrip = 13,
RectList = 17,
LineLoop = 18,
QuadList = 19,
QuadStrip = 20,
Polygon = 21,
};
enum class GsOutputPrimitiveType : u32 {
PointList = 0,
LineStrip = 1,
TriangleStrip = 2,
};
} // namespace AmdGpu
template <>
struct fmt::formatter<AmdGpu::TessellationType> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
auto format(AmdGpu::TessellationType type, format_context& ctx) const {
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type));
}
};
template <>
struct fmt::formatter<AmdGpu::TessellationPartitioning> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
auto format(AmdGpu::TessellationPartitioning type, format_context& ctx) const {
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type));
}
};
template <>
struct fmt::formatter<AmdGpu::TessellationTopology> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
auto format(AmdGpu::TessellationTopology type, format_context& ctx) const {
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type));
}
};

View File

@ -23,7 +23,7 @@ namespace VideoCore {
static constexpr size_t DataShareBufferSize = 64_KB;
static constexpr size_t StagingBufferSize = 512_MB;
static constexpr size_t UboStreamBufferSize = 128_MB;
static constexpr size_t UboStreamBufferSize = 64_MB;
static constexpr size_t DownloadBufferSize = 128_MB;
static constexpr size_t DeviceBufferSize = 128_MB;
static constexpr size_t MaxPageFaults = 1024;
@ -329,8 +329,7 @@ void BufferCache::BindIndexBuffer(u32 index_offset) {
const auto& regs = liverpool->regs;
// Figure out index type and size.
const bool is_index16 =
regs.index_buffer_type.index_type == AmdGpu::Liverpool::IndexType::Index16;
const bool is_index16 = regs.index_buffer_type.index_type == AmdGpu::IndexType::Index16;
const vk::IndexType index_type = is_index16 ? vk::IndexType::eUint16 : vk::IndexType::eUint32;
const u32 index_size = is_index16 ? sizeof(u16) : sizeof(u32);
const VAddr index_address =

View File

@ -13,27 +13,27 @@
namespace Vulkan::LiverpoolToVK {
using DepthBuffer = Liverpool::DepthBuffer;
using DepthBuffer = AmdGpu::DepthBuffer;
vk::StencilOp StencilOp(Liverpool::StencilFunc op) {
vk::StencilOp StencilOp(AmdGpu::StencilFunc op) {
switch (op) {
case Liverpool::StencilFunc::Keep:
case AmdGpu::StencilFunc::Keep:
return vk::StencilOp::eKeep;
case Liverpool::StencilFunc::Zero:
case AmdGpu::StencilFunc::Zero:
return vk::StencilOp::eZero;
case Liverpool::StencilFunc::ReplaceTest:
case AmdGpu::StencilFunc::ReplaceTest:
return vk::StencilOp::eReplace;
case Liverpool::StencilFunc::AddClamp:
case AmdGpu::StencilFunc::AddClamp:
return vk::StencilOp::eIncrementAndClamp;
case Liverpool::StencilFunc::SubClamp:
case AmdGpu::StencilFunc::SubClamp:
return vk::StencilOp::eDecrementAndClamp;
case Liverpool::StencilFunc::Invert:
case AmdGpu::StencilFunc::Invert:
return vk::StencilOp::eInvert;
case Liverpool::StencilFunc::AddWrap:
case AmdGpu::StencilFunc::AddWrap:
return vk::StencilOp::eIncrementAndWrap;
case Liverpool::StencilFunc::SubWrap:
case AmdGpu::StencilFunc::SubWrap:
return vk::StencilOp::eDecrementAndWrap;
case Liverpool::StencilFunc::ReplaceOp:
case AmdGpu::StencilFunc::ReplaceOp:
return vk::StencilOp::eReplace;
default:
UNREACHABLE();
@ -41,23 +41,23 @@ vk::StencilOp StencilOp(Liverpool::StencilFunc op) {
}
}
vk::CompareOp CompareOp(Liverpool::CompareFunc func) {
vk::CompareOp CompareOp(AmdGpu::CompareFunc func) {
switch (func) {
case Liverpool::CompareFunc::Always:
case AmdGpu::CompareFunc::Always:
return vk::CompareOp::eAlways;
case Liverpool::CompareFunc::Equal:
case AmdGpu::CompareFunc::Equal:
return vk::CompareOp::eEqual;
case Liverpool::CompareFunc::GreaterEqual:
case AmdGpu::CompareFunc::GreaterEqual:
return vk::CompareOp::eGreaterOrEqual;
case Liverpool::CompareFunc::Greater:
case AmdGpu::CompareFunc::Greater:
return vk::CompareOp::eGreater;
case Liverpool::CompareFunc::LessEqual:
case AmdGpu::CompareFunc::LessEqual:
return vk::CompareOp::eLessOrEqual;
case Liverpool::CompareFunc::Less:
case AmdGpu::CompareFunc::Less:
return vk::CompareOp::eLess;
case Liverpool::CompareFunc::NotEqual:
case AmdGpu::CompareFunc::NotEqual:
return vk::CompareOp::eNotEqual;
case Liverpool::CompareFunc::Never:
case AmdGpu::CompareFunc::Never:
return vk::CompareOp::eNever;
default:
UNREACHABLE();
@ -126,13 +126,13 @@ vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type) {
}
}
vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode) {
vk::PolygonMode PolygonMode(AmdGpu::PolygonMode mode) {
switch (mode) {
case Liverpool::PolygonMode::Point:
case AmdGpu::PolygonMode::Point:
return vk::PolygonMode::ePoint;
case Liverpool::PolygonMode::Line:
case AmdGpu::PolygonMode::Line:
return vk::PolygonMode::eLine;
case Liverpool::PolygonMode::Fill:
case AmdGpu::PolygonMode::Fill:
return vk::PolygonMode::eFill;
default:
UNREACHABLE();
@ -140,15 +140,15 @@ vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode) {
}
}
vk::CullModeFlags CullMode(Liverpool::CullMode mode) {
vk::CullModeFlags CullMode(AmdGpu::CullMode mode) {
switch (mode) {
case Liverpool::CullMode::None:
case AmdGpu::CullMode::None:
return vk::CullModeFlagBits::eNone;
case Liverpool::CullMode::Front:
case AmdGpu::CullMode::Front:
return vk::CullModeFlagBits::eFront;
case Liverpool::CullMode::Back:
case AmdGpu::CullMode::Back:
return vk::CullModeFlagBits::eBack;
case Liverpool::CullMode::FrontAndBack:
case AmdGpu::CullMode::FrontAndBack:
return vk::CullModeFlagBits::eFrontAndBack;
default:
UNREACHABLE();
@ -156,11 +156,11 @@ vk::CullModeFlags CullMode(Liverpool::CullMode mode) {
}
}
vk::FrontFace FrontFace(Liverpool::FrontFace face) {
vk::FrontFace FrontFace(AmdGpu::FrontFace face) {
switch (face) {
case Liverpool::FrontFace::Clockwise:
case AmdGpu::FrontFace::Clockwise:
return vk::FrontFace::eClockwise;
case Liverpool::FrontFace::CounterClockwise:
case AmdGpu::FrontFace::CounterClockwise:
return vk::FrontFace::eCounterClockwise;
default:
UNREACHABLE();
@ -168,8 +168,8 @@ vk::FrontFace FrontFace(Liverpool::FrontFace face) {
}
}
vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor) {
using BlendFactor = Liverpool::BlendControl::BlendFactor;
vk::BlendFactor BlendFactor(AmdGpu::BlendControl::BlendFactor factor) {
using BlendFactor = AmdGpu::BlendControl::BlendFactor;
switch (factor) {
case BlendFactor::Zero:
return vk::BlendFactor::eZero;
@ -214,8 +214,8 @@ vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor) {
}
}
bool IsDualSourceBlendFactor(Liverpool::BlendControl::BlendFactor factor) {
using BlendFactor = Liverpool::BlendControl::BlendFactor;
bool IsDualSourceBlendFactor(AmdGpu::BlendControl::BlendFactor factor) {
using BlendFactor = AmdGpu::BlendControl::BlendFactor;
switch (factor) {
case BlendFactor::Src1Color:
case BlendFactor::Src1Alpha:
@ -227,8 +227,8 @@ bool IsDualSourceBlendFactor(Liverpool::BlendControl::BlendFactor factor) {
}
}
vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func) {
using BlendFunc = Liverpool::BlendControl::BlendFunc;
vk::BlendOp BlendOp(AmdGpu::BlendControl::BlendFunc func) {
using BlendFunc = AmdGpu::BlendControl::BlendFunc;
switch (func) {
case BlendFunc::Add:
return vk::BlendOp::eAdd;
@ -245,8 +245,8 @@ vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func) {
}
}
vk::LogicOp LogicOp(Liverpool::ColorControl::LogicOp logic_op) {
using LogicOp = Liverpool::ColorControl::LogicOp;
vk::LogicOp LogicOp(AmdGpu::ColorControl::LogicOp logic_op) {
using LogicOp = AmdGpu::ColorControl::LogicOp;
switch (logic_op) {
case LogicOp::Clear:
return vk::LogicOp::eClear;
@ -805,9 +805,9 @@ vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat
return format->vk_format;
}
vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer) {
vk::ClearValue ColorBufferClearValue(const AmdGpu::ColorBuffer& color_buffer) {
const auto comp_swizzle = color_buffer.Swizzle();
const auto format = color_buffer.info.format.Value();
const auto format = AmdGpu::DataFormat(color_buffer.info.format);
const auto number_type = color_buffer.GetFixedNumberFormat();
const auto& c0 = color_buffer.clear_word0;

View File

@ -5,36 +5,37 @@
#include <span>
#include "common/assert.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/pixel_format.h"
#include "video_core/amdgpu/regs_color.h"
#include "video_core/amdgpu/regs_depth.h"
#include "video_core/amdgpu/regs_primitive.h"
#include "video_core/amdgpu/regs_vertex.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan::LiverpoolToVK {
using Liverpool = AmdGpu::Liverpool;
vk::StencilOp StencilOp(AmdGpu::StencilFunc op);
vk::StencilOp StencilOp(Liverpool::StencilFunc op);
vk::CompareOp CompareOp(Liverpool::CompareFunc func);
vk::CompareOp CompareOp(AmdGpu::CompareFunc func);
bool IsPrimitiveCulled(AmdGpu::PrimitiveType type);
vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type);
vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode);
vk::PolygonMode PolygonMode(AmdGpu::PolygonMode mode);
vk::CullModeFlags CullMode(Liverpool::CullMode mode);
vk::CullModeFlags CullMode(AmdGpu::CullMode mode);
vk::FrontFace FrontFace(Liverpool::FrontFace mode);
vk::FrontFace FrontFace(AmdGpu::FrontFace mode);
vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor);
vk::BlendFactor BlendFactor(AmdGpu::BlendControl::BlendFactor factor);
bool IsDualSourceBlendFactor(Liverpool::BlendControl::BlendFactor factor);
bool IsDualSourceBlendFactor(AmdGpu::BlendControl::BlendFactor factor);
vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func);
vk::BlendOp BlendOp(AmdGpu::BlendControl::BlendFunc func);
vk::LogicOp LogicOp(Liverpool::ColorControl::LogicOp logic_op);
vk::LogicOp LogicOp(AmdGpu::ColorControl::LogicOp logic_op);
vk::SamplerAddressMode ClampMode(AmdGpu::ClampMode mode);
@ -63,17 +64,17 @@ std::span<const SurfaceFormatInfo> SurfaceFormats();
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format);
struct DepthFormatInfo {
Liverpool::DepthBuffer::ZFormat z_format;
Liverpool::DepthBuffer::StencilFormat stencil_format;
AmdGpu::DepthBuffer::ZFormat z_format;
AmdGpu::DepthBuffer::StencilFormat stencil_format;
vk::Format vk_format;
vk::FormatFeatureFlags2 flags;
};
std::span<const DepthFormatInfo> DepthFormats();
vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format,
Liverpool::DepthBuffer::StencilFormat stencil_format);
vk::Format DepthFormat(AmdGpu::DepthBuffer::ZFormat z_format,
AmdGpu::DepthBuffer::StencilFormat stencil_format);
vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer);
vk::ClearValue ColorBufferClearValue(const AmdGpu::ColorBuffer& color_buffer);
vk::SampleCountFlagBits NumSamples(u32 num_samples, vk::SampleCountFlags supported_flags);

View File

@ -3,6 +3,7 @@
#include <boost/container/small_vector.hpp>
#include "shader_recompiler/info.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
@ -31,8 +32,8 @@ ComputePipeline::ComputePipeline(const Instance& instance, Scheduler& scheduler,
const auto sharp = buffer.GetSharp(*info);
bindings.push_back({
.binding = binding++,
.descriptorType = buffer.IsStorage(sharp, profile) ? vk::DescriptorType::eStorageBuffer
: vk::DescriptorType::eUniformBuffer,
.descriptorType = buffer.IsStorage(sharp) ? vk::DescriptorType::eStorageBuffer
: vk::DescriptorType::eUniformBuffer,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute,
});

View File

@ -4,12 +4,10 @@
#include <algorithm>
#include <utility>
#include <boost/container/small_vector.hpp>
#include <boost/container/static_vector.hpp>
#include "common/assert.h"
#include "shader_recompiler/backend/spirv/emit_spirv_quad_rect.h"
#include "shader_recompiler/frontend/fetch_shader.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
@ -118,7 +116,7 @@ GraphicsPipeline::GraphicsPipeline(
.lineWidth = 1.0f,
},
vk::PipelineRasterizationProvokingVertexStateCreateInfoEXT{
.provokingVertexMode = key.provoking_vtx_last == Liverpool::ProvokingVtxLast::First
.provokingVertexMode = key.provoking_vtx_last == AmdGpu::ProvokingVtxLast::First
? vk::ProvokingVertexModeEXT::eFirstVertex
: vk::ProvokingVertexModeEXT::eLastVertex,
},
@ -142,7 +140,7 @@ GraphicsPipeline::GraphicsPipeline(
};
const vk::PipelineViewportDepthClipControlCreateInfoEXT clip_control = {
.negativeOneToOne = key.clip_space == Liverpool::ClipSpace::MinusWToW,
.negativeOneToOne = key.clip_space == AmdGpu::ClipSpace::MinusWToW,
};
const vk::PipelineViewportStateCreateInfo viewport_info = {
@ -259,7 +257,7 @@ GraphicsPipeline::GraphicsPipeline(
color_formats[i] = color_format;
}
std::array<vk::SampleCountFlagBits, Liverpool::NumColorBuffers> color_samples;
std::array<vk::SampleCountFlagBits, AmdGpu::NUM_COLOR_BUFFERS> color_samples;
std::ranges::transform(key.color_samples, color_samples.begin(), [&instance](u8 num_samples) {
return num_samples ? LiverpoolToVK::NumSamples(num_samples, instance.GetColorSampleCounts())
: vk::SampleCountFlagBits::e1;
@ -275,16 +273,15 @@ GraphicsPipeline::GraphicsPipeline(
.pNext = instance.IsMixedDepthSamplesSupported() ? &mixed_samples : nullptr,
.colorAttachmentCount = key.num_color_attachments,
.pColorAttachmentFormats = color_formats.data(),
.depthAttachmentFormat = key.z_format != Liverpool::DepthBuffer::ZFormat::Invalid
.depthAttachmentFormat = key.z_format != AmdGpu::DepthBuffer::ZFormat::Invalid
? depth_format
: vk::Format::eUndefined,
.stencilAttachmentFormat =
key.stencil_format != Liverpool::DepthBuffer::StencilFormat::Invalid
? depth_format
: vk::Format::eUndefined,
.stencilAttachmentFormat = key.stencil_format != AmdGpu::DepthBuffer::StencilFormat::Invalid
? depth_format
: vk::Format::eUndefined,
};
std::array<vk::PipelineColorBlendAttachmentState, Liverpool::NumColorBuffers> attachments;
std::array<vk::PipelineColorBlendAttachmentState, AmdGpu::NUM_COLOR_BUFFERS> attachments;
for (u32 i = 0; i < key.num_color_attachments; i++) {
const auto& control = key.blend_controls[i];
@ -335,7 +332,7 @@ GraphicsPipeline::GraphicsPipeline(
// Unfortunatelly, Vulkan doesn't provide any control on blend inputs, so below we detecting
// such cases and override alpha value in order to emulate HW behaviour.
const auto has_alpha_masked_out =
(key.cb_shader_mask.GetMask(i) & Liverpool::ColorBufferMask::ComponentA) == 0;
(key.cb_shader_mask.GetMask(i) & AmdGpu::ColorBufferMask::ComponentA) == 0;
const auto has_src_alpha_in_src_blend = src_color == vk::BlendFactor::eSrcAlpha ||
src_color == vk::BlendFactor::eOneMinusSrcAlpha;
const auto has_src_alpha_in_dst_blend = dst_color == vk::BlendFactor::eSrcAlpha ||
@ -354,7 +351,7 @@ GraphicsPipeline::GraphicsPipeline(
const vk::PipelineColorBlendStateCreateInfo color_blending = {
.logicOpEnable =
instance.IsLogicOpSupported() && key.logic_op != Liverpool::ColorControl::LogicOp::Copy,
instance.IsLogicOpSupported() && key.logic_op != AmdGpu::ColorControl::LogicOp::Copy,
.logicOp = LiverpoolToVK::LogicOp(key.logic_op),
.attachmentCount = key.num_color_attachments,
.pAttachments = attachments.data(),
@ -451,9 +448,8 @@ void GraphicsPipeline::BuildDescSetLayout() {
const auto sharp = buffer.GetSharp(*stage);
bindings.push_back({
.binding = binding++,
.descriptorType = buffer.IsStorage(sharp, profile)
? vk::DescriptorType::eStorageBuffer
: vk::DescriptorType::eUniformBuffer,
.descriptorType = buffer.IsStorage(sharp) ? vk::DescriptorType::eStorageBuffer
: vk::DescriptorType::eUniformBuffer,
.descriptorCount = 1,
.stageFlags = stage_bit,
});

View File

@ -6,10 +6,10 @@
#include <boost/container/static_vector.hpp>
#include <xxhash.h>
#include "common/types.h"
#include "shader_recompiler/frontend/fetch_shader.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/amdgpu/regs_color.h"
#include "video_core/amdgpu/regs_depth.h"
#include "video_core/amdgpu/regs_primitive.h"
#include "video_core/renderer_vulkan/vk_pipeline_common.h"
namespace VideoCore {
@ -26,8 +26,6 @@ class Instance;
class Scheduler;
class DescriptorHeap;
using Liverpool = AmdGpu::Liverpool;
template <typename T>
using VertexInputs = boost::container::static_vector<T, MaxVertexBufferCount>;
@ -36,25 +34,25 @@ struct GraphicsPipelineKey {
std::array<vk::Format, MaxVertexBufferCount> vertex_buffer_formats;
u32 patch_control_points;
u32 num_color_attachments;
std::array<Shader::PsColorBuffer, Liverpool::NumColorBuffers> color_buffers;
std::array<Liverpool::BlendControl, Liverpool::NumColorBuffers> blend_controls;
std::array<vk::ColorComponentFlags, Liverpool::NumColorBuffers> write_masks;
Liverpool::ColorBufferMask cb_shader_mask;
Liverpool::ColorControl::LogicOp logic_op;
std::array<Shader::PsColorBuffer, AmdGpu::NUM_COLOR_BUFFERS> color_buffers;
std::array<AmdGpu::BlendControl, AmdGpu::NUM_COLOR_BUFFERS> blend_controls;
std::array<vk::ColorComponentFlags, AmdGpu::NUM_COLOR_BUFFERS> write_masks;
AmdGpu::ColorBufferMask cb_shader_mask;
AmdGpu::ColorControl::LogicOp logic_op;
u8 num_samples;
u8 depth_samples;
std::array<u8, Liverpool::NumColorBuffers> color_samples;
std::array<u8, AmdGpu::NUM_COLOR_BUFFERS> color_samples;
u32 mrt_mask;
struct {
Liverpool::DepthBuffer::ZFormat z_format : 2;
Liverpool::DepthBuffer::StencilFormat stencil_format : 1;
AmdGpu::DepthBuffer::ZFormat z_format : 2;
AmdGpu::DepthBuffer::StencilFormat stencil_format : 1;
u32 depth_clamp_enable : 1;
};
struct {
AmdGpu::PrimitiveType prim_type : 5;
Liverpool::PolygonMode polygon_mode : 2;
Liverpool::ClipSpace clip_space : 1;
Liverpool::ProvokingVtxLast provoking_vtx_last : 1;
AmdGpu::PolygonMode polygon_mode : 2;
AmdGpu::ClipSpace clip_space : 1;
AmdGpu::ProvokingVtxLast provoking_vtx_last : 1;
u32 depth_clip_enable : 1;
};

View File

@ -12,14 +12,13 @@
#include "shader_recompiler/info.h"
#include "shader_recompiler/recompiler.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_presenter.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
extern std::unique_ptr<Vulkan::Presenter> presenter;
namespace Vulkan {
using Shader::LogicalStage;
@ -36,8 +35,7 @@ constexpr static std::array DescriptorHeapSizes = {
vk::DescriptorPoolSize{vk::DescriptorType::eSampler, 1024},
};
static u32 MapOutputs(std::span<Shader::OutputMap, 3> outputs,
const AmdGpu::Liverpool::VsOutputControl& ctl) {
static u32 MapOutputs(std::span<Shader::OutputMap, 3> outputs, const AmdGpu::VsOutputControl& ctl) {
u32 num_outputs = 0;
if (ctl.vs_out_misc_enable) {
@ -110,10 +108,10 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
}
case Stage::Hull: {
BuildCommon(regs.hs_program);
info.hs_info.num_input_control_points = regs.ls_hs_config.hs_input_control_points.Value();
info.hs_info.num_threads = regs.ls_hs_config.hs_output_control_points.Value();
info.hs_info.num_input_control_points = regs.ls_hs_config.hs_input_control_points;
info.hs_info.num_threads = regs.ls_hs_config.hs_output_control_points;
info.hs_info.tess_type = regs.tess_config.type;
info.hs_info.offchip_lds_enable = regs.hs_program.settings.rsrc2_hs.oc_lds_en.Value();
info.hs_info.offchip_lds_enable = regs.hs_program.settings.oc_lds_en;
// We need to initialize most hs_info fields after finding the V# with tess constants
break;
@ -130,7 +128,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
info.vs_info.num_outputs = MapOutputs(info.vs_info.outputs, regs.vs_output_control);
info.vs_info.emulate_depth_negative_one_to_one =
!instance.IsDepthClipControlSupported() &&
regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW;
regs.clipper_control.clip_space == AmdGpu::ClipSpace::MinusWToW;
info.vs_info.tess_emulated_primitive =
regs.primitive_type == AmdGpu::PrimitiveType::RectList ||
regs.primitive_type == AmdGpu::PrimitiveType::QuadList;
@ -157,7 +155,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
gs_info.in_vertex_data_size = regs.vgt_esgs_ring_itemsize;
gs_info.out_vertex_data_size = regs.vgt_gs_vert_itemsize[0];
gs_info.mode = regs.vgt_gs_mode.mode;
const auto params_vc = Liverpool::GetParams(regs.vs_program);
const auto params_vc = AmdGpu::GetParams(regs.vs_program);
gs_info.vs_copy = params_vc.code;
gs_info.vs_copy_hash = params_vc.hash;
DumpShader(gs_info.vs_copy, gs_info.vs_copy_hash, Shader::Stage::Vertex, 0, "copy.bin");
@ -191,7 +189,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
const auto& ps_inputs = regs.ps_inputs;
for (u32 i = 0; i < regs.num_interp; i++) {
info.fs_info.inputs[i] = {
.param_index = u8(ps_inputs[i].input_offset.Value()),
.param_index = u8(ps_inputs[i].input_offset),
.is_default = bool(ps_inputs[i].use_default),
.is_flat = bool(ps_inputs[i].flat_shade),
.default_value = u8(ps_inputs[i].default_value),
@ -327,11 +325,11 @@ bool PipelineCache::RefreshGraphicsKey() {
const bool db_enabled = regs.depth_buffer.DepthValid() || regs.depth_buffer.StencilValid();
key.z_format = regs.depth_buffer.DepthValid() ? regs.depth_buffer.z_info.format.Value()
: Liverpool::DepthBuffer::ZFormat::Invalid;
key.z_format = regs.depth_buffer.DepthValid() ? regs.depth_buffer.z_info.format
: AmdGpu::DepthBuffer::ZFormat::Invalid;
key.stencil_format = regs.depth_buffer.StencilValid()
? regs.depth_buffer.stencil_info.format.Value()
: Liverpool::DepthBuffer::StencilFormat::Invalid;
? regs.depth_buffer.stencil_info.format
: AmdGpu::DepthBuffer::StencilFormat::Invalid;
key.depth_clamp_enable = !regs.depth_render_override.disable_viewport_clamp;
key.depth_clip_enable = regs.clipper_control.ZclipEnable();
key.clip_space = regs.clipper_control.clip_space;
@ -339,17 +337,17 @@ bool PipelineCache::RefreshGraphicsKey() {
key.prim_type = regs.primitive_type;
key.polygon_mode = regs.polygon_control.PolyMode();
key.patch_control_points =
regs.stage_enable.hs_en ? regs.ls_hs_config.hs_input_control_points.Value() : 0;
regs.stage_enable.hs_en ? regs.ls_hs_config.hs_input_control_points : 0;
key.logic_op = regs.color_control.rop3;
key.depth_samples = db_enabled ? regs.depth_buffer.NumSamples() : 1;
key.num_samples = key.depth_samples;
key.cb_shader_mask = regs.color_shader_mask;
const bool skip_cb_binding =
regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable;
regs.color_control.mode == AmdGpu::ColorControl::OperationMode::Disable;
// First pass to fill render target information needed by shader recompiler
for (s32 cb = 0; cb < Liverpool::NumColorBuffers && !skip_cb_binding; ++cb) {
for (s32 cb = 0; cb < AmdGpu::NUM_COLOR_BUFFERS && !skip_cb_binding; ++cb) {
const auto& col_buf = regs.color_buffers[cb];
if (!col_buf || !regs.color_target_mask.GetMask(cb)) {
// No attachment bound or writing to it is disabled.
@ -436,15 +434,7 @@ bool PipelineCache::RefreshGraphicsStages() {
return false;
}
const auto& bininfo = Liverpool::GetBinaryInfo(*pgm);
if (!bininfo.Valid()) {
LOG_WARNING(Render_Vulkan, "Invalid binary info structure!");
key.stage_hashes[stage_out_idx] = 0;
infos[stage_out_idx] = nullptr;
return false;
}
auto params = Liverpool::GetParams(*pgm);
const auto params = AmdGpu::GetParams(*pgm);
std::optional<Shader::Gcn::FetchShaderData> fetch_shader_;
std::tie(infos[stage_out_idx], modules[stage_out_idx], fetch_shader_,
key.stage_hashes[stage_out_idx]) =
@ -463,7 +453,7 @@ bool PipelineCache::RefreshGraphicsStages() {
key.num_color_attachments = std::bit_width(key.mrt_mask);
switch (regs.stage_enable.raw) {
case Liverpool::ShaderStageEnable::VgtStages::EsGs:
case AmdGpu::ShaderStageEnable::VgtStages::EsGs:
if (!instance.IsGeometryStageSupported()) {
LOG_WARNING(Render_Vulkan, "Geometry shader stage unsupported, skipping");
return false;
@ -479,7 +469,7 @@ bool PipelineCache::RefreshGraphicsStages() {
return false;
}
break;
case Liverpool::ShaderStageEnable::VgtStages::LsHs:
case AmdGpu::ShaderStageEnable::VgtStages::LsHs:
if (!instance.IsTessellationSupported() ||
(regs.tess_config.type == AmdGpu::TessellationType::Isoline &&
!instance.IsTessellationIsolinesSupported())) {
@ -519,7 +509,7 @@ bool PipelineCache::RefreshGraphicsStages() {
bool PipelineCache::RefreshComputeKey() {
Shader::Backend::Bindings binding{};
const auto& cs_pgm = liverpool->GetCsRegs();
const auto cs_params = Liverpool::GetParams(cs_pgm);
const auto cs_params = AmdGpu::GetParams(cs_pgm);
std::tie(infos[0], modules[0], fetch_shader, compute_key.value) =
GetProgram(Shader::Stage::Compute, LogicalStage::Compute, cs_params, binding);
return true;

View File

@ -19,6 +19,10 @@ struct std::hash<vk::ShaderModule> {
}
};
namespace AmdGpu {
class Liverpool;
}
namespace Shader {
struct Info;
}

View File

@ -3,13 +3,11 @@
#include <boost/container/static_vector.hpp>
#include "shader_recompiler/info.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "shader_recompiler/resource.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_pipeline_common.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/texture_cache/texture_cache.h"
namespace Vulkan {

View File

@ -3,15 +3,16 @@
#pragma once
#include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/info.h"
#include "shader_recompiler/profile.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/texture_cache/texture_cache.h"
namespace VideoCore {
class BufferCache;
} // namespace VideoCore
#include <boost/container/small_vector.hpp>
namespace Shader {
struct Info;
struct PushData;
} // namespace Shader
namespace Vulkan {
@ -74,7 +75,7 @@ protected:
vk::UniqueDescriptorSetLayout desc_layout;
std::array<const Shader::Info*, Shader::MaxStageTypes> stages{};
bool uses_push_descriptors{};
const bool is_compute;
bool is_compute;
};
} // namespace Vulkan

View File

@ -15,6 +15,7 @@
#include <vector>
#include <fmt/ranges.h>
#include "common/assert.h"
#include "common/config.h"
#include "common/logging/log.h"
@ -459,4 +460,4 @@ vk::UniqueDebugUtilsMessengerEXT CreateDebugCallback(vk::Instance instance) {
return std::move(messenger);
}
} // namespace Vulkan
} // namespace Vulkan

View File

@ -146,6 +146,10 @@ Presenter::~Presenter() {
ImGui::Core::Shutdown(device);
}
bool Presenter::IsVideoOutSurface(const AmdGpu::ColorBuffer& color_buffer) const {
return std::ranges::find(vo_buffers_addr, color_buffer.Address()) != vo_buffers_addr.cend();
}
void Presenter::RecreateFrame(Frame* frame, u32 width, u32 height) {
const vk::Device device = instance.GetDevice();
if (frame->imgui_texture) {
@ -288,7 +292,7 @@ static vk::Format GetFrameViewFormat(const Libraries::VideoOut::PixelFormat form
Frame* Presenter::PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
VAddr cpu_address) {
auto desc = VideoCore::TextureCache::VideoOutDesc{attribute, cpu_address};
auto desc = VideoCore::TextureCache::ImageDesc{attribute, cpu_address};
const auto image_id = texture_cache.FindImage(desc);
texture_cache.UpdateImage(image_id);

View File

@ -6,9 +6,7 @@
#include <condition_variable>
#include "core/libraries/videoout/buffer.h"
#include "imgui/imgui_config.h"
#include "imgui/imgui_texture.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/renderer_vulkan/host_passes/fsr_pass.h"
#include "video_core/renderer_vulkan/host_passes/pp_pass.h"
#include "video_core/renderer_vulkan/vk_instance.h"
@ -82,20 +80,18 @@ public:
pp_settings.hdr = enable ? 1 : 0;
}
bool IsVideoOutSurface(const AmdGpu::Liverpool::ColorBuffer& color_buffer) const {
return std::ranges::find(vo_buffers_addr, color_buffer.Address()) != vo_buffers_addr.cend();
}
VideoCore::Image& RegisterVideoOutSurface(
const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) {
vo_buffers_addr.emplace_back(cpu_address);
auto desc = VideoCore::TextureCache::VideoOutDesc{attribute, cpu_address};
auto desc = VideoCore::TextureCache::ImageDesc{attribute, cpu_address};
const auto image_id = texture_cache.FindImage(desc);
auto& image = texture_cache.GetImage(image_id);
image.usage.vo_surface = 1u;
return image;
}
bool IsVideoOutSurface(const AmdGpu::ColorBuffer& color_buffer) const;
Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
VAddr cpu_address);

View File

@ -6,6 +6,7 @@
#include "core/memory.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
@ -19,7 +20,7 @@
namespace Vulkan {
static Shader::PushData MakeUserData(const AmdGpu::Liverpool::Regs& regs) {
static Shader::PushData MakeUserData(const AmdGpu::Regs& regs) {
// TODO(roamic): Add support for multiple viewports and geometry shaders when ViewportIndex
// is encountered and implemented in the recompiler.
Shader::PushData push_data{};
@ -60,20 +61,18 @@ void Rasterizer::CpSync() {
bool Rasterizer::FilterDraw() {
const auto& regs = liverpool->regs;
// There are several cases (e.g. FCE, FMask/HTile decompression) where we don't need to do an
// actual draw hence can skip pipeline creation.
if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::EliminateFastClear) {
if (regs.color_control.mode == AmdGpu::ColorControl::OperationMode::EliminateFastClear) {
// Clears the render target if FCE is launched before any draws
EliminateFastClear();
return false;
}
if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::FmaskDecompress) {
if (regs.color_control.mode == AmdGpu::ColorControl::OperationMode::FmaskDecompress) {
// TODO: check for a valid MRT1 to promote the draw to the resolve pass.
LOG_TRACE(Render_Vulkan, "FMask decompression pass skipped");
ScopedMarkerInsert("FmaskDecompress");
return false;
}
if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::Resolve) {
if (regs.color_control.mode == AmdGpu::ColorControl::OperationMode::Resolve) {
LOG_TRACE(Render_Vulkan, "Resolve pass");
Resolve();
return false;
@ -85,7 +84,7 @@ bool Rasterizer::FilterDraw() {
}
const bool cb_disabled =
regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable;
regs.color_control.mode == AmdGpu::ColorControl::OperationMode::Disable;
const auto depth_copy =
regs.depth_render_override.force_z_dirty && regs.depth_render_override.force_z_valid &&
regs.depth_buffer.DepthValid() && regs.depth_buffer.DepthWriteValid() &&
@ -116,7 +115,7 @@ void Rasterizer::PrepareRenderState(const GraphicsPipeline* pipeline) {
}
const bool skip_cb_binding =
regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable;
regs.color_control.mode == AmdGpu::ColorControl::OperationMode::Disable;
for (s32 cb = 0; cb < std::bit_width(key.mrt_mask); ++cb) {
auto& [image_id, desc] = cb_descs[cb];
const auto& col_buf = regs.color_buffers[cb];
@ -147,8 +146,8 @@ void Rasterizer::PrepareRenderState(const GraphicsPipeline* pipeline) {
}
}
[[nodiscard]] std::pair<u32, u32> GetDrawOffsets(
const AmdGpu::Liverpool::Regs& regs, const Shader::Info& info,
static std::pair<u32, u32> GetDrawOffsets(
const AmdGpu::Regs& regs, const Shader::Info& info,
const std::optional<Shader::Gcn::FetchShaderData>& fetch_shader) {
u32 vertex_offset = regs.index_offset;
u32 instance_offset = 0;
@ -168,7 +167,7 @@ void Rasterizer::EliminateFastClear() {
if (!col_buf || !col_buf.info.fast_clear) {
return;
}
VideoCore::TextureCache::RenderTargetDesc desc(col_buf, liverpool->last_cb_extent[0]);
VideoCore::TextureCache::ImageDesc desc(col_buf, liverpool->last_cb_extent[0]);
const auto image_id = texture_cache.FindImage(desc);
const auto& image_view = texture_cache.FindRenderTarget(image_id, desc);
if (!texture_cache.IsMetaCleared(col_buf.CmaskAddress(), col_buf.view.slice_start)) {
@ -540,7 +539,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
for (u32 i = 0; i < buffer_bindings.size(); i++) {
const auto& [buffer_id, vsharp, size] = buffer_bindings[i];
const auto& desc = stage.buffers[i];
const bool is_storage = desc.IsStorage(vsharp, pipeline_cache.GetProfile());
const bool is_storage = desc.IsStorage(vsharp);
const u32 alignment =
is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment();
// Buffer is not from the cache, either a special buffer or unbound.
@ -846,37 +845,27 @@ RenderState Rasterizer::BeginRendering(const GraphicsPipeline* pipeline) {
}
void Rasterizer::Resolve() {
// Read from MRT0, average all samples, and write to MRT1, which is one-sample
const auto& mrt0_hint = liverpool->last_cb_extent[0];
const auto& mrt1_hint = liverpool->last_cb_extent[1];
VideoCore::TextureCache::RenderTargetDesc mrt0_desc{liverpool->regs.color_buffers[0],
mrt0_hint};
VideoCore::TextureCache::RenderTargetDesc mrt1_desc{liverpool->regs.color_buffers[1],
mrt1_hint};
VideoCore::TextureCache::ImageDesc mrt0_desc{liverpool->regs.color_buffers[0], mrt0_hint};
VideoCore::TextureCache::ImageDesc mrt1_desc{liverpool->regs.color_buffers[1], mrt1_hint};
auto& mrt0_image = texture_cache.GetImage(texture_cache.FindImage(mrt0_desc, true));
auto& mrt1_image = texture_cache.GetImage(texture_cache.FindImage(mrt1_desc, true));
VideoCore::SubresourceRange mrt0_range;
mrt0_range.base.layer = liverpool->regs.color_buffers[0].view.slice_start;
mrt0_range.extent.layers = liverpool->regs.color_buffers[0].NumSlices() - mrt0_range.base.layer;
VideoCore::SubresourceRange mrt1_range;
mrt1_range.base.layer = liverpool->regs.color_buffers[1].view.slice_start;
mrt1_range.extent.layers = liverpool->regs.color_buffers[1].NumSlices() - mrt1_range.base.layer;
ScopeMarkerBegin(fmt::format("Resolve:MRT0={:#x}:MRT1={:#x}",
liverpool->regs.color_buffers[0].Address(),
liverpool->regs.color_buffers[1].Address()));
mrt1_image.Resolve(mrt0_image, mrt0_range, mrt1_range);
mrt1_image.Resolve(mrt0_image, mrt0_desc.view_info.range, mrt1_desc.view_info.range);
ScopeMarkerEnd();
}
void Rasterizer::DepthStencilCopy(bool is_depth, bool is_stencil) {
auto& regs = liverpool->regs;
auto read_desc = VideoCore::TextureCache::DepthTargetDesc(
auto read_desc = VideoCore::TextureCache::ImageDesc(
regs.depth_buffer, regs.depth_view, regs.depth_control,
regs.depth_htile_data_base.GetAddress(), liverpool->last_db_extent, false);
auto write_desc = VideoCore::TextureCache::DepthTargetDesc(
auto write_desc = VideoCore::TextureCache::ImageDesc(
regs.depth_buffer, regs.depth_view, regs.depth_control,
regs.depth_htile_data_base.GetAddress(), liverpool->last_db_extent, true);
@ -904,6 +893,7 @@ void Rasterizer::DepthStencilCopy(bool is_depth, bool is_stencil) {
if (is_stencil) {
aspect_mask |= vk::ImageAspectFlagBits::eStencil;
}
vk::ImageCopy region = {
.srcSubresource =
{
@ -1013,16 +1003,16 @@ void Rasterizer::UpdateViewportScissorState() const {
const auto combined_scissor_value_br = [](s16 scr, s16 win, s16 gen, s16 win_offset) {
return std::min({scr, s16(win + win_offset), s16(gen + win_offset)});
};
const bool enable_offset = !regs.window_scissor.window_offset_disable.Value();
const bool enable_offset = !regs.window_scissor.window_offset_disable;
Liverpool::Scissor scsr{};
AmdGpu::Scissor scsr{};
scsr.top_left_x = combined_scissor_value_tl(
regs.screen_scissor.top_left_x, s16(regs.window_scissor.top_left_x.Value()),
s16(regs.generic_scissor.top_left_x.Value()),
regs.screen_scissor.top_left_x, s16(regs.window_scissor.top_left_x),
s16(regs.generic_scissor.top_left_x),
enable_offset ? regs.window_offset.window_x_offset : 0);
scsr.top_left_y = combined_scissor_value_tl(
regs.screen_scissor.top_left_y, s16(regs.window_scissor.top_left_y.Value()),
s16(regs.generic_scissor.top_left_y.Value()),
regs.screen_scissor.top_left_y, s16(regs.window_scissor.top_left_y),
s16(regs.generic_scissor.top_left_y),
enable_offset ? regs.window_offset.window_y_offset : 0);
scsr.bottom_right_x = combined_scissor_value_br(
regs.screen_scissor.bottom_right_x, regs.window_scissor.bottom_right_x,
@ -1033,8 +1023,8 @@ void Rasterizer::UpdateViewportScissorState() const {
regs.generic_scissor.bottom_right_y,
enable_offset ? regs.window_offset.window_y_offset : 0);
boost::container::static_vector<vk::Viewport, Liverpool::NumViewports> viewports;
boost::container::static_vector<vk::Rect2D, Liverpool::NumViewports> scissors;
boost::container::static_vector<vk::Viewport, AmdGpu::NUM_VIEWPORTS> viewports;
boost::container::static_vector<vk::Rect2D, AmdGpu::NUM_VIEWPORTS> scissors;
if (regs.polygon_control.enable_window_offset &&
(regs.window_offset.window_x_offset != 0 || regs.window_offset.window_y_offset != 0)) {
@ -1043,7 +1033,7 @@ void Rasterizer::UpdateViewportScissorState() const {
}
const auto& vp_ctl = regs.viewport_control;
for (u32 i = 0; i < Liverpool::NumViewports; i++) {
for (u32 i = 0; i < AmdGpu::NUM_VIEWPORTS; i++) {
const auto& vp = regs.viewports[i];
const auto& vp_d = regs.viewport_depths[i];
if (vp.xscale == 0) {
@ -1059,7 +1049,7 @@ void Rasterizer::UpdateViewportScissorState() const {
// https://gitlab.freedesktop.org/mesa/mesa/-/blob/209a0ed/src/amd/vulkan/radv_cmd_buffer.c#L3103-3109
// When the clip space is ranged [-1...1], the zoffset is centered.
// By reversing the above viewport calculations, we get the following:
if (regs.clipper_control.clip_space == AmdGpu::Liverpool::ClipSpace::MinusWToW) {
if (regs.clipper_control.clip_space == AmdGpu::ClipSpace::MinusWToW) {
viewport.minDepth = zoffset - zscale;
viewport.maxDepth = zoffset + zscale;
} else {
@ -1098,13 +1088,13 @@ void Rasterizer::UpdateViewportScissorState() const {
auto vp_scsr = scsr;
if (regs.mode_control.vport_scissor_enable) {
vp_scsr.top_left_x =
std::max(vp_scsr.top_left_x, s16(regs.viewport_scissors[i].top_left_x.Value()));
std::max(vp_scsr.top_left_x, s16(regs.viewport_scissors[i].top_left_x));
vp_scsr.top_left_y =
std::max(vp_scsr.top_left_y, s16(regs.viewport_scissors[i].top_left_y.Value()));
vp_scsr.bottom_right_x =
std::min(vp_scsr.bottom_right_x, regs.viewport_scissors[i].bottom_right_x);
vp_scsr.bottom_right_y =
std::min(vp_scsr.bottom_right_y, regs.viewport_scissors[i].bottom_right_y);
std::max(vp_scsr.top_left_y, s16(regs.viewport_scissors[i].top_left_y));
vp_scsr.bottom_right_x = std::min(AmdGpu::Scissor::Clamp(vp_scsr.bottom_right_x),
regs.viewport_scissors[i].bottom_right_x);
vp_scsr.bottom_right_y = std::min(AmdGpu::Scissor::Clamp(vp_scsr.bottom_right_y),
regs.viewport_scissors[i].bottom_right_y);
}
scissors.push_back({
.offset = {vp_scsr.top_left_x, vp_scsr.top_left_y},
@ -1187,8 +1177,8 @@ void Rasterizer::UpdateDepthStencilState() const {
const auto back =
regs.depth_control.backface_enable ? regs.stencil_ref_back : regs.stencil_ref_front;
dynamic_state.SetStencilReferences(front.stencil_test_val, back.stencil_test_val);
dynamic_state.SetStencilWriteMasks(!stencil_clear ? front.stencil_write_mask.Value() : 0U,
!stencil_clear ? back.stencil_write_mask.Value() : 0U);
dynamic_state.SetStencilWriteMasks(!stencil_clear ? front.stencil_write_mask : 0U,
!stencil_clear ? back.stencil_write_mask : 0U);
dynamic_state.SetStencilCompareMasks(front.stencil_mask, back.stencil_mask);
}
}

View File

@ -127,22 +127,21 @@ private:
Common::SharedFirstMutex mapped_ranges_mutex;
PipelineCache pipeline_cache;
using RenderTargetInfo =
std::pair<VideoCore::ImageId, VideoCore::TextureCache::RenderTargetDesc>;
std::array<RenderTargetInfo, Liverpool::NumColorBuffers> cb_descs;
std::pair<VideoCore::ImageId, VideoCore::TextureCache::DepthTargetDesc> db_desc;
boost::container::static_vector<vk::DescriptorImageInfo, Shader::NumImages> image_infos;
boost::container::static_vector<vk::DescriptorBufferInfo, Shader::NumBuffers> buffer_infos;
boost::container::static_vector<VideoCore::ImageId, Shader::NumImages> bound_images;
using RenderTargetInfo = std::pair<VideoCore::ImageId, VideoCore::TextureCache::ImageDesc>;
std::array<RenderTargetInfo, AmdGpu::NUM_COLOR_BUFFERS> cb_descs;
std::pair<VideoCore::ImageId, VideoCore::TextureCache::ImageDesc> db_desc;
boost::container::static_vector<vk::DescriptorImageInfo, Shader::NUM_IMAGES> image_infos;
boost::container::static_vector<vk::DescriptorBufferInfo, Shader::NUM_BUFFERS> buffer_infos;
boost::container::static_vector<VideoCore::ImageId, Shader::NUM_IMAGES> bound_images;
Pipeline::DescriptorWrites set_writes;
Pipeline::BufferBarriers buffer_barriers;
Shader::PushData push_data;
using BufferBindingInfo = std::tuple<VideoCore::BufferId, AmdGpu::Buffer, u64>;
boost::container::static_vector<BufferBindingInfo, Shader::NumBuffers> buffer_bindings;
using ImageBindingInfo = std::pair<VideoCore::ImageId, VideoCore::TextureCache::TextureDesc>;
boost::container::static_vector<ImageBindingInfo, Shader::NumImages> image_bindings;
boost::container::static_vector<BufferBindingInfo, Shader::NUM_BUFFERS> buffer_bindings;
using ImageBindingInfo = std::pair<VideoCore::ImageId, VideoCore::TextureCache::ImageDesc>;
boost::container::static_vector<ImageBindingInfo, Shader::NUM_IMAGES> image_bindings;
bool fault_process_pending{};
bool attachment_feedback_loop{};
};

View File

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
@ -152,20 +152,20 @@ void Scheduler::SubmitExecution(SubmitInfo& info) {
};
const vk::TimelineSemaphoreSubmitInfo timeline_si = {
.waitSemaphoreValueCount = static_cast<u32>(info.wait_ticks.size()),
.waitSemaphoreValueCount = info.num_wait_semas,
.pWaitSemaphoreValues = info.wait_ticks.data(),
.signalSemaphoreValueCount = static_cast<u32>(info.signal_ticks.size()),
.signalSemaphoreValueCount = info.num_signal_semas,
.pSignalSemaphoreValues = info.signal_ticks.data(),
};
const vk::SubmitInfo submit_info = {
.pNext = &timeline_si,
.waitSemaphoreCount = static_cast<u32>(info.wait_semas.size()),
.waitSemaphoreCount = info.num_wait_semas,
.pWaitSemaphores = info.wait_semas.data(),
.pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = 1U,
.pCommandBuffers = &current_cmdbuf,
.signalSemaphoreCount = static_cast<u32>(info.signal_semas.size()),
.signalSemaphoreCount = info.num_signal_semas,
.pSignalSemaphores = info.signal_semas.data(),
};

View File

@ -1,14 +1,15 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <condition_variable>
#include <boost/container/static_vector.hpp>
#include <mutex>
#include <queue>
#include "common/types.h"
#include "common/unique_function.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/regs_color.h"
#include "video_core/amdgpu/regs_primitive.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
@ -45,20 +46,22 @@ struct RenderState {
};
struct SubmitInfo {
boost::container::static_vector<vk::Semaphore, 3> wait_semas;
boost::container::static_vector<u64, 3> wait_ticks;
boost::container::static_vector<vk::Semaphore, 3> signal_semas;
boost::container::static_vector<u64, 3> signal_ticks;
std::array<vk::Semaphore, 3> wait_semas;
std::array<u64, 3> wait_ticks;
std::array<vk::Semaphore, 3> signal_semas;
std::array<u64, 3> signal_ticks;
vk::Fence fence;
u32 num_wait_semas;
u32 num_signal_semas;
void AddWait(vk::Semaphore semaphore, u64 tick = 1) {
wait_semas.emplace_back(semaphore);
wait_ticks.emplace_back(tick);
wait_semas[num_wait_semas] = semaphore;
wait_ticks[num_wait_semas++] = tick;
}
void AddSignal(vk::Semaphore semaphore, u64 tick = 1) {
signal_semas.emplace_back(semaphore);
signal_ticks.emplace_back(tick);
signal_semas[num_signal_semas] = semaphore;
signal_ticks[num_signal_semas++] = tick;
}
void AddSignal(vk::Fence fence) {
@ -66,9 +69,9 @@ struct SubmitInfo {
}
};
using Viewports = boost::container::static_vector<vk::Viewport, AmdGpu::Liverpool::NumViewports>;
using Scissors = boost::container::static_vector<vk::Rect2D, AmdGpu::Liverpool::NumViewports>;
using ColorWriteMasks = std::array<vk::ColorComponentFlags, AmdGpu::Liverpool::NumColorBuffers>;
using Viewports = boost::container::static_vector<vk::Viewport, AmdGpu::NUM_VIEWPORTS>;
using Scissors = boost::container::static_vector<vk::Rect2D, AmdGpu::NUM_VIEWPORTS>;
using ColorWriteMasks = std::array<vk::ColorComponentFlags, AmdGpu::NUM_COLOR_BUFFERS>;
struct StencilOps {
vk::StencilOp fail_op{};
vk::StencilOp pass_op{};
@ -413,6 +416,7 @@ private:
const Instance& instance;
MasterSemaphore master_semaphore;
CommandPool command_pool;
DynamicState dynamic_state;
vk::CommandBuffer current_cmdbuf;
std::condition_variable_any event_cv;
struct PendingOp {
@ -421,7 +425,6 @@ private:
};
std::queue<PendingOp> pending_ops;
RenderState render_state;
DynamicState dynamic_state;
bool is_rendering = false;
tracy::VkCtxScope* profiler_scope{};
};

View File

@ -12,8 +12,7 @@ namespace Vulkan {
static constexpr u64 COPY_SHADER_HASH = 0xfefebf9f;
static bool ExecuteCopyShaderHLE(const Shader::Info& info,
const AmdGpu::Liverpool::ComputeProgram& cs_program,
static bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::ComputeProgram& cs_program,
Rasterizer& rasterizer) {
auto& scheduler = rasterizer.GetScheduler();
auto& buffer_cache = rasterizer.GetBufferCache();
@ -121,8 +120,8 @@ static bool ExecuteCopyShaderHLE(const Shader::Info& info,
return true;
}
bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs,
const AmdGpu::Liverpool::ComputeProgram& cs_program, Rasterizer& rasterizer) {
bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Regs& regs,
const AmdGpu::ComputeProgram& cs_program, Rasterizer& rasterizer) {
switch (info.pgm_hash) {
case COPY_SHADER_HASH:
return ExecuteCopyShaderHLE(info, cs_program, rasterizer);

View File

@ -3,7 +3,10 @@
#pragma once
#include "video_core/amdgpu/liverpool.h"
namespace AmdGpu {
struct ComputeProgram;
union Regs;
} // namespace AmdGpu
namespace Shader {
struct Info;
@ -14,7 +17,7 @@ namespace Vulkan {
class Rasterizer;
/// Attempts to execute a shader using HLE if possible.
bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs,
const AmdGpu::Liverpool::ComputeProgram& cs_program, Rasterizer& rasterizer);
bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Regs& regs,
const AmdGpu::ComputeProgram& cs_program, Rasterizer& rasterizer);
} // namespace Vulkan

View File

@ -11,6 +11,8 @@
#include <deque>
#include <optional>
#include <boost/container/small_vector.hpp>
#include <boost/container/static_vector.hpp>
namespace Vulkan {
class Instance;

View File

@ -4,7 +4,7 @@
#include "common/assert.h"
#include "core/libraries/kernel/process.h"
#include "core/libraries/videoout/buffer.h"
#include "shader_recompiler/info.h"
#include "shader_recompiler/resource.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/texture_cache/image_info.h"
#include "video_core/texture_cache/tile.h"
@ -54,8 +54,7 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group,
UpdateSize();
}
ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint /*= {}*/) noexcept {
ImageInfo::ImageInfo(const AmdGpu::ColorBuffer& buffer, AmdGpu::CbDbExtent hint) noexcept {
props.is_tiled = buffer.IsTiled();
tile_mode = buffer.GetTileMode();
array_mode = AmdGpu::GetArrayMode(tile_mode);
@ -74,27 +73,25 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
guest_address = buffer.Address();
if (props.is_tiled) {
guest_size = buffer.GetColorSliceSize() * resources.layers;
mips_layout.emplace_back(guest_size, pitch, buffer.Height(), 0);
mips_layout[0] = MipInfo(guest_size, pitch, buffer.Height(), 0);
} else {
std::tie(std::ignore, std::ignore, guest_size) =
ImageSizeLinearAligned(pitch, size.height, num_bits, num_samples);
guest_size *= resources.layers;
mips_layout.emplace_back(guest_size, pitch, size.height, 0);
mips_layout[0] = MipInfo(guest_size, pitch, size.height, 0);
}
alt_tile = Libraries::Kernel::sceKernelIsNeoMode() && buffer.info.alt_tile_mode;
}
ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices,
VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint,
bool write_buffer) noexcept {
ImageInfo::ImageInfo(const AmdGpu::DepthBuffer& buffer, u32 num_slices, VAddr htile_address,
AmdGpu::CbDbExtent hint, bool write_buffer) noexcept {
tile_mode = buffer.GetTileMode();
array_mode = AmdGpu::GetArrayMode(tile_mode);
pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format);
type = AmdGpu::ImageType::Color2D;
props.is_tiled = buffer.IsTiled();
props.is_depth = true;
props.has_stencil =
buffer.stencil_info.format != AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid;
props.has_stencil = buffer.stencil_info.format != AmdGpu::DepthBuffer::StencilFormat::Invalid;
num_samples = buffer.NumSamples();
num_bits = buffer.NumBits();
size.width = hint.Valid() ? hint.width : buffer.Pitch();
@ -102,7 +99,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice
size.depth = 1;
pitch = buffer.Pitch();
resources.layers = num_slices;
meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0;
meta_info.htile_addr = buffer.z_info.tile_surface_enable ? htile_address : 0;
stencil_addr = write_buffer ? buffer.StencilWriteAddress() : buffer.StencilAddress();
stencil_size = pitch * size.height * sizeof(u8);
@ -110,12 +107,12 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice
guest_address = write_buffer ? buffer.DepthWriteAddress() : buffer.DepthAddress();
if (props.is_tiled) {
guest_size = buffer.GetDepthSliceSize() * resources.layers;
mips_layout.emplace_back(guest_size, pitch, buffer.Height(), 0);
mips_layout[0] = MipInfo(guest_size, pitch, buffer.Height(), 0);
} else {
std::tie(std::ignore, std::ignore, guest_size) =
ImageSizeLinearAligned(pitch, size.height, num_bits, num_samples);
guest_size *= resources.layers;
mips_layout.emplace_back(guest_size, pitch, size.height, 0);
mips_layout[0] = MipInfo(guest_size, pitch, size.height, 0);
}
}
@ -154,8 +151,6 @@ bool ImageInfo::IsCompatible(const ImageInfo& info) const {
}
void ImageInfo::UpdateSize() {
mips_layout.clear();
MipInfo mip_info{};
guest_size = 0;
for (s32 mip = 0; mip < resources.levels; ++mip) {
u32 mip_w = pitch >> mip;
@ -175,6 +170,7 @@ void ImageInfo::UpdateSize() {
mip_d = std::bit_ceil(mip_d);
}
auto& mip_info = mips_layout[mip];
switch (array_mode) {
case AmdGpu::ArrayMode::ArrayLinearAligned: {
std::tie(mip_info.pitch, mip_info.height, mip_info.size) =
@ -210,7 +206,6 @@ void ImageInfo::UpdateSize() {
}
mip_info.size *= mip_d * resources.layers;
mip_info.offset = guest_size;
mips_layout.emplace_back(mip_info);
guest_size += mip_info.size;
}
}
@ -229,13 +224,9 @@ s32 ImageInfo::MipOf(const ImageInfo& info) const {
return -1;
}
if (info.mips_layout.empty()) {
UNREACHABLE();
}
// Find mip
auto mip = -1;
for (auto m = 0; m < info.mips_layout.size(); ++m) {
for (auto m = 0; m < info.resources.levels; ++m) {
const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = info.mips_layout[m];
const VAddr mip_base = info.guest_address + mip_ofs;
const VAddr mip_end = mip_base + mip_size;

View File

@ -3,16 +3,18 @@
#pragma once
#include <boost/container/static_vector.hpp>
#include "common/types.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/cb_db_extent.h"
#include "video_core/amdgpu/tiling.h"
#include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/texture_cache/types.h"
namespace AmdGpu {
struct ColorBuffer;
struct DepthBuffer;
struct Image;
enum class ImageType : u64;
}
} // namespace AmdGpu
namespace Libraries::VideoOut {
struct BufferAttributeGroup;
@ -36,10 +38,9 @@ struct ImageProperties {
struct ImageInfo {
ImageInfo() = default;
ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group, VAddr cpu_address) noexcept;
ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept;
ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices, VAddr htile_address,
const AmdGpu::Liverpool::CbDbExtent& hint = {}, bool write_buffer = false) noexcept;
ImageInfo(const AmdGpu::ColorBuffer& buffer, AmdGpu::CbDbExtent hint) noexcept;
ImageInfo(const AmdGpu::DepthBuffer& buffer, u32 num_slices, VAddr htile_address,
AmdGpu::CbDbExtent hint, bool write_buffer = false) noexcept;
ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept;
bool IsTiled() const {
@ -60,7 +61,7 @@ struct ImageInfo {
VAddr cmask_addr;
VAddr fmask_addr;
VAddr htile_addr;
u32 htile_clear_mask = u32(-1);
s32 htile_clear_mask = -1;
} meta_info{};
ImageProperties props{};
@ -79,7 +80,7 @@ struct ImageInfo {
u32 height;
u32 offset;
};
boost::container::static_vector<MipInfo, 16> mips_layout;
std::array<MipInfo, 16> mips_layout;
VAddr guest_address{};
u32 guest_size{};
u8 bank_swizzle{};

View File

@ -2,8 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/logging/log.h"
#include "shader_recompiler/info.h"
#include "video_core/amdgpu/resource.h"
#include "shader_recompiler/resource.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/texture_cache/image.h"
@ -71,17 +70,16 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageReso
}
}
ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer) noexcept {
range.base.layer = col_buffer.view.slice_start;
ImageViewInfo::ImageViewInfo(const AmdGpu::ColorBuffer& col_buffer) noexcept {
range.base.layer = col_buffer.BaseSlice();
range.extent.layers = col_buffer.NumSlices() - range.base.layer;
type = range.extent.layers > 1 ? AmdGpu::ImageType::Color2DArray : AmdGpu::ImageType::Color2D;
format =
Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.GetDataFmt(), col_buffer.GetNumberFmt());
}
ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer,
AmdGpu::Liverpool::DepthView view,
AmdGpu::Liverpool::DepthControl ctl) {
ImageViewInfo::ImageViewInfo(const AmdGpu::DepthBuffer& depth_buffer, AmdGpu::DepthView view,
AmdGpu::DepthControl ctl) {
format = Vulkan::LiverpoolToVK::DepthFormat(depth_buffer.z_info.format,
depth_buffer.stencil_info.format);
is_storage = ctl.depth_write_enable;

View File

@ -3,12 +3,19 @@
#pragma once
#include "shader_recompiler/info.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/regs_depth.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/texture_cache/types.h"
namespace AmdGpu {
struct ColorBuffer;
}
namespace Shader {
struct ImageResource;
}
namespace Vulkan {
class Instance;
class Scheduler;
@ -19,9 +26,9 @@ namespace VideoCore {
struct ImageViewInfo {
ImageViewInfo() = default;
ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept;
ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer) noexcept;
ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer,
AmdGpu::Liverpool::DepthView view, AmdGpu::Liverpool::DepthControl ctl);
ImageViewInfo(const AmdGpu::ColorBuffer& col_buffer) noexcept;
ImageViewInfo(const AmdGpu::DepthBuffer& depth_buffer, AmdGpu::DepthView view,
AmdGpu::DepthControl ctl);
AmdGpu::ImageType type = AmdGpu::ImageType::Color2D;
vk::Format format = vk::Format::eR8G8B8A8Unorm;

View File

@ -2,7 +2,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/texture_cache/sampler.h"
@ -10,7 +9,7 @@
namespace VideoCore {
Sampler::Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sampler,
const AmdGpu::Liverpool::BorderColorBufferBase& border_color_base) {
const AmdGpu::BorderColorBuffer border_color_base) {
using namespace Vulkan;
const bool anisotropy_enable = instance.IsAnisotropicFilteringSupported() &&
(AmdGpu::IsAnisoFilter(sampler.xy_mag_filter) ||

View File

@ -3,6 +3,7 @@
#pragma once
#include "video_core/amdgpu/regs_texture.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/vk_common.h"
@ -15,7 +16,7 @@ namespace VideoCore {
class Sampler {
public:
explicit Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sampler,
const AmdGpu::Liverpool::BorderColorBufferBase& border_color_base);
const AmdGpu::BorderColorBuffer border_color_base);
~Sampler();
Sampler(const Sampler&) = delete;

View File

@ -6,7 +6,6 @@
#include "common/assert.h"
#include "common/config.h"
#include "common/debug.h"
#include "common/polyfill_thread.h"
#include "common/scope_exit.h"
#include "core/memory.h"
#include "video_core/buffer_cache/buffer_cache.h"
@ -140,8 +139,8 @@ void TextureCache::DownloadedImagesThread(const std::stop_token& token) {
DownloadedImage image;
{
std::unique_lock lock{downloaded_images_mutex};
Common::CondvarWait(downloaded_images_cv, lock, token,
[this] { return !downloaded_images_queue.empty(); });
downloaded_images_cv.wait(lock, token,
[this] { return !downloaded_images_queue.empty(); });
if (token.stop_requested()) {
break;
}
@ -212,7 +211,7 @@ void TextureCache::InvalidateMemoryFromGPU(VAddr address, size_t max_size) {
void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) {
std::scoped_lock lk{mutex};
boost::container::small_vector<ImageId, 16> deleted_images;
ImageIds deleted_images;
ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
for (const ImageId id : deleted_images) {
// TODO: Download image data back to host.
@ -440,7 +439,7 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) {
return new_image_id;
}
ImageId TextureCache::FindImage(BaseDesc& desc, bool exact_fmt) {
ImageId TextureCache::FindImage(ImageDesc& desc, bool exact_fmt) {
const auto& info = desc.info;
if (info.guest_address == 0) [[unlikely]] {
@ -448,7 +447,7 @@ ImageId TextureCache::FindImage(BaseDesc& desc, bool exact_fmt) {
}
std::scoped_lock lock{mutex};
boost::container::small_vector<ImageId, 8> image_ids;
ImageIds image_ids;
ForEachImageInRegion(info.guest_address, info.guest_size,
[&](ImageId image_id, Image& image) { image_ids.push_back(image_id); });
@ -529,13 +528,12 @@ ImageId TextureCache::FindImage(BaseDesc& desc, bool exact_fmt) {
}
ImageId TextureCache::FindImageFromRange(VAddr address, size_t size, bool ensure_valid) {
boost::container::small_vector<ImageId, 4> image_ids;
ImageIds image_ids;
ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) {
if (image.info.guest_address != address) {
return;
}
if (ensure_valid && (False(image.flags & ImageFlagBits::GpuModified) ||
True(image.flags & ImageFlagBits::Dirty))) {
if (ensure_valid && !image.SafeToDownload()) {
return;
}
image_ids.push_back(image_id);
@ -559,7 +557,7 @@ ImageId TextureCache::FindImageFromRange(VAddr address, size_t size, bool ensure
return {};
}
ImageView& TextureCache::FindTexture(ImageId image_id, const BaseDesc& desc) {
ImageView& TextureCache::FindTexture(ImageId image_id, const ImageDesc& desc) {
Image& image = slot_images[image_id];
if (desc.type == BindingType::Storage) {
image.flags |= ImageFlagBits::GpuModified;
@ -572,7 +570,7 @@ ImageView& TextureCache::FindTexture(ImageId image_id, const BaseDesc& desc) {
return image.FindView(desc.view_info);
}
ImageView& TextureCache::FindRenderTarget(ImageId image_id, const BaseDesc& desc) {
ImageView& TextureCache::FindRenderTarget(ImageId image_id, const ImageDesc& desc) {
Image& image = slot_images[image_id];
image.flags |= ImageFlagBits::GpuModified;
if (Config::readbackLinearImages() && !image.info.props.is_tiled) {
@ -597,7 +595,7 @@ ImageView& TextureCache::FindRenderTarget(ImageId image_id, const BaseDesc& desc
return image.FindView(desc.view_info, false);
}
ImageView& TextureCache::FindDepthTarget(ImageId image_id, const BaseDesc& desc) {
ImageView& TextureCache::FindDepthTarget(ImageId image_id, const ImageDesc& desc) {
Image& image = slot_images[image_id];
image.flags |= ImageFlagBits::GpuModified;
image.usage.depth_target = 1u;
@ -662,10 +660,8 @@ void TextureCache::RefreshImage(Image& image) {
image.hash = hash;
}
const auto& num_layers = image.info.resources.layers;
const auto& num_mips = image.info.resources.levels;
ASSERT(num_mips == image.info.mips_layout.size());
const u32 num_layers = image.info.resources.layers;
const u32 num_mips = image.info.resources.levels;
const bool is_gpu_modified = True(image.flags & ImageFlagBits::GpuModified);
const bool is_gpu_dirty = True(image.flags & ImageFlagBits::GpuDirty);
@ -731,9 +727,8 @@ void TextureCache::RefreshImage(Image& image) {
image.Upload(image_copies, buffer, offset);
}
vk::Sampler TextureCache::GetSampler(
const AmdGpu::Sampler& sampler,
const AmdGpu::Liverpool::BorderColorBufferBase& border_color_base) {
vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler,
AmdGpu::BorderColorBuffer border_color_base) {
const u64 hash = XXH3_64bits(&sampler, sizeof(sampler));
const auto [it, new_sampler] = samplers.try_emplace(hash, instance, sampler, border_color_base);
return it->second.Handle();

View File

@ -3,13 +3,17 @@
#pragma once
#include <condition_variable>
#include <mutex>
#include <thread>
#include <unordered_set>
#include <boost/container/small_vector.hpp>
#include <queue>
#include <tsl/robin_map.h>
#include "common/lru_cache.h"
#include "common/slot_vector.h"
#include "video_core/amdgpu/resource.h"
#include "shader_recompiler/resource.h"
#include "video_core/multi_level_page_table.h"
#include "video_core/texture_cache/blit_helper.h"
#include "video_core/texture_cache/image.h"
@ -32,8 +36,10 @@ class TextureCache {
static constexpr s64 DEFAULT_CRITICAL_GC_MEMORY = 3_GB;
static constexpr s64 TARGET_GC_THRESHOLD = 8_GB;
using ImageIds = boost::container::small_vector<ImageId, 16>;
struct Traits {
using Entry = boost::container::small_vector<ImageId, 16>;
using Entry = ImageIds;
static constexpr size_t AddressSpaceBits = 40;
static constexpr size_t FirstLevelBits = 10;
static constexpr size_t PageBits = 20;
@ -49,44 +55,24 @@ public:
VideoOut,
};
struct BaseDesc {
struct ImageDesc {
ImageInfo info;
ImageViewInfo view_info;
BindingType type{BindingType::Texture};
BaseDesc() = default;
BaseDesc(BindingType type_, ImageInfo info_, ImageViewInfo view_info_) noexcept
: info{std::move(info_)}, view_info{std::move(view_info_)}, type{type_} {}
};
struct TextureDesc : public BaseDesc {
TextureDesc() = default;
TextureDesc(const AmdGpu::Image& image, const Shader::ImageResource& desc)
: BaseDesc{desc.is_written ? BindingType::Storage : BindingType::Texture,
ImageInfo{image, desc}, ImageViewInfo{image, desc}} {}
};
struct RenderTargetDesc : public BaseDesc {
RenderTargetDesc() = default;
RenderTargetDesc(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint = {})
: BaseDesc{BindingType::RenderTarget, ImageInfo{buffer, hint}, ImageViewInfo{buffer}} {}
};
struct DepthTargetDesc : public BaseDesc {
DepthTargetDesc() = default;
DepthTargetDesc(const AmdGpu::Liverpool::DepthBuffer& buffer,
const AmdGpu::Liverpool::DepthView& view,
const AmdGpu::Liverpool::DepthControl& ctl, VAddr htile_address,
const AmdGpu::Liverpool::CbDbExtent& hint = {}, bool write_buffer = false)
: BaseDesc{BindingType::DepthTarget,
ImageInfo{buffer, view.NumSlices(), htile_address, hint, write_buffer},
ImageViewInfo{buffer, view, ctl}} {}
};
struct VideoOutDesc : public BaseDesc {
VideoOutDesc(const Libraries::VideoOut::BufferAttributeGroup& group, VAddr cpu_address)
: BaseDesc{BindingType::VideoOut, ImageInfo{group, cpu_address}, ImageViewInfo{}} {}
ImageDesc() = default;
ImageDesc(const AmdGpu::Image& image, const Shader::ImageResource& desc)
: info{image, desc}, view_info{image, desc},
type{desc.is_written ? BindingType::Storage : BindingType::Texture} {}
ImageDesc(const AmdGpu::ColorBuffer& buffer, AmdGpu::CbDbExtent hint)
: info{buffer, hint}, view_info{buffer}, type{BindingType::RenderTarget} {}
ImageDesc(const AmdGpu::DepthBuffer& buffer, AmdGpu::DepthView view,
AmdGpu::DepthControl ctl, VAddr htile_address, AmdGpu::CbDbExtent hint,
bool write_buffer = false)
: info{buffer, view.NumSlices(), htile_address, hint, write_buffer},
view_info{buffer, view, ctl}, type{BindingType::DepthTarget} {}
ImageDesc(const Libraries::VideoOut::BufferAttributeGroup& group, VAddr cpu_address)
: info{group, cpu_address}, type{BindingType::VideoOut} {}
};
public:
@ -111,19 +97,19 @@ public:
void ProcessDownloadImages();
/// Retrieves the image handle of the image with the provided attributes.
[[nodiscard]] ImageId FindImage(BaseDesc& desc, bool exact_fmt = false);
[[nodiscard]] ImageId FindImage(ImageDesc& desc, bool exact_fmt = false);
/// Retrieves image whose address matches provided
[[nodiscard]] ImageId FindImageFromRange(VAddr address, size_t size, bool ensure_valid = true);
/// Retrieves an image view with the properties of the specified image id.
[[nodiscard]] ImageView& FindTexture(ImageId image_id, const BaseDesc& desc);
[[nodiscard]] ImageView& FindTexture(ImageId image_id, const ImageDesc& desc);
/// Retrieves the render target with specified properties
[[nodiscard]] ImageView& FindRenderTarget(ImageId image_id, const BaseDesc& desc);
[[nodiscard]] ImageView& FindRenderTarget(ImageId image_id, const ImageDesc& desc);
/// Retrieves the depth target with specified properties
[[nodiscard]] ImageView& FindDepthTarget(ImageId image_id, const BaseDesc& desc);
[[nodiscard]] ImageView& FindDepthTarget(ImageId image_id, const ImageDesc& desc);
/// Updates image contents if it was modified by CPU.
void UpdateImage(ImageId image_id) {
@ -151,9 +137,8 @@ public:
void RefreshImage(Image& image);
/// Retrieves the sampler that matches the provided S# descriptor.
[[nodiscard]] vk::Sampler GetSampler(
const AmdGpu::Sampler& sampler,
const AmdGpu::Liverpool::BorderColorBufferBase& border_color_base);
[[nodiscard]] vk::Sampler GetSampler(const AmdGpu::Sampler& sampler,
AmdGpu::BorderColorBuffer border_color_base);
/// Retrieves the image with the specified id.
[[nodiscard]] Image& GetImage(ImageId id) {
@ -212,7 +197,7 @@ public:
void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) {
using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
boost::container::small_vector<ImageId, 32> images;
ImageIds images;
ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) {
const auto it = page_table.find(page);
if (it == nullptr) {
@ -329,7 +314,6 @@ private:
Common::LeastRecentlyUsedCache<ImageId, u64> lru_cache;
PageTable page_table;
std::mutex mutex;
struct DownloadedImage {
u64 tick;
VAddr device_addr;
@ -340,7 +324,6 @@ private:
std::mutex downloaded_images_mutex;
std::condition_variable_any downloaded_images_cv;
std::jthread downloaded_images_thread;
struct MetaDataInfo {
enum class Type {
CMask,
@ -348,7 +331,7 @@ private:
HTile,
};
Type type;
u32 clear_mask{u32(-1)};
s32 clear_mask = -1;
};
tsl::robin_map<VAddr, MetaDataInfo> surface_metas;
};