From f9ef57f74be3127d76875c2bbdb5d23a6297f986 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quang=20Ng=C3=B4?= Date: Fri, 28 Nov 2025 23:36:11 +0700 Subject: [PATCH 01/12] Fix metainfo (#3834) --- dist/net.shadps4.shadPS4.metainfo.xml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dist/net.shadps4.shadPS4.metainfo.xml b/dist/net.shadps4.shadPS4.metainfo.xml index 5798876f6..c85fcf003 100644 --- a/dist/net.shadps4.shadPS4.metainfo.xml +++ b/dist/net.shadps4.shadPS4.metainfo.xml @@ -18,19 +18,19 @@ https://cdn.jsdelivr.net/gh/shadps4-emu/shadps4@main/documents/Screenshots/1.png - Bloodborne + Bloodborne by From Software https://cdn.jsdelivr.net/gh/shadps4-emu/shadps4@main/documents/Screenshots/2.png - Hatsune Miku: Project DIVA Future Tone + Hatsune Miku Project DIVA Future Tone by SEGA https://cdn.jsdelivr.net/gh/shadps4-emu/shadps4@main/documents/Screenshots/3.png - Yakuza 0 + Yakuza 0 by SEGA https://cdn.jsdelivr.net/gh/shadps4-emu/shadps4@main/documents/Screenshots/4.png - Persona 4 Golden + DRIVECLUBâ„¢ by Evolution Studios From a9f8eaf77855a52be29e1b8c5ec2449e2ef35d8a Mon Sep 17 00:00:00 2001 From: psucien <168137814+psucien@users.noreply.github.com> Date: Sat, 29 Nov 2025 10:52:08 +0100 Subject: [PATCH 02/12] video_core: Initial implementation of pipeline cache (#3816) * Initial implementation * Fix for crash caused by stale stages data; cosmetics applied * Someone mentioned the assert * Async blob writer * Fix for memory leak * Remain stuff * Async changed to `packaged_task` --- .gitmodules | 3 + CMakeLists.txt | 12 +- externals/CMakeLists.txt | 3 + externals/miniz | 1 + src/common/config.cpp | 24 + src/common/config.h | 4 + src/common/path_util.cpp | 1 + src/common/path_util.h | 2 + src/common/serdes.h | 140 +++++ src/emulator.cpp | 2 + .../frontend/fetch_shader.cpp | 2 +- src/shader_recompiler/frontend/fetch_shader.h | 8 +- .../frontend/structured_control_flow.cpp | 8 +- .../frontend/translate/translate.cpp | 3 +- src/shader_recompiler/info.h | 117 +++-- .../passes/flatten_extended_userdata_pass.cpp | 18 +- .../ir/passes/hull_shader_transform.cpp | 6 +- src/shader_recompiler/ir/passes/ir_passes.h | 4 +- .../ir/passes/resource_tracking_pass.cpp | 3 +- src/shader_recompiler/ir/passes/srt.h | 9 + src/shader_recompiler/profile.h | 9 +- src/shader_recompiler/recompiler.cpp | 6 +- src/shader_recompiler/recompiler.h | 5 +- src/shader_recompiler/resource.h | 11 +- src/shader_recompiler/runtime_info.h | 3 +- src/shader_recompiler/specialization.h | 48 +- src/video_core/amdgpu/pixel_format.h | 8 +- src/video_core/cache_storage.cpp | 264 ++++++++++ src/video_core/cache_storage.h | 50 ++ .../renderer_vulkan/vk_compute_pipeline.cpp | 9 +- .../renderer_vulkan/vk_compute_pipeline.h | 16 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 75 +-- .../renderer_vulkan/vk_graphics_pipeline.h | 20 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 74 ++- .../renderer_vulkan/vk_pipeline_cache.h | 36 +- .../vk_pipeline_serialization.cpp | 480 ++++++++++++++++++ .../vk_pipeline_serialization.h | 21 + 37 files changed, 1339 insertions(+), 166 deletions(-) create mode 160000 externals/miniz create mode 100644 src/common/serdes.h create mode 100644 src/video_core/cache_storage.cpp create mode 100644 src/video_core/cache_storage.h create mode 100644 src/video_core/renderer_vulkan/vk_pipeline_serialization.cpp create mode 100644 src/video_core/renderer_vulkan/vk_pipeline_serialization.h diff --git a/.gitmodules b/.gitmodules index b8d1544e4..c5d05edd3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -117,3 +117,6 @@ path = externals/sdl3_mixer url = https://github.com/libsdl-org/SDL_mixer shallow = true +[submodule "externals/miniz"] + path = externals/miniz + url = https://github.com/richgel999/miniz diff --git a/CMakeLists.txt b/CMakeLists.txt index 7c1ebca79..cf78e92bf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -512,7 +512,7 @@ set(PAD_LIB src/core/libraries/pad/pad.cpp src/core/libraries/pad/pad_errors.h ) -set(SYSTEM_GESTURE_LIB +set(SYSTEM_GESTURE_LIB src/core/libraries/system_gesture/system_gesture.cpp src/core/libraries/system_gesture/system_gesture.h ) @@ -693,7 +693,6 @@ set(COMMON src/common/logging/backend.cpp src/common/lru_cache.h src/common/error.cpp src/common/error.h - src/common/scope_exit.h src/common/fixed_value.h src/common/func_traits.h src/common/native_clock.cpp @@ -707,6 +706,8 @@ set(COMMON src/common/logging/backend.cpp src/common/rdtsc.h src/common/recursive_lock.cpp src/common/recursive_lock.h + src/common/scope_exit.h + src/common/serdes.h src/common/sha1.h src/common/shared_first_mutex.h src/common/signal_context.h @@ -986,6 +987,8 @@ set(VIDEO_CORE src/video_core/amdgpu/cb_db_extent.h src/video_core/renderer_vulkan/vk_pipeline_cache.h src/video_core/renderer_vulkan/vk_pipeline_common.cpp src/video_core/renderer_vulkan/vk_pipeline_common.h + src/video_core/renderer_vulkan/vk_pipeline_serialization.cpp + src/video_core/renderer_vulkan/vk_pipeline_serialization.h src/video_core/renderer_vulkan/vk_platform.cpp src/video_core/renderer_vulkan/vk_platform.h src/video_core/renderer_vulkan/vk_presenter.cpp @@ -1023,6 +1026,8 @@ set(VIDEO_CORE src/video_core/amdgpu/cb_db_extent.h src/video_core/texture_cache/tile_manager.cpp src/video_core/texture_cache/tile_manager.h src/video_core/texture_cache/types.h + src/video_core/cache_storage.cpp + src/video_core/cache_storage.h src/video_core/page_manager.cpp src/video_core/page_manager.h src/video_core/multi_level_page_table.h @@ -1077,7 +1082,8 @@ add_executable(shadps4 create_target_directory_groups(shadps4) target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui gcn half::half ZLIB::ZLIB PNG::PNG) -target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 SDL3_mixer::SDL3_mixer pugixml::pugixml stb::headers libusb::usb lfreist-hwinfo::hwinfo nlohmann_json::nlohmann_json) +target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 SDL3_mixer::SDL3_mixer pugixml::pugixml) +target_link_libraries(shadps4 PRIVATE stb::headers libusb::usb lfreist-hwinfo::hwinfo nlohmann_json::nlohmann_json miniz) target_compile_definitions(shadps4 PRIVATE IMGUI_USER_CONFIG="imgui/imgui_config.h") target_compile_definitions(Dear_ImGui PRIVATE IMGUI_USER_CONFIG="${PROJECT_SOURCE_DIR}/src/imgui/imgui_config.h") diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index b6c7c746e..eb3723f2c 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -261,3 +261,6 @@ endif() #nlohmann json set(JSON_BuildTests OFF CACHE INTERNAL "") add_subdirectory(json) + +# miniz +add_subdirectory(miniz) diff --git a/externals/miniz b/externals/miniz new file mode 160000 index 000000000..174573d60 --- /dev/null +++ b/externals/miniz @@ -0,0 +1 @@ +Subproject commit 174573d60290f447c13a2b1b3405de2b96e27d6c diff --git a/src/common/config.cpp b/src/common/config.cpp index b0f068142..e79652b32 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -191,6 +191,8 @@ static ConfigEntry vkCrashDiagnostic(false); static ConfigEntry vkHostMarkers(false); static ConfigEntry vkGuestMarkers(false); static ConfigEntry rdocEnable(false); +static ConfigEntry pipelineCacheEnable(false); +static ConfigEntry pipelineCacheArchive(false); // Debug static ConfigEntry isDebugDump(false); @@ -452,6 +454,14 @@ bool isRdocEnabled() { return rdocEnable.get(); } +bool isPipelineCacheEnabled() { + return pipelineCacheEnable.get(); +} + +bool isPipelineCacheArchived() { + return pipelineCacheArchive.get(); +} + bool fpsColor() { return isFpsColor.get(); } @@ -603,6 +613,14 @@ void setRdocEnabled(bool enable, bool is_game_specific) { rdocEnable.set(enable, is_game_specific); } +void setPipelineCacheEnabled(bool enable, bool is_game_specific) { + pipelineCacheEnable.set(enable, is_game_specific); +} + +void setPipelineCacheArchived(bool enable, bool is_game_specific) { + pipelineCacheArchive.set(enable, is_game_specific); +} + void setVblankFreq(u32 value, bool is_game_specific) { vblankFrequency.set(value, is_game_specific); } @@ -939,6 +957,8 @@ void load(const std::filesystem::path& path, bool is_game_specific) { vkHostMarkers.setFromToml(vk, "hostMarkers", is_game_specific); vkGuestMarkers.setFromToml(vk, "guestMarkers", is_game_specific); rdocEnable.setFromToml(vk, "rdocEnable", is_game_specific); + pipelineCacheEnable.setFromToml(vk, "pipelineCacheEnable", is_game_specific); + pipelineCacheArchive.setFromToml(vk, "pipelineCacheArchive", is_game_specific); } string current_version = {}; @@ -1107,6 +1127,8 @@ void save(const std::filesystem::path& path, bool is_game_specific) { vkHostMarkers.setTomlValue(data, "Vulkan", "hostMarkers", is_game_specific); vkGuestMarkers.setTomlValue(data, "Vulkan", "guestMarkers", is_game_specific); rdocEnable.setTomlValue(data, "Vulkan", "rdocEnable", is_game_specific); + pipelineCacheEnable.setTomlValue(data, "Vulkan", "pipelineCacheEnable", is_game_specific); + pipelineCacheArchive.setTomlValue(data, "Vulkan", "pipelineCacheArchive", is_game_specific); isDebugDump.setTomlValue(data, "Debug", "DebugDump", is_game_specific); isShaderDebug.setTomlValue(data, "Debug", "CollectShader", is_game_specific); @@ -1237,6 +1259,8 @@ void setDefaultValues(bool is_game_specific) { vkHostMarkers.set(false, is_game_specific); vkGuestMarkers.set(false, is_game_specific); rdocEnable.set(false, is_game_specific); + pipelineCacheEnable.set(false, is_game_specific); + pipelineCacheArchive.set(false, is_game_specific); // GS - Debug isDebugDump.set(false, is_game_specific); diff --git a/src/common/config.h b/src/common/config.h index 5c9f89ae6..481ef6444 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -94,7 +94,11 @@ void setVkGuestMarkersEnabled(bool enable, bool is_game_specific = false); bool getEnableDiscordRPC(); void setEnableDiscordRPC(bool enable); bool isRdocEnabled(); +bool isPipelineCacheEnabled(); +bool isPipelineCacheArchived(); void setRdocEnabled(bool enable, bool is_game_specific = false); +void setPipelineCacheEnabled(bool enable, bool is_game_specific = false); +void setPipelineCacheArchived(bool enable, bool is_game_specific = false); std::string getLogType(); void setLogType(const std::string& type, bool is_game_specific = false); std::string getLogFilter(); diff --git a/src/common/path_util.cpp b/src/common/path_util.cpp index bd0aff040..b0cbb10cf 100644 --- a/src/common/path_util.cpp +++ b/src/common/path_util.cpp @@ -127,6 +127,7 @@ static auto UserPaths = [] { create_path(PathType::MetaDataDir, user_dir / METADATA_DIR); create_path(PathType::CustomTrophy, user_dir / CUSTOM_TROPHY); create_path(PathType::CustomConfigs, user_dir / CUSTOM_CONFIGS); + create_path(PathType::CacheDir, user_dir / CACHE_DIR); std::ofstream notice_file(user_dir / CUSTOM_TROPHY / "Notice.txt"); if (notice_file.is_open()) { diff --git a/src/common/path_util.h b/src/common/path_util.h index 0a0234eba..fd2c18baa 100644 --- a/src/common/path_util.h +++ b/src/common/path_util.h @@ -24,6 +24,7 @@ enum class PathType { MetaDataDir, // Where game metadata (e.g. trophies and menu backgrounds) is stored. CustomTrophy, // Where custom files for trophies are stored. CustomConfigs, // Where custom files for different games are stored. + CacheDir, // Where pipeline and shader cache is stored. }; constexpr auto PORTABLE_DIR = "user"; @@ -42,6 +43,7 @@ constexpr auto PATCHES_DIR = "patches"; constexpr auto METADATA_DIR = "game_data"; constexpr auto CUSTOM_TROPHY = "custom_trophy"; constexpr auto CUSTOM_CONFIGS = "custom_configs"; +constexpr auto CACHE_DIR = "cache"; // Filenames constexpr auto LOG_FILE = "shad_log.txt"; diff --git a/src/common/serdes.h b/src/common/serdes.h new file mode 100644 index 000000000..a36fed4d3 --- /dev/null +++ b/src/common/serdes.h @@ -0,0 +1,140 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/assert.h" +#include "common/types.h" + +#include + +namespace Serialization { + +template +concept Container = requires(T t) { + typename T::iterator; + { t.begin() } -> std::same_as; + { t.end() } -> std::same_as; + { t.size() } -> std::convertible_to; +}; + +struct Archive { + void Alloc(size_t size) { + container.resize(size); + } + + void Grow(size_t size) { + container.resize(container.size() + size); + } + + void Merge(const Archive& ar) { + container.insert(container.end(), ar.container.cbegin(), ar.container.cend()); + offset = container.size(); + } + + [[nodiscard]] size_t SizeBytes() const { + return container.size(); + } + + u8* CurrPtr() { + return container.data() + offset; + } + + void Advance(size_t size) { + ASSERT(offset + size <= container.size()); + offset += size; + } + + std::vector&& TakeOff() { + offset = 0; + return std::move(container); + } + + [[nodiscard]] bool IsEoS() const { + return offset >= container.size(); + } + + Archive() = default; + explicit Archive(std::vector&& v) : container{v} {} + +private: + u32 offset{}; + std::vector container{}; + + friend struct Writer; + friend struct Reader; +}; + +struct Writer { + template + void Write(const T* ptr, size_t size) { + if (ar.offset + size >= ar.container.size()) { + ar.Grow(size); + } + std::memcpy(ar.CurrPtr(), reinterpret_cast(ptr), size); + ar.Advance(size); + } + + template + requires(!Container) + void Write(const T& value) { + const auto size = sizeof(value); + Write(&value, size); + } + + void Write(const auto& v) { + Write(v.size()); + for (const auto& elem : v) { + Write(elem); + } + } + + void Write(const std::string& s) { + Write(s.size()); + Write(s.c_str(), s.size()); + } + + Writer() = delete; + explicit Writer(Archive& ar_) : ar{ar_} {} + + Archive& ar; +}; + +struct Reader { + template + void Read(T* ptr, size_t size) { + ASSERT(ar.offset + size <= ar.container.size()); + std::memcpy(reinterpret_cast(ptr), ar.CurrPtr(), size); + ar.Advance(size); + } + + template + requires(!Container) + void Read(T& value) { + const auto size = sizeof(value); + Read(&value, size); + } + + void Read(auto& v) { + size_t num_elements{}; + Read(num_elements); + for (int i = 0; i < num_elements; ++i) { + v.emplace_back(); + Read(v.back()); + } + } + + void Read(std::string& s) { + size_t length{}; + Read(length); + s.resize(length); + Read(s.data(), length); + } + + Reader() = delete; + explicit Reader(Archive& ar_) : ar{ar_} {} + + Archive& ar; +}; + +} // namespace Serialization diff --git a/src/emulator.cpp b/src/emulator.cpp index fb187cfae..f0026068c 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -42,6 +42,7 @@ #include "core/linker.h" #include "core/memory.h" #include "emulator.h" +#include "video_core/cache_storage.h" #include "video_core/renderdoc.h" #ifdef _WIN32 @@ -387,6 +388,7 @@ void Emulator::Run(std::filesystem::path file, std::vector args, } UpdatePlayTime(id); + Storage::DataBase::Instance().Close(); std::quick_exit(0); } diff --git a/src/shader_recompiler/frontend/fetch_shader.cpp b/src/shader_recompiler/frontend/fetch_shader.cpp index 35bea1c1b..ba0635546 100644 --- a/src/shader_recompiler/frontend/fetch_shader.cpp +++ b/src/shader_recompiler/frontend/fetch_shader.cpp @@ -51,7 +51,7 @@ std::optional ParseFetchShader(const Shader::Info& info) { } const auto* code = GetFetchShaderCode(info, info.fetch_shader_sgpr_base); - FetchShaderData data{.code = code}; + FetchShaderData data{}; GcnCodeSlice code_slice(code, code + std::numeric_limits::max()); GcnDecodeContext decoder; diff --git a/src/shader_recompiler/frontend/fetch_shader.h b/src/shader_recompiler/frontend/fetch_shader.h index 442a9af2f..a57bbb0c6 100644 --- a/src/shader_recompiler/frontend/fetch_shader.h +++ b/src/shader_recompiler/frontend/fetch_shader.h @@ -8,6 +8,10 @@ #include "common/types.h" #include "shader_recompiler/info.h" +namespace Serialization { +struct Archive; +} + namespace Shader::Gcn { struct VertexAttribute { @@ -50,7 +54,6 @@ struct VertexAttribute { }; struct FetchShaderData { - const u32* code; u32 size = 0; std::vector attributes; s8 vertex_offset_sgpr = -1; ///< SGPR of vertex offset from VADDR @@ -60,6 +63,9 @@ struct FetchShaderData { return attributes == other.attributes && vertex_offset_sgpr == other.vertex_offset_sgpr && instance_offset_sgpr == other.instance_offset_sgpr; } + + void Serialize(Serialization::Archive& ar) const; + bool Deserialize(Serialization::Archive& buffer); }; const u32* GetFetchShaderCode(const Info& info, u32 sgpr_base); diff --git a/src/shader_recompiler/frontend/structured_control_flow.cpp b/src/shader_recompiler/frontend/structured_control_flow.cpp index 963b2c0d5..1dc186c64 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/structured_control_flow.cpp @@ -596,9 +596,8 @@ public: IR::AbstractSyntaxList& syntax_list_, std::span inst_list_, Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_) : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, - syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_}, - runtime_info{runtime_info_}, profile{profile_}, - translator{info_, runtime_info_, profile_} { + syntax_list{syntax_list_}, inst_list{inst_list_}, runtime_info{runtime_info_}, + profile{profile_}, translator{info_, runtime_info_, profile_} { Visit(root_stmt, nullptr, nullptr); IR::Block* first_block = syntax_list.front().data.block; @@ -782,7 +781,7 @@ private: } } - IR::Block* MergeBlock(Statement& parent, Statement& stmt) { + IR::Block* MergeBlock(Statement& parent, Statement& stmt) const { Statement* merge_stmt{TryFindForwardBlock(stmt)}; if (!merge_stmt) { // Create a merge block we can visit later @@ -798,7 +797,6 @@ private: IR::AbstractSyntaxList& syntax_list; const Block dummy_flow_block{.is_dummy = true}; std::span inst_list; - Info& info; const RuntimeInfo& runtime_info; const Profile& profile; Translator translator; diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 57b50a3e1..3aa70e2ec 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -560,7 +560,8 @@ void Translator::EmitFetch(const GcnInst& inst) { } const auto filename = fmt::format("vs_{:#018x}.fetch.bin", info.pgm_hash); const auto file = IOFile{dump_dir / filename, FileAccessMode::Create}; - file.WriteRaw(fetch_data->code, fetch_data->size); + const auto* code = GetFetchShaderCode(info, code_sgpr_base); + file.WriteRaw(code, fetch_data->size); } for (const auto& attrib : fetch_data->attributes) { diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index a3be34390..8d89537cb 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -19,6 +19,10 @@ #include "shader_recompiler/resource.h" #include "shader_recompiler/runtime_info.h" +namespace Serialization { +struct Archive; +} + namespace Shader { enum class Qualifier : u8 { @@ -34,7 +38,49 @@ enum class Qualifier : u8 { /** * Contains general information generated by the shader recompiler for an input program. */ -struct Info { +struct InfoPersistent { + BufferResourceList buffers; + ImageResourceList images; + SamplerResourceList samplers; + FMaskResourceList fmasks; + + struct UserDataMask { + void Set(IR::ScalarReg reg) noexcept { + mask |= 1 << static_cast(reg); + } + + u32 Index(IR::ScalarReg reg) const noexcept { + const u32 reg_mask = (1 << static_cast(reg)) - 1; + return std::popcount(mask & reg_mask); + } + + u32 NumRegs() const noexcept { + return std::popcount(mask); + } + + u32 mask; + }; + UserDataMask ud_mask{}; + u32 fetch_shader_sgpr_base{}; + + u64 pgm_hash{}; + + s32 tess_consts_dword_offset = -1; + IR::ScalarReg tess_consts_ptr_base = IR::ScalarReg::Max; + Stage stage; + LogicalStage l_stage; + + u8 mrt_mask{}; + bool has_fetch_shader{}; + bool has_bitwise_xor{}; + bool uses_dma{}; + + InfoPersistent() = default; + InfoPersistent(Stage stage_, LogicalStage l_stage_, u64 pgm_hash_) + : stage{stage_}, l_stage{l_stage_}, pgm_hash{pgm_hash_} {} +}; + +struct Info : InfoPersistent { struct AttributeFlags { bool Get(IR::Attribute attrib, u32 comp = 0) const { return flags[Index(attrib)] & (1 << comp); @@ -58,56 +104,32 @@ struct Info { std::array flags; }; - AttributeFlags loads{}; - AttributeFlags stores{}; - struct UserDataMask { - void Set(IR::ScalarReg reg) noexcept { - mask |= 1 << static_cast(reg); - } - - u32 Index(IR::ScalarReg reg) const noexcept { - const u32 reg_mask = (1 << static_cast(reg)) - 1; - return std::popcount(mask & reg_mask); - } - - u32 NumRegs() const noexcept { - return std::popcount(mask); - } - - u32 mask; + enum class ReadConstType { + None = 0, + Immediate = 1 << 0, + Dynamic = 1 << 1, }; - UserDataMask ud_mask{}; - - CopyShaderData gs_copy_data; - u32 uses_patches{}; - - BufferResourceList buffers; - ImageResourceList images; - SamplerResourceList samplers; - FMaskResourceList fmasks; - - PersistentSrtInfo srt_info; - std::vector flattened_ud_buf; struct Interpolation { Qualifier primary; Qualifier auxiliary; }; - std::array fs_interpolation{}; - - IR::ScalarReg tess_consts_ptr_base = IR::ScalarReg::Max; - s32 tess_consts_dword_offset = -1; std::span user_data; - Stage stage; - LogicalStage l_stage; + std::vector flattened_ud_buf; + PersistentSrtInfo srt_info; + + AttributeFlags loads{}; + AttributeFlags stores{}; + + ReadConstType readconst_types{}; + CopyShaderData gs_copy_data; + u32 uses_patches{}; - u64 pgm_hash{}; VAddr pgm_base; bool has_storage_images{}; bool has_discard{}; - bool has_bitwise_xor{}; bool has_image_gather{}; bool has_image_query{}; bool uses_buffer_atomic_float_min_max{}; @@ -125,20 +147,12 @@ struct Info { bool stores_tess_level_outer{}; bool stores_tess_level_inner{}; bool translation_failed{}; - u8 mrt_mask{0u}; - bool has_fetch_shader{false}; - u32 fetch_shader_sgpr_base{0u}; - enum class ReadConstType { - None = 0, - Immediate = 1 << 0, - Dynamic = 1 << 1, - }; - ReadConstType readconst_types{}; - bool uses_dma{}; + std::array fs_interpolation{}; - explicit Info(Stage stage_, LogicalStage l_stage_, ShaderParams params) - : stage{stage_}, l_stage{l_stage_}, pgm_hash{params.hash}, pgm_base{params.Base()}, + Info() = default; + Info(Stage stage_, LogicalStage l_stage_, ShaderParams params) + : InfoPersistent(stage_, l_stage_, params.hash), pgm_base{params.Base()}, user_data{params.user_data} {} template @@ -192,6 +206,9 @@ struct Info { reinterpret_cast(tess_constants_addr), sizeof(tess_constants)); } + + void Serialize(Serialization::Archive& ar) const; + bool Deserialize(Serialization::Archive& ar); }; DECLARE_ENUM_FLAG_OPERATORS(Info::ReadConstType); diff --git a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp index 7626b9c9f..e1f9f2c5a 100644 --- a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp +++ b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp @@ -28,6 +28,17 @@ using namespace Xbyak::util; static Xbyak::CodeGenerator g_srt_codegen(32_MB); static const u8* g_srt_codegen_start = nullptr; +namespace Shader { + +PFN_SrtWalker RegisterWalkerCode(const u8* ptr, size_t size) { + const auto func_addr = (PFN_SrtWalker)g_srt_codegen.getCurr(); + g_srt_codegen.db(ptr, size); + g_srt_codegen.ready(); + return func_addr; +} + +} // namespace Shader + namespace { static void DumpSrtProgram(const Shader::Info& info, const u8* code, size_t codesize) { @@ -215,9 +226,12 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) { c.ret(); c.ready(); + info.srt_info.walker_func_size = + c.getCurr() - reinterpret_cast(info.srt_info.walker_func); + if (Config::dumpShaders()) { - size_t codesize = c.getCurr() - reinterpret_cast(info.srt_info.walker_func); - DumpSrtProgram(info, reinterpret_cast(info.srt_info.walker_func), codesize); + DumpSrtProgram(info, reinterpret_cast(info.srt_info.walker_func), + info.srt_info.walker_func_size); } info.srt_info.flattened_bufsize_dw = pass_info.dst_off_dw; diff --git a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp index 2f8e1d7b1..48b496727 100644 --- a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp +++ b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp @@ -363,7 +363,7 @@ static IR::F32 ReadTessControlPointAttribute(IR::U32 addr, const u32 stride, IR: } // namespace -void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { +void HullShaderTransform(IR::Program& program, const RuntimeInfo& runtime_info) { const Info& info = program.info; for (IR::Block* block : program.blocks) { @@ -561,8 +561,8 @@ void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { } } -void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { - Info& info = program.info; +void DomainShaderTransform(const IR::Program& program, const RuntimeInfo& runtime_info) { + const Info& info = program.info; for (IR::Block* block : program.blocks) { for (IR::Inst& inst : block->Instructions()) { diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h index fdae9d3cf..5bf362284 100644 --- a/src/shader_recompiler/ir/passes/ir_passes.h +++ b/src/shader_recompiler/ir/passes/ir_passes.h @@ -24,8 +24,8 @@ void LowerBufferFormatToRaw(IR::Program& program); void LowerFp64ToFp32(IR::Program& program); void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info); void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info); -void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info); -void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info); +void HullShaderTransform(IR::Program& program, const RuntimeInfo& runtime_info); +void DomainShaderTransform(const IR::Program& program, const RuntimeInfo& runtime_info); void SharedMemoryBarrierPass(IR::Program& program, const RuntimeInfo& runtime_info, const Profile& profile); void SharedMemorySimplifyPass(IR::Program& program, const Profile& profile); diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index b9b4e9726..53b161149 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -498,7 +498,8 @@ void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& // buffer_load_format_xyz v[8:10], v1, s[32:35], 0 ... // is used to define an inline buffer resource std::array raw; - raw[0] = info.pgm_base + (handle->Arg(0).U32() | u64(handle->Arg(1).U32()) << 32); + // Keep relative address, we'll do fixup of the address at buffer fetch later + raw[0] = (handle->Arg(0).U32() | u64(handle->Arg(1).U32()) << 32); raw[1] = handle->Arg(2).U32() | u64(handle->Arg(3).U32()) << 32; const auto buffer = std::bit_cast(raw); buffer_binding = descriptors.Add(BufferResource{ diff --git a/src/shader_recompiler/ir/passes/srt.h b/src/shader_recompiler/ir/passes/srt.h index 4dce38674..918b832e0 100644 --- a/src/shader_recompiler/ir/passes/srt.h +++ b/src/shader_recompiler/ir/passes/srt.h @@ -7,9 +7,14 @@ #include #include "common/types.h" +namespace Serialization { +struct Archive; +} + namespace Shader { using PFN_SrtWalker = void PS4_SYSV_ABI (*)(const u32* /*user_data*/, u32* /*flat_dst*/); +PFN_SrtWalker RegisterWalkerCode(const u8* ptr, size_t size); struct PersistentSrtInfo { // Special case when fetch shader uses step rates. @@ -20,7 +25,11 @@ struct PersistentSrtInfo { }; PFN_SrtWalker walker_func{}; + size_t walker_func_size{}; u32 flattened_bufsize_dw = 16; // NumUserDataRegs + + void Serialize(Serialization::Archive& ar) const; + bool Deserialize(Serialization::Archive& ar); }; } // namespace Shader diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index c51e00088..52e37bbf0 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -8,6 +8,10 @@ namespace Shader { struct Profile { + u64 max_ubo_size{}; + u32 max_viewport_width{}; + u32 max_viewport_height{}; + u32 max_shared_memory_size{}; u32 supported_spirv{0x00010000}; u32 subgroup_size{}; bool support_int8{}; @@ -37,10 +41,7 @@ struct Profile { bool needs_lds_barriers{}; bool needs_buffer_offsets{}; bool needs_unorm_fixup{}; - u64 max_ubo_size{}; - u32 max_viewport_width{}; - u32 max_viewport_height{}; - u32 max_shared_memory_size{}; + bool _pad0{}; }; } // namespace Shader diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 547d4524f..4764ddbec 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -29,7 +29,7 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { return blocks; } -IR::Program TranslateProgram(std::span code, Pools& pools, Info& info, +IR::Program TranslateProgram(const std::span& code, Pools& pools, Info& info, RuntimeInfo& runtime_info, const Profile& profile) { // Ensure first instruction is expected. constexpr u32 token_mov_vcchi = 0xBEEB03FF; @@ -55,8 +55,8 @@ IR::Program TranslateProgram(std::span code, Pools& pools, Info& info Gcn::CFG cfg{gcn_block_pool, program.ins_list}; // Structurize control flow graph and create program. - program.syntax_list = Shader::Gcn::BuildASL(pools.inst_pool, pools.block_pool, cfg, - program.info, runtime_info, profile); + program.syntax_list = + Shader::Gcn::BuildASL(pools.inst_pool, pools.block_pool, cfg, info, runtime_info, profile); program.blocks = GenerateBlocks(program.syntax_list); program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front()); diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h index 8180c29b3..80c63447a 100644 --- a/src/shader_recompiler/recompiler.h +++ b/src/shader_recompiler/recompiler.h @@ -27,7 +27,8 @@ struct Pools { } }; -[[nodiscard]] IR::Program TranslateProgram(std::span code, Pools& pools, Info& info, - RuntimeInfo& runtime_info, const Profile& profile); +[[nodiscard]] IR::Program TranslateProgram(const std::span& code, Pools& pools, + Info& info, RuntimeInfo& runtime_info, + const Profile& profile); } // namespace Shader diff --git a/src/shader_recompiler/resource.h b/src/shader_recompiler/resource.h index 29545d0bb..5d9965105 100644 --- a/src/shader_recompiler/resource.h +++ b/src/shader_recompiler/resource.h @@ -53,8 +53,15 @@ struct BufferResource { } constexpr AmdGpu::Buffer GetSharp(const auto& info) const noexcept { - const auto buffer = - inline_cbuf ? inline_cbuf : info.template ReadUdSharp(sharp_idx); + AmdGpu::Buffer buffer{}; + if (inline_cbuf) { + buffer = inline_cbuf; + if (inline_cbuf.base_address > 1) { + buffer.base_address += info.pgm_base; // address fixup + } + } else { + buffer = info.template ReadUdSharp(sharp_idx); + } if (!buffer.Valid()) { LOG_DEBUG(Render, "Encountered invalid buffer sharp"); return AmdGpu::Buffer::Null(); diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 9624c465f..8620ab970 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -159,7 +159,8 @@ struct GeometryRuntimeInfo { return num_outputs == other.num_outputs && outputs == other.outputs && num_invocations && other.num_invocations && output_vertices == other.output_vertices && in_primitive == other.in_primitive && - std::ranges::equal(out_primitive, other.out_primitive); + std::ranges::equal(out_primitive, other.out_primitive) && + vs_copy_hash == other.vs_copy_hash; } }; diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index a7215e29e..4f6bb44bf 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -79,8 +79,8 @@ struct SamplerSpecialization { struct StageSpecialization { static constexpr size_t MaxStageResources = 128; - const Shader::Info* info; - RuntimeInfo runtime_info; + const Info* info{}; + RuntimeInfo runtime_info{}; std::bitset bitset{}; std::optional fetch_shader_data{}; boost::container::small_vector vs_attribs; @@ -90,6 +90,7 @@ struct StageSpecialization { boost::container::small_vector samplers; Backend::Bindings start{}; + StageSpecialization() = default; StageSpecialization(const Info& info_, RuntimeInfo runtime_info_, const Profile& profile_, Backend::Bindings start_) : info{&info_}, runtime_info{runtime_info_}, start{start_} { @@ -158,7 +159,7 @@ struct StageSpecialization { // Initialize runtime_info fields that rely on analysis in tessellation passes if (info->l_stage == LogicalStage::TessellationControl || info->l_stage == LogicalStage::TessellationEval) { - Shader::TessellationDataConstantBuffer tess_constants; + TessellationDataConstantBuffer tess_constants{}; info->ReadTessConstantBuffer(tess_constants); if (info->l_stage == LogicalStage::TessellationControl) { runtime_info.hs_info.InitFromTessConstants(tess_constants); @@ -192,21 +193,43 @@ struct StageSpecialization { } } + [[nodiscard]] bool Valid() const { + return info != nullptr; + } + bool operator==(const StageSpecialization& other) const { - if (start != other.start) { + if (!Valid()) { return false; } + + if (vs_attribs != other.vs_attribs) { + return false; + } + if (runtime_info != other.runtime_info) { return false; } + if (fetch_shader_data != other.fetch_shader_data) { return false; } - for (u32 i = 0; i < vs_attribs.size(); i++) { - if (vs_attribs[i] != other.vs_attribs[i]) { - return false; - } + + if (fmasks != other.fmasks) { + return false; } + + // For VS which only generates geometry and doesn't have any inputs, its start + // bindings still may change as they depend on previously processed FS. The check below + // handles this case and prevents generation of redundant permutations. This is also safe + // for other types of shaders with no bindings. + if (bitset.none() && other.bitset.none()) { + return true; + } + + if (start != other.start) { + return false; + } + u32 binding{}; for (u32 i = 0; i < buffers.size(); i++) { if (other.bitset[binding++] && buffers[i] != other.buffers[i]) { @@ -218,11 +241,7 @@ struct StageSpecialization { return false; } } - for (u32 i = 0; i < fmasks.size(); i++) { - if (other.bitset[binding++] && fmasks[i] != other.fmasks[i]) { - return false; - } - } + for (u32 i = 0; i < samplers.size(); i++) { if (samplers[i] != other.samplers[i]) { return false; @@ -230,6 +249,9 @@ struct StageSpecialization { } return true; } + + void Serialize(Serialization::Archive& ar) const; + bool Deserialize(Serialization::Archive& ar); }; } // namespace Shader diff --git a/src/video_core/amdgpu/pixel_format.h b/src/video_core/amdgpu/pixel_format.h index 21c2eee2a..69e082edb 100644 --- a/src/video_core/amdgpu/pixel_format.h +++ b/src/video_core/amdgpu/pixel_format.h @@ -79,10 +79,10 @@ enum class NumberFormat : u32 { Ubscaled = 13, }; -enum class NumberClass { - Float, - Sint, - Uint, +enum class NumberClass : u8 { + Float = 0, + Sint = 1, + Uint = 2, }; enum class CompSwizzle : u8 { diff --git a/src/video_core/cache_storage.cpp b/src/video_core/cache_storage.cpp new file mode 100644 index 000000000..1c46a4cf5 --- /dev/null +++ b/src/video_core/cache_storage.cpp @@ -0,0 +1,264 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/config.h" +#include "common/elf_info.h" +#include "common/io_file.h" +#include "common/polyfill_thread.h" +#include "common/thread.h" + +#include "video_core/cache_storage.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_pipeline_cache.h" + +#include + +#include +#include +#include +#include +#include + +namespace { + +std::mutex submit_mutex{}; +u32 num_requests{}; +std::condition_variable_any request_cv{}; +std::queue> req_queue{}; +std::mutex m_request{}; + +mz_zip_archive zip_ar{}; +bool ar_is_read_only{true}; + +} // namespace + +namespace Storage { + +void ProcessIO(const std::stop_token& stoken) { + Common::SetCurrentThreadName("shadPS4:PipelineCacheIO"); + + while (!stoken.stop_requested()) { + { + std::unique_lock lk{submit_mutex}; + Common::CondvarWait(request_cv, lk, stoken, [&] { return num_requests; }); + } + + if (stoken.stop_requested()) { + break; + } + + while (num_requests) { + std::packaged_task request{}; + { + std::scoped_lock lock{m_request}; + if (req_queue.empty()) { + continue; + } + request = std::move(req_queue.front()); + req_queue.pop(); + } + + if (request.valid()) { + request(); + request.get_future().wait(); + } + + --num_requests; + } + } +} + +constexpr std::string GetBlobFileExtension(BlobType type) { + switch (type) { + case BlobType::ShaderMeta: { + return "meta"; + } + case BlobType::ShaderBinary: { + return "spv"; + } + case BlobType::PipelineKey: { + return "key"; + } + case BlobType::ShaderProfile: { + return "bin"; + } + default: + UNREACHABLE(); + } +} + +void DataBase::Open() { + if (opened) { + return; + } + + const auto& game_info = Common::ElfInfo::Instance(); + + using namespace Common::FS; + if (Config::isPipelineCacheArchived()) { + mz_zip_zero_struct(&zip_ar); + + cache_path = GetUserPath(PathType::CacheDir) / + std::filesystem::path{game_info.GameSerial()}.replace_extension(".zip"); + + if (!mz_zip_reader_init_file(&zip_ar, cache_path.string().c_str(), + MZ_ZIP_FLAG_READ_ALLOW_WRITING) || + !mz_zip_validate_archive(&zip_ar, 0)) { + LOG_INFO(Render, "Cache archive {} is not found or archive is corrupted", + cache_path.string().c_str()); + mz_zip_reader_end(&zip_ar); + mz_zip_writer_init_file(&zip_ar, cache_path.string().c_str(), 0); + } + } else { + cache_path = GetUserPath(PathType::CacheDir) / game_info.GameSerial(); + if (!std::filesystem::exists(cache_path)) { + std::filesystem::create_directories(cache_path); + } + } + + io_worker = std::jthread{ProcessIO}; + opened = true; +} + +void DataBase::Close() { + if (!IsOpened()) { + return; + } + + io_worker.request_stop(); + io_worker.join(); + + if (Config::isPipelineCacheArchived()) { + mz_zip_writer_finalize_archive(&zip_ar); + mz_zip_writer_end(&zip_ar); + } + + LOG_INFO(Render, "Cache dumped"); +} + +template +bool WriteVector(const BlobType type, std::filesystem::path&& path_, std::vector&& v) { + { + auto request = std::packaged_task{[=]() { + auto path{path_}; + path.replace_extension(GetBlobFileExtension(type)); + if (Config::isPipelineCacheArchived()) { + ASSERT_MSG(!ar_is_read_only, + "The archive is read-only. Did you forget to call `FinishPreload`?"); + if (!mz_zip_writer_add_mem(&zip_ar, path.string().c_str(), v.data(), + v.size() * sizeof(T), MZ_BEST_COMPRESSION)) { + LOG_ERROR(Render, "Failed to add {} to the archive", path.string().c_str()); + } + } else { + using namespace Common::FS; + const auto file = IOFile{path, FileAccessMode::Create}; + file.Write(v); + } + }}; + std::scoped_lock lock{m_request}; + req_queue.emplace(std::move(request)); + } + + std::scoped_lock lk{submit_mutex}; + ++num_requests; + request_cv.notify_one(); + return true; +} + +template +void LoadVector(BlobType type, std::filesystem::path& path, std::vector& v) { + using namespace Common::FS; + path.replace_extension(GetBlobFileExtension(type)); + if (Config::isPipelineCacheArchived()) { + int index{-1}; + index = mz_zip_reader_locate_file(&zip_ar, path.string().c_str(), nullptr, 0); + if (index < 0) { + LOG_WARNING(Render, "File {} is not found in the archive", path.string().c_str()); + return; + } + mz_zip_archive_file_stat stat{}; + mz_zip_reader_file_stat(&zip_ar, index, &stat); + v.resize(stat.m_uncomp_size / sizeof(T)); + mz_zip_reader_extract_to_mem(&zip_ar, index, v.data(), stat.m_uncomp_size, 0); + } else { + const auto file = IOFile{path, FileAccessMode::Read}; + v.resize(file.GetSize() / sizeof(T)); + file.Read(v); + } +} + +bool DataBase::Save(BlobType type, const std::string& name, std::vector&& data) { + if (!opened) { + return false; + } + + auto path = Config::isPipelineCacheArchived() ? std::filesystem::path{name} : cache_path / name; + return WriteVector(type, std::move(path), std::move(data)); +} + +bool DataBase::Save(BlobType type, const std::string& name, std::vector&& data) { + if (!opened) { + return false; + } + + auto path = Config::isPipelineCacheArchived() ? std::filesystem::path{name} : cache_path / name; + return WriteVector(type, std::move(path), std::move(data)); +} + +void DataBase::Load(BlobType type, const std::string& name, std::vector& data) { + if (!opened) { + return; + } + + auto path = Config::isPipelineCacheArchived() ? std::filesystem::path{name} : cache_path / name; + return LoadVector(type, path, data); +} + +void DataBase::Load(BlobType type, const std::string& name, std::vector& data) { + if (!opened) { + return; + } + + auto path = Config::isPipelineCacheArchived() ? std::filesystem::path{name} : cache_path / name; + return LoadVector(type, path, data); +} + +void DataBase::ForEachBlob(BlobType type, const std::function&& data)>& func) { + const auto& ext = GetBlobFileExtension(type); + if (Config::isPipelineCacheArchived()) { + const auto num_files = mz_zip_reader_get_num_files(&zip_ar); + for (int index = 0; index < num_files; ++index) { + std::array file_name{}; + file_name.fill(0); + mz_zip_reader_get_filename(&zip_ar, index, file_name.data(), file_name.size()); + if (std::string{file_name.data()}.ends_with(ext)) { + mz_zip_archive_file_stat stat{}; + mz_zip_reader_file_stat(&zip_ar, index, &stat); + std::vector data(stat.m_uncomp_size); + mz_zip_reader_extract_to_mem(&zip_ar, index, data.data(), data.size(), 0); + func(std::move(data)); + } + } + } else { + for (const auto& file_name : std::filesystem::directory_iterator{cache_path}) { + if (file_name.path().extension().string().ends_with(ext)) { + using namespace Common::FS; + const auto& file = IOFile{file_name, FileAccessMode::Read}; + if (file.IsOpen()) { + std::vector data(file.GetSize()); + file.Read(data); + func(std::move(data)); + } + } + } + } +} + +void DataBase::FinishPreload() { + if (Config::isPipelineCacheArchived()) { + mz_zip_writer_init_from_reader(&zip_ar, cache_path.string().c_str()); + ar_is_read_only = false; + } +} + +} // namespace Storage diff --git a/src/video_core/cache_storage.h b/src/video_core/cache_storage.h new file mode 100644 index 000000000..91f2136e9 --- /dev/null +++ b/src/video_core/cache_storage.h @@ -0,0 +1,50 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/path_util.h" +#include "common/singleton.h" +#include "common/types.h" + +#include +#include +#include + +namespace Storage { + +enum class BlobType : u32 { + ShaderMeta, + ShaderBinary, + PipelineKey, + ShaderProfile, +}; + +class DataBase { +public: + static DataBase& Instance() { + return *Common::Singleton::Instance(); + } + + void Open(); + void Close(); + [[nodiscard]] bool IsOpened() const { + return opened; + } + void FinishPreload(); + + bool Save(BlobType type, const std::string& name, std::vector&& data); + bool Save(BlobType type, const std::string& name, std::vector&& data); + + void Load(BlobType type, const std::string& name, std::vector& data); + void Load(BlobType type, const std::string& name, std::vector& data); + + void ForEachBlob(BlobType type, const std::function&& data)>& func); + +private: + std::jthread io_worker{}; + std::filesystem::path cache_path{}; + bool opened{}; +}; + +} // namespace Storage diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 2b93eb7f3..35eda86da 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -13,7 +13,8 @@ namespace Vulkan { ComputePipeline::ComputePipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, const Shader::Profile& profile, vk::PipelineCache pipeline_cache, ComputePipelineKey compute_key_, - const Shader::Info& info_, vk::ShaderModule module) + const Shader::Info& info_, vk::ShaderModule module, + SerializationSupport& sdata, bool preloading /*=false*/) : Pipeline{instance, scheduler, desc_heap, profile, pipeline_cache, true}, compute_key{compute_key_} { auto& info = stages[int(Shader::LogicalStage::Compute)]; @@ -29,7 +30,11 @@ ComputePipeline::ComputePipeline(const Instance& instance, Scheduler& scheduler, u32 binding{}; boost::container::small_vector bindings; for (const auto& buffer : info->buffers) { - const auto sharp = buffer.GetSharp(*info); + // During deserialization, we don't have access to the UD to fetch sharp data. To address + // this properly we need to track shaprs or portion of them in `sdata`, but since we're + // interested only in "is storage" flag (which is not even effective atm), we can take a + // shortcut there. + const auto sharp = preloading ? AmdGpu::Buffer{} : buffer.GetSharp(*info); bindings.push_back({ .binding = binding++, .descriptorType = buffer.IsStorage(sharp) ? vk::DescriptorType::eStorageBuffer diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 79059b509..1cac7204c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -11,6 +11,10 @@ class BufferCache; class TextureCache; } // namespace VideoCore +namespace Serialization { +struct Archive; +} + namespace Vulkan { class Instance; @@ -26,14 +30,24 @@ struct ComputePipelineKey { friend bool operator!=(const ComputePipelineKey& lhs, const ComputePipelineKey& rhs) { return !(lhs == rhs); } + + void Serialize(Serialization::Archive& ar) const; + bool Deserialize(Serialization::Archive& ar); }; class ComputePipeline : public Pipeline { public: + struct SerializationSupport { + u32 dummy{}; + + void Serialize(Serialization::Archive& ar) const; + bool Deserialize(Serialization::Archive& ar); + }; + ComputePipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, const Shader::Profile& profile, vk::PipelineCache pipeline_cache, ComputePipelineKey compute_key, const Shader::Info& info, - vk::ShaderModule module); + vk::ShaderModule module, SerializationSupport& sdata, bool preloading); ~ComputePipeline(); private: diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index e2531456c..242c9b6f2 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -41,12 +41,12 @@ GraphicsPipeline::GraphicsPipeline( vk::PipelineCache pipeline_cache, std::span infos, std::span runtime_infos, std::optional fetch_shader_, - std::span modules) + std::span modules, SerializationSupport& sdata, bool preloading) : Pipeline{instance, scheduler, desc_heap, profile, pipeline_cache}, key{key_}, fetch_shader{std::move(fetch_shader_)} { const vk::Device device = instance.GetDevice(); std::ranges::copy(infos, stages.begin()); - BuildDescSetLayout(); + BuildDescSetLayout(preloading); const auto debug_str = GetDebugString(); const vk::PushConstantRange push_constants = { @@ -68,27 +68,26 @@ GraphicsPipeline::GraphicsPipeline( pipeline_layout = std::move(layout); SetObjectName(device, *pipeline_layout, "Graphics PipelineLayout {}", debug_str); - VertexInputs vertex_attributes; - VertexInputs vertex_bindings; - VertexInputs divisors; - VertexInputs guest_buffers; - if (!instance.IsVertexInputDynamicState()) { - const auto& vs_info = runtime_infos[u32(Shader::LogicalStage::Vertex)].vs_info; - GetVertexInputs(vertex_attributes, vertex_bindings, divisors, guest_buffers, - vs_info.step_rate_0, vs_info.step_rate_1); + if (!preloading) { + VertexInputs guest_buffers; + if (!instance.IsVertexInputDynamicState()) { + const auto& vs_info = runtime_infos[u32(Shader::LogicalStage::Vertex)].vs_info; + GetVertexInputs(sdata.vertex_attributes, sdata.vertex_bindings, sdata.divisors, + guest_buffers, vs_info.step_rate_0, vs_info.step_rate_1); + } } const vk::PipelineVertexInputDivisorStateCreateInfo divisor_state = { - .vertexBindingDivisorCount = static_cast(divisors.size()), - .pVertexBindingDivisors = divisors.data(), + .vertexBindingDivisorCount = static_cast(sdata.divisors.size()), + .pVertexBindingDivisors = sdata.divisors.data(), }; const vk::PipelineVertexInputStateCreateInfo vertex_input_info = { - .pNext = divisors.empty() ? nullptr : &divisor_state, - .vertexBindingDescriptionCount = static_cast(vertex_bindings.size()), - .pVertexBindingDescriptions = vertex_bindings.data(), - .vertexAttributeDescriptionCount = static_cast(vertex_attributes.size()), - .pVertexAttributeDescriptions = vertex_attributes.data(), + .pNext = sdata.divisors.empty() ? nullptr : &divisor_state, + .vertexBindingDescriptionCount = static_cast(sdata.vertex_bindings.size()), + .pVertexBindingDescriptions = sdata.vertex_bindings.data(), + .vertexAttributeDescriptionCount = static_cast(sdata.vertex_attributes.size()), + .pVertexAttributeDescriptions = sdata.vertex_attributes.data(), }; const auto topology = LiverpoolToVK::PrimitiveType(key.prim_type); @@ -98,7 +97,6 @@ GraphicsPipeline::GraphicsPipeline( const bool is_rect_list = key.prim_type == AmdGpu::PrimitiveType::RectList; const bool is_quad_list = key.prim_type == AmdGpu::PrimitiveType::QuadList; - const auto& fs_info = runtime_infos[u32(Shader::LogicalStage::Fragment)].fs_info; const vk::PipelineTessellationStateCreateInfo tessellation_state = { .patchControlPoints = is_rect_list ? 3U : (is_quad_list ? 4U : key.patch_control_points), }; @@ -128,12 +126,15 @@ GraphicsPipeline::GraphicsPipeline( raster_chain.unlink(); } - const vk::PipelineMultisampleStateCreateInfo multisampling = { - .rasterizationSamples = LiverpoolToVK::NumSamples( - key.num_samples, instance.GetColorSampleCounts() & instance.GetDepthSampleCounts()), - .sampleShadingEnable = - fs_info.addr_flags.persp_sample_ena || fs_info.addr_flags.linear_sample_ena, - }; + if (!preloading) { + const auto& fs_info = runtime_infos[u32(Shader::LogicalStage::Fragment)].fs_info; + sdata.multisampling = { + .rasterizationSamples = LiverpoolToVK::NumSamples( + key.num_samples, instance.GetColorSampleCounts() & instance.GetDepthSampleCounts()), + .sampleShadingEnable = + fs_info.addr_flags.persp_sample_ena || fs_info.addr_flags.linear_sample_ena, + }; + } const vk::PipelineViewportDepthClipControlCreateInfoEXT clip_control = { .negativeOneToOne = key.clip_space == AmdGpu::ClipSpace::MinusWToW, @@ -164,7 +165,7 @@ GraphicsPipeline::GraphicsPipeline( } if (instance.IsVertexInputDynamicState()) { dynamic_states.push_back(vk::DynamicState::eVertexInputEXT); - } else if (!vertex_bindings.empty()) { + } else if (!sdata.vertex_bindings.empty()) { dynamic_states.push_back(vk::DynamicState::eVertexInputBindingStride); } @@ -200,10 +201,13 @@ GraphicsPipeline::GraphicsPipeline( }); } else if (is_rect_list || is_quad_list) { const auto type = is_quad_list ? AuxShaderType::QuadListTCS : AuxShaderType::RectListTCS; - auto tcs = Shader::Backend::SPIRV::EmitAuxilaryTessShader(type, fs_info); + if (!preloading) { + const auto& fs_info = runtime_infos[u32(Shader::LogicalStage::Fragment)].fs_info; + sdata.tcs = Shader::Backend::SPIRV::EmitAuxilaryTessShader(type, fs_info); + } shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eTessellationControl, - .module = CompileSPV(tcs, instance.GetDevice()), + .module = CompileSPV(sdata.tcs, instance.GetDevice()), .pName = "main", }); } @@ -215,11 +219,14 @@ GraphicsPipeline::GraphicsPipeline( .pName = "main", }); } else if (is_rect_list || is_quad_list) { - auto tes = - Shader::Backend::SPIRV::EmitAuxilaryTessShader(AuxShaderType::PassthroughTES, fs_info); + if (!preloading) { + const auto& fs_info = runtime_infos[u32(Shader::LogicalStage::Fragment)].fs_info; + sdata.tes = Shader::Backend::SPIRV::EmitAuxilaryTessShader( + AuxShaderType::PassthroughTES, fs_info); + } shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eTessellationEvaluation, - .module = CompileSPV(tes, instance.GetDevice()), + .module = CompileSPV(sdata.tes, instance.GetDevice()), .pName = "main", }); } @@ -360,7 +367,7 @@ GraphicsPipeline::GraphicsPipeline( .pTessellationState = &tessellation_state, .pViewportState = &viewport_info, .pRasterizationState = &raster_chain.get(), - .pMultisampleState = &multisampling, + .pMultisampleState = &sdata.multisampling, .pColorBlendState = &color_blending, .pDynamicState = &dynamic_info, .layout = *pipeline_layout, @@ -428,7 +435,7 @@ template void GraphicsPipeline::GetVertexInputs( VertexInputs& divisors, VertexInputs& guest_buffers, u32 step_rate_0, u32 step_rate_1) const; -void GraphicsPipeline::BuildDescSetLayout() { +void GraphicsPipeline::BuildDescSetLayout(bool preloading) { boost::container::small_vector bindings; u32 binding{}; @@ -438,7 +445,9 @@ void GraphicsPipeline::BuildDescSetLayout() { } const auto stage_bit = LogicalStageToStageBit[u32(stage->l_stage)]; for (const auto& buffer : stage->buffers) { - const auto sharp = buffer.GetSharp(*stage); + const auto sharp = + preloading ? AmdGpu::Buffer{} + : buffer.GetSharp(*stage); // See for the comment in compute PL creation bindings.push_back({ .binding = binding++, .descriptorType = buffer.IsStorage(sharp) ? vk::DescriptorType::eStorageBuffer diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 8254605cb..0dea92864 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -63,17 +63,33 @@ struct GraphicsPipelineKey { bool operator==(const GraphicsPipelineKey& key) const noexcept { return std::memcmp(this, &key, sizeof(key)) == 0; } + + void Serialize(Serialization::Archive& ar) const; + bool Deserialize(Serialization::Archive& ar); }; class GraphicsPipeline : public Pipeline { public: + struct SerializationSupport { + VertexInputs vertex_attributes{}; + VertexInputs vertex_bindings{}; + VertexInputs divisors{}; + vk::PipelineMultisampleStateCreateInfo multisampling{}; + std::vector tcs{}; + std::vector tes{}; + + void Serialize(Serialization::Archive& ar) const; + bool Deserialize(Serialization::Archive& ar); + }; + GraphicsPipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, const Shader::Profile& profile, const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache, std::span stages, std::span runtime_infos, std::optional fetch_shader, - std::span modules); + std::span modules, SerializationSupport& sdata, + bool preloading); ~GraphicsPipeline(); const std::optional& GetFetchShader() const noexcept { @@ -92,7 +108,7 @@ public: u32 step_rate_1) const; private: - void BuildDescSetLayout(); + void BuildDescSetLayout(bool preloading); private: GraphicsPipelineKey key; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 4706bff24..a0ea58817 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -13,9 +13,10 @@ #include "shader_recompiler/recompiler.h" #include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/liverpool.h" +#include "video_core/cache_storage.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" -#include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_pipeline_serialization.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" @@ -223,6 +224,13 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, desc_heap{instance, scheduler.GetMasterSemaphore(), DescriptorHeapSizes} { const auto& vk12_props = instance.GetVk12Properties(); profile = Shader::Profile{ + // When binding a UBO, we calculate its size considering the offset in the larger buffer + // cache underlying resource. In some cases, it may produce sizes exceeding the system + // maximum allowed UBO range, so we need to reduce the threshold to prevent issues. + .max_ubo_size = instance.UniformMaxSize() - instance.UniformMinAlignment(), + .max_viewport_width = instance.GetMaxViewportWidth(), + .max_viewport_height = instance.GetMaxViewportHeight(), + .max_shared_memory_size = instance.MaxComputeSharedMemorySize(), .supported_spirv = SpirvVersion1_6, .subgroup_size = instance.SubgroupSize(), .support_int8 = instance.IsShaderInt8Supported(), @@ -258,14 +266,10 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, instance.GetDriverID() == vk::DriverId::eMoltenvk, .needs_buffer_offsets = instance.StorageMinAlignment() > 4, .needs_unorm_fixup = instance.GetDriverID() == vk::DriverId::eMoltenvk, - // When binding a UBO, we calculate its size considering the offset in the larger buffer - // cache underlying resource. In some cases, it may produce sizes exceeding the system - // maximum allowed UBO range, so we need to reduce the threshold to prevent issues. - .max_ubo_size = instance.UniformMaxSize() - instance.UniformMinAlignment(), - .max_viewport_width = instance.GetMaxViewportWidth(), - .max_viewport_height = instance.GetMaxViewportHeight(), - .max_shared_memory_size = instance.MaxComputeSharedMemorySize(), }; + + WarmUp(); + auto [cache_result, cache] = instance.GetDevice().createPipelineCacheUnique({}); ASSERT_MSG(cache_result == vk::Result::eSuccess, "Failed to create pipeline cache: {}", vk::to_string(cache_result)); @@ -283,9 +287,14 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() { const auto pipeline_hash = std::hash{}(graphics_key); LOG_INFO(Render_Vulkan, "Compiling graphics pipeline {:#x}", pipeline_hash); - it.value() = std::make_unique(instance, scheduler, desc_heap, profile, - graphics_key, *pipeline_cache, infos, - runtime_infos, fetch_shader, modules); + GraphicsPipeline::SerializationSupport sdata{}; + it.value() = std::make_unique( + instance, scheduler, desc_heap, profile, graphics_key, *pipeline_cache, infos, + runtime_infos, fetch_shader, modules, sdata, false); + + RegisterPipelineData(graphics_key, pipeline_hash, sdata); + ++num_new_pipelines; + if (Config::collectShadersForDebug()) { for (auto stage = 0; stage < MaxShaderStages; ++stage) { if (infos[stage]) { @@ -294,6 +303,7 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() { } } } + fetch_shader.reset(); } return it->second.get(); } @@ -307,9 +317,13 @@ const ComputePipeline* PipelineCache::GetComputePipeline() { const auto pipeline_hash = std::hash{}(compute_key); LOG_INFO(Render_Vulkan, "Compiling compute pipeline {:#x}", pipeline_hash); - it.value() = - std::make_unique(instance, scheduler, desc_heap, profile, - *pipeline_cache, compute_key, *infos[0], modules[0]); + ComputePipeline::SerializationSupport sdata{}; + it.value() = std::make_unique(instance, scheduler, desc_heap, profile, + *pipeline_cache, compute_key, *infos[0], + modules[0], sdata, false); + RegisterPipelineData(compute_key, sdata); + ++num_new_pipelines; + if (Config::collectShadersForDebug()) { auto& m = modules[0]; module_related_pipelines[m].emplace_back(compute_key); @@ -445,6 +459,7 @@ bool PipelineCache::RefreshGraphicsStages() { }; infos.fill(nullptr); + modules.fill(nullptr); bind_stage(Stage::Fragment, LogicalStage::Fragment); const auto* fs_info = infos[static_cast(LogicalStage::Fragment)]; @@ -515,7 +530,7 @@ bool PipelineCache::RefreshComputeKey() { } vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info, - std::span code, size_t perm_idx, + const std::span& code, size_t perm_idx, Shader::Backend::Bindings& binding) { LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash, perm_idx != 0 ? "(permutation)" : ""); @@ -536,6 +551,8 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, Shader::Runtim module = CompileSPV(spv, instance.GetDevice()); } + RegisterShaderBinary(std::move(spv), info.pgm_hash, perm_idx); + const auto name = GetShaderName(info.stage, info.pgm_hash, perm_idx); Vulkan::SetObjectName(instance.GetDevice(), module, name); if (Config::collectShadersForDebug()) { @@ -546,7 +563,7 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, Shader::Runtim } PipelineCache::Result PipelineCache::GetProgram(Stage stage, LogicalStage l_stage, - Shader::ShaderParams params, + const Shader::ShaderParams& params, Shader::Backend::Bindings& binding) { auto runtime_info = BuildRuntimeInfo(stage, l_stage); auto [it_pgm, new_program] = program_cache.try_emplace(params.hash); @@ -555,32 +572,42 @@ PipelineCache::Result PipelineCache::GetProgram(Stage stage, LogicalStage l_stag auto& program = it_pgm.value(); auto start = binding; const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding); - const auto spec = Shader::StageSpecialization(program->info, runtime_info, profile, start); + auto spec = Shader::StageSpecialization(program->info, runtime_info, profile, start); + const auto perm_hash = HashCombine(params.hash, 0); + + RegisterShaderMeta(program->info, spec.fetch_shader_data, spec, perm_hash, 0); program->AddPermut(module, std::move(spec)); - return std::make_tuple(&program->info, module, spec.fetch_shader_data, - HashCombine(params.hash, 0)); + return std::make_tuple(&program->info, module, program->modules[0].spec.fetch_shader_data, + perm_hash); } - it_pgm.value()->info.user_data = params.user_data; auto& program = it_pgm.value(); auto& info = program->info; + info.pgm_base = params.Base(); // Needs to be actualized for inline cbuffer address fixup + info.user_data = params.user_data; info.RefreshFlatBuf(); - const auto spec = Shader::StageSpecialization(info, runtime_info, profile, binding); + auto spec = Shader::StageSpecialization(info, runtime_info, profile, binding); + size_t perm_idx = program->modules.size(); + u64 perm_hash = HashCombine(params.hash, perm_idx); + vk::ShaderModule module{}; const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec); if (it == program->modules.end()) { auto new_info = Shader::Info(stage, l_stage, params); module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding); + + RegisterShaderMeta(info, spec.fetch_shader_data, spec, perm_hash, perm_idx); program->AddPermut(module, std::move(spec)); } else { info.AddBindings(binding); module = it->module; perm_idx = std::distance(program->modules.begin(), it); + perm_hash = HashCombine(params.hash, perm_idx); } - return std::make_tuple(&info, module, spec.fetch_shader_data, - HashCombine(params.hash, perm_idx)); + return std::make_tuple(&program->info, module, + program->modules[perm_idx].spec.fetch_shader_data, perm_hash); } std::optional PipelineCache::ReplaceShader(vk::ShaderModule module, @@ -654,5 +681,4 @@ std::optional> PipelineCache::GetShaderPatch(u64 hash, Shader:: file.Read(code); return code; } - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 706b99536..754397214 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -23,6 +23,10 @@ namespace AmdGpu { class Liverpool; } +namespace Serialization { +struct Archive; +} + namespace Shader { struct Info; } @@ -38,17 +42,25 @@ struct Program { vk::ShaderModule module; Shader::StageSpecialization spec; }; - using ModuleList = boost::container::small_vector; + static constexpr size_t MaxPermutations = 8; + using ModuleList = boost::container::small_vector; Shader::Info info; - ModuleList modules; + ModuleList modules{}; - explicit Program(Shader::Stage stage, Shader::LogicalStage l_stage, Shader::ShaderParams params) + Program() = default; + Program(Shader::Stage stage, Shader::LogicalStage l_stage, Shader::ShaderParams params) : info{stage, l_stage, params} {} - void AddPermut(vk::ShaderModule module, const Shader::StageSpecialization&& spec) { + void AddPermut(vk::ShaderModule module, Shader::StageSpecialization&& spec) { modules.emplace_back(module, std::move(spec)); } + + void InsertPermut(vk::ShaderModule module, Shader::StageSpecialization&& spec, + size_t perm_idx) { + modules.resize(std::max(modules.size(), perm_idx + 1)); // <-- beware of realloc + modules[perm_idx] = {module, std::move(spec)}; + } }; class PipelineCache { @@ -57,6 +69,13 @@ public: AmdGpu::Liverpool* liverpool); ~PipelineCache(); + void WarmUp(); + void Sync(); + + bool LoadComputePipeline(Serialization::Archive& ar); + bool LoadGraphicsPipeline(Serialization::Archive& ar); + bool LoadPipelineStage(Serialization::Archive& ar, size_t stage); + const GraphicsPipeline* GetGraphicsPipeline(); const ComputePipeline* GetComputePipeline(); @@ -64,7 +83,7 @@ public: using Result = std::tuple, u64>; Result GetProgram(Shader::Stage stage, Shader::LogicalStage l_stage, - Shader::ShaderParams params, Shader::Backend::Bindings& binding); + const Shader::ShaderParams& params, Shader::Backend::Bindings& binding); std::optional ReplaceShader(vk::ShaderModule module, std::span spv_code); @@ -86,10 +105,14 @@ private: std::optional> GetShaderPatch(u64 hash, Shader::Stage stage, size_t perm_idx, std::string_view ext); vk::ShaderModule CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info, - std::span code, size_t perm_idx, + const std::span& code, size_t perm_idx, Shader::Backend::Bindings& binding); const Shader::RuntimeInfo& BuildRuntimeInfo(Shader::Stage stage, Shader::LogicalStage l_stage); + [[nodiscard]] bool IsPipelineCacheDirty() const { + return num_new_pipelines > 0; + } + private: const Instance& instance; Scheduler& scheduler; @@ -108,6 +131,7 @@ private: std::optional fetch_shader{}; GraphicsPipelineKey graphics_key{}; ComputePipelineKey compute_key{}; + u32 num_new_pipelines{}; // new pipelines added to the cache since the game start // Only if Config::collectShadersForDebug() tsl::robin_map& fetch_shader_data, + const Shader::StageSpecialization& spec, size_t perm_hash, + size_t perm_idx) { + if (!Storage::DataBase::Instance().IsOpened()) { + return; + } + + Serialization::Archive ar; + Serialization::Writer meta{ar}; + + meta.Write(Serialization::ShaderMetaVersion); + meta.Write(Serialization::ShaderBinaryVersion); + + meta.Write(perm_hash); + meta.Write(perm_idx); + + spec.Serialize(ar); + info.Serialize(ar); + + Storage::DataBase::Instance().Save(Storage::BlobType::ShaderMeta, + fmt::format("{:#018x}", perm_hash), ar.TakeOff()); +} + +void RegisterShaderBinary(std::vector&& spv, u64 pgm_hash, size_t perm_idx) { + if (!Storage::DataBase::Instance().IsOpened()) { + return; + } + + Storage::DataBase::Instance().Save(Storage::BlobType::ShaderBinary, + fmt::format("{:#018x}_{}", pgm_hash, perm_idx), + std::move(spv)); +} + +bool LoadShaderMeta(Serialization::Archive& ar, Shader::Info& info, + std::optional& fetch_shader_data, + Shader::StageSpecialization& spec, size_t& perm_idx) { + Serialization::Reader meta{ar}; + + u32 meta_version{}; + meta.Read(meta_version); + if (meta_version != Serialization::ShaderMetaVersion) { + return false; + } + + u32 binary_version{}; + meta.Read(binary_version); + if (binary_version != Serialization::ShaderBinaryVersion) { + return false; + } + + u64 perm_hash_ar{}; + meta.Read(perm_hash_ar); + meta.Read(perm_idx); + + spec.Deserialize(ar); + info.Deserialize(ar); + + fetch_shader_data = spec.fetch_shader_data; + return true; +} + +void ComputePipelineKey::Serialize(Serialization::Archive& ar) const { + Serialization::Writer key{ar}; + key.Write(value); +} + +bool ComputePipelineKey::Deserialize(Serialization::Archive& ar) { + Serialization::Reader key{ar}; + key.Read(value); + return true; +} + +void ComputePipeline::SerializationSupport::Serialize(Serialization::Archive& ar) const { + // Nothing here yet + return; +} + +bool ComputePipeline::SerializationSupport::Deserialize(Serialization::Archive& ar) { + // Nothing here yet + return true; +} + +bool PipelineCache::LoadComputePipeline(Serialization::Archive& ar) { + compute_key.Deserialize(ar); + + ComputePipeline::SerializationSupport sdata{}; + sdata.Deserialize(ar); + + std::vector meta_blob; + Storage::DataBase::Instance().Load(Storage::BlobType::ShaderMeta, + fmt::format("{:#018x}", compute_key.value), meta_blob); + if (meta_blob.empty()) { + return false; + } + + Serialization::Archive meta_ar{std::move(meta_blob)}; + + if (!LoadPipelineStage(meta_ar, 0)) { + return false; + } + + const auto [it, is_new] = compute_pipelines.try_emplace(compute_key); + ASSERT(is_new); + + it.value() = + std::make_unique(instance, scheduler, desc_heap, profile, *pipeline_cache, + compute_key, *infos[0], modules[0], sdata, true); + + infos.fill(nullptr); + modules.fill(nullptr); + + return true; +} + +void GraphicsPipelineKey::Serialize(Serialization::Archive& ar) const { + Serialization::Writer key{ar}; + + key.Write(this, sizeof(*this)); +} + +bool GraphicsPipelineKey::Deserialize(Serialization::Archive& ar) { + Serialization::Reader key{ar}; + + key.Read(this, sizeof(*this)); + return true; +} + +void GraphicsPipeline::SerializationSupport::Serialize(Serialization::Archive& ar) const { + Serialization::Writer sdata{ar}; + + sdata.Write(&vertex_attributes, sizeof(vertex_attributes)); + sdata.Write(&vertex_bindings, sizeof(vertex_bindings)); + sdata.Write(&divisors, sizeof(divisors)); + sdata.Write(multisampling); + sdata.Write(tcs); + sdata.Write(tes); +} + +bool GraphicsPipeline::SerializationSupport::Deserialize(Serialization::Archive& ar) { + Serialization::Reader sdata{ar}; + + sdata.Read(&vertex_attributes, sizeof(vertex_attributes)); + sdata.Read(&vertex_bindings, sizeof(vertex_bindings)); + sdata.Read(&divisors, sizeof(divisors)); + sdata.Read(multisampling); + sdata.Read(tcs); + sdata.Read(tes); + return true; +} + +bool PipelineCache::LoadGraphicsPipeline(Serialization::Archive& ar) { + graphics_key.Deserialize(ar); + + GraphicsPipeline::SerializationSupport sdata{}; + sdata.Deserialize(ar); + + for (int stage_idx = 0; stage_idx < MaxShaderStages; ++stage_idx) { + const auto& hash = graphics_key.stage_hashes[stage_idx]; + if (!hash) { + continue; + } + + std::vector meta_blob; + Storage::DataBase::Instance().Load(Storage::BlobType::ShaderMeta, + fmt::format("{:#018x}", hash), meta_blob); + if (meta_blob.empty()) { + return false; + } + + Serialization::Archive meta_ar{std::move(meta_blob)}; + + if (!LoadPipelineStage(meta_ar, stage_idx)) { + return false; + } + } + + const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key); + ASSERT(is_new); + + it.value() = std::make_unique( + instance, scheduler, desc_heap, profile, graphics_key, *pipeline_cache, infos, + runtime_infos, fetch_shader, modules, sdata, true); + + infos.fill(nullptr); + modules.fill(nullptr); + fetch_shader.reset(); + + return true; +} + +bool PipelineCache::LoadPipelineStage(Serialization::Archive& ar, size_t stage) { + auto program = std::make_unique(); + Shader::StageSpecialization spec{}; + spec.info = &program->info; + size_t perm_idx{}; + if (!LoadShaderMeta(ar, program->info, fetch_shader, spec, perm_idx)) { + return false; + } + + std::vector spv{}; + Storage::DataBase::Instance().Load(Storage::BlobType::ShaderBinary, + fmt::format("{:#018x}_{}", program->info.pgm_hash, perm_idx), + spv); + if (spv.empty()) { + return false; + } + + // Permutation hash depends on shader variation index. To prevent collisions, we need insert it + // at the exact position rather than append + + vk::ShaderModule module{}; + + auto [it_pgm, new_program] = program_cache.try_emplace(program->info.pgm_hash); + if (new_program) { + module = CompileSPV(spv, instance.GetDevice()); + it_pgm.value() = std::move(program); + } else { + const auto& it = std::ranges::find(it_pgm.value()->modules, spec, &Program::Module::spec); + if (it != it_pgm.value()->modules.end()) { + // If the permutation is already preloaded, make sure it has the same permutation index + const auto idx = std::distance(it_pgm.value()->modules.begin(), it); + ASSERT_MSG(perm_idx == idx, "Permutation {} is already inserted at {}! ({}_{:x})", + perm_idx, idx, program->info.stage, program->info.pgm_hash); + module = it->module; + } else { + module = CompileSPV(spv, instance.GetDevice()); + } + } + it_pgm.value()->InsertPermut(module, std::move(spec), perm_idx); + + infos[stage] = &it_pgm.value()->info; + modules[stage] = module; + + return true; +} + +void PipelineCache::WarmUp() { + if (!Config::isPipelineCacheEnabled()) { + return; + } + + Storage::DataBase::Instance().Open(); + + // Check if cache is compatible + std::vector profile_data{}; + Storage::DataBase::Instance().Load(Storage::BlobType::ShaderProfile, "profile", profile_data); + if (profile_data.empty()) { + Storage::DataBase::Instance().FinishPreload(); + + profile_data.resize(sizeof(profile)); + std::memcpy(profile_data.data(), &profile, sizeof(profile)); + Storage::DataBase::Instance().Save(Storage::BlobType::ShaderProfile, "profile", + std::move(profile_data)); + return; + } + if (std::memcmp(profile_data.data(), &profile, sizeof(profile)) != 0) { + LOG_WARNING(Render, + "Pipeline cache isn't compatible with current system. Ignoring the cache"); + return; + } + + u32 num_pipelines{}; + u32 num_total_pipelines{}; + + Storage::DataBase::Instance().ForEachBlob( + Storage::BlobType::PipelineKey, [&](std::vector&& data) { + ++num_total_pipelines; + + Serialization::Archive ar{std::move(data)}; + Serialization::Reader pldata{ar}; + + u32 version{}; + pldata.Read(version); + if (version != Serialization::PipelineKeyVersion) { + return; + } + + u32 is_compute{}; + pldata.Read(is_compute); + + bool result{}; + if (is_compute) { + result = LoadComputePipeline(ar); + } else { + result = LoadGraphicsPipeline(ar); + } + + if (result) { + ++num_pipelines; + } + }); + + LOG_INFO(Render, "Preloaded {} pipelines", num_pipelines); + if (num_total_pipelines > num_pipelines) { + LOG_WARNING(Render, "{} stale pipelines were found. Consider re-generating the cache", + num_total_pipelines - num_pipelines); + } + + Storage::DataBase::Instance().FinishPreload(); +} + +void PipelineCache::Sync() { + Storage::DataBase::Instance().Close(); +} + +} // namespace Vulkan + +namespace Shader { + +void Info::Serialize(Serialization::Archive& ar) const { + Serialization::Writer info{ar}; + + info.Write(this, sizeof(InfoPersistent)); + info.Write(flattened_ud_buf); + srt_info.Serialize(ar); +} + +bool Info::Deserialize(Serialization::Archive& ar) { + Serialization::Reader info{ar}; + + info.Read(this, sizeof(Shader::InfoPersistent)); + info.Read(flattened_ud_buf); + + return srt_info.Deserialize(ar); +} + +void Gcn::FetchShaderData::Serialize(Serialization::Archive& ar) const { + Serialization::Writer fetch{ar}; + ar.Grow(6 + attributes.size() * sizeof(VertexAttribute)); + + fetch.Write(size); + fetch.Write(vertex_offset_sgpr); + fetch.Write(instance_offset_sgpr); + fetch.Write(attributes); +} + +bool Gcn::FetchShaderData::Deserialize(Serialization::Archive& ar) { + Serialization::Reader fetch{ar}; + + fetch.Read(size); + fetch.Read(vertex_offset_sgpr); + fetch.Read(instance_offset_sgpr); + fetch.Read(attributes); + + return true; +} + +void PersistentSrtInfo::Serialize(Serialization::Archive& ar) const { + Serialization::Writer srt{ar}; + + srt.Write(this, sizeof(*this)); + if (walker_func_size) { + srt.Write(reinterpret_cast(walker_func), walker_func_size); + } +} + +bool PersistentSrtInfo::Deserialize(Serialization::Archive& ar) { + Serialization::Reader srt{ar}; + + srt.Read(this, sizeof(*this)); + + if (walker_func_size) { + walker_func = RegisterWalkerCode(ar.CurrPtr(), walker_func_size); + ar.Advance(walker_func_size); + } + + return true; +} + +void StageSpecialization::Serialize(Serialization::Archive& ar) const { + Serialization::Writer spec{ar}; + + spec.Write(start); + spec.Write(runtime_info); + + spec.Write(bitset.to_string()); + + if (fetch_shader_data) { + spec.Write(sizeof(*fetch_shader_data)); + fetch_shader_data->Serialize(ar); + } else { + spec.Write(size_t{0}); + } + + spec.Write(vs_attribs); + spec.Write(buffers); + spec.Write(images); + spec.Write(fmasks); + spec.Write(samplers); +} + +bool StageSpecialization::Deserialize(Serialization::Archive& ar) { + Serialization::Reader spec{ar}; + + spec.Read(start); + spec.Read(runtime_info); + + std::string bits{}; + spec.Read(bits); + bitset = std::bitset(bits); + + u64 fetch_data_size{}; + spec.Read(fetch_data_size); + + if (fetch_data_size) { + Gcn::FetchShaderData fetch_data; + fetch_data.Deserialize(ar); + fetch_shader_data = fetch_data; + } + + spec.Read(vs_attribs); + spec.Read(buffers); + spec.Read(images); + spec.Read(fmasks); + spec.Read(samplers); + + return true; +} + +} // namespace Shader diff --git a/src/video_core/renderer_vulkan/vk_pipeline_serialization.h b/src/video_core/renderer_vulkan/vk_pipeline_serialization.h new file mode 100644 index 000000000..31ea4e357 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline_serialization.h @@ -0,0 +1,21 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "shader_recompiler/frontend/fetch_shader.h" +#include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" + +namespace Vulkan { + +void RegisterPipelineData(const ComputePipelineKey& key, + ComputePipeline::SerializationSupport& sdata); +void RegisterPipelineData(const GraphicsPipelineKey& key, u64 hash, + GraphicsPipeline::SerializationSupport& sdata); +void RegisterShaderMeta(const Shader::Info& info, + const std::optional& fetch_shader_data, + const Shader::StageSpecialization& spec, size_t perm_hash, size_t perm_idx); +void RegisterShaderBinary(std::vector&& spv, u64 pgm_hash, size_t perm_idx); + +} // namespace Vulkan From 78e301c3db87cf517a4bde5542e397ff803859ce Mon Sep 17 00:00:00 2001 From: TheThunderTurner <64212185+thethunderturner@users.noreply.github.com> Date: Sat, 29 Nov 2025 22:47:15 +0100 Subject: [PATCH 03/12] libSceNpCommerce (#3839) * libSceNpCommerce * copyright notice --- CMakeLists.txt | 2 + src/common/logging/filter.cpp | 1 + src/common/logging/types.h | 1 + src/core/libraries/libs.cpp | 2 + src/core/libraries/np/np_commerce.cpp | 88 +++++++++++++++++++++++++++ src/core/libraries/np/np_commerce.h | 16 +++++ 6 files changed, 110 insertions(+) create mode 100644 src/core/libraries/np/np_commerce.cpp create mode 100644 src/core/libraries/np/np_commerce.h diff --git a/CMakeLists.txt b/CMakeLists.txt index cf78e92bf..04534ec26 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -573,6 +573,8 @@ set(VDEC_LIB src/core/libraries/videodec/videodec2_impl.cpp set(NP_LIBS src/core/libraries/np/np_error.h src/core/libraries/np/np_common.cpp src/core/libraries/np/np_common.h + src/core/libraries/np/np_commerce.cpp + src/core/libraries/np/np_commerce.h src/core/libraries/np/np_manager.cpp src/core/libraries/np/np_manager.h src/core/libraries/np/np_score.cpp diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp index bf6844c7d..fd8386aff 100644 --- a/src/common/logging/filter.cpp +++ b/src/common/logging/filter.cpp @@ -104,6 +104,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) { SUB(Lib, Move) \ SUB(Lib, NpAuth) \ SUB(Lib, NpCommon) \ + SUB(Lib, NpCommerce) \ SUB(Lib, NpManager) \ SUB(Lib, NpScore) \ SUB(Lib, NpTrophy) \ diff --git a/src/common/logging/types.h b/src/common/logging/types.h index 035a959db..82db477ed 100644 --- a/src/common/logging/types.h +++ b/src/common/logging/types.h @@ -70,6 +70,7 @@ enum class Class : u8 { Lib_Http2, ///< The LibSceHttp2 implementation. Lib_SysModule, ///< The LibSceSysModule implementation Lib_NpCommon, ///< The LibSceNpCommon implementation + Lib_NpCommerce, ///< The LibSceNpCommerce implementation Lib_NpAuth, ///< The LibSceNpAuth implementation Lib_NpManager, ///< The LibSceNpManager implementation Lib_NpScore, ///< The LibSceNpScore implementation diff --git a/src/core/libraries/libs.cpp b/src/core/libraries/libs.cpp index eec9ee7c8..1f7ecb75e 100644 --- a/src/core/libraries/libs.cpp +++ b/src/core/libraries/libs.cpp @@ -32,6 +32,7 @@ #include "core/libraries/network/ssl.h" #include "core/libraries/network/ssl2.h" #include "core/libraries/np/np_auth.h" +#include "core/libraries/np/np_commerce.h" #include "core/libraries/np/np_common.h" #include "core/libraries/np/np_manager.h" #include "core/libraries/np/np_party.h" @@ -93,6 +94,7 @@ void InitHLELibs(Core::Loader::SymbolsResolver* sym) { Libraries::SysModule::RegisterLib(sym); Libraries::Posix::RegisterLib(sym); Libraries::AudioIn::RegisterLib(sym); + Libraries::Np::NpCommerce::RegisterLib(sym); Libraries::Np::NpCommon::RegisterLib(sym); Libraries::Np::NpManager::RegisterLib(sym); Libraries::Np::NpScore::RegisterLib(sym); diff --git a/src/core/libraries/np/np_commerce.cpp b/src/core/libraries/np/np_commerce.cpp new file mode 100644 index 000000000..1e8440ec0 --- /dev/null +++ b/src/core/libraries/np/np_commerce.cpp @@ -0,0 +1,88 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/logging/log.h" +#include "core/libraries/error_codes.h" +#include "core/libraries/libs.h" + +namespace Libraries::Np::NpCommerce { +s32 PS4_SYSV_ABI sceNpCommerceDialogClose() { + LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceNpCommerceDialogGetResult(s32* result) { + LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + return ORBIS_OK; +} + +s8 PS4_SYSV_ABI sceNpCommerceDialogGetStatus() { + LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceNpCommerceDialogInitialize() { + LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceNpCommerceDialogInitializeInternal() { + LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + return ORBIS_OK; +} + +s16 PS4_SYSV_ABI sceNpCommerceDialogOpen(s64 check) { + LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceNpCommerceDialogTerminate() { + LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceNpCommerceDialogUpdateStatus() { + LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceNpCommerceHidePsStoreIcon() { + LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceNpCommerceSetPsStoreIconLayout(s32 layout) { + LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceNpCommerceShowPsStoreIcon(s16 icon) { + LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + return ORBIS_OK; +} + +void RegisterLib(Core::Loader::SymbolsResolver* sym) { + LIB_FUNCTION("NU3ckGHMFXo", "libSceNpCommerce", 1, "libSceNpCommerce", + sceNpCommerceDialogClose); + LIB_FUNCTION("r42bWcQbtZY", "libSceNpCommerce", 1, "libSceNpCommerce", + sceNpCommerceDialogGetResult); + LIB_FUNCTION("CCbC+lqqvF0", "libSceNpCommerce", 1, "libSceNpCommerce", + sceNpCommerceDialogGetStatus); + LIB_FUNCTION("0aR2aWmQal4", "libSceNpCommerce", 1, "libSceNpCommerce", + sceNpCommerceDialogInitialize); + LIB_FUNCTION("9ZiLXAGG5rg", "libSceNpCommerce", 1, "libSceNpCommerce", + sceNpCommerceDialogInitializeInternal); + LIB_FUNCTION("DfSCDRA3EjY", "libSceNpCommerce", 1, "libSceNpCommerce", sceNpCommerceDialogOpen); + LIB_FUNCTION("m-I92Ab50W8", "libSceNpCommerce", 1, "libSceNpCommerce", + sceNpCommerceDialogTerminate); + LIB_FUNCTION("LR5cwFMMCVE", "libSceNpCommerce", 1, "libSceNpCommerce", + sceNpCommerceDialogUpdateStatus); + LIB_FUNCTION("dsqCVsNM0Zg", "libSceNpCommerce", 1, "libSceNpCommerce", + sceNpCommerceHidePsStoreIcon); + LIB_FUNCTION("uKTDW8hk-ts", "libSceNpCommerce", 1, "libSceNpCommerce", + sceNpCommerceSetPsStoreIconLayout); + LIB_FUNCTION("DHmwsa6S8Tc", "libSceNpCommerce", 1, "libSceNpCommerce", + sceNpCommerceShowPsStoreIcon); +}; + +} // namespace Libraries::Np::NpCommerce diff --git a/src/core/libraries/np/np_commerce.h b/src/core/libraries/np/np_commerce.h new file mode 100644 index 000000000..003e85a58 --- /dev/null +++ b/src/core/libraries/np/np_commerce.h @@ -0,0 +1,16 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/types.h" + +namespace Core::Loader { +class SymbolsResolver; +} + +namespace Libraries::Np::NpCommerce { + +void RegisterLib(Core::Loader::SymbolsResolver* sym); + +} // namespace Libraries::Np::NpCommerce \ No newline at end of file From 052f3260f391491c29caac6133c2eeb8c236437f Mon Sep 17 00:00:00 2001 From: Connor Garey Date: Sun, 30 Nov 2025 17:57:14 +0000 Subject: [PATCH 04/12] Sdl message box when no args provided (#3843) * Added a message box when no arguments are passed. * clang-fix * clang-fix episode 2 * Output message box error to stderr instead of stdout --- src/main.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/main.cpp b/src/main.cpp index 4d05dfe5a..f1e5ce932 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include "functional" #include "iostream" #include "string" @@ -182,6 +183,10 @@ int main(int argc, char* argv[]) { }}}; if (argc == 1) { + if (!SDL_ShowSimpleMessageBox( + SDL_MESSAGEBOX_INFORMATION, "shadPS4", + "This is a CLI application. Please use the QTLauncher for a GUI.", nullptr)) + std::cerr << "Could not display SDL message box! Error: " << SDL_GetError() << "\n"; int dummy = 0; // one does not simply pass 0 directly arg_map.at("-h")(dummy); return -1; From cf866ab294469874e805561a36a7ef2948c43081 Mon Sep 17 00:00:00 2001 From: kalaposfos13 <153381648+kalaposfos13@users.noreply.github.com> Date: Sun, 30 Nov 2025 21:40:58 +0100 Subject: [PATCH 05/12] Don't bother trying to restart the emulator if sceSystemServiceLoadExec is called with an invalid path (#3845) --- src/core/libraries/system/systemservice.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/core/libraries/system/systemservice.cpp b/src/core/libraries/system/systemservice.cpp index c02c4b3c3..ce5542fc8 100644 --- a/src/core/libraries/system/systemservice.cpp +++ b/src/core/libraries/system/systemservice.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include "common/config.h" #include "common/logging/log.h" #include "common/singleton.h" @@ -1874,6 +1875,10 @@ int PS4_SYSV_ABI sceSystemServiceLoadExec(const char* path, const char* argv[]) auto emu = Common::Singleton::Instance(); auto mnt = Common::Singleton::Instance(); auto hostPath = mnt->GetHostPath(std::string_view(path)); + if (hostPath.empty()) { + LOG_INFO(Lib_SystemService, "Restart called with invalid file '{}', exiting.", path); + std::quick_exit(0); + } std::vector args; if (argv != nullptr) { for (const char** ptr = argv; *ptr != nullptr; ptr++) { From a5f928084123c2b4862bcc9c9977ffb7898f3734 Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Mon, 1 Dec 2025 02:21:19 -0600 Subject: [PATCH 06/12] Return CPU mode based on param.sfo attributes (#3846) Values are based on hardware observations. --- src/core/libraries/kernel/process.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/core/libraries/kernel/process.cpp b/src/core/libraries/kernel/process.cpp index 02da041c3..e88446e02 100644 --- a/src/core/libraries/kernel/process.cpp +++ b/src/core/libraries/kernel/process.cpp @@ -42,6 +42,16 @@ s32 PS4_SYSV_ABI sceKernelGetCompiledSdkVersion(s32* ver) { } s32 PS4_SYSV_ABI sceKernelGetCpumode() { + LOG_DEBUG(Lib_Kernel, "called"); + auto& attrs = Common::ElfInfo::Instance().GetPSFAttributes(); + u32 is_cpu6 = attrs.six_cpu_mode.Value(); + u32 is_cpu7 = attrs.seven_cpu_mode.Value(); + if (is_cpu6 == 1 && is_cpu7 == 1) { + return 2; + } + if (is_cpu7 == 1) { + return 5; + } return 0; } From c3f7a4301cecbe773f0415bf41d5ea9c9eca868e Mon Sep 17 00:00:00 2001 From: kalaposfos13 <153381648+kalaposfos13@users.noreply.github.com> Date: Tue, 2 Dec 2025 09:21:01 +0100 Subject: [PATCH 07/12] Add basic mouse-to-touchpad emulation (#3842) --- src/common/config.cpp | 1 + src/input/input_handler.cpp | 4 ++++ src/input/input_handler.h | 11 +++++++---- src/input/input_mouse.cpp | 18 +++++++++++++++++- src/input/input_mouse.h | 1 + src/sdl_window.cpp | 5 +++++ 6 files changed, 35 insertions(+), 5 deletions(-) diff --git a/src/common/config.cpp b/src/common/config.cpp index e79652b32..94d8b488c 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -1310,6 +1310,7 @@ hotkey_pause = f9 hotkey_reload_inputs = f8 hotkey_toggle_mouse_to_joystick = f7 hotkey_toggle_mouse_to_gyro = f6 +hotkey_toggle_mouse_to_touchpad = delete hotkey_quit = lctrl, lshift, end )"; } diff --git a/src/input/input_handler.cpp b/src/input/input_handler.cpp index d38b45ddd..01c6d1fa4 100644 --- a/src/input/input_handler.cpp +++ b/src/input/input_handler.cpp @@ -106,6 +106,7 @@ auto output_array = std::array{ ControllerOutput(HOTKEY_RELOAD_INPUTS), ControllerOutput(HOTKEY_TOGGLE_MOUSE_TO_JOYSTICK), ControllerOutput(HOTKEY_TOGGLE_MOUSE_TO_GYRO), + ControllerOutput(HOTKEY_TOGGLE_MOUSE_TO_TOUCHPAD), ControllerOutput(HOTKEY_RENDERDOC), ControllerOutput(SDL_GAMEPAD_BUTTON_INVALID, SDL_GAMEPAD_AXIS_INVALID), @@ -579,6 +580,9 @@ void ControllerOutput::FinalizeUpdate() { case HOTKEY_TOGGLE_MOUSE_TO_GYRO: PushSDLEvent(SDL_EVENT_MOUSE_TO_GYRO); break; + case HOTKEY_TOGGLE_MOUSE_TO_TOUCHPAD: + PushSDLEvent(SDL_EVENT_MOUSE_TO_TOUCHPAD); + break; case HOTKEY_RENDERDOC: PushSDLEvent(SDL_EVENT_RDOC_CAPTURE); break; diff --git a/src/input/input_handler.h b/src/input/input_handler.h index 0d95d1c4a..eaadd164e 100644 --- a/src/input/input_handler.h +++ b/src/input/input_handler.h @@ -34,9 +34,10 @@ #define SDL_EVENT_RELOAD_INPUTS SDL_EVENT_USER + 5 #define SDL_EVENT_MOUSE_TO_JOYSTICK SDL_EVENT_USER + 6 #define SDL_EVENT_MOUSE_TO_GYRO SDL_EVENT_USER + 7 -#define SDL_EVENT_RDOC_CAPTURE SDL_EVENT_USER + 8 -#define SDL_EVENT_QUIT_DIALOG SDL_EVENT_USER + 9 -#define SDL_EVENT_MOUSE_WHEEL_OFF SDL_EVENT_USER + 10 +#define SDL_EVENT_MOUSE_TO_TOUCHPAD SDL_EVENT_USER + 8 +#define SDL_EVENT_RDOC_CAPTURE SDL_EVENT_USER + 9 +#define SDL_EVENT_QUIT_DIALOG SDL_EVENT_USER + 10 +#define SDL_EVENT_MOUSE_WHEEL_OFF SDL_EVENT_USER + 11 #define LEFTJOYSTICK_HALFMODE 0x00010000 #define RIGHTJOYSTICK_HALFMODE 0x00020000 @@ -52,7 +53,8 @@ #define HOTKEY_RELOAD_INPUTS 0xf0000005 #define HOTKEY_TOGGLE_MOUSE_TO_JOYSTICK 0xf0000006 #define HOTKEY_TOGGLE_MOUSE_TO_GYRO 0xf0000007 -#define HOTKEY_RENDERDOC 0xf0000008 +#define HOTKEY_TOGGLE_MOUSE_TO_TOUCHPAD 0xf0000008 +#define HOTKEY_RENDERDOC 0xf0000009 #define SDL_UNMAPPED UINT32_MAX - 1 @@ -141,6 +143,7 @@ const std::map string_to_cbutton_map = { {"hotkey_reload_inputs", HOTKEY_RELOAD_INPUTS}, {"hotkey_toggle_mouse_to_joystick", HOTKEY_TOGGLE_MOUSE_TO_JOYSTICK}, {"hotkey_toggle_mouse_to_gyro", HOTKEY_TOGGLE_MOUSE_TO_GYRO}, + {"hotkey_toggle_mouse_to_touchpad", HOTKEY_TOGGLE_MOUSE_TO_TOUCHPAD}, {"hotkey_renderdoc_capture", HOTKEY_RENDERDOC}, }; diff --git a/src/input/input_mouse.cpp b/src/input/input_mouse.cpp index 3c718dbd5..55489283c 100644 --- a/src/input/input_mouse.cpp +++ b/src/input/input_mouse.cpp @@ -8,8 +8,12 @@ #include "input/controller.h" #include "input_mouse.h" +#include +#include #include "SDL3/SDL.h" +extern Frontend::WindowSDL* g_window; + namespace Input { int mouse_joystick_binding = 0; @@ -80,7 +84,6 @@ void EmulateJoystick(GameController* controller, u32 interval) { constexpr float constant_down_accel[3] = {0.0f, 10.0f, 0.0f}; void EmulateGyro(GameController* controller, u32 interval) { - // LOG_INFO(Input, "todo gyro"); float d_x = 0, d_y = 0; SDL_GetRelativeMouseState(&d_x, &d_y); controller->Acceleration(1, constant_down_accel); @@ -92,6 +95,16 @@ void EmulateGyro(GameController* controller, u32 interval) { controller->Gyro(1, gyro_from_mouse); } +void EmulateTouchpad(GameController* controller, u32 interval) { + float x, y; + SDL_MouseButtonFlags mouse_buttons = SDL_GetMouseState(&x, &y); + controller->SetTouchpadState(0, (mouse_buttons & SDL_BUTTON_LMASK) != 0, + std::clamp(x / g_window->GetWidth(), 0.0f, 1.0f), + std::clamp(y / g_window->GetHeight(), 0.0f, 1.0f)); + controller->CheckButton(0, Libraries::Pad::OrbisPadButtonDataOffset::TouchPad, + (mouse_buttons & SDL_BUTTON_RMASK) != 0); +} + Uint32 MousePolling(void* param, Uint32 id, Uint32 interval) { auto* controller = (GameController*)param; switch (mouse_mode) { @@ -101,6 +114,9 @@ Uint32 MousePolling(void* param, Uint32 id, Uint32 interval) { case MouseMode::Gyro: EmulateGyro(controller, interval); break; + case MouseMode::Touchpad: + EmulateTouchpad(controller, interval); + break; default: break; diff --git a/src/input/input_mouse.h b/src/input/input_mouse.h index a56ef2d8f..995f836f2 100644 --- a/src/input/input_mouse.h +++ b/src/input/input_mouse.h @@ -12,6 +12,7 @@ enum MouseMode { Off = 0, Joystick, Gyro, + Touchpad, }; bool ToggleMouseModeTo(MouseMode m); diff --git a/src/sdl_window.cpp b/src/sdl_window.cpp index 449defdd1..476a56b52 100644 --- a/src/sdl_window.cpp +++ b/src/sdl_window.cpp @@ -457,6 +457,11 @@ void WindowSDL::WaitEvent() { SDL_SetWindowRelativeMouseMode(this->GetSDLWindow(), Input::ToggleMouseModeTo(Input::MouseMode::Gyro)); break; + case SDL_EVENT_MOUSE_TO_TOUCHPAD: + SDL_SetWindowRelativeMouseMode(this->GetSDLWindow(), + Input::ToggleMouseModeTo(Input::MouseMode::Touchpad)); + SDL_SetWindowRelativeMouseMode(this->GetSDLWindow(), false); + break; case SDL_EVENT_RDOC_CAPTURE: VideoCore::TriggerCapture(); break; From e5ea55e42588407e92a82dad47e84b83df5b4114 Mon Sep 17 00:00:00 2001 From: Pirky <92021796+Pirky10@users.noreply.github.com> Date: Tue, 2 Dec 2025 09:22:41 +0100 Subject: [PATCH 08/12] np: Add dialog state tracking for NpCommerce (#3841) --- src/core/libraries/np/np_commerce.cpp | 63 ++++++++++++++++++++++----- 1 file changed, 52 insertions(+), 11 deletions(-) diff --git a/src/core/libraries/np/np_commerce.cpp b/src/core/libraries/np/np_commerce.cpp index 1e8440ec0..99b03384a 100644 --- a/src/core/libraries/np/np_commerce.cpp +++ b/src/core/libraries/np/np_commerce.cpp @@ -4,46 +4,87 @@ #include "common/logging/log.h" #include "core/libraries/error_codes.h" #include "core/libraries/libs.h" +#include "core/libraries/system/commondialog.h" namespace Libraries::Np::NpCommerce { + +using CommonDialog::Error; +using CommonDialog::Result; +using CommonDialog::Status; + +static Status g_dialog_status = Status::NONE; +static Result g_dialog_result = Result::OK; + s32 PS4_SYSV_ABI sceNpCommerceDialogClose() { - LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + LOG_INFO(Lib_NpCommerce, "called"); + if (g_dialog_status == Status::NONE) { + return static_cast(Error::NOT_INITIALIZED); + } + if (g_dialog_status != Status::FINISHED) { + return static_cast(Error::NOT_FINISHED); + } + g_dialog_status = Status::INITIALIZED; return ORBIS_OK; } s32 PS4_SYSV_ABI sceNpCommerceDialogGetResult(s32* result) { - LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + LOG_INFO(Lib_NpCommerce, "called"); + if (result == nullptr) { + return static_cast(Error::ARG_NULL); + } + if (g_dialog_status != Status::FINISHED) { + return static_cast(Error::NOT_FINISHED); + } + *result = static_cast(g_dialog_result); return ORBIS_OK; } s8 PS4_SYSV_ABI sceNpCommerceDialogGetStatus() { - LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); - return ORBIS_OK; + LOG_DEBUG(Lib_NpCommerce, "called, status = {}", static_cast(g_dialog_status)); + return static_cast(g_dialog_status); } s32 PS4_SYSV_ABI sceNpCommerceDialogInitialize() { - LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + LOG_INFO(Lib_NpCommerce, "called"); + if (g_dialog_status != Status::NONE) { + return static_cast(Error::ALREADY_INITIALIZED); + } + g_dialog_status = Status::INITIALIZED; return ORBIS_OK; } s32 PS4_SYSV_ABI sceNpCommerceDialogInitializeInternal() { - LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); - return ORBIS_OK; + LOG_INFO(Lib_NpCommerce, "called"); + return sceNpCommerceDialogInitialize(); } s16 PS4_SYSV_ABI sceNpCommerceDialogOpen(s64 check) { - LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + LOG_INFO(Lib_NpCommerce, "called, check = {}", check); + if (g_dialog_status != Status::INITIALIZED) { + LOG_WARNING(Lib_NpCommerce, "Dialog not initialized"); + return ORBIS_OK; + } + + g_dialog_status = Status::FINISHED; + g_dialog_result = Result::USER_CANCELED; return ORBIS_OK; } s32 PS4_SYSV_ABI sceNpCommerceDialogTerminate() { - LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + LOG_INFO(Lib_NpCommerce, "called"); + if (g_dialog_status == Status::NONE) { + return static_cast(Error::NOT_INITIALIZED); + } + if (g_dialog_status == Status::RUNNING) { + return static_cast(Error::NOT_FINISHED); + } + g_dialog_status = Status::NONE; return ORBIS_OK; } s32 PS4_SYSV_ABI sceNpCommerceDialogUpdateStatus() { - LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); - return ORBIS_OK; + LOG_DEBUG(Lib_NpCommerce, "called, status = {}", static_cast(g_dialog_status)); + return static_cast(g_dialog_status); } s32 PS4_SYSV_ABI sceNpCommerceHidePsStoreIcon() { From dc6013cf0e19a66f489178da2235d2468fbb0186 Mon Sep 17 00:00:00 2001 From: kalaposfos13 <153381648+kalaposfos13@users.noreply.github.com> Date: Tue, 2 Dec 2025 09:41:06 +0100 Subject: [PATCH 09/12] Block normal mouse inputs in mouse-to-touchpad mode shadow sniped my PR. :( --- src/input/input_handler.cpp | 3 +++ src/input/input_mouse.cpp | 19 +++++++++++++++++++ src/input/input_mouse.h | 2 ++ 3 files changed, 24 insertions(+) diff --git a/src/input/input_handler.cpp b/src/input/input_handler.cpp index 01c6d1fa4..e74569737 100644 --- a/src/input/input_handler.cpp +++ b/src/input/input_handler.cpp @@ -777,6 +777,9 @@ void ActivateOutputsFromInputs() { it.ResetUpdate(); } + // Check for input blockers + ApplyMouseInputBlockers(); + // Iterate over all inputs, and update their respecive outputs accordingly for (auto& it : connections) { it.output->AddUpdate(it.ProcessBinding()); diff --git a/src/input/input_mouse.cpp b/src/input/input_mouse.cpp index 55489283c..cead87e53 100644 --- a/src/input/input_mouse.cpp +++ b/src/input/input_mouse.cpp @@ -6,6 +6,7 @@ #include "common/assert.h" #include "common/types.h" #include "input/controller.h" +#include "input/input_handler.h" #include "input_mouse.h" #include @@ -16,6 +17,8 @@ extern Frontend::WindowSDL* g_window; namespace Input { +extern std::list> pressed_keys; + int mouse_joystick_binding = 0; float mouse_deadzone_offset = 0.5, mouse_speed = 1, mouse_speed_offset = 0.1250; bool mouse_gyro_roll_mode = false; @@ -105,6 +108,22 @@ void EmulateTouchpad(GameController* controller, u32 interval) { (mouse_buttons & SDL_BUTTON_RMASK) != 0); } +void ApplyMouseInputBlockers() { + switch (mouse_mode) { + case MouseMode::Touchpad: + LOG_INFO(Input, "Blocking mouse inputs"); + for (auto& k : pressed_keys) { + if (k.first.input.sdl_id == SDL_BUTTON_LEFT || + k.first.input.sdl_id == SDL_BUTTON_RIGHT) { + k.second = true; + } + } + break; + default: + break; + } +} + Uint32 MousePolling(void* param, Uint32 id, Uint32 interval) { auto* controller = (GameController*)param; switch (mouse_mode) { diff --git a/src/input/input_mouse.h b/src/input/input_mouse.h index 995f836f2..da1d874ec 100644 --- a/src/input/input_mouse.h +++ b/src/input/input_mouse.h @@ -23,6 +23,8 @@ void SetMouseGyroRollMode(bool mode); void EmulateJoystick(GameController* controller, u32 interval); void EmulateGyro(GameController* controller, u32 interval); +void ApplyMouseInputBlockers(); + // Polls the mouse for changes Uint32 MousePolling(void* param, Uint32 id, Uint32 interval); From b135a056ba457c7d3ea1a4cfc15f5f340532f41d Mon Sep 17 00:00:00 2001 From: kalaposfos13 <153381648+kalaposfos13@users.noreply.github.com> Date: Tue, 2 Dec 2025 09:50:11 +0100 Subject: [PATCH 10/12] Remove debug logging --- src/input/input_mouse.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/input/input_mouse.cpp b/src/input/input_mouse.cpp index cead87e53..cbb07721b 100644 --- a/src/input/input_mouse.cpp +++ b/src/input/input_mouse.cpp @@ -111,7 +111,6 @@ void EmulateTouchpad(GameController* controller, u32 interval) { void ApplyMouseInputBlockers() { switch (mouse_mode) { case MouseMode::Touchpad: - LOG_INFO(Input, "Blocking mouse inputs"); for (auto& k : pressed_keys) { if (k.first.input.sdl_id == SDL_BUTTON_LEFT || k.first.input.sdl_id == SDL_BUTTON_RIGHT) { From 9db4642f666c1c46dd4f9f816472929cbe765bb7 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Tue, 2 Dec 2025 22:27:01 +0100 Subject: [PATCH 11/12] video_core: Scheduler priority pending operation queue (#3848) * Priority pending ops * Use priority operations on image download * clang-format * Simplify thread * I'm tired, it's too late :( --- .../renderer_vulkan/vk_scheduler.cpp | 29 +++++++++++++++ src/video_core/renderer_vulkan/vk_scheduler.h | 18 ++++++++++ .../texture_cache/texture_cache.cpp | 35 +++---------------- src/video_core/texture_cache/texture_cache.h | 10 ------ 4 files changed, 52 insertions(+), 40 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index cc8f6956d..fee0b408e 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -3,6 +3,7 @@ #include "common/assert.h" #include "common/debug.h" +#include "common/thread.h" #include "imgui/renderer/texture_manager.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -17,6 +18,8 @@ Scheduler::Scheduler(const Instance& instance) profiler_scope = reinterpret_cast(std::malloc(sizeof(tracy::VkCtxScope))); #endif AllocateWorkerCommandBuffers(); + priority_pending_ops_thread = + std::jthread(std::bind_front(&Scheduler::PriorityPendingOpsThread, this)); } Scheduler::~Scheduler() { @@ -167,6 +170,32 @@ void Scheduler::SubmitExecution(SubmitInfo& info) { PopPendingOperations(); } +void Scheduler::PriorityPendingOpsThread(std::stop_token stoken) { + Common::SetCurrentThreadName("shadPS4:GpuSchedPriorityPendingOpsRunner"); + + while (!stoken.stop_requested()) { + PendingOp op; + { + std::unique_lock lk(priority_pending_ops_mutex); + priority_pending_ops_cv.wait(lk, stoken, + [this] { return !priority_pending_ops.empty(); }); + if (stoken.stop_requested()) { + break; + } + + op = std::move(priority_pending_ops.front()); + priority_pending_ops.pop(); + } + + master_semaphore.Wait(op.gpu_tick); + if (stoken.stop_requested()) { + break; + } + + op.callback(); + } +} + void DynamicState::Commit(const Instance& instance, const vk::CommandBuffer& cmdbuf) { if (dirty_state.viewports) { dirty_state.viewports = false; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 506b84159..aff299e54 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -5,6 +5,7 @@ #include #include +#include #include #include "common/unique_function.h" @@ -401,10 +402,21 @@ public: } /// Defers an operation until the gpu has reached the current cpu tick. + /// Will be run when submitting or calling PopPendingOperations. void DeferOperation(Common::UniqueFunction&& func) { pending_ops.emplace(std::move(func), CurrentTick()); } + /// Defers an operation until the gpu has reached the current cpu tick. + /// Runs as soon as possible in another thread. + void DeferPriorityOperation(Common::UniqueFunction&& func) { + { + std::unique_lock lk(priority_pending_ops_mutex); + priority_pending_ops.emplace(std::move(func), CurrentTick()); + } + priority_pending_ops_cv.notify_one(); + } + static std::mutex submit_mutex; private: @@ -412,6 +424,8 @@ private: void SubmitExecution(SubmitInfo& info); + void PriorityPendingOpsThread(std::stop_token stoken); + private: const Instance& instance; MasterSemaphore master_semaphore; @@ -424,6 +438,10 @@ private: u64 gpu_tick; }; std::queue pending_ops; + std::queue priority_pending_ops; + std::mutex priority_pending_ops_mutex; + std::condition_variable_any priority_pending_ops_cv; + std::jthread priority_pending_ops_thread; RenderState render_state; bool is_rendering = false; tracy::VkCtxScope* profiler_scope{}; diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index c7604995a..17c7e67b3 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -52,9 +52,6 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& std::max(std::min(device_local_memory - min_vacancy_critical, min_spacing_critical), DEFAULT_CRITICAL_GC_MEMORY)); trigger_gc_memory = static_cast((device_local_memory - mem_threshold) / 2); - - downloaded_images_thread = - std::jthread([&](const std::stop_token& token) { DownloadedImagesThread(token); }); } TextureCache::~TextureCache() = default; @@ -125,33 +122,11 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { cmdbuf.copyImageToBuffer(image.GetImage(), vk::ImageLayout::eTransferSrcOptimal, download_buffer.Handle(), image_download); - { - std::unique_lock lock(downloaded_images_mutex); - downloaded_images_queue.emplace(scheduler.CurrentTick(), image.info.guest_address, download, - download_size); - downloaded_images_cv.notify_one(); - } -} - -void TextureCache::DownloadedImagesThread(const std::stop_token& token) { - auto* memory = Core::Memory::Instance(); - while (!token.stop_requested()) { - DownloadedImage image; - { - std::unique_lock lock{downloaded_images_mutex}; - downloaded_images_cv.wait(lock, token, - [this] { return !downloaded_images_queue.empty(); }); - if (token.stop_requested()) { - break; - } - image = downloaded_images_queue.front(); - downloaded_images_queue.pop(); - } - - scheduler.GetMasterSemaphore()->Wait(image.tick); - memory->TryWriteBacking(std::bit_cast(image.device_addr), image.download, - image.download_size); - } + scheduler.DeferPriorityOperation( + [this, device_addr = image.info.guest_address, download, download_size] { + Core::Memory::Instance()->TryWriteBacking(std::bit_cast(device_addr), download, + download_size); + }); } void TextureCache::MarkAsMaybeDirty(ImageId image_id, Image& image) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 9d25069db..141ac938f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -314,16 +314,6 @@ private: Common::LeastRecentlyUsedCache lru_cache; PageTable page_table; std::mutex mutex; - struct DownloadedImage { - u64 tick; - VAddr device_addr; - void* download; - size_t download_size; - }; - std::queue downloaded_images_queue; - std::mutex downloaded_images_mutex; - std::condition_variable_any downloaded_images_cv; - std::jthread downloaded_images_thread; struct MetaDataInfo { enum class Type { CMask, From 98fd0689ac46250debd536dcd16b6cf11dfb159d Mon Sep 17 00:00:00 2001 From: kalaposfos13 <153381648+kalaposfos13@users.noreply.github.com> Date: Wed, 3 Dec 2025 14:05:19 +0100 Subject: [PATCH 12/12] Revert non-Linux parts of #3819 (#3852) * Revert non-Linux parts of #3819 * More OpenOrbis stuff that I couldn't be bothered to put in a new PR --- src/core/libraries/fiber/fiber.cpp | 6 +++--- src/core/libraries/kernel/threads/pthread.cpp | 4 ++++ src/core/linker.cpp | 4 ++-- src/core/tls.cpp | 18 ++++++++++++++++++ src/core/tls.h | 3 +++ 5 files changed, 30 insertions(+), 5 deletions(-) diff --git a/src/core/libraries/fiber/fiber.cpp b/src/core/libraries/fiber/fiber.cpp index 776792041..2ebfbd244 100644 --- a/src/core/libraries/fiber/fiber.cpp +++ b/src/core/libraries/fiber/fiber.cpp @@ -6,8 +6,8 @@ #include "common/elf_info.h" #include "common/logging/log.h" #include "core/libraries/fiber/fiber_error.h" -#include "core/libraries/kernel/threads/pthread.h" #include "core/libraries/libs.h" +#include "core/tls.h" namespace Libraries::Fiber { @@ -20,7 +20,7 @@ static constexpr u64 kFiberStackSizeCheck = 0xdeadbeefdeadbeef; static std::atomic context_size_check = false; OrbisFiberContext* GetFiberContext() { - return Libraries::Kernel::g_curthread->tcb->tcb_fiber; + return Core::GetTcbBase()->tcb_fiber; } extern "C" s32 PS4_SYSV_ABI _sceFiberSetJmp(OrbisFiberContext* ctx) asm("_sceFiberSetJmp"); @@ -269,7 +269,7 @@ s32 PS4_SYSV_ABI sceFiberRunImpl(OrbisFiber* fiber, void* addr_context, u64 size return ORBIS_FIBER_ERROR_INVALID; } - Core::Tcb* tcb = Libraries::Kernel::g_curthread->tcb; + Core::Tcb* tcb = Core::GetTcbBase(); if (tcb->tcb_fiber) { return ORBIS_FIBER_ERROR_PERMISSION; } diff --git a/src/core/libraries/kernel/threads/pthread.cpp b/src/core/libraries/kernel/threads/pthread.cpp index 8ab8b72c3..6c11eebc2 100644 --- a/src/core/libraries/kernel/threads/pthread.cpp +++ b/src/core/libraries/kernel/threads/pthread.cpp @@ -663,6 +663,10 @@ void RegisterThread(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("Z4QosVuAsA0", "libkernel", 1, "libkernel", posix_pthread_once); LIB_FUNCTION("EotR8a3ASf4", "libkernel", 1, "libkernel", posix_pthread_self); LIB_FUNCTION("OxhIB8LB-PQ", "libkernel", 1, "libkernel", posix_pthread_create); + LIB_FUNCTION("lZzFeSxPl08", "libkernel", 1, "libkernel", posix_pthread_setcancelstate); + LIB_FUNCTION("CBNtXOoef-E", "libkernel", 1, "libkernel", posix_sched_get_priority_max); + LIB_FUNCTION("m0iS6jNsXds", "libkernel", 1, "libkernel", posix_sched_get_priority_min); + LIB_FUNCTION("Xs9hdiD7sAA", "libkernel", 1, "libkernel", posix_pthread_setschedparam); LIB_FUNCTION("+U1R4WtXvoc", "libkernel", 1, "libkernel", posix_pthread_detach); LIB_FUNCTION("7Xl257M4VNI", "libkernel", 1, "libkernel", posix_pthread_equal); LIB_FUNCTION("h9CcP3J0oVM", "libkernel", 1, "libkernel", posix_pthread_join); diff --git a/src/core/linker.cpp b/src/core/linker.cpp index b7c9a2895..ac6b37769 100644 --- a/src/core/linker.cpp +++ b/src/core/linker.cpp @@ -368,7 +368,7 @@ bool Linker::Resolve(const std::string& name, Loader::SymbolType sym_type, Modul void* Linker::TlsGetAddr(u64 module_index, u64 offset) { std::scoped_lock lk{mutex}; - DtvEntry* dtv_table = Libraries::Kernel::g_curthread->tcb->tcb_dtv; + DtvEntry* dtv_table = GetTcbBase()->tcb_dtv; if (dtv_table[0].counter != dtv_generation_counter) { // Generation counter changed, a dynamic module was either loaded or unloaded. const u32 old_num_dtvs = dtv_table[1].counter; @@ -381,7 +381,7 @@ void* Linker::TlsGetAddr(u64 module_index, u64 offset) { delete[] dtv_table; // Update TCB pointer. - Libraries::Kernel::g_curthread->tcb->tcb_dtv = new_dtv_table; + GetTcbBase()->tcb_dtv = new_dtv_table; dtv_table = new_dtv_table; } diff --git a/src/core/tls.cpp b/src/core/tls.cpp index bcefd6f25..57ed20f38 100644 --- a/src/core/tls.cpp +++ b/src/core/tls.cpp @@ -46,6 +46,10 @@ void SetTcbBase(void* image_address) { ASSERT(result != 0); } +Tcb* GetTcbBase() { + return reinterpret_cast(TlsGetValue(GetTcbKey())); +} + #elif defined(__APPLE__) && defined(ARCH_X86_64) // Apple x86_64 @@ -145,6 +149,12 @@ void SetTcbBase(void* image_address) { "Failed to store thread LDT page pointer: {}", errno); } +Tcb* GetTcbBase() { + Tcb* tcb; + asm volatile("mov %%fs:0x0, %0" : "=r"(tcb)); + return tcb; +} + #elif defined(ARCH_X86_64) // Other POSIX x86_64 @@ -154,6 +164,10 @@ void SetTcbBase(void* image_address) { ASSERT_MSG(ret == 0, "Failed to set GS base: errno {}", errno); } +Tcb* GetTcbBase() { + return Libraries::Kernel::g_curthread->tcb; +} + #else // POSIX non-x86_64 @@ -176,6 +190,10 @@ void SetTcbBase(void* image_address) { ASSERT(pthread_setspecific(GetTcbKey(), image_address) == 0); } +Tcb* GetTcbBase() { + return static_cast(pthread_getspecific(GetTcbKey())); +} + #endif thread_local std::once_flag init_tls_flag; diff --git a/src/core/tls.h b/src/core/tls.h index 0ae512a04..83940be7a 100644 --- a/src/core/tls.h +++ b/src/core/tls.h @@ -39,6 +39,9 @@ u32 GetTcbKey(); /// Sets the data pointer to the TCB block. void SetTcbBase(void* image_address); +/// Retrieves Tcb structure for the calling thread. +Tcb* GetTcbBase(); + /// Makes sure TLS is initialized for the thread before entering guest. void EnsureThreadInitialized();