From 6612a325230b6639ffee3e4519f93ba902886594 Mon Sep 17 00:00:00 2001 From: marecl Date: Thu, 20 Nov 2025 18:41:01 +0100 Subject: [PATCH 01/25] Prevent writing to directories (#3820) * Prevent writing to directories * Prevent writing to directories --- src/core/file_sys/directories/base_directory.h | 12 ++++++++++++ src/core/libraries/kernel/file_system.cpp | 11 +++++++++++ 2 files changed, 23 insertions(+) diff --git a/src/core/file_sys/directories/base_directory.h b/src/core/file_sys/directories/base_directory.h index 8900ac32b..b412865a2 100644 --- a/src/core/file_sys/directories/base_directory.h +++ b/src/core/file_sys/directories/base_directory.h @@ -36,6 +36,18 @@ public: return ORBIS_KERNEL_ERROR_EBADF; } + virtual s64 write(const void* buf, u64 nbytes) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + virtual s64 writev(const Libraries::Kernel::OrbisKernelIovec* iov, s32 iovcnt) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + virtual s64 pwritev(const Libraries::Kernel::OrbisKernelIovec* iov, s32 iovcnt, s64 offset) { + return ORBIS_KERNEL_ERROR_EBADF; + } + virtual s64 lseek(s64 offset, s32 whence) { return ORBIS_KERNEL_ERROR_EBADF; } diff --git a/src/core/libraries/kernel/file_system.cpp b/src/core/libraries/kernel/file_system.cpp index 7ded1f33e..b4c342f18 100644 --- a/src/core/libraries/kernel/file_system.cpp +++ b/src/core/libraries/kernel/file_system.cpp @@ -311,6 +311,9 @@ s64 PS4_SYSV_ABI write(s32 fd, const void* buf, u64 nbytes) { } else if (file->type == Core::FileSys::FileType::Socket) { // Socket functions handle errnos internally. return file->socket->SendPacket(buf, nbytes, 0, nullptr, 0); + } else if (file->type == Core::FileSys::FileType::Directory) { + *__Error() = POSIX_EBADF; + return -1; } return file->f.WriteRaw(buf, nbytes); @@ -405,7 +408,11 @@ s64 PS4_SYSV_ABI writev(s32 fd, const OrbisKernelIovec* iov, s32 iovcnt) { return -1; } return result; + } else if (file->type == Core::FileSys::FileType::Directory) { + *__Error() = POSIX_EBADF; + return -1; } + s64 total_written = 0; for (s32 i = 0; i < iovcnt; i++) { total_written += file->f.WriteRaw(iov[i].iov_base, iov[i].iov_len); @@ -1047,7 +1054,11 @@ s64 PS4_SYSV_ABI posix_pwritev(s32 fd, const OrbisKernelIovec* iov, s32 iovcnt, return -1; } return result; + } else if (file->type == Core::FileSys::FileType::Directory) { + *__Error() = POSIX_EBADF; + return -1; } + const s64 pos = file->f.Tell(); SCOPE_EXIT { file->f.Seek(pos); From 544a22a43115175d3a4970dfaaf75bc673a05306 Mon Sep 17 00:00:00 2001 From: Osyotr Date: Thu, 20 Nov 2025 23:35:35 +0300 Subject: [PATCH 02/25] emulator: crash faster (#2360) By disabling Windows Error Reporting. --- src/emulator.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/emulator.cpp b/src/emulator.cpp index ad407f9b6..fb187cfae 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -58,10 +58,11 @@ Frontend::WindowSDL* g_window = nullptr; namespace Core { Emulator::Emulator() { - // Initialize NT API functions and set high priority + // Initialize NT API functions, set high priority and disable WER #ifdef _WIN32 Common::NtApi::Initialize(); SetPriorityClass(GetCurrentProcess(), ABOVE_NORMAL_PRIORITY_CLASS); + SetErrorMode(SetErrorMode(0) | SEM_NOGPFAULTERRORBOX); // need to init this in order for winsock2 to work WORD versionWanted = MAKEWORD(2, 2); WSADATA wsaData; From 56109a13313158e15ec8bb57b0fbc4ec1338750b Mon Sep 17 00:00:00 2001 From: kalaposfos13 <153381648+kalaposfos13@users.noreply.github.com> Date: Fri, 21 Nov 2025 09:42:49 +0100 Subject: [PATCH 03/25] Avoid storing the Tcb pointer on the stack (#3819) * Avoid storing the Tcb pointer on the stack * Just return the already stored pointer in GetTcbBase * Replace uses of GetTcbBase with g_curthread->tcb * copyright 2025 * sir clang offnir, the all-formatting --- src/core/libraries/fiber/fiber.cpp | 8 ++++---- src/core/linker.cpp | 4 ++-- src/core/tls.cpp | 23 +---------------------- src/core/tls.h | 5 +---- 4 files changed, 8 insertions(+), 32 deletions(-) diff --git a/src/core/libraries/fiber/fiber.cpp b/src/core/libraries/fiber/fiber.cpp index 7d35add4e..776792041 100644 --- a/src/core/libraries/fiber/fiber.cpp +++ b/src/core/libraries/fiber/fiber.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include "fiber.h" @@ -6,8 +6,8 @@ #include "common/elf_info.h" #include "common/logging/log.h" #include "core/libraries/fiber/fiber_error.h" +#include "core/libraries/kernel/threads/pthread.h" #include "core/libraries/libs.h" -#include "core/tls.h" namespace Libraries::Fiber { @@ -20,7 +20,7 @@ static constexpr u64 kFiberStackSizeCheck = 0xdeadbeefdeadbeef; static std::atomic context_size_check = false; OrbisFiberContext* GetFiberContext() { - return Core::GetTcbBase()->tcb_fiber; + return Libraries::Kernel::g_curthread->tcb->tcb_fiber; } extern "C" s32 PS4_SYSV_ABI _sceFiberSetJmp(OrbisFiberContext* ctx) asm("_sceFiberSetJmp"); @@ -269,7 +269,7 @@ s32 PS4_SYSV_ABI sceFiberRunImpl(OrbisFiber* fiber, void* addr_context, u64 size return ORBIS_FIBER_ERROR_INVALID; } - Core::Tcb* tcb = Core::GetTcbBase(); + Core::Tcb* tcb = Libraries::Kernel::g_curthread->tcb; if (tcb->tcb_fiber) { return ORBIS_FIBER_ERROR_PERMISSION; } diff --git a/src/core/linker.cpp b/src/core/linker.cpp index ac6b37769..b7c9a2895 100644 --- a/src/core/linker.cpp +++ b/src/core/linker.cpp @@ -368,7 +368,7 @@ bool Linker::Resolve(const std::string& name, Loader::SymbolType sym_type, Modul void* Linker::TlsGetAddr(u64 module_index, u64 offset) { std::scoped_lock lk{mutex}; - DtvEntry* dtv_table = GetTcbBase()->tcb_dtv; + DtvEntry* dtv_table = Libraries::Kernel::g_curthread->tcb->tcb_dtv; if (dtv_table[0].counter != dtv_generation_counter) { // Generation counter changed, a dynamic module was either loaded or unloaded. const u32 old_num_dtvs = dtv_table[1].counter; @@ -381,7 +381,7 @@ void* Linker::TlsGetAddr(u64 module_index, u64 offset) { delete[] dtv_table; // Update TCB pointer. - GetTcbBase()->tcb_dtv = new_dtv_table; + Libraries::Kernel::g_curthread->tcb->tcb_dtv = new_dtv_table; dtv_table = new_dtv_table; } diff --git a/src/core/tls.cpp b/src/core/tls.cpp index 2f7e1a1fd..bcefd6f25 100644 --- a/src/core/tls.cpp +++ b/src/core/tls.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include @@ -46,10 +46,6 @@ void SetTcbBase(void* image_address) { ASSERT(result != 0); } -Tcb* GetTcbBase() { - return reinterpret_cast(TlsGetValue(GetTcbKey())); -} - #elif defined(__APPLE__) && defined(ARCH_X86_64) // Apple x86_64 @@ -149,12 +145,6 @@ void SetTcbBase(void* image_address) { "Failed to store thread LDT page pointer: {}", errno); } -Tcb* GetTcbBase() { - Tcb* tcb; - asm volatile("mov %%fs:0x0, %0" : "=r"(tcb)); - return tcb; -} - #elif defined(ARCH_X86_64) // Other POSIX x86_64 @@ -164,13 +154,6 @@ void SetTcbBase(void* image_address) { ASSERT_MSG(ret == 0, "Failed to set GS base: errno {}", errno); } -Tcb* GetTcbBase() { - void* tcb = nullptr; - const int ret = syscall(SYS_arch_prctl, ARCH_GET_GS, &tcb); - ASSERT_MSG(ret == 0, "Failed to get GS base: errno {}", errno); - return static_cast(tcb); -} - #else // POSIX non-x86_64 @@ -193,10 +176,6 @@ void SetTcbBase(void* image_address) { ASSERT(pthread_setspecific(GetTcbKey(), image_address) == 0); } -Tcb* GetTcbBase() { - return static_cast(pthread_getspecific(GetTcbKey())); -} - #endif thread_local std::once_flag init_tls_flag; diff --git a/src/core/tls.h b/src/core/tls.h index 787744cd3..6be9752b0 100644 --- a/src/core/tls.h +++ b/src/core/tls.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once @@ -36,9 +36,6 @@ u32 GetTcbKey(); /// Sets the data pointer to the TCB block. void SetTcbBase(void* image_address); -/// Retrieves Tcb structure for the calling thread. -Tcb* GetTcbBase(); - /// Makes sure TLS is initialized for the thread before entering guest. void EnsureThreadInitialized(); From 4922d526feb41bae0353994e907e343972acd4ef Mon Sep 17 00:00:00 2001 From: oltolm Date: Sat, 22 Nov 2025 09:32:29 +0100 Subject: [PATCH 04/25] msys2: fix build (#3818) * cmake: fix mingw-w64 build * time.cpp: fix build with Clang on Windows * tls.h: include malloc.h for alloca --- CMakeLists.txt | 18 +++++++++++------- externals/CMakeLists.txt | 2 +- src/core/libraries/kernel/time.cpp | 23 ++++++++++++++++++++--- src/core/tls.h | 3 +++ 4 files changed, 35 insertions(+), 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ddaf2422c..7c1ebca79 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -228,8 +228,10 @@ find_package(half 1.12.0 MODULE) find_package(magic_enum 0.9.7 CONFIG) find_package(PNG 1.6 MODULE) find_package(RenderDoc 1.6.0 MODULE) -find_package(SDL3 3.1.2 CONFIG) find_package(SDL3_mixer 2.8.1 CONFIG) +if (SDL3_mixer_FOUND) + find_package(SDL3 3.1.2 CONFIG) +endif() find_package(stb MODULE) find_package(toml11 4.2.0 CONFIG) find_package(tsl-robin-map 1.3.0 CONFIG) @@ -554,6 +556,8 @@ set(FIBER_LIB src/core/libraries/fiber/fiber_context.s src/core/libraries/fiber/fiber_error.h ) +set_source_files_properties(src/core/libraries/fiber/fiber_context.s PROPERTIES COMPILE_OPTIONS -Wno-unused-command-line-argument) + set(VDEC_LIB src/core/libraries/videodec/videodec2_impl.cpp src/core/libraries/videodec/videodec2_impl.h src/core/libraries/videodec/videodec2.cpp @@ -1122,22 +1126,22 @@ if (APPLE) endif() if (WIN32) - target_link_libraries(shadps4 PRIVATE mincore wepoll) + target_link_libraries(shadps4 PRIVATE mincore wepoll wbemuuid) if (MSVC) # MSVC likes putting opinions on what people can use, disable: - add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE -D_SCL_SECURE_NO_WARNINGS) + add_compile_definitions(_CRT_SECURE_NO_WARNINGS _CRT_NONSTDC_NO_DEPRECATE _SCL_SECURE_NO_WARNINGS) endif() - add_definitions(-DNOMINMAX -DWIN32_LEAN_AND_MEAN) + add_compile_definitions(NOMINMAX WIN32_LEAN_AND_MEAN) if (MSVC) # Needed for conflicts with time.h of windows.h - add_definitions(-D_TIMESPEC_DEFINED) + add_compile_definitions(_TIMESPEC_DEFINED) endif() # Target Windows 10 RS5 - add_definitions(-DNTDDI_VERSION=0x0A000006 -D_WIN32_WINNT=0x0A00 -DWINVER=0x0A00) + add_compile_definitions(NTDDI_VERSION=0x0A000006 _WIN32_WINNT=0x0A00 WINVER=0x0A00) if (MSVC) target_link_libraries(shadps4 PRIVATE clang_rt.builtins-x86_64.lib) @@ -1169,7 +1173,7 @@ if (WIN32) target_sources(shadps4 PRIVATE src/shadps4.rc) endif() -add_definitions(-DBOOST_ASIO_STANDALONE) +add_compile_definitions(BOOST_ASIO_STANDALONE) target_include_directories(shadps4 PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 5f7ae94c4..b6c7c746e 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -152,7 +152,7 @@ endif() # sirit add_subdirectory(sirit) if (WIN32) - target_compile_options(sirit PUBLIC "-Wno-error=unused-command-line-argument") + target_compile_options(sirit PRIVATE "-Wno-error=unused-command-line-argument") endif() # half diff --git a/src/core/libraries/kernel/time.cpp b/src/core/libraries/kernel/time.cpp index ad07678b2..51f86e2c7 100644 --- a/src/core/libraries/kernel/time.cpp +++ b/src/core/libraries/kernel/time.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include #include "common/assert.h" @@ -485,25 +486,41 @@ Common::NativeClock* GetClock() { s32 PS4_SYSV_ABI sceKernelConvertUtcToLocaltime(time_t time, time_t* local_time, struct OrbisTimesec* st, u64* dst_sec) { LOG_TRACE(Kernel, "Called"); +#ifdef _WIN32 + TIME_ZONE_INFORMATION tz{}; + DWORD res = GetTimeZoneInformation(&tz); + *local_time = time - tz.Bias; + + if (st != nullptr) { + st->t = time; + st->west_sec = -tz.Bias * 60; + st->dst_sec = res == TIME_ZONE_ID_DAYLIGHT ? -_dstbias : 0; + } + + if (dst_sec != nullptr) { + *dst_sec = res == TIME_ZONE_ID_DAYLIGHT ? -_dstbias : 0; + } +#else #ifdef __APPLE__ // std::chrono::current_zone() not available yet. const auto* time_zone = date::current_zone(); #else const auto* time_zone = std::chrono::current_zone(); -#endif +#endif // __APPLE__ auto info = time_zone->get_info(std::chrono::system_clock::now()); *local_time = info.offset.count() + info.save.count() * 60 + time; if (st != nullptr) { st->t = time; - st->west_sec = info.offset.count() * 60; + st->west_sec = info.offset.count(); st->dst_sec = info.save.count() * 60; } if (dst_sec != nullptr) { *dst_sec = info.save.count() * 60; } +#endif // _WIN32 return ORBIS_OK; } @@ -565,4 +582,4 @@ void RegisterTime(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("-o5uEDpN+oY", "libkernel", 1, "libkernel", sceKernelConvertUtcToLocaltime); } -} // namespace Libraries::Kernel \ No newline at end of file +} // namespace Libraries::Kernel diff --git a/src/core/tls.h b/src/core/tls.h index 6be9752b0..0ae512a04 100644 --- a/src/core/tls.h +++ b/src/core/tls.h @@ -5,6 +5,9 @@ #include #include "common/types.h" +#ifdef _WIN32 +#include +#endif namespace Xbyak { class CodeGenerator; From f6ae5544fdcf781d9578d4a7b6c5089705929c8f Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Sat, 22 Nov 2025 02:32:53 -0600 Subject: [PATCH 05/25] Kernel.Vmm: Protect Fixes (#3822) * Some mprotect fixes The biggest thing here is preventing mprotect on memory that isn't mapped in address space. This would cause exceptions before, but succeeds on real hardware. I've also included a couple other minor fixes, mostly based around some tests I recently performed. Note: All changes to memory pools in this PR are assumed. I have not yet tested memory pools with any of this logic, but I do at least want to prevent mprotect on pool reserved memory to avoid crashes. * Update memory.cpp * clang --- src/core/memory.cpp | 48 ++++++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 7db25391a..b9fd7fd7d 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -587,6 +587,10 @@ s32 MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, u64 size, Memory // On real hardware, GPU file mmaps cause a full system crash due to an internal error. ASSERT_MSG(false, "Files cannot be mapped to GPU memory"); } + if (True(prot & MemoryProt::CpuExec)) { + // On real hardware, execute permissions are silently removed. + prot &= ~MemoryProt::CpuExec; + } // Add virtual memory area auto& new_vma = CarveVMA(mapped_addr, size)->second; @@ -793,10 +797,9 @@ s32 MemoryManager::QueryProtection(VAddr addr, void** start, void** end, u32* pr s64 MemoryManager::ProtectBytes(VAddr addr, VirtualMemoryArea& vma_base, u64 size, MemoryProt prot) { const auto start_in_vma = addr - vma_base.base; - const auto adjusted_size = - vma_base.size - start_in_vma < size ? vma_base.size - start_in_vma : size; + const auto adjusted_size = std::min(vma_base.size - start_in_vma, size); - if (vma_base.type == VMAType::Free) { + if (vma_base.type == VMAType::Free || vma_base.type == VMAType::PoolReserved) { // On PS4, protecting freed memory does nothing. return adjusted_size; } @@ -828,8 +831,9 @@ s64 MemoryManager::ProtectBytes(VAddr addr, VirtualMemoryArea& vma_base, u64 siz perms |= Core::MemoryPermission::ReadWrite; } - if (vma_base.type == VMAType::Direct || vma_base.type == VMAType::Pooled) { - // On PS4, execute permissions are hidden from direct memory mappings. + if (vma_base.type == VMAType::Direct || vma_base.type == VMAType::Pooled || + vma_base.type == VMAType::File) { + // On PS4, execute permissions are hidden from direct memory and file mappings. // Tests show that execute permissions still apply, so handle this after reading perms. prot &= ~MemoryProt::CpuExec; } @@ -837,6 +841,12 @@ s64 MemoryManager::ProtectBytes(VAddr addr, VirtualMemoryArea& vma_base, u64 siz // Change protection vma_base.prot = prot; + if (vma_base.type == VMAType::Reserved) { + // On PS4, protections change vma_map, but don't apply. + // Return early to avoid protecting memory that isn't mapped in address space. + return adjusted_size; + } + impl.Protect(addr, size, perms); return adjusted_size; @@ -853,22 +863,20 @@ s32 MemoryManager::Protect(VAddr addr, u64 size, MemoryProt prot) { // Ensure the range to modify is valid ASSERT_MSG(IsValidMapping(addr, size), "Attempted to access invalid address {:#x}", addr); - // Validate protection flags - constexpr static MemoryProt valid_flags = - MemoryProt::NoAccess | MemoryProt::CpuRead | MemoryProt::CpuWrite | MemoryProt::CpuExec | - MemoryProt::GpuRead | MemoryProt::GpuWrite | MemoryProt::GpuReadWrite; - - MemoryProt invalid_flags = prot & ~valid_flags; - if (invalid_flags != MemoryProt::NoAccess) { - LOG_ERROR(Kernel_Vmm, "Invalid protection flags"); - return ORBIS_KERNEL_ERROR_EINVAL; - } + // Appropriately restrict flags. + constexpr static MemoryProt flag_mask = + MemoryProt::CpuReadWrite | MemoryProt::CpuExec | MemoryProt::GpuReadWrite; + MemoryProt valid_flags = prot & flag_mask; // Protect all VMAs between addr and addr + size. s64 protected_bytes = 0; while (protected_bytes < size) { auto it = FindVMA(addr + protected_bytes); auto& vma_base = it->second; + if (vma_base.base > addr + protected_bytes) { + // Account for potential gaps in memory map. + protected_bytes += vma_base.base - (addr + protected_bytes); + } auto result = ProtectBytes(addr + protected_bytes, vma_base, size - protected_bytes, prot); if (result < 0) { // ProtectBytes returned an error, return it @@ -904,13 +912,21 @@ s32 MemoryManager::VirtualQuery(VAddr addr, s32 flags, const auto& vma = it->second; info->start = vma.base; info->end = vma.base + vma.size; - info->offset = vma.type == VMAType::Flexible ? 0 : vma.phys_base; + info->offset = 0; info->protection = static_cast(vma.prot); info->is_flexible = vma.type == VMAType::Flexible ? 1 : 0; info->is_direct = vma.type == VMAType::Direct ? 1 : 0; info->is_stack = vma.type == VMAType::Stack ? 1 : 0; info->is_pooled = vma.type == VMAType::PoolReserved || vma.type == VMAType::Pooled ? 1 : 0; info->is_committed = vma.IsMapped() ? 1 : 0; + if (vma.type == VMAType::Direct || vma.type == VMAType::Pooled) { + // Offset is only assigned for direct and pooled mappings. + info->offset = vma.phys_base; + } + if (vma.type == VMAType::Reserved || vma.type == VMAType::PoolReserved) { + // Protection is hidden from reserved mappings. + info->protection = 0; + } strncpy(info->name, vma.name.data(), ::Libraries::Kernel::ORBIS_KERNEL_MAXIMUM_NAME_LENGTH); From f1a8b7d85e4f8279f1ec59960c32dcfee0be3fb0 Mon Sep 17 00:00:00 2001 From: TheTurtle Date: Mon, 24 Nov 2025 03:26:34 +0200 Subject: [PATCH 06/25] vector_alu: Fix V_CMP_U64 (#3823) * vector_alu: Fix V_CMP_U64 * vector_alu: Also handle vcc in V_CMP_U64 --- .../frontend/translate/vector_alu.cpp | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 604efabbd..83633402c 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -1043,20 +1043,25 @@ void Translator::V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const } void Translator::V_CMP_U64(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst) { - const IR::U64 src0{GetSrc64(inst.src[0])}; - const IR::U64 src1{GetSrc64(inst.src[1])}; + ASSERT(inst.src[1].field == OperandField::ConstZero); + const IR::U1 src0 = [&] { + switch (inst.src[0].field) { + case OperandField::ScalarGPR: + return ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code)); + case OperandField::VccLo: + return ir.GetVcc(); + default: + UNREACHABLE_MSG("src0 = {}", u32(inst.src[0].field)); + } + }(); const IR::U1 result = [&] { switch (op) { case ConditionOp::EQ: - return ir.IEqual(src0, src1); + return ir.LogicalNot(src0); case ConditionOp::LG: // NE - return ir.INotEqual(src0, src1); + return src0; case ConditionOp::GT: - if (src1.IsImmediate() && src1.U64() == 0) { - ASSERT(inst.src[0].field == OperandField::ScalarGPR); - return ir.GroupAny(ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code))); - } - return ir.IGreaterThan(src0, src1, is_signed); + return ir.GroupAny(ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code))); default: UNREACHABLE_MSG("Unsupported V_CMP_U64 condition operation: {}", u32(op)); } From 8123c44ad172acb1e85a143e517dfd54cc9b8ba4 Mon Sep 17 00:00:00 2001 From: Missake Date: Mon, 24 Nov 2025 02:28:48 +0100 Subject: [PATCH 07/25] Make FSR off by default (#3801) * Make FSR and RCAS off by default * Update config.cpp --- src/common/config.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/config.cpp b/src/common/config.cpp index 4d3e1d877..b0f068142 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -177,7 +177,7 @@ static ConfigEntry isFullscreen(false); static ConfigEntry fullscreenMode("Windowed"); static ConfigEntry presentMode("Mailbox"); static ConfigEntry isHDRAllowed(false); -static ConfigEntry fsrEnabled(true); +static ConfigEntry fsrEnabled(false); static ConfigEntry rcasEnabled(true); static ConfigEntry rcasAttenuation(250); From 2577dfde7e5e26dd6a38b8093c774eea02c27eab Mon Sep 17 00:00:00 2001 From: kalaposfos13 <153381648+kalaposfos13@users.noreply.github.com> Date: Mon, 24 Nov 2025 02:30:09 +0100 Subject: [PATCH 08/25] Add assert on SFO file being empty (#3815) --- src/core/file_format/psf.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/file_format/psf.cpp b/src/core/file_format/psf.cpp index 047828330..e647059f0 100644 --- a/src/core/file_format/psf.cpp +++ b/src/core/file_format/psf.cpp @@ -36,6 +36,7 @@ bool PSF::Open(const std::filesystem::path& filepath) { } const u64 psfSize = file.GetSize(); + ASSERT_MSG(psfSize != 0, "SFO file at {} is empty!", filepath.string()); std::vector psf(psfSize); file.Seek(0); file.Read(psf); From 14d71a155a20b946c3a0ed89382391bdaae9e0ed Mon Sep 17 00:00:00 2001 From: TheTurtle Date: Mon, 24 Nov 2025 23:25:22 +0200 Subject: [PATCH 09/25] video_core: Reimplement inline data as buffer fill (#3825) --- src/video_core/amdgpu/liverpool.cpp | 15 ++--- src/video_core/buffer_cache/buffer_cache.cpp | 60 +++---------------- src/video_core/buffer_cache/buffer_cache.h | 4 +- .../renderer_vulkan/vk_rasterizer.cpp | 4 +- .../renderer_vulkan/vk_rasterizer.h | 2 +- 5 files changed, 20 insertions(+), 65 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 85bfeb1a1..3f307c51b 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -655,8 +655,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spansrc_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Gds) { - rasterizer->InlineData(dma_data->dst_addr_lo, &dma_data->data, sizeof(u32), - true); + rasterizer->FillBuffer(dma_data->dst_addr_lo, dma_data->NumBytes(), + dma_data->data, true); } else if ((dma_data->src_sel == DmaDataSrc::Memory || dma_data->src_sel == DmaDataSrc::MemoryUsingL2) && dma_data->dst_sel == DmaDataDst::Gds) { @@ -665,8 +665,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spansrc_sel == DmaDataSrc::Data && (dma_data->dst_sel == DmaDataDst::Memory || dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) { - rasterizer->InlineData(dma_data->DstAddress(), &dma_data->data, - sizeof(u32), false); + rasterizer->FillBuffer(dma_data->DstAddress(), dma_data->NumBytes(), + dma_data->data, false); } else if (dma_data->src_sel == DmaDataSrc::Gds && (dma_data->dst_sel == DmaDataDst::Memory || dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) { @@ -898,7 +898,8 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { break; } if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Gds) { - rasterizer->InlineData(dma_data->dst_addr_lo, &dma_data->data, sizeof(u32), true); + rasterizer->FillBuffer(dma_data->dst_addr_lo, dma_data->NumBytes(), dma_data->data, + true); } else if ((dma_data->src_sel == DmaDataSrc::Memory || dma_data->src_sel == DmaDataSrc::MemoryUsingL2) && dma_data->dst_sel == DmaDataDst::Gds) { @@ -907,8 +908,8 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { } else if (dma_data->src_sel == DmaDataSrc::Data && (dma_data->dst_sel == DmaDataDst::Memory || dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) { - rasterizer->InlineData(dma_data->DstAddress(), &dma_data->data, sizeof(u32), - false); + rasterizer->FillBuffer(dma_data->DstAddress(), dma_data->NumBytes(), + dma_data->data, false); } else if (dma_data->src_sel == DmaDataSrc::Gds && (dma_data->dst_sel == DmaDataDst::Memory || dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) { diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index ac3fac5b1..7347e99a2 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -261,14 +261,13 @@ void BufferCache::BindIndexBuffer(u32 index_offset) { cmdbuf.bindIndexBuffer(vk_buffer->Handle(), offset, index_type); } -void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) { +void BufferCache::FillBuffer(VAddr address, u32 num_bytes, u32 value, bool is_gds) { ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned"); if (!is_gds) { - if (!memory->TryWriteBacking(std::bit_cast(address), value, num_bytes)) { - std::memcpy(std::bit_cast(address), value, num_bytes); - return; - } - if (!IsRegionRegistered(address, num_bytes)) { + texture_cache.ClearMeta(address); + if (!IsRegionGpuModified(address, num_bytes)) { + u32* buffer = std::bit_cast(address); + std::fill(buffer, buffer + num_bytes / sizeof(u32), value); return; } } @@ -276,10 +275,10 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo if (is_gds) { return &gds_buffer; } - const BufferId buffer_id = FindBuffer(address, num_bytes); - return &slot_buffers[buffer_id]; + const auto [buffer, offset] = ObtainBuffer(address, num_bytes, true); + return buffer; }(); - InlineDataBuffer(*buffer, address, value, num_bytes); + buffer->Fill(buffer->Offset(address), num_bytes, value); } void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) { @@ -778,49 +777,6 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) { }); } -void BufferCache::InlineDataBuffer(Buffer& buffer, VAddr address, const void* value, - u32 num_bytes) { - scheduler.EndRendering(); - const auto cmdbuf = scheduler.CommandBuffer(); - const vk::BufferMemoryBarrier2 pre_barrier = { - .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, - .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, - .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, - .buffer = buffer.Handle(), - .offset = buffer.Offset(address), - .size = num_bytes, - }; - const vk::BufferMemoryBarrier2 post_barrier = { - .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, - .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, - .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, - .buffer = buffer.Handle(), - .offset = buffer.Offset(address), - .size = num_bytes, - }; - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &pre_barrier, - }); - // vkCmdUpdateBuffer can only copy up to 65536 bytes at a time. - static constexpr u32 UpdateBufferMaxSize = 65536; - const auto dst_offset = buffer.Offset(address); - for (u32 offset = 0; offset < num_bytes; offset += UpdateBufferMaxSize) { - const auto* update_src = static_cast(value) + offset; - const auto update_dst = dst_offset + offset; - const auto update_size = std::min(num_bytes - offset, UpdateBufferMaxSize); - cmdbuf.updateBuffer(buffer.Handle(), update_dst, update_size, update_src); - } - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &post_barrier, - }); -} - void BufferCache::WriteDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes) { vk::BufferCopy copy = { .srcOffset = 0, diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 6954f979e..73d70704e 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -118,7 +118,7 @@ public: void BindIndexBuffer(u32 index_offset); /// Writes a value to GPU buffer. (uses command buffer to temporarily store the data) - void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); + void FillBuffer(VAddr address, u32 num_bytes, u32 value, bool is_gds); /// Performs buffer to buffer data copy on the GPU. void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds); @@ -193,8 +193,6 @@ private: bool SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size); - void InlineDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes); - void WriteDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes); void TouchBuffer(const Buffer& buffer); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 8d00ff2d0..214d6d697 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -976,8 +976,8 @@ void Rasterizer::DepthStencilCopy(bool is_depth, bool is_stencil) { ScopeMarkerEnd(); } -void Rasterizer::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) { - buffer_cache.InlineData(address, value, num_bytes, is_gds); +void Rasterizer::FillBuffer(VAddr address, u32 num_bytes, u32 value, bool is_gds) { + buffer_cache.FillBuffer(address, num_bytes, value, is_gds); } void Rasterizer::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 96a3c95e8..c73626f3f 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -55,7 +55,7 @@ public: void ScopedMarkerInsertColor(const std::string_view& str, const u32 color, bool from_guest = false); - void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); + void FillBuffer(VAddr address, u32 num_bytes, u32 value, bool is_gds); void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds); u32 ReadDataFromGds(u32 gsd_offset); bool InvalidateMemory(VAddr addr, u64 size); From 6295c32e5cf1c1389c6ce2b36014bd5fba520c56 Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Tue, 25 Nov 2025 01:51:06 -0600 Subject: [PATCH 10/25] Render.Recompiler: Implement V_FLOOR_F64 (#3828) * VectorFpRound64 decode table Also fixed definition for V_TRUNC_F64, though I doubt that would change anything important. * V_FLOOR_F64 implementation Used by Just Cause 4 * Oops Never forget your 64s --- src/shader_recompiler/frontend/format.cpp | 14 ++++++++++---- .../frontend/translate/translate.h | 1 + .../frontend/translate/vector_alu.cpp | 7 +++++++ 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/frontend/format.cpp b/src/shader_recompiler/frontend/format.cpp index 6c4427e5f..d26873396 100644 --- a/src/shader_recompiler/frontend/format.cpp +++ b/src/shader_recompiler/frontend/format.cpp @@ -1837,11 +1837,17 @@ constexpr std::array InstructionFormatVOP1 = {{ // 22 = V_CVT_F64_U32 {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, ScalarType::Float64}, // 23 = V_TRUNC_F64 - {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float64, + {InstClass::VectorFpRound64, InstCategory::VectorALU, 1, 1, ScalarType::Float64, + ScalarType::Float64}, + // 24 = V_CEIL_F64 + {InstClass::VectorFpRound64, InstCategory::VectorALU, 1, 1, ScalarType::Float64, + ScalarType::Float64}, + // 25 = V_RNDNE_F64 + {InstClass::VectorFpRound64, InstCategory::VectorALU, 1, 1, ScalarType::Float64, + ScalarType::Float64}, + // 26 = V_FLOOR_F64 + {InstClass::VectorFpRound64, InstCategory::VectorALU, 1, 1, ScalarType::Float64, ScalarType::Float64}, - {}, - {}, - {}, {}, {}, {}, diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index ea81a8d09..f999a3e3e 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -201,6 +201,7 @@ public: void V_CVT_F32_F64(const GcnInst& inst); void V_CVT_F64_F32(const GcnInst& inst); void V_CVT_F32_UBYTE(u32 index, const GcnInst& inst); + void V_FLOOR_F64(const GcnInst& inst); void V_FRACT_F32(const GcnInst& inst); void V_TRUNC_F32(const GcnInst& inst); void V_CEIL_F32(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 83633402c..94cefb958 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -142,6 +142,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_CVT_F32_UBYTE(2, inst); case Opcode::V_CVT_F32_UBYTE3: return V_CVT_F32_UBYTE(3, inst); + case Opcode::V_FLOOR_F64: + return V_FLOOR_F64(inst); case Opcode::V_FRACT_F32: return V_FRACT_F32(inst); case Opcode::V_TRUNC_F32: @@ -806,6 +808,11 @@ void Translator::V_CVT_F32_UBYTE(u32 index, const GcnInst& inst) { SetDst(inst.dst[0], ir.ConvertUToF(32, 32, byte)); } +void Translator::V_FLOOR_F64(const GcnInst& inst) { + const IR::F64 src0{GetSrc64(inst.src[0])}; + SetDst64(inst.dst[0], ir.FPFloor(src0)); +} + void Translator::V_FRACT_F32(const GcnInst& inst) { const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPFract(src0)); From 13948527910dd445fed29380679d451a026368e0 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Mon, 24 Nov 2025 23:51:39 -0800 Subject: [PATCH 11/25] renderer_vulkan: Remove primitive restart disable support check. (#3827) --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 9 +-------- src/video_core/renderer_vulkan/vk_instance.h | 5 ----- src/video_core/renderer_vulkan/vk_scheduler.cpp | 4 +--- 3 files changed, 2 insertions(+), 16 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 9e2ce4848..e2531456c 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -94,10 +94,6 @@ GraphicsPipeline::GraphicsPipeline( const auto topology = LiverpoolToVK::PrimitiveType(key.prim_type); const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { .topology = topology, - // Avoid warning spam on all pipelines about unsupported restart disable, if not supported. - // However, must be false for list topologies to avoid validation errors. - .primitiveRestartEnable = - !instance.IsPrimitiveRestartDisableSupported() && !IsPrimitiveTopologyList(topology), }; const bool is_rect_list = key.prim_type == AmdGpu::PrimitiveType::RectList; @@ -156,12 +152,9 @@ GraphicsPipeline::GraphicsPipeline( vk::DynamicState::eStencilCompareMask, vk::DynamicState::eStencilWriteMask, vk::DynamicState::eStencilOp, vk::DynamicState::eCullMode, vk::DynamicState::eFrontFace, vk::DynamicState::eRasterizerDiscardEnable, - vk::DynamicState::eLineWidth, + vk::DynamicState::eLineWidth, vk::DynamicState::ePrimitiveRestartEnable, }; - if (instance.IsPrimitiveRestartDisableSupported()) { - dynamic_states.push_back(vk::DynamicState::ePrimitiveRestartEnable); - } if (instance.IsDepthBoundsSupported()) { dynamic_states.push_back(vk::DynamicState::eDepthBoundsTestEnable); dynamic_states.push_back(vk::DynamicState::eDepthBounds); diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 2a8bd3c82..bbefdc1b3 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -411,11 +411,6 @@ public: properties.limits.framebufferStencilSampleCounts; } - /// Returns whether disabling primitive restart is supported. - bool IsPrimitiveRestartDisableSupported() const { - return driver_id != vk::DriverId::eMoltenvk; - } - /// Returns true if logic ops are supported by the device. bool IsLogicOpSupported() const { return features.logicOp; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index da7467dfb..cc8f6956d 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -290,9 +290,7 @@ void DynamicState::Commit(const Instance& instance, const vk::CommandBuffer& cmd } if (dirty_state.primitive_restart_enable) { dirty_state.primitive_restart_enable = false; - if (instance.IsPrimitiveRestartDisableSupported()) { - cmdbuf.setPrimitiveRestartEnable(primitive_restart_enable); - } + cmdbuf.setPrimitiveRestartEnable(primitive_restart_enable); } if (dirty_state.rasterizer_discard_enable) { dirty_state.rasterizer_discard_enable = false; From f9ef57f74be3127d76875c2bbdb5d23a6297f986 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quang=20Ng=C3=B4?= Date: Fri, 28 Nov 2025 23:36:11 +0700 Subject: [PATCH 12/25] Fix metainfo (#3834) --- dist/net.shadps4.shadPS4.metainfo.xml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dist/net.shadps4.shadPS4.metainfo.xml b/dist/net.shadps4.shadPS4.metainfo.xml index 5798876f6..c85fcf003 100644 --- a/dist/net.shadps4.shadPS4.metainfo.xml +++ b/dist/net.shadps4.shadPS4.metainfo.xml @@ -18,19 +18,19 @@ https://cdn.jsdelivr.net/gh/shadps4-emu/shadps4@main/documents/Screenshots/1.png - Bloodborne + Bloodborne by From Software https://cdn.jsdelivr.net/gh/shadps4-emu/shadps4@main/documents/Screenshots/2.png - Hatsune Miku: Project DIVA Future Tone + Hatsune Miku Project DIVA Future Tone by SEGA https://cdn.jsdelivr.net/gh/shadps4-emu/shadps4@main/documents/Screenshots/3.png - Yakuza 0 + Yakuza 0 by SEGA https://cdn.jsdelivr.net/gh/shadps4-emu/shadps4@main/documents/Screenshots/4.png - Persona 4 Golden + DRIVECLUBâ„¢ by Evolution Studios From a9f8eaf77855a52be29e1b8c5ec2449e2ef35d8a Mon Sep 17 00:00:00 2001 From: psucien <168137814+psucien@users.noreply.github.com> Date: Sat, 29 Nov 2025 10:52:08 +0100 Subject: [PATCH 13/25] video_core: Initial implementation of pipeline cache (#3816) * Initial implementation * Fix for crash caused by stale stages data; cosmetics applied * Someone mentioned the assert * Async blob writer * Fix for memory leak * Remain stuff * Async changed to `packaged_task` --- .gitmodules | 3 + CMakeLists.txt | 12 +- externals/CMakeLists.txt | 3 + externals/miniz | 1 + src/common/config.cpp | 24 + src/common/config.h | 4 + src/common/path_util.cpp | 1 + src/common/path_util.h | 2 + src/common/serdes.h | 140 +++++ src/emulator.cpp | 2 + .../frontend/fetch_shader.cpp | 2 +- src/shader_recompiler/frontend/fetch_shader.h | 8 +- .../frontend/structured_control_flow.cpp | 8 +- .../frontend/translate/translate.cpp | 3 +- src/shader_recompiler/info.h | 117 +++-- .../passes/flatten_extended_userdata_pass.cpp | 18 +- .../ir/passes/hull_shader_transform.cpp | 6 +- src/shader_recompiler/ir/passes/ir_passes.h | 4 +- .../ir/passes/resource_tracking_pass.cpp | 3 +- src/shader_recompiler/ir/passes/srt.h | 9 + src/shader_recompiler/profile.h | 9 +- src/shader_recompiler/recompiler.cpp | 6 +- src/shader_recompiler/recompiler.h | 5 +- src/shader_recompiler/resource.h | 11 +- src/shader_recompiler/runtime_info.h | 3 +- src/shader_recompiler/specialization.h | 48 +- src/video_core/amdgpu/pixel_format.h | 8 +- src/video_core/cache_storage.cpp | 264 ++++++++++ src/video_core/cache_storage.h | 50 ++ .../renderer_vulkan/vk_compute_pipeline.cpp | 9 +- .../renderer_vulkan/vk_compute_pipeline.h | 16 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 75 +-- .../renderer_vulkan/vk_graphics_pipeline.h | 20 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 74 ++- .../renderer_vulkan/vk_pipeline_cache.h | 36 +- .../vk_pipeline_serialization.cpp | 480 ++++++++++++++++++ .../vk_pipeline_serialization.h | 21 + 37 files changed, 1339 insertions(+), 166 deletions(-) create mode 160000 externals/miniz create mode 100644 src/common/serdes.h create mode 100644 src/video_core/cache_storage.cpp create mode 100644 src/video_core/cache_storage.h create mode 100644 src/video_core/renderer_vulkan/vk_pipeline_serialization.cpp create mode 100644 src/video_core/renderer_vulkan/vk_pipeline_serialization.h diff --git a/.gitmodules b/.gitmodules index b8d1544e4..c5d05edd3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -117,3 +117,6 @@ path = externals/sdl3_mixer url = https://github.com/libsdl-org/SDL_mixer shallow = true +[submodule "externals/miniz"] + path = externals/miniz + url = https://github.com/richgel999/miniz diff --git a/CMakeLists.txt b/CMakeLists.txt index 7c1ebca79..cf78e92bf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -512,7 +512,7 @@ set(PAD_LIB src/core/libraries/pad/pad.cpp src/core/libraries/pad/pad_errors.h ) -set(SYSTEM_GESTURE_LIB +set(SYSTEM_GESTURE_LIB src/core/libraries/system_gesture/system_gesture.cpp src/core/libraries/system_gesture/system_gesture.h ) @@ -693,7 +693,6 @@ set(COMMON src/common/logging/backend.cpp src/common/lru_cache.h src/common/error.cpp src/common/error.h - src/common/scope_exit.h src/common/fixed_value.h src/common/func_traits.h src/common/native_clock.cpp @@ -707,6 +706,8 @@ set(COMMON src/common/logging/backend.cpp src/common/rdtsc.h src/common/recursive_lock.cpp src/common/recursive_lock.h + src/common/scope_exit.h + src/common/serdes.h src/common/sha1.h src/common/shared_first_mutex.h src/common/signal_context.h @@ -986,6 +987,8 @@ set(VIDEO_CORE src/video_core/amdgpu/cb_db_extent.h src/video_core/renderer_vulkan/vk_pipeline_cache.h src/video_core/renderer_vulkan/vk_pipeline_common.cpp src/video_core/renderer_vulkan/vk_pipeline_common.h + src/video_core/renderer_vulkan/vk_pipeline_serialization.cpp + src/video_core/renderer_vulkan/vk_pipeline_serialization.h src/video_core/renderer_vulkan/vk_platform.cpp src/video_core/renderer_vulkan/vk_platform.h src/video_core/renderer_vulkan/vk_presenter.cpp @@ -1023,6 +1026,8 @@ set(VIDEO_CORE src/video_core/amdgpu/cb_db_extent.h src/video_core/texture_cache/tile_manager.cpp src/video_core/texture_cache/tile_manager.h src/video_core/texture_cache/types.h + src/video_core/cache_storage.cpp + src/video_core/cache_storage.h src/video_core/page_manager.cpp src/video_core/page_manager.h src/video_core/multi_level_page_table.h @@ -1077,7 +1082,8 @@ add_executable(shadps4 create_target_directory_groups(shadps4) target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui gcn half::half ZLIB::ZLIB PNG::PNG) -target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 SDL3_mixer::SDL3_mixer pugixml::pugixml stb::headers libusb::usb lfreist-hwinfo::hwinfo nlohmann_json::nlohmann_json) +target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 SDL3_mixer::SDL3_mixer pugixml::pugixml) +target_link_libraries(shadps4 PRIVATE stb::headers libusb::usb lfreist-hwinfo::hwinfo nlohmann_json::nlohmann_json miniz) target_compile_definitions(shadps4 PRIVATE IMGUI_USER_CONFIG="imgui/imgui_config.h") target_compile_definitions(Dear_ImGui PRIVATE IMGUI_USER_CONFIG="${PROJECT_SOURCE_DIR}/src/imgui/imgui_config.h") diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index b6c7c746e..eb3723f2c 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -261,3 +261,6 @@ endif() #nlohmann json set(JSON_BuildTests OFF CACHE INTERNAL "") add_subdirectory(json) + +# miniz +add_subdirectory(miniz) diff --git a/externals/miniz b/externals/miniz new file mode 160000 index 000000000..174573d60 --- /dev/null +++ b/externals/miniz @@ -0,0 +1 @@ +Subproject commit 174573d60290f447c13a2b1b3405de2b96e27d6c diff --git a/src/common/config.cpp b/src/common/config.cpp index b0f068142..e79652b32 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -191,6 +191,8 @@ static ConfigEntry vkCrashDiagnostic(false); static ConfigEntry vkHostMarkers(false); static ConfigEntry vkGuestMarkers(false); static ConfigEntry rdocEnable(false); +static ConfigEntry pipelineCacheEnable(false); +static ConfigEntry pipelineCacheArchive(false); // Debug static ConfigEntry isDebugDump(false); @@ -452,6 +454,14 @@ bool isRdocEnabled() { return rdocEnable.get(); } +bool isPipelineCacheEnabled() { + return pipelineCacheEnable.get(); +} + +bool isPipelineCacheArchived() { + return pipelineCacheArchive.get(); +} + bool fpsColor() { return isFpsColor.get(); } @@ -603,6 +613,14 @@ void setRdocEnabled(bool enable, bool is_game_specific) { rdocEnable.set(enable, is_game_specific); } +void setPipelineCacheEnabled(bool enable, bool is_game_specific) { + pipelineCacheEnable.set(enable, is_game_specific); +} + +void setPipelineCacheArchived(bool enable, bool is_game_specific) { + pipelineCacheArchive.set(enable, is_game_specific); +} + void setVblankFreq(u32 value, bool is_game_specific) { vblankFrequency.set(value, is_game_specific); } @@ -939,6 +957,8 @@ void load(const std::filesystem::path& path, bool is_game_specific) { vkHostMarkers.setFromToml(vk, "hostMarkers", is_game_specific); vkGuestMarkers.setFromToml(vk, "guestMarkers", is_game_specific); rdocEnable.setFromToml(vk, "rdocEnable", is_game_specific); + pipelineCacheEnable.setFromToml(vk, "pipelineCacheEnable", is_game_specific); + pipelineCacheArchive.setFromToml(vk, "pipelineCacheArchive", is_game_specific); } string current_version = {}; @@ -1107,6 +1127,8 @@ void save(const std::filesystem::path& path, bool is_game_specific) { vkHostMarkers.setTomlValue(data, "Vulkan", "hostMarkers", is_game_specific); vkGuestMarkers.setTomlValue(data, "Vulkan", "guestMarkers", is_game_specific); rdocEnable.setTomlValue(data, "Vulkan", "rdocEnable", is_game_specific); + pipelineCacheEnable.setTomlValue(data, "Vulkan", "pipelineCacheEnable", is_game_specific); + pipelineCacheArchive.setTomlValue(data, "Vulkan", "pipelineCacheArchive", is_game_specific); isDebugDump.setTomlValue(data, "Debug", "DebugDump", is_game_specific); isShaderDebug.setTomlValue(data, "Debug", "CollectShader", is_game_specific); @@ -1237,6 +1259,8 @@ void setDefaultValues(bool is_game_specific) { vkHostMarkers.set(false, is_game_specific); vkGuestMarkers.set(false, is_game_specific); rdocEnable.set(false, is_game_specific); + pipelineCacheEnable.set(false, is_game_specific); + pipelineCacheArchive.set(false, is_game_specific); // GS - Debug isDebugDump.set(false, is_game_specific); diff --git a/src/common/config.h b/src/common/config.h index 5c9f89ae6..481ef6444 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -94,7 +94,11 @@ void setVkGuestMarkersEnabled(bool enable, bool is_game_specific = false); bool getEnableDiscordRPC(); void setEnableDiscordRPC(bool enable); bool isRdocEnabled(); +bool isPipelineCacheEnabled(); +bool isPipelineCacheArchived(); void setRdocEnabled(bool enable, bool is_game_specific = false); +void setPipelineCacheEnabled(bool enable, bool is_game_specific = false); +void setPipelineCacheArchived(bool enable, bool is_game_specific = false); std::string getLogType(); void setLogType(const std::string& type, bool is_game_specific = false); std::string getLogFilter(); diff --git a/src/common/path_util.cpp b/src/common/path_util.cpp index bd0aff040..b0cbb10cf 100644 --- a/src/common/path_util.cpp +++ b/src/common/path_util.cpp @@ -127,6 +127,7 @@ static auto UserPaths = [] { create_path(PathType::MetaDataDir, user_dir / METADATA_DIR); create_path(PathType::CustomTrophy, user_dir / CUSTOM_TROPHY); create_path(PathType::CustomConfigs, user_dir / CUSTOM_CONFIGS); + create_path(PathType::CacheDir, user_dir / CACHE_DIR); std::ofstream notice_file(user_dir / CUSTOM_TROPHY / "Notice.txt"); if (notice_file.is_open()) { diff --git a/src/common/path_util.h b/src/common/path_util.h index 0a0234eba..fd2c18baa 100644 --- a/src/common/path_util.h +++ b/src/common/path_util.h @@ -24,6 +24,7 @@ enum class PathType { MetaDataDir, // Where game metadata (e.g. trophies and menu backgrounds) is stored. CustomTrophy, // Where custom files for trophies are stored. CustomConfigs, // Where custom files for different games are stored. + CacheDir, // Where pipeline and shader cache is stored. }; constexpr auto PORTABLE_DIR = "user"; @@ -42,6 +43,7 @@ constexpr auto PATCHES_DIR = "patches"; constexpr auto METADATA_DIR = "game_data"; constexpr auto CUSTOM_TROPHY = "custom_trophy"; constexpr auto CUSTOM_CONFIGS = "custom_configs"; +constexpr auto CACHE_DIR = "cache"; // Filenames constexpr auto LOG_FILE = "shad_log.txt"; diff --git a/src/common/serdes.h b/src/common/serdes.h new file mode 100644 index 000000000..a36fed4d3 --- /dev/null +++ b/src/common/serdes.h @@ -0,0 +1,140 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/assert.h" +#include "common/types.h" + +#include + +namespace Serialization { + +template +concept Container = requires(T t) { + typename T::iterator; + { t.begin() } -> std::same_as; + { t.end() } -> std::same_as; + { t.size() } -> std::convertible_to; +}; + +struct Archive { + void Alloc(size_t size) { + container.resize(size); + } + + void Grow(size_t size) { + container.resize(container.size() + size); + } + + void Merge(const Archive& ar) { + container.insert(container.end(), ar.container.cbegin(), ar.container.cend()); + offset = container.size(); + } + + [[nodiscard]] size_t SizeBytes() const { + return container.size(); + } + + u8* CurrPtr() { + return container.data() + offset; + } + + void Advance(size_t size) { + ASSERT(offset + size <= container.size()); + offset += size; + } + + std::vector&& TakeOff() { + offset = 0; + return std::move(container); + } + + [[nodiscard]] bool IsEoS() const { + return offset >= container.size(); + } + + Archive() = default; + explicit Archive(std::vector&& v) : container{v} {} + +private: + u32 offset{}; + std::vector container{}; + + friend struct Writer; + friend struct Reader; +}; + +struct Writer { + template + void Write(const T* ptr, size_t size) { + if (ar.offset + size >= ar.container.size()) { + ar.Grow(size); + } + std::memcpy(ar.CurrPtr(), reinterpret_cast(ptr), size); + ar.Advance(size); + } + + template + requires(!Container) + void Write(const T& value) { + const auto size = sizeof(value); + Write(&value, size); + } + + void Write(const auto& v) { + Write(v.size()); + for (const auto& elem : v) { + Write(elem); + } + } + + void Write(const std::string& s) { + Write(s.size()); + Write(s.c_str(), s.size()); + } + + Writer() = delete; + explicit Writer(Archive& ar_) : ar{ar_} {} + + Archive& ar; +}; + +struct Reader { + template + void Read(T* ptr, size_t size) { + ASSERT(ar.offset + size <= ar.container.size()); + std::memcpy(reinterpret_cast(ptr), ar.CurrPtr(), size); + ar.Advance(size); + } + + template + requires(!Container) + void Read(T& value) { + const auto size = sizeof(value); + Read(&value, size); + } + + void Read(auto& v) { + size_t num_elements{}; + Read(num_elements); + for (int i = 0; i < num_elements; ++i) { + v.emplace_back(); + Read(v.back()); + } + } + + void Read(std::string& s) { + size_t length{}; + Read(length); + s.resize(length); + Read(s.data(), length); + } + + Reader() = delete; + explicit Reader(Archive& ar_) : ar{ar_} {} + + Archive& ar; +}; + +} // namespace Serialization diff --git a/src/emulator.cpp b/src/emulator.cpp index fb187cfae..f0026068c 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -42,6 +42,7 @@ #include "core/linker.h" #include "core/memory.h" #include "emulator.h" +#include "video_core/cache_storage.h" #include "video_core/renderdoc.h" #ifdef _WIN32 @@ -387,6 +388,7 @@ void Emulator::Run(std::filesystem::path file, std::vector args, } UpdatePlayTime(id); + Storage::DataBase::Instance().Close(); std::quick_exit(0); } diff --git a/src/shader_recompiler/frontend/fetch_shader.cpp b/src/shader_recompiler/frontend/fetch_shader.cpp index 35bea1c1b..ba0635546 100644 --- a/src/shader_recompiler/frontend/fetch_shader.cpp +++ b/src/shader_recompiler/frontend/fetch_shader.cpp @@ -51,7 +51,7 @@ std::optional ParseFetchShader(const Shader::Info& info) { } const auto* code = GetFetchShaderCode(info, info.fetch_shader_sgpr_base); - FetchShaderData data{.code = code}; + FetchShaderData data{}; GcnCodeSlice code_slice(code, code + std::numeric_limits::max()); GcnDecodeContext decoder; diff --git a/src/shader_recompiler/frontend/fetch_shader.h b/src/shader_recompiler/frontend/fetch_shader.h index 442a9af2f..a57bbb0c6 100644 --- a/src/shader_recompiler/frontend/fetch_shader.h +++ b/src/shader_recompiler/frontend/fetch_shader.h @@ -8,6 +8,10 @@ #include "common/types.h" #include "shader_recompiler/info.h" +namespace Serialization { +struct Archive; +} + namespace Shader::Gcn { struct VertexAttribute { @@ -50,7 +54,6 @@ struct VertexAttribute { }; struct FetchShaderData { - const u32* code; u32 size = 0; std::vector attributes; s8 vertex_offset_sgpr = -1; ///< SGPR of vertex offset from VADDR @@ -60,6 +63,9 @@ struct FetchShaderData { return attributes == other.attributes && vertex_offset_sgpr == other.vertex_offset_sgpr && instance_offset_sgpr == other.instance_offset_sgpr; } + + void Serialize(Serialization::Archive& ar) const; + bool Deserialize(Serialization::Archive& buffer); }; const u32* GetFetchShaderCode(const Info& info, u32 sgpr_base); diff --git a/src/shader_recompiler/frontend/structured_control_flow.cpp b/src/shader_recompiler/frontend/structured_control_flow.cpp index 963b2c0d5..1dc186c64 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/structured_control_flow.cpp @@ -596,9 +596,8 @@ public: IR::AbstractSyntaxList& syntax_list_, std::span inst_list_, Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_) : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, - syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_}, - runtime_info{runtime_info_}, profile{profile_}, - translator{info_, runtime_info_, profile_} { + syntax_list{syntax_list_}, inst_list{inst_list_}, runtime_info{runtime_info_}, + profile{profile_}, translator{info_, runtime_info_, profile_} { Visit(root_stmt, nullptr, nullptr); IR::Block* first_block = syntax_list.front().data.block; @@ -782,7 +781,7 @@ private: } } - IR::Block* MergeBlock(Statement& parent, Statement& stmt) { + IR::Block* MergeBlock(Statement& parent, Statement& stmt) const { Statement* merge_stmt{TryFindForwardBlock(stmt)}; if (!merge_stmt) { // Create a merge block we can visit later @@ -798,7 +797,6 @@ private: IR::AbstractSyntaxList& syntax_list; const Block dummy_flow_block{.is_dummy = true}; std::span inst_list; - Info& info; const RuntimeInfo& runtime_info; const Profile& profile; Translator translator; diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 57b50a3e1..3aa70e2ec 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -560,7 +560,8 @@ void Translator::EmitFetch(const GcnInst& inst) { } const auto filename = fmt::format("vs_{:#018x}.fetch.bin", info.pgm_hash); const auto file = IOFile{dump_dir / filename, FileAccessMode::Create}; - file.WriteRaw(fetch_data->code, fetch_data->size); + const auto* code = GetFetchShaderCode(info, code_sgpr_base); + file.WriteRaw(code, fetch_data->size); } for (const auto& attrib : fetch_data->attributes) { diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index a3be34390..8d89537cb 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -19,6 +19,10 @@ #include "shader_recompiler/resource.h" #include "shader_recompiler/runtime_info.h" +namespace Serialization { +struct Archive; +} + namespace Shader { enum class Qualifier : u8 { @@ -34,7 +38,49 @@ enum class Qualifier : u8 { /** * Contains general information generated by the shader recompiler for an input program. */ -struct Info { +struct InfoPersistent { + BufferResourceList buffers; + ImageResourceList images; + SamplerResourceList samplers; + FMaskResourceList fmasks; + + struct UserDataMask { + void Set(IR::ScalarReg reg) noexcept { + mask |= 1 << static_cast(reg); + } + + u32 Index(IR::ScalarReg reg) const noexcept { + const u32 reg_mask = (1 << static_cast(reg)) - 1; + return std::popcount(mask & reg_mask); + } + + u32 NumRegs() const noexcept { + return std::popcount(mask); + } + + u32 mask; + }; + UserDataMask ud_mask{}; + u32 fetch_shader_sgpr_base{}; + + u64 pgm_hash{}; + + s32 tess_consts_dword_offset = -1; + IR::ScalarReg tess_consts_ptr_base = IR::ScalarReg::Max; + Stage stage; + LogicalStage l_stage; + + u8 mrt_mask{}; + bool has_fetch_shader{}; + bool has_bitwise_xor{}; + bool uses_dma{}; + + InfoPersistent() = default; + InfoPersistent(Stage stage_, LogicalStage l_stage_, u64 pgm_hash_) + : stage{stage_}, l_stage{l_stage_}, pgm_hash{pgm_hash_} {} +}; + +struct Info : InfoPersistent { struct AttributeFlags { bool Get(IR::Attribute attrib, u32 comp = 0) const { return flags[Index(attrib)] & (1 << comp); @@ -58,56 +104,32 @@ struct Info { std::array flags; }; - AttributeFlags loads{}; - AttributeFlags stores{}; - struct UserDataMask { - void Set(IR::ScalarReg reg) noexcept { - mask |= 1 << static_cast(reg); - } - - u32 Index(IR::ScalarReg reg) const noexcept { - const u32 reg_mask = (1 << static_cast(reg)) - 1; - return std::popcount(mask & reg_mask); - } - - u32 NumRegs() const noexcept { - return std::popcount(mask); - } - - u32 mask; + enum class ReadConstType { + None = 0, + Immediate = 1 << 0, + Dynamic = 1 << 1, }; - UserDataMask ud_mask{}; - - CopyShaderData gs_copy_data; - u32 uses_patches{}; - - BufferResourceList buffers; - ImageResourceList images; - SamplerResourceList samplers; - FMaskResourceList fmasks; - - PersistentSrtInfo srt_info; - std::vector flattened_ud_buf; struct Interpolation { Qualifier primary; Qualifier auxiliary; }; - std::array fs_interpolation{}; - - IR::ScalarReg tess_consts_ptr_base = IR::ScalarReg::Max; - s32 tess_consts_dword_offset = -1; std::span user_data; - Stage stage; - LogicalStage l_stage; + std::vector flattened_ud_buf; + PersistentSrtInfo srt_info; + + AttributeFlags loads{}; + AttributeFlags stores{}; + + ReadConstType readconst_types{}; + CopyShaderData gs_copy_data; + u32 uses_patches{}; - u64 pgm_hash{}; VAddr pgm_base; bool has_storage_images{}; bool has_discard{}; - bool has_bitwise_xor{}; bool has_image_gather{}; bool has_image_query{}; bool uses_buffer_atomic_float_min_max{}; @@ -125,20 +147,12 @@ struct Info { bool stores_tess_level_outer{}; bool stores_tess_level_inner{}; bool translation_failed{}; - u8 mrt_mask{0u}; - bool has_fetch_shader{false}; - u32 fetch_shader_sgpr_base{0u}; - enum class ReadConstType { - None = 0, - Immediate = 1 << 0, - Dynamic = 1 << 1, - }; - ReadConstType readconst_types{}; - bool uses_dma{}; + std::array fs_interpolation{}; - explicit Info(Stage stage_, LogicalStage l_stage_, ShaderParams params) - : stage{stage_}, l_stage{l_stage_}, pgm_hash{params.hash}, pgm_base{params.Base()}, + Info() = default; + Info(Stage stage_, LogicalStage l_stage_, ShaderParams params) + : InfoPersistent(stage_, l_stage_, params.hash), pgm_base{params.Base()}, user_data{params.user_data} {} template @@ -192,6 +206,9 @@ struct Info { reinterpret_cast(tess_constants_addr), sizeof(tess_constants)); } + + void Serialize(Serialization::Archive& ar) const; + bool Deserialize(Serialization::Archive& ar); }; DECLARE_ENUM_FLAG_OPERATORS(Info::ReadConstType); diff --git a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp index 7626b9c9f..e1f9f2c5a 100644 --- a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp +++ b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp @@ -28,6 +28,17 @@ using namespace Xbyak::util; static Xbyak::CodeGenerator g_srt_codegen(32_MB); static const u8* g_srt_codegen_start = nullptr; +namespace Shader { + +PFN_SrtWalker RegisterWalkerCode(const u8* ptr, size_t size) { + const auto func_addr = (PFN_SrtWalker)g_srt_codegen.getCurr(); + g_srt_codegen.db(ptr, size); + g_srt_codegen.ready(); + return func_addr; +} + +} // namespace Shader + namespace { static void DumpSrtProgram(const Shader::Info& info, const u8* code, size_t codesize) { @@ -215,9 +226,12 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) { c.ret(); c.ready(); + info.srt_info.walker_func_size = + c.getCurr() - reinterpret_cast(info.srt_info.walker_func); + if (Config::dumpShaders()) { - size_t codesize = c.getCurr() - reinterpret_cast(info.srt_info.walker_func); - DumpSrtProgram(info, reinterpret_cast(info.srt_info.walker_func), codesize); + DumpSrtProgram(info, reinterpret_cast(info.srt_info.walker_func), + info.srt_info.walker_func_size); } info.srt_info.flattened_bufsize_dw = pass_info.dst_off_dw; diff --git a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp index 2f8e1d7b1..48b496727 100644 --- a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp +++ b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp @@ -363,7 +363,7 @@ static IR::F32 ReadTessControlPointAttribute(IR::U32 addr, const u32 stride, IR: } // namespace -void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { +void HullShaderTransform(IR::Program& program, const RuntimeInfo& runtime_info) { const Info& info = program.info; for (IR::Block* block : program.blocks) { @@ -561,8 +561,8 @@ void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { } } -void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { - Info& info = program.info; +void DomainShaderTransform(const IR::Program& program, const RuntimeInfo& runtime_info) { + const Info& info = program.info; for (IR::Block* block : program.blocks) { for (IR::Inst& inst : block->Instructions()) { diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h index fdae9d3cf..5bf362284 100644 --- a/src/shader_recompiler/ir/passes/ir_passes.h +++ b/src/shader_recompiler/ir/passes/ir_passes.h @@ -24,8 +24,8 @@ void LowerBufferFormatToRaw(IR::Program& program); void LowerFp64ToFp32(IR::Program& program); void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info); void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info); -void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info); -void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info); +void HullShaderTransform(IR::Program& program, const RuntimeInfo& runtime_info); +void DomainShaderTransform(const IR::Program& program, const RuntimeInfo& runtime_info); void SharedMemoryBarrierPass(IR::Program& program, const RuntimeInfo& runtime_info, const Profile& profile); void SharedMemorySimplifyPass(IR::Program& program, const Profile& profile); diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index b9b4e9726..53b161149 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -498,7 +498,8 @@ void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& // buffer_load_format_xyz v[8:10], v1, s[32:35], 0 ... // is used to define an inline buffer resource std::array raw; - raw[0] = info.pgm_base + (handle->Arg(0).U32() | u64(handle->Arg(1).U32()) << 32); + // Keep relative address, we'll do fixup of the address at buffer fetch later + raw[0] = (handle->Arg(0).U32() | u64(handle->Arg(1).U32()) << 32); raw[1] = handle->Arg(2).U32() | u64(handle->Arg(3).U32()) << 32; const auto buffer = std::bit_cast(raw); buffer_binding = descriptors.Add(BufferResource{ diff --git a/src/shader_recompiler/ir/passes/srt.h b/src/shader_recompiler/ir/passes/srt.h index 4dce38674..918b832e0 100644 --- a/src/shader_recompiler/ir/passes/srt.h +++ b/src/shader_recompiler/ir/passes/srt.h @@ -7,9 +7,14 @@ #include #include "common/types.h" +namespace Serialization { +struct Archive; +} + namespace Shader { using PFN_SrtWalker = void PS4_SYSV_ABI (*)(const u32* /*user_data*/, u32* /*flat_dst*/); +PFN_SrtWalker RegisterWalkerCode(const u8* ptr, size_t size); struct PersistentSrtInfo { // Special case when fetch shader uses step rates. @@ -20,7 +25,11 @@ struct PersistentSrtInfo { }; PFN_SrtWalker walker_func{}; + size_t walker_func_size{}; u32 flattened_bufsize_dw = 16; // NumUserDataRegs + + void Serialize(Serialization::Archive& ar) const; + bool Deserialize(Serialization::Archive& ar); }; } // namespace Shader diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index c51e00088..52e37bbf0 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -8,6 +8,10 @@ namespace Shader { struct Profile { + u64 max_ubo_size{}; + u32 max_viewport_width{}; + u32 max_viewport_height{}; + u32 max_shared_memory_size{}; u32 supported_spirv{0x00010000}; u32 subgroup_size{}; bool support_int8{}; @@ -37,10 +41,7 @@ struct Profile { bool needs_lds_barriers{}; bool needs_buffer_offsets{}; bool needs_unorm_fixup{}; - u64 max_ubo_size{}; - u32 max_viewport_width{}; - u32 max_viewport_height{}; - u32 max_shared_memory_size{}; + bool _pad0{}; }; } // namespace Shader diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 547d4524f..4764ddbec 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -29,7 +29,7 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { return blocks; } -IR::Program TranslateProgram(std::span code, Pools& pools, Info& info, +IR::Program TranslateProgram(const std::span& code, Pools& pools, Info& info, RuntimeInfo& runtime_info, const Profile& profile) { // Ensure first instruction is expected. constexpr u32 token_mov_vcchi = 0xBEEB03FF; @@ -55,8 +55,8 @@ IR::Program TranslateProgram(std::span code, Pools& pools, Info& info Gcn::CFG cfg{gcn_block_pool, program.ins_list}; // Structurize control flow graph and create program. - program.syntax_list = Shader::Gcn::BuildASL(pools.inst_pool, pools.block_pool, cfg, - program.info, runtime_info, profile); + program.syntax_list = + Shader::Gcn::BuildASL(pools.inst_pool, pools.block_pool, cfg, info, runtime_info, profile); program.blocks = GenerateBlocks(program.syntax_list); program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front()); diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h index 8180c29b3..80c63447a 100644 --- a/src/shader_recompiler/recompiler.h +++ b/src/shader_recompiler/recompiler.h @@ -27,7 +27,8 @@ struct Pools { } }; -[[nodiscard]] IR::Program TranslateProgram(std::span code, Pools& pools, Info& info, - RuntimeInfo& runtime_info, const Profile& profile); +[[nodiscard]] IR::Program TranslateProgram(const std::span& code, Pools& pools, + Info& info, RuntimeInfo& runtime_info, + const Profile& profile); } // namespace Shader diff --git a/src/shader_recompiler/resource.h b/src/shader_recompiler/resource.h index 29545d0bb..5d9965105 100644 --- a/src/shader_recompiler/resource.h +++ b/src/shader_recompiler/resource.h @@ -53,8 +53,15 @@ struct BufferResource { } constexpr AmdGpu::Buffer GetSharp(const auto& info) const noexcept { - const auto buffer = - inline_cbuf ? inline_cbuf : info.template ReadUdSharp(sharp_idx); + AmdGpu::Buffer buffer{}; + if (inline_cbuf) { + buffer = inline_cbuf; + if (inline_cbuf.base_address > 1) { + buffer.base_address += info.pgm_base; // address fixup + } + } else { + buffer = info.template ReadUdSharp(sharp_idx); + } if (!buffer.Valid()) { LOG_DEBUG(Render, "Encountered invalid buffer sharp"); return AmdGpu::Buffer::Null(); diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 9624c465f..8620ab970 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -159,7 +159,8 @@ struct GeometryRuntimeInfo { return num_outputs == other.num_outputs && outputs == other.outputs && num_invocations && other.num_invocations && output_vertices == other.output_vertices && in_primitive == other.in_primitive && - std::ranges::equal(out_primitive, other.out_primitive); + std::ranges::equal(out_primitive, other.out_primitive) && + vs_copy_hash == other.vs_copy_hash; } }; diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index a7215e29e..4f6bb44bf 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -79,8 +79,8 @@ struct SamplerSpecialization { struct StageSpecialization { static constexpr size_t MaxStageResources = 128; - const Shader::Info* info; - RuntimeInfo runtime_info; + const Info* info{}; + RuntimeInfo runtime_info{}; std::bitset bitset{}; std::optional fetch_shader_data{}; boost::container::small_vector vs_attribs; @@ -90,6 +90,7 @@ struct StageSpecialization { boost::container::small_vector samplers; Backend::Bindings start{}; + StageSpecialization() = default; StageSpecialization(const Info& info_, RuntimeInfo runtime_info_, const Profile& profile_, Backend::Bindings start_) : info{&info_}, runtime_info{runtime_info_}, start{start_} { @@ -158,7 +159,7 @@ struct StageSpecialization { // Initialize runtime_info fields that rely on analysis in tessellation passes if (info->l_stage == LogicalStage::TessellationControl || info->l_stage == LogicalStage::TessellationEval) { - Shader::TessellationDataConstantBuffer tess_constants; + TessellationDataConstantBuffer tess_constants{}; info->ReadTessConstantBuffer(tess_constants); if (info->l_stage == LogicalStage::TessellationControl) { runtime_info.hs_info.InitFromTessConstants(tess_constants); @@ -192,21 +193,43 @@ struct StageSpecialization { } } + [[nodiscard]] bool Valid() const { + return info != nullptr; + } + bool operator==(const StageSpecialization& other) const { - if (start != other.start) { + if (!Valid()) { return false; } + + if (vs_attribs != other.vs_attribs) { + return false; + } + if (runtime_info != other.runtime_info) { return false; } + if (fetch_shader_data != other.fetch_shader_data) { return false; } - for (u32 i = 0; i < vs_attribs.size(); i++) { - if (vs_attribs[i] != other.vs_attribs[i]) { - return false; - } + + if (fmasks != other.fmasks) { + return false; } + + // For VS which only generates geometry and doesn't have any inputs, its start + // bindings still may change as they depend on previously processed FS. The check below + // handles this case and prevents generation of redundant permutations. This is also safe + // for other types of shaders with no bindings. + if (bitset.none() && other.bitset.none()) { + return true; + } + + if (start != other.start) { + return false; + } + u32 binding{}; for (u32 i = 0; i < buffers.size(); i++) { if (other.bitset[binding++] && buffers[i] != other.buffers[i]) { @@ -218,11 +241,7 @@ struct StageSpecialization { return false; } } - for (u32 i = 0; i < fmasks.size(); i++) { - if (other.bitset[binding++] && fmasks[i] != other.fmasks[i]) { - return false; - } - } + for (u32 i = 0; i < samplers.size(); i++) { if (samplers[i] != other.samplers[i]) { return false; @@ -230,6 +249,9 @@ struct StageSpecialization { } return true; } + + void Serialize(Serialization::Archive& ar) const; + bool Deserialize(Serialization::Archive& ar); }; } // namespace Shader diff --git a/src/video_core/amdgpu/pixel_format.h b/src/video_core/amdgpu/pixel_format.h index 21c2eee2a..69e082edb 100644 --- a/src/video_core/amdgpu/pixel_format.h +++ b/src/video_core/amdgpu/pixel_format.h @@ -79,10 +79,10 @@ enum class NumberFormat : u32 { Ubscaled = 13, }; -enum class NumberClass { - Float, - Sint, - Uint, +enum class NumberClass : u8 { + Float = 0, + Sint = 1, + Uint = 2, }; enum class CompSwizzle : u8 { diff --git a/src/video_core/cache_storage.cpp b/src/video_core/cache_storage.cpp new file mode 100644 index 000000000..1c46a4cf5 --- /dev/null +++ b/src/video_core/cache_storage.cpp @@ -0,0 +1,264 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/config.h" +#include "common/elf_info.h" +#include "common/io_file.h" +#include "common/polyfill_thread.h" +#include "common/thread.h" + +#include "video_core/cache_storage.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_pipeline_cache.h" + +#include + +#include +#include +#include +#include +#include + +namespace { + +std::mutex submit_mutex{}; +u32 num_requests{}; +std::condition_variable_any request_cv{}; +std::queue> req_queue{}; +std::mutex m_request{}; + +mz_zip_archive zip_ar{}; +bool ar_is_read_only{true}; + +} // namespace + +namespace Storage { + +void ProcessIO(const std::stop_token& stoken) { + Common::SetCurrentThreadName("shadPS4:PipelineCacheIO"); + + while (!stoken.stop_requested()) { + { + std::unique_lock lk{submit_mutex}; + Common::CondvarWait(request_cv, lk, stoken, [&] { return num_requests; }); + } + + if (stoken.stop_requested()) { + break; + } + + while (num_requests) { + std::packaged_task request{}; + { + std::scoped_lock lock{m_request}; + if (req_queue.empty()) { + continue; + } + request = std::move(req_queue.front()); + req_queue.pop(); + } + + if (request.valid()) { + request(); + request.get_future().wait(); + } + + --num_requests; + } + } +} + +constexpr std::string GetBlobFileExtension(BlobType type) { + switch (type) { + case BlobType::ShaderMeta: { + return "meta"; + } + case BlobType::ShaderBinary: { + return "spv"; + } + case BlobType::PipelineKey: { + return "key"; + } + case BlobType::ShaderProfile: { + return "bin"; + } + default: + UNREACHABLE(); + } +} + +void DataBase::Open() { + if (opened) { + return; + } + + const auto& game_info = Common::ElfInfo::Instance(); + + using namespace Common::FS; + if (Config::isPipelineCacheArchived()) { + mz_zip_zero_struct(&zip_ar); + + cache_path = GetUserPath(PathType::CacheDir) / + std::filesystem::path{game_info.GameSerial()}.replace_extension(".zip"); + + if (!mz_zip_reader_init_file(&zip_ar, cache_path.string().c_str(), + MZ_ZIP_FLAG_READ_ALLOW_WRITING) || + !mz_zip_validate_archive(&zip_ar, 0)) { + LOG_INFO(Render, "Cache archive {} is not found or archive is corrupted", + cache_path.string().c_str()); + mz_zip_reader_end(&zip_ar); + mz_zip_writer_init_file(&zip_ar, cache_path.string().c_str(), 0); + } + } else { + cache_path = GetUserPath(PathType::CacheDir) / game_info.GameSerial(); + if (!std::filesystem::exists(cache_path)) { + std::filesystem::create_directories(cache_path); + } + } + + io_worker = std::jthread{ProcessIO}; + opened = true; +} + +void DataBase::Close() { + if (!IsOpened()) { + return; + } + + io_worker.request_stop(); + io_worker.join(); + + if (Config::isPipelineCacheArchived()) { + mz_zip_writer_finalize_archive(&zip_ar); + mz_zip_writer_end(&zip_ar); + } + + LOG_INFO(Render, "Cache dumped"); +} + +template +bool WriteVector(const BlobType type, std::filesystem::path&& path_, std::vector&& v) { + { + auto request = std::packaged_task{[=]() { + auto path{path_}; + path.replace_extension(GetBlobFileExtension(type)); + if (Config::isPipelineCacheArchived()) { + ASSERT_MSG(!ar_is_read_only, + "The archive is read-only. Did you forget to call `FinishPreload`?"); + if (!mz_zip_writer_add_mem(&zip_ar, path.string().c_str(), v.data(), + v.size() * sizeof(T), MZ_BEST_COMPRESSION)) { + LOG_ERROR(Render, "Failed to add {} to the archive", path.string().c_str()); + } + } else { + using namespace Common::FS; + const auto file = IOFile{path, FileAccessMode::Create}; + file.Write(v); + } + }}; + std::scoped_lock lock{m_request}; + req_queue.emplace(std::move(request)); + } + + std::scoped_lock lk{submit_mutex}; + ++num_requests; + request_cv.notify_one(); + return true; +} + +template +void LoadVector(BlobType type, std::filesystem::path& path, std::vector& v) { + using namespace Common::FS; + path.replace_extension(GetBlobFileExtension(type)); + if (Config::isPipelineCacheArchived()) { + int index{-1}; + index = mz_zip_reader_locate_file(&zip_ar, path.string().c_str(), nullptr, 0); + if (index < 0) { + LOG_WARNING(Render, "File {} is not found in the archive", path.string().c_str()); + return; + } + mz_zip_archive_file_stat stat{}; + mz_zip_reader_file_stat(&zip_ar, index, &stat); + v.resize(stat.m_uncomp_size / sizeof(T)); + mz_zip_reader_extract_to_mem(&zip_ar, index, v.data(), stat.m_uncomp_size, 0); + } else { + const auto file = IOFile{path, FileAccessMode::Read}; + v.resize(file.GetSize() / sizeof(T)); + file.Read(v); + } +} + +bool DataBase::Save(BlobType type, const std::string& name, std::vector&& data) { + if (!opened) { + return false; + } + + auto path = Config::isPipelineCacheArchived() ? std::filesystem::path{name} : cache_path / name; + return WriteVector(type, std::move(path), std::move(data)); +} + +bool DataBase::Save(BlobType type, const std::string& name, std::vector&& data) { + if (!opened) { + return false; + } + + auto path = Config::isPipelineCacheArchived() ? std::filesystem::path{name} : cache_path / name; + return WriteVector(type, std::move(path), std::move(data)); +} + +void DataBase::Load(BlobType type, const std::string& name, std::vector& data) { + if (!opened) { + return; + } + + auto path = Config::isPipelineCacheArchived() ? std::filesystem::path{name} : cache_path / name; + return LoadVector(type, path, data); +} + +void DataBase::Load(BlobType type, const std::string& name, std::vector& data) { + if (!opened) { + return; + } + + auto path = Config::isPipelineCacheArchived() ? std::filesystem::path{name} : cache_path / name; + return LoadVector(type, path, data); +} + +void DataBase::ForEachBlob(BlobType type, const std::function&& data)>& func) { + const auto& ext = GetBlobFileExtension(type); + if (Config::isPipelineCacheArchived()) { + const auto num_files = mz_zip_reader_get_num_files(&zip_ar); + for (int index = 0; index < num_files; ++index) { + std::array file_name{}; + file_name.fill(0); + mz_zip_reader_get_filename(&zip_ar, index, file_name.data(), file_name.size()); + if (std::string{file_name.data()}.ends_with(ext)) { + mz_zip_archive_file_stat stat{}; + mz_zip_reader_file_stat(&zip_ar, index, &stat); + std::vector data(stat.m_uncomp_size); + mz_zip_reader_extract_to_mem(&zip_ar, index, data.data(), data.size(), 0); + func(std::move(data)); + } + } + } else { + for (const auto& file_name : std::filesystem::directory_iterator{cache_path}) { + if (file_name.path().extension().string().ends_with(ext)) { + using namespace Common::FS; + const auto& file = IOFile{file_name, FileAccessMode::Read}; + if (file.IsOpen()) { + std::vector data(file.GetSize()); + file.Read(data); + func(std::move(data)); + } + } + } + } +} + +void DataBase::FinishPreload() { + if (Config::isPipelineCacheArchived()) { + mz_zip_writer_init_from_reader(&zip_ar, cache_path.string().c_str()); + ar_is_read_only = false; + } +} + +} // namespace Storage diff --git a/src/video_core/cache_storage.h b/src/video_core/cache_storage.h new file mode 100644 index 000000000..91f2136e9 --- /dev/null +++ b/src/video_core/cache_storage.h @@ -0,0 +1,50 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/path_util.h" +#include "common/singleton.h" +#include "common/types.h" + +#include +#include +#include + +namespace Storage { + +enum class BlobType : u32 { + ShaderMeta, + ShaderBinary, + PipelineKey, + ShaderProfile, +}; + +class DataBase { +public: + static DataBase& Instance() { + return *Common::Singleton::Instance(); + } + + void Open(); + void Close(); + [[nodiscard]] bool IsOpened() const { + return opened; + } + void FinishPreload(); + + bool Save(BlobType type, const std::string& name, std::vector&& data); + bool Save(BlobType type, const std::string& name, std::vector&& data); + + void Load(BlobType type, const std::string& name, std::vector& data); + void Load(BlobType type, const std::string& name, std::vector& data); + + void ForEachBlob(BlobType type, const std::function&& data)>& func); + +private: + std::jthread io_worker{}; + std::filesystem::path cache_path{}; + bool opened{}; +}; + +} // namespace Storage diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 2b93eb7f3..35eda86da 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -13,7 +13,8 @@ namespace Vulkan { ComputePipeline::ComputePipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, const Shader::Profile& profile, vk::PipelineCache pipeline_cache, ComputePipelineKey compute_key_, - const Shader::Info& info_, vk::ShaderModule module) + const Shader::Info& info_, vk::ShaderModule module, + SerializationSupport& sdata, bool preloading /*=false*/) : Pipeline{instance, scheduler, desc_heap, profile, pipeline_cache, true}, compute_key{compute_key_} { auto& info = stages[int(Shader::LogicalStage::Compute)]; @@ -29,7 +30,11 @@ ComputePipeline::ComputePipeline(const Instance& instance, Scheduler& scheduler, u32 binding{}; boost::container::small_vector bindings; for (const auto& buffer : info->buffers) { - const auto sharp = buffer.GetSharp(*info); + // During deserialization, we don't have access to the UD to fetch sharp data. To address + // this properly we need to track shaprs or portion of them in `sdata`, but since we're + // interested only in "is storage" flag (which is not even effective atm), we can take a + // shortcut there. + const auto sharp = preloading ? AmdGpu::Buffer{} : buffer.GetSharp(*info); bindings.push_back({ .binding = binding++, .descriptorType = buffer.IsStorage(sharp) ? vk::DescriptorType::eStorageBuffer diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 79059b509..1cac7204c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -11,6 +11,10 @@ class BufferCache; class TextureCache; } // namespace VideoCore +namespace Serialization { +struct Archive; +} + namespace Vulkan { class Instance; @@ -26,14 +30,24 @@ struct ComputePipelineKey { friend bool operator!=(const ComputePipelineKey& lhs, const ComputePipelineKey& rhs) { return !(lhs == rhs); } + + void Serialize(Serialization::Archive& ar) const; + bool Deserialize(Serialization::Archive& ar); }; class ComputePipeline : public Pipeline { public: + struct SerializationSupport { + u32 dummy{}; + + void Serialize(Serialization::Archive& ar) const; + bool Deserialize(Serialization::Archive& ar); + }; + ComputePipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, const Shader::Profile& profile, vk::PipelineCache pipeline_cache, ComputePipelineKey compute_key, const Shader::Info& info, - vk::ShaderModule module); + vk::ShaderModule module, SerializationSupport& sdata, bool preloading); ~ComputePipeline(); private: diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index e2531456c..242c9b6f2 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -41,12 +41,12 @@ GraphicsPipeline::GraphicsPipeline( vk::PipelineCache pipeline_cache, std::span infos, std::span runtime_infos, std::optional fetch_shader_, - std::span modules) + std::span modules, SerializationSupport& sdata, bool preloading) : Pipeline{instance, scheduler, desc_heap, profile, pipeline_cache}, key{key_}, fetch_shader{std::move(fetch_shader_)} { const vk::Device device = instance.GetDevice(); std::ranges::copy(infos, stages.begin()); - BuildDescSetLayout(); + BuildDescSetLayout(preloading); const auto debug_str = GetDebugString(); const vk::PushConstantRange push_constants = { @@ -68,27 +68,26 @@ GraphicsPipeline::GraphicsPipeline( pipeline_layout = std::move(layout); SetObjectName(device, *pipeline_layout, "Graphics PipelineLayout {}", debug_str); - VertexInputs vertex_attributes; - VertexInputs vertex_bindings; - VertexInputs divisors; - VertexInputs guest_buffers; - if (!instance.IsVertexInputDynamicState()) { - const auto& vs_info = runtime_infos[u32(Shader::LogicalStage::Vertex)].vs_info; - GetVertexInputs(vertex_attributes, vertex_bindings, divisors, guest_buffers, - vs_info.step_rate_0, vs_info.step_rate_1); + if (!preloading) { + VertexInputs guest_buffers; + if (!instance.IsVertexInputDynamicState()) { + const auto& vs_info = runtime_infos[u32(Shader::LogicalStage::Vertex)].vs_info; + GetVertexInputs(sdata.vertex_attributes, sdata.vertex_bindings, sdata.divisors, + guest_buffers, vs_info.step_rate_0, vs_info.step_rate_1); + } } const vk::PipelineVertexInputDivisorStateCreateInfo divisor_state = { - .vertexBindingDivisorCount = static_cast(divisors.size()), - .pVertexBindingDivisors = divisors.data(), + .vertexBindingDivisorCount = static_cast(sdata.divisors.size()), + .pVertexBindingDivisors = sdata.divisors.data(), }; const vk::PipelineVertexInputStateCreateInfo vertex_input_info = { - .pNext = divisors.empty() ? nullptr : &divisor_state, - .vertexBindingDescriptionCount = static_cast(vertex_bindings.size()), - .pVertexBindingDescriptions = vertex_bindings.data(), - .vertexAttributeDescriptionCount = static_cast(vertex_attributes.size()), - .pVertexAttributeDescriptions = vertex_attributes.data(), + .pNext = sdata.divisors.empty() ? nullptr : &divisor_state, + .vertexBindingDescriptionCount = static_cast(sdata.vertex_bindings.size()), + .pVertexBindingDescriptions = sdata.vertex_bindings.data(), + .vertexAttributeDescriptionCount = static_cast(sdata.vertex_attributes.size()), + .pVertexAttributeDescriptions = sdata.vertex_attributes.data(), }; const auto topology = LiverpoolToVK::PrimitiveType(key.prim_type); @@ -98,7 +97,6 @@ GraphicsPipeline::GraphicsPipeline( const bool is_rect_list = key.prim_type == AmdGpu::PrimitiveType::RectList; const bool is_quad_list = key.prim_type == AmdGpu::PrimitiveType::QuadList; - const auto& fs_info = runtime_infos[u32(Shader::LogicalStage::Fragment)].fs_info; const vk::PipelineTessellationStateCreateInfo tessellation_state = { .patchControlPoints = is_rect_list ? 3U : (is_quad_list ? 4U : key.patch_control_points), }; @@ -128,12 +126,15 @@ GraphicsPipeline::GraphicsPipeline( raster_chain.unlink(); } - const vk::PipelineMultisampleStateCreateInfo multisampling = { - .rasterizationSamples = LiverpoolToVK::NumSamples( - key.num_samples, instance.GetColorSampleCounts() & instance.GetDepthSampleCounts()), - .sampleShadingEnable = - fs_info.addr_flags.persp_sample_ena || fs_info.addr_flags.linear_sample_ena, - }; + if (!preloading) { + const auto& fs_info = runtime_infos[u32(Shader::LogicalStage::Fragment)].fs_info; + sdata.multisampling = { + .rasterizationSamples = LiverpoolToVK::NumSamples( + key.num_samples, instance.GetColorSampleCounts() & instance.GetDepthSampleCounts()), + .sampleShadingEnable = + fs_info.addr_flags.persp_sample_ena || fs_info.addr_flags.linear_sample_ena, + }; + } const vk::PipelineViewportDepthClipControlCreateInfoEXT clip_control = { .negativeOneToOne = key.clip_space == AmdGpu::ClipSpace::MinusWToW, @@ -164,7 +165,7 @@ GraphicsPipeline::GraphicsPipeline( } if (instance.IsVertexInputDynamicState()) { dynamic_states.push_back(vk::DynamicState::eVertexInputEXT); - } else if (!vertex_bindings.empty()) { + } else if (!sdata.vertex_bindings.empty()) { dynamic_states.push_back(vk::DynamicState::eVertexInputBindingStride); } @@ -200,10 +201,13 @@ GraphicsPipeline::GraphicsPipeline( }); } else if (is_rect_list || is_quad_list) { const auto type = is_quad_list ? AuxShaderType::QuadListTCS : AuxShaderType::RectListTCS; - auto tcs = Shader::Backend::SPIRV::EmitAuxilaryTessShader(type, fs_info); + if (!preloading) { + const auto& fs_info = runtime_infos[u32(Shader::LogicalStage::Fragment)].fs_info; + sdata.tcs = Shader::Backend::SPIRV::EmitAuxilaryTessShader(type, fs_info); + } shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eTessellationControl, - .module = CompileSPV(tcs, instance.GetDevice()), + .module = CompileSPV(sdata.tcs, instance.GetDevice()), .pName = "main", }); } @@ -215,11 +219,14 @@ GraphicsPipeline::GraphicsPipeline( .pName = "main", }); } else if (is_rect_list || is_quad_list) { - auto tes = - Shader::Backend::SPIRV::EmitAuxilaryTessShader(AuxShaderType::PassthroughTES, fs_info); + if (!preloading) { + const auto& fs_info = runtime_infos[u32(Shader::LogicalStage::Fragment)].fs_info; + sdata.tes = Shader::Backend::SPIRV::EmitAuxilaryTessShader( + AuxShaderType::PassthroughTES, fs_info); + } shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eTessellationEvaluation, - .module = CompileSPV(tes, instance.GetDevice()), + .module = CompileSPV(sdata.tes, instance.GetDevice()), .pName = "main", }); } @@ -360,7 +367,7 @@ GraphicsPipeline::GraphicsPipeline( .pTessellationState = &tessellation_state, .pViewportState = &viewport_info, .pRasterizationState = &raster_chain.get(), - .pMultisampleState = &multisampling, + .pMultisampleState = &sdata.multisampling, .pColorBlendState = &color_blending, .pDynamicState = &dynamic_info, .layout = *pipeline_layout, @@ -428,7 +435,7 @@ template void GraphicsPipeline::GetVertexInputs( VertexInputs& divisors, VertexInputs& guest_buffers, u32 step_rate_0, u32 step_rate_1) const; -void GraphicsPipeline::BuildDescSetLayout() { +void GraphicsPipeline::BuildDescSetLayout(bool preloading) { boost::container::small_vector bindings; u32 binding{}; @@ -438,7 +445,9 @@ void GraphicsPipeline::BuildDescSetLayout() { } const auto stage_bit = LogicalStageToStageBit[u32(stage->l_stage)]; for (const auto& buffer : stage->buffers) { - const auto sharp = buffer.GetSharp(*stage); + const auto sharp = + preloading ? AmdGpu::Buffer{} + : buffer.GetSharp(*stage); // See for the comment in compute PL creation bindings.push_back({ .binding = binding++, .descriptorType = buffer.IsStorage(sharp) ? vk::DescriptorType::eStorageBuffer diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 8254605cb..0dea92864 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -63,17 +63,33 @@ struct GraphicsPipelineKey { bool operator==(const GraphicsPipelineKey& key) const noexcept { return std::memcmp(this, &key, sizeof(key)) == 0; } + + void Serialize(Serialization::Archive& ar) const; + bool Deserialize(Serialization::Archive& ar); }; class GraphicsPipeline : public Pipeline { public: + struct SerializationSupport { + VertexInputs vertex_attributes{}; + VertexInputs vertex_bindings{}; + VertexInputs divisors{}; + vk::PipelineMultisampleStateCreateInfo multisampling{}; + std::vector tcs{}; + std::vector tes{}; + + void Serialize(Serialization::Archive& ar) const; + bool Deserialize(Serialization::Archive& ar); + }; + GraphicsPipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, const Shader::Profile& profile, const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache, std::span stages, std::span runtime_infos, std::optional fetch_shader, - std::span modules); + std::span modules, SerializationSupport& sdata, + bool preloading); ~GraphicsPipeline(); const std::optional& GetFetchShader() const noexcept { @@ -92,7 +108,7 @@ public: u32 step_rate_1) const; private: - void BuildDescSetLayout(); + void BuildDescSetLayout(bool preloading); private: GraphicsPipelineKey key; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 4706bff24..a0ea58817 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -13,9 +13,10 @@ #include "shader_recompiler/recompiler.h" #include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/liverpool.h" +#include "video_core/cache_storage.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" -#include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_pipeline_serialization.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" @@ -223,6 +224,13 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, desc_heap{instance, scheduler.GetMasterSemaphore(), DescriptorHeapSizes} { const auto& vk12_props = instance.GetVk12Properties(); profile = Shader::Profile{ + // When binding a UBO, we calculate its size considering the offset in the larger buffer + // cache underlying resource. In some cases, it may produce sizes exceeding the system + // maximum allowed UBO range, so we need to reduce the threshold to prevent issues. + .max_ubo_size = instance.UniformMaxSize() - instance.UniformMinAlignment(), + .max_viewport_width = instance.GetMaxViewportWidth(), + .max_viewport_height = instance.GetMaxViewportHeight(), + .max_shared_memory_size = instance.MaxComputeSharedMemorySize(), .supported_spirv = SpirvVersion1_6, .subgroup_size = instance.SubgroupSize(), .support_int8 = instance.IsShaderInt8Supported(), @@ -258,14 +266,10 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, instance.GetDriverID() == vk::DriverId::eMoltenvk, .needs_buffer_offsets = instance.StorageMinAlignment() > 4, .needs_unorm_fixup = instance.GetDriverID() == vk::DriverId::eMoltenvk, - // When binding a UBO, we calculate its size considering the offset in the larger buffer - // cache underlying resource. In some cases, it may produce sizes exceeding the system - // maximum allowed UBO range, so we need to reduce the threshold to prevent issues. - .max_ubo_size = instance.UniformMaxSize() - instance.UniformMinAlignment(), - .max_viewport_width = instance.GetMaxViewportWidth(), - .max_viewport_height = instance.GetMaxViewportHeight(), - .max_shared_memory_size = instance.MaxComputeSharedMemorySize(), }; + + WarmUp(); + auto [cache_result, cache] = instance.GetDevice().createPipelineCacheUnique({}); ASSERT_MSG(cache_result == vk::Result::eSuccess, "Failed to create pipeline cache: {}", vk::to_string(cache_result)); @@ -283,9 +287,14 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() { const auto pipeline_hash = std::hash{}(graphics_key); LOG_INFO(Render_Vulkan, "Compiling graphics pipeline {:#x}", pipeline_hash); - it.value() = std::make_unique(instance, scheduler, desc_heap, profile, - graphics_key, *pipeline_cache, infos, - runtime_infos, fetch_shader, modules); + GraphicsPipeline::SerializationSupport sdata{}; + it.value() = std::make_unique( + instance, scheduler, desc_heap, profile, graphics_key, *pipeline_cache, infos, + runtime_infos, fetch_shader, modules, sdata, false); + + RegisterPipelineData(graphics_key, pipeline_hash, sdata); + ++num_new_pipelines; + if (Config::collectShadersForDebug()) { for (auto stage = 0; stage < MaxShaderStages; ++stage) { if (infos[stage]) { @@ -294,6 +303,7 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() { } } } + fetch_shader.reset(); } return it->second.get(); } @@ -307,9 +317,13 @@ const ComputePipeline* PipelineCache::GetComputePipeline() { const auto pipeline_hash = std::hash{}(compute_key); LOG_INFO(Render_Vulkan, "Compiling compute pipeline {:#x}", pipeline_hash); - it.value() = - std::make_unique(instance, scheduler, desc_heap, profile, - *pipeline_cache, compute_key, *infos[0], modules[0]); + ComputePipeline::SerializationSupport sdata{}; + it.value() = std::make_unique(instance, scheduler, desc_heap, profile, + *pipeline_cache, compute_key, *infos[0], + modules[0], sdata, false); + RegisterPipelineData(compute_key, sdata); + ++num_new_pipelines; + if (Config::collectShadersForDebug()) { auto& m = modules[0]; module_related_pipelines[m].emplace_back(compute_key); @@ -445,6 +459,7 @@ bool PipelineCache::RefreshGraphicsStages() { }; infos.fill(nullptr); + modules.fill(nullptr); bind_stage(Stage::Fragment, LogicalStage::Fragment); const auto* fs_info = infos[static_cast(LogicalStage::Fragment)]; @@ -515,7 +530,7 @@ bool PipelineCache::RefreshComputeKey() { } vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info, - std::span code, size_t perm_idx, + const std::span& code, size_t perm_idx, Shader::Backend::Bindings& binding) { LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash, perm_idx != 0 ? "(permutation)" : ""); @@ -536,6 +551,8 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, Shader::Runtim module = CompileSPV(spv, instance.GetDevice()); } + RegisterShaderBinary(std::move(spv), info.pgm_hash, perm_idx); + const auto name = GetShaderName(info.stage, info.pgm_hash, perm_idx); Vulkan::SetObjectName(instance.GetDevice(), module, name); if (Config::collectShadersForDebug()) { @@ -546,7 +563,7 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, Shader::Runtim } PipelineCache::Result PipelineCache::GetProgram(Stage stage, LogicalStage l_stage, - Shader::ShaderParams params, + const Shader::ShaderParams& params, Shader::Backend::Bindings& binding) { auto runtime_info = BuildRuntimeInfo(stage, l_stage); auto [it_pgm, new_program] = program_cache.try_emplace(params.hash); @@ -555,32 +572,42 @@ PipelineCache::Result PipelineCache::GetProgram(Stage stage, LogicalStage l_stag auto& program = it_pgm.value(); auto start = binding; const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding); - const auto spec = Shader::StageSpecialization(program->info, runtime_info, profile, start); + auto spec = Shader::StageSpecialization(program->info, runtime_info, profile, start); + const auto perm_hash = HashCombine(params.hash, 0); + + RegisterShaderMeta(program->info, spec.fetch_shader_data, spec, perm_hash, 0); program->AddPermut(module, std::move(spec)); - return std::make_tuple(&program->info, module, spec.fetch_shader_data, - HashCombine(params.hash, 0)); + return std::make_tuple(&program->info, module, program->modules[0].spec.fetch_shader_data, + perm_hash); } - it_pgm.value()->info.user_data = params.user_data; auto& program = it_pgm.value(); auto& info = program->info; + info.pgm_base = params.Base(); // Needs to be actualized for inline cbuffer address fixup + info.user_data = params.user_data; info.RefreshFlatBuf(); - const auto spec = Shader::StageSpecialization(info, runtime_info, profile, binding); + auto spec = Shader::StageSpecialization(info, runtime_info, profile, binding); + size_t perm_idx = program->modules.size(); + u64 perm_hash = HashCombine(params.hash, perm_idx); + vk::ShaderModule module{}; const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec); if (it == program->modules.end()) { auto new_info = Shader::Info(stage, l_stage, params); module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding); + + RegisterShaderMeta(info, spec.fetch_shader_data, spec, perm_hash, perm_idx); program->AddPermut(module, std::move(spec)); } else { info.AddBindings(binding); module = it->module; perm_idx = std::distance(program->modules.begin(), it); + perm_hash = HashCombine(params.hash, perm_idx); } - return std::make_tuple(&info, module, spec.fetch_shader_data, - HashCombine(params.hash, perm_idx)); + return std::make_tuple(&program->info, module, + program->modules[perm_idx].spec.fetch_shader_data, perm_hash); } std::optional PipelineCache::ReplaceShader(vk::ShaderModule module, @@ -654,5 +681,4 @@ std::optional> PipelineCache::GetShaderPatch(u64 hash, Shader:: file.Read(code); return code; } - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 706b99536..754397214 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -23,6 +23,10 @@ namespace AmdGpu { class Liverpool; } +namespace Serialization { +struct Archive; +} + namespace Shader { struct Info; } @@ -38,17 +42,25 @@ struct Program { vk::ShaderModule module; Shader::StageSpecialization spec; }; - using ModuleList = boost::container::small_vector; + static constexpr size_t MaxPermutations = 8; + using ModuleList = boost::container::small_vector; Shader::Info info; - ModuleList modules; + ModuleList modules{}; - explicit Program(Shader::Stage stage, Shader::LogicalStage l_stage, Shader::ShaderParams params) + Program() = default; + Program(Shader::Stage stage, Shader::LogicalStage l_stage, Shader::ShaderParams params) : info{stage, l_stage, params} {} - void AddPermut(vk::ShaderModule module, const Shader::StageSpecialization&& spec) { + void AddPermut(vk::ShaderModule module, Shader::StageSpecialization&& spec) { modules.emplace_back(module, std::move(spec)); } + + void InsertPermut(vk::ShaderModule module, Shader::StageSpecialization&& spec, + size_t perm_idx) { + modules.resize(std::max(modules.size(), perm_idx + 1)); // <-- beware of realloc + modules[perm_idx] = {module, std::move(spec)}; + } }; class PipelineCache { @@ -57,6 +69,13 @@ public: AmdGpu::Liverpool* liverpool); ~PipelineCache(); + void WarmUp(); + void Sync(); + + bool LoadComputePipeline(Serialization::Archive& ar); + bool LoadGraphicsPipeline(Serialization::Archive& ar); + bool LoadPipelineStage(Serialization::Archive& ar, size_t stage); + const GraphicsPipeline* GetGraphicsPipeline(); const ComputePipeline* GetComputePipeline(); @@ -64,7 +83,7 @@ public: using Result = std::tuple, u64>; Result GetProgram(Shader::Stage stage, Shader::LogicalStage l_stage, - Shader::ShaderParams params, Shader::Backend::Bindings& binding); + const Shader::ShaderParams& params, Shader::Backend::Bindings& binding); std::optional ReplaceShader(vk::ShaderModule module, std::span spv_code); @@ -86,10 +105,14 @@ private: std::optional> GetShaderPatch(u64 hash, Shader::Stage stage, size_t perm_idx, std::string_view ext); vk::ShaderModule CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info, - std::span code, size_t perm_idx, + const std::span& code, size_t perm_idx, Shader::Backend::Bindings& binding); const Shader::RuntimeInfo& BuildRuntimeInfo(Shader::Stage stage, Shader::LogicalStage l_stage); + [[nodiscard]] bool IsPipelineCacheDirty() const { + return num_new_pipelines > 0; + } + private: const Instance& instance; Scheduler& scheduler; @@ -108,6 +131,7 @@ private: std::optional fetch_shader{}; GraphicsPipelineKey graphics_key{}; ComputePipelineKey compute_key{}; + u32 num_new_pipelines{}; // new pipelines added to the cache since the game start // Only if Config::collectShadersForDebug() tsl::robin_map& fetch_shader_data, + const Shader::StageSpecialization& spec, size_t perm_hash, + size_t perm_idx) { + if (!Storage::DataBase::Instance().IsOpened()) { + return; + } + + Serialization::Archive ar; + Serialization::Writer meta{ar}; + + meta.Write(Serialization::ShaderMetaVersion); + meta.Write(Serialization::ShaderBinaryVersion); + + meta.Write(perm_hash); + meta.Write(perm_idx); + + spec.Serialize(ar); + info.Serialize(ar); + + Storage::DataBase::Instance().Save(Storage::BlobType::ShaderMeta, + fmt::format("{:#018x}", perm_hash), ar.TakeOff()); +} + +void RegisterShaderBinary(std::vector&& spv, u64 pgm_hash, size_t perm_idx) { + if (!Storage::DataBase::Instance().IsOpened()) { + return; + } + + Storage::DataBase::Instance().Save(Storage::BlobType::ShaderBinary, + fmt::format("{:#018x}_{}", pgm_hash, perm_idx), + std::move(spv)); +} + +bool LoadShaderMeta(Serialization::Archive& ar, Shader::Info& info, + std::optional& fetch_shader_data, + Shader::StageSpecialization& spec, size_t& perm_idx) { + Serialization::Reader meta{ar}; + + u32 meta_version{}; + meta.Read(meta_version); + if (meta_version != Serialization::ShaderMetaVersion) { + return false; + } + + u32 binary_version{}; + meta.Read(binary_version); + if (binary_version != Serialization::ShaderBinaryVersion) { + return false; + } + + u64 perm_hash_ar{}; + meta.Read(perm_hash_ar); + meta.Read(perm_idx); + + spec.Deserialize(ar); + info.Deserialize(ar); + + fetch_shader_data = spec.fetch_shader_data; + return true; +} + +void ComputePipelineKey::Serialize(Serialization::Archive& ar) const { + Serialization::Writer key{ar}; + key.Write(value); +} + +bool ComputePipelineKey::Deserialize(Serialization::Archive& ar) { + Serialization::Reader key{ar}; + key.Read(value); + return true; +} + +void ComputePipeline::SerializationSupport::Serialize(Serialization::Archive& ar) const { + // Nothing here yet + return; +} + +bool ComputePipeline::SerializationSupport::Deserialize(Serialization::Archive& ar) { + // Nothing here yet + return true; +} + +bool PipelineCache::LoadComputePipeline(Serialization::Archive& ar) { + compute_key.Deserialize(ar); + + ComputePipeline::SerializationSupport sdata{}; + sdata.Deserialize(ar); + + std::vector meta_blob; + Storage::DataBase::Instance().Load(Storage::BlobType::ShaderMeta, + fmt::format("{:#018x}", compute_key.value), meta_blob); + if (meta_blob.empty()) { + return false; + } + + Serialization::Archive meta_ar{std::move(meta_blob)}; + + if (!LoadPipelineStage(meta_ar, 0)) { + return false; + } + + const auto [it, is_new] = compute_pipelines.try_emplace(compute_key); + ASSERT(is_new); + + it.value() = + std::make_unique(instance, scheduler, desc_heap, profile, *pipeline_cache, + compute_key, *infos[0], modules[0], sdata, true); + + infos.fill(nullptr); + modules.fill(nullptr); + + return true; +} + +void GraphicsPipelineKey::Serialize(Serialization::Archive& ar) const { + Serialization::Writer key{ar}; + + key.Write(this, sizeof(*this)); +} + +bool GraphicsPipelineKey::Deserialize(Serialization::Archive& ar) { + Serialization::Reader key{ar}; + + key.Read(this, sizeof(*this)); + return true; +} + +void GraphicsPipeline::SerializationSupport::Serialize(Serialization::Archive& ar) const { + Serialization::Writer sdata{ar}; + + sdata.Write(&vertex_attributes, sizeof(vertex_attributes)); + sdata.Write(&vertex_bindings, sizeof(vertex_bindings)); + sdata.Write(&divisors, sizeof(divisors)); + sdata.Write(multisampling); + sdata.Write(tcs); + sdata.Write(tes); +} + +bool GraphicsPipeline::SerializationSupport::Deserialize(Serialization::Archive& ar) { + Serialization::Reader sdata{ar}; + + sdata.Read(&vertex_attributes, sizeof(vertex_attributes)); + sdata.Read(&vertex_bindings, sizeof(vertex_bindings)); + sdata.Read(&divisors, sizeof(divisors)); + sdata.Read(multisampling); + sdata.Read(tcs); + sdata.Read(tes); + return true; +} + +bool PipelineCache::LoadGraphicsPipeline(Serialization::Archive& ar) { + graphics_key.Deserialize(ar); + + GraphicsPipeline::SerializationSupport sdata{}; + sdata.Deserialize(ar); + + for (int stage_idx = 0; stage_idx < MaxShaderStages; ++stage_idx) { + const auto& hash = graphics_key.stage_hashes[stage_idx]; + if (!hash) { + continue; + } + + std::vector meta_blob; + Storage::DataBase::Instance().Load(Storage::BlobType::ShaderMeta, + fmt::format("{:#018x}", hash), meta_blob); + if (meta_blob.empty()) { + return false; + } + + Serialization::Archive meta_ar{std::move(meta_blob)}; + + if (!LoadPipelineStage(meta_ar, stage_idx)) { + return false; + } + } + + const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key); + ASSERT(is_new); + + it.value() = std::make_unique( + instance, scheduler, desc_heap, profile, graphics_key, *pipeline_cache, infos, + runtime_infos, fetch_shader, modules, sdata, true); + + infos.fill(nullptr); + modules.fill(nullptr); + fetch_shader.reset(); + + return true; +} + +bool PipelineCache::LoadPipelineStage(Serialization::Archive& ar, size_t stage) { + auto program = std::make_unique(); + Shader::StageSpecialization spec{}; + spec.info = &program->info; + size_t perm_idx{}; + if (!LoadShaderMeta(ar, program->info, fetch_shader, spec, perm_idx)) { + return false; + } + + std::vector spv{}; + Storage::DataBase::Instance().Load(Storage::BlobType::ShaderBinary, + fmt::format("{:#018x}_{}", program->info.pgm_hash, perm_idx), + spv); + if (spv.empty()) { + return false; + } + + // Permutation hash depends on shader variation index. To prevent collisions, we need insert it + // at the exact position rather than append + + vk::ShaderModule module{}; + + auto [it_pgm, new_program] = program_cache.try_emplace(program->info.pgm_hash); + if (new_program) { + module = CompileSPV(spv, instance.GetDevice()); + it_pgm.value() = std::move(program); + } else { + const auto& it = std::ranges::find(it_pgm.value()->modules, spec, &Program::Module::spec); + if (it != it_pgm.value()->modules.end()) { + // If the permutation is already preloaded, make sure it has the same permutation index + const auto idx = std::distance(it_pgm.value()->modules.begin(), it); + ASSERT_MSG(perm_idx == idx, "Permutation {} is already inserted at {}! ({}_{:x})", + perm_idx, idx, program->info.stage, program->info.pgm_hash); + module = it->module; + } else { + module = CompileSPV(spv, instance.GetDevice()); + } + } + it_pgm.value()->InsertPermut(module, std::move(spec), perm_idx); + + infos[stage] = &it_pgm.value()->info; + modules[stage] = module; + + return true; +} + +void PipelineCache::WarmUp() { + if (!Config::isPipelineCacheEnabled()) { + return; + } + + Storage::DataBase::Instance().Open(); + + // Check if cache is compatible + std::vector profile_data{}; + Storage::DataBase::Instance().Load(Storage::BlobType::ShaderProfile, "profile", profile_data); + if (profile_data.empty()) { + Storage::DataBase::Instance().FinishPreload(); + + profile_data.resize(sizeof(profile)); + std::memcpy(profile_data.data(), &profile, sizeof(profile)); + Storage::DataBase::Instance().Save(Storage::BlobType::ShaderProfile, "profile", + std::move(profile_data)); + return; + } + if (std::memcmp(profile_data.data(), &profile, sizeof(profile)) != 0) { + LOG_WARNING(Render, + "Pipeline cache isn't compatible with current system. Ignoring the cache"); + return; + } + + u32 num_pipelines{}; + u32 num_total_pipelines{}; + + Storage::DataBase::Instance().ForEachBlob( + Storage::BlobType::PipelineKey, [&](std::vector&& data) { + ++num_total_pipelines; + + Serialization::Archive ar{std::move(data)}; + Serialization::Reader pldata{ar}; + + u32 version{}; + pldata.Read(version); + if (version != Serialization::PipelineKeyVersion) { + return; + } + + u32 is_compute{}; + pldata.Read(is_compute); + + bool result{}; + if (is_compute) { + result = LoadComputePipeline(ar); + } else { + result = LoadGraphicsPipeline(ar); + } + + if (result) { + ++num_pipelines; + } + }); + + LOG_INFO(Render, "Preloaded {} pipelines", num_pipelines); + if (num_total_pipelines > num_pipelines) { + LOG_WARNING(Render, "{} stale pipelines were found. Consider re-generating the cache", + num_total_pipelines - num_pipelines); + } + + Storage::DataBase::Instance().FinishPreload(); +} + +void PipelineCache::Sync() { + Storage::DataBase::Instance().Close(); +} + +} // namespace Vulkan + +namespace Shader { + +void Info::Serialize(Serialization::Archive& ar) const { + Serialization::Writer info{ar}; + + info.Write(this, sizeof(InfoPersistent)); + info.Write(flattened_ud_buf); + srt_info.Serialize(ar); +} + +bool Info::Deserialize(Serialization::Archive& ar) { + Serialization::Reader info{ar}; + + info.Read(this, sizeof(Shader::InfoPersistent)); + info.Read(flattened_ud_buf); + + return srt_info.Deserialize(ar); +} + +void Gcn::FetchShaderData::Serialize(Serialization::Archive& ar) const { + Serialization::Writer fetch{ar}; + ar.Grow(6 + attributes.size() * sizeof(VertexAttribute)); + + fetch.Write(size); + fetch.Write(vertex_offset_sgpr); + fetch.Write(instance_offset_sgpr); + fetch.Write(attributes); +} + +bool Gcn::FetchShaderData::Deserialize(Serialization::Archive& ar) { + Serialization::Reader fetch{ar}; + + fetch.Read(size); + fetch.Read(vertex_offset_sgpr); + fetch.Read(instance_offset_sgpr); + fetch.Read(attributes); + + return true; +} + +void PersistentSrtInfo::Serialize(Serialization::Archive& ar) const { + Serialization::Writer srt{ar}; + + srt.Write(this, sizeof(*this)); + if (walker_func_size) { + srt.Write(reinterpret_cast(walker_func), walker_func_size); + } +} + +bool PersistentSrtInfo::Deserialize(Serialization::Archive& ar) { + Serialization::Reader srt{ar}; + + srt.Read(this, sizeof(*this)); + + if (walker_func_size) { + walker_func = RegisterWalkerCode(ar.CurrPtr(), walker_func_size); + ar.Advance(walker_func_size); + } + + return true; +} + +void StageSpecialization::Serialize(Serialization::Archive& ar) const { + Serialization::Writer spec{ar}; + + spec.Write(start); + spec.Write(runtime_info); + + spec.Write(bitset.to_string()); + + if (fetch_shader_data) { + spec.Write(sizeof(*fetch_shader_data)); + fetch_shader_data->Serialize(ar); + } else { + spec.Write(size_t{0}); + } + + spec.Write(vs_attribs); + spec.Write(buffers); + spec.Write(images); + spec.Write(fmasks); + spec.Write(samplers); +} + +bool StageSpecialization::Deserialize(Serialization::Archive& ar) { + Serialization::Reader spec{ar}; + + spec.Read(start); + spec.Read(runtime_info); + + std::string bits{}; + spec.Read(bits); + bitset = std::bitset(bits); + + u64 fetch_data_size{}; + spec.Read(fetch_data_size); + + if (fetch_data_size) { + Gcn::FetchShaderData fetch_data; + fetch_data.Deserialize(ar); + fetch_shader_data = fetch_data; + } + + spec.Read(vs_attribs); + spec.Read(buffers); + spec.Read(images); + spec.Read(fmasks); + spec.Read(samplers); + + return true; +} + +} // namespace Shader diff --git a/src/video_core/renderer_vulkan/vk_pipeline_serialization.h b/src/video_core/renderer_vulkan/vk_pipeline_serialization.h new file mode 100644 index 000000000..31ea4e357 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline_serialization.h @@ -0,0 +1,21 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "shader_recompiler/frontend/fetch_shader.h" +#include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" + +namespace Vulkan { + +void RegisterPipelineData(const ComputePipelineKey& key, + ComputePipeline::SerializationSupport& sdata); +void RegisterPipelineData(const GraphicsPipelineKey& key, u64 hash, + GraphicsPipeline::SerializationSupport& sdata); +void RegisterShaderMeta(const Shader::Info& info, + const std::optional& fetch_shader_data, + const Shader::StageSpecialization& spec, size_t perm_hash, size_t perm_idx); +void RegisterShaderBinary(std::vector&& spv, u64 pgm_hash, size_t perm_idx); + +} // namespace Vulkan From 78e301c3db87cf517a4bde5542e397ff803859ce Mon Sep 17 00:00:00 2001 From: TheThunderTurner <64212185+thethunderturner@users.noreply.github.com> Date: Sat, 29 Nov 2025 22:47:15 +0100 Subject: [PATCH 14/25] libSceNpCommerce (#3839) * libSceNpCommerce * copyright notice --- CMakeLists.txt | 2 + src/common/logging/filter.cpp | 1 + src/common/logging/types.h | 1 + src/core/libraries/libs.cpp | 2 + src/core/libraries/np/np_commerce.cpp | 88 +++++++++++++++++++++++++++ src/core/libraries/np/np_commerce.h | 16 +++++ 6 files changed, 110 insertions(+) create mode 100644 src/core/libraries/np/np_commerce.cpp create mode 100644 src/core/libraries/np/np_commerce.h diff --git a/CMakeLists.txt b/CMakeLists.txt index cf78e92bf..04534ec26 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -573,6 +573,8 @@ set(VDEC_LIB src/core/libraries/videodec/videodec2_impl.cpp set(NP_LIBS src/core/libraries/np/np_error.h src/core/libraries/np/np_common.cpp src/core/libraries/np/np_common.h + src/core/libraries/np/np_commerce.cpp + src/core/libraries/np/np_commerce.h src/core/libraries/np/np_manager.cpp src/core/libraries/np/np_manager.h src/core/libraries/np/np_score.cpp diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp index bf6844c7d..fd8386aff 100644 --- a/src/common/logging/filter.cpp +++ b/src/common/logging/filter.cpp @@ -104,6 +104,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) { SUB(Lib, Move) \ SUB(Lib, NpAuth) \ SUB(Lib, NpCommon) \ + SUB(Lib, NpCommerce) \ SUB(Lib, NpManager) \ SUB(Lib, NpScore) \ SUB(Lib, NpTrophy) \ diff --git a/src/common/logging/types.h b/src/common/logging/types.h index 035a959db..82db477ed 100644 --- a/src/common/logging/types.h +++ b/src/common/logging/types.h @@ -70,6 +70,7 @@ enum class Class : u8 { Lib_Http2, ///< The LibSceHttp2 implementation. Lib_SysModule, ///< The LibSceSysModule implementation Lib_NpCommon, ///< The LibSceNpCommon implementation + Lib_NpCommerce, ///< The LibSceNpCommerce implementation Lib_NpAuth, ///< The LibSceNpAuth implementation Lib_NpManager, ///< The LibSceNpManager implementation Lib_NpScore, ///< The LibSceNpScore implementation diff --git a/src/core/libraries/libs.cpp b/src/core/libraries/libs.cpp index eec9ee7c8..1f7ecb75e 100644 --- a/src/core/libraries/libs.cpp +++ b/src/core/libraries/libs.cpp @@ -32,6 +32,7 @@ #include "core/libraries/network/ssl.h" #include "core/libraries/network/ssl2.h" #include "core/libraries/np/np_auth.h" +#include "core/libraries/np/np_commerce.h" #include "core/libraries/np/np_common.h" #include "core/libraries/np/np_manager.h" #include "core/libraries/np/np_party.h" @@ -93,6 +94,7 @@ void InitHLELibs(Core::Loader::SymbolsResolver* sym) { Libraries::SysModule::RegisterLib(sym); Libraries::Posix::RegisterLib(sym); Libraries::AudioIn::RegisterLib(sym); + Libraries::Np::NpCommerce::RegisterLib(sym); Libraries::Np::NpCommon::RegisterLib(sym); Libraries::Np::NpManager::RegisterLib(sym); Libraries::Np::NpScore::RegisterLib(sym); diff --git a/src/core/libraries/np/np_commerce.cpp b/src/core/libraries/np/np_commerce.cpp new file mode 100644 index 000000000..1e8440ec0 --- /dev/null +++ b/src/core/libraries/np/np_commerce.cpp @@ -0,0 +1,88 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/logging/log.h" +#include "core/libraries/error_codes.h" +#include "core/libraries/libs.h" + +namespace Libraries::Np::NpCommerce { +s32 PS4_SYSV_ABI sceNpCommerceDialogClose() { + LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceNpCommerceDialogGetResult(s32* result) { + LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + return ORBIS_OK; +} + +s8 PS4_SYSV_ABI sceNpCommerceDialogGetStatus() { + LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceNpCommerceDialogInitialize() { + LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceNpCommerceDialogInitializeInternal() { + LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + return ORBIS_OK; +} + +s16 PS4_SYSV_ABI sceNpCommerceDialogOpen(s64 check) { + LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceNpCommerceDialogTerminate() { + LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceNpCommerceDialogUpdateStatus() { + LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceNpCommerceHidePsStoreIcon() { + LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceNpCommerceSetPsStoreIconLayout(s32 layout) { + LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceNpCommerceShowPsStoreIcon(s16 icon) { + LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + return ORBIS_OK; +} + +void RegisterLib(Core::Loader::SymbolsResolver* sym) { + LIB_FUNCTION("NU3ckGHMFXo", "libSceNpCommerce", 1, "libSceNpCommerce", + sceNpCommerceDialogClose); + LIB_FUNCTION("r42bWcQbtZY", "libSceNpCommerce", 1, "libSceNpCommerce", + sceNpCommerceDialogGetResult); + LIB_FUNCTION("CCbC+lqqvF0", "libSceNpCommerce", 1, "libSceNpCommerce", + sceNpCommerceDialogGetStatus); + LIB_FUNCTION("0aR2aWmQal4", "libSceNpCommerce", 1, "libSceNpCommerce", + sceNpCommerceDialogInitialize); + LIB_FUNCTION("9ZiLXAGG5rg", "libSceNpCommerce", 1, "libSceNpCommerce", + sceNpCommerceDialogInitializeInternal); + LIB_FUNCTION("DfSCDRA3EjY", "libSceNpCommerce", 1, "libSceNpCommerce", sceNpCommerceDialogOpen); + LIB_FUNCTION("m-I92Ab50W8", "libSceNpCommerce", 1, "libSceNpCommerce", + sceNpCommerceDialogTerminate); + LIB_FUNCTION("LR5cwFMMCVE", "libSceNpCommerce", 1, "libSceNpCommerce", + sceNpCommerceDialogUpdateStatus); + LIB_FUNCTION("dsqCVsNM0Zg", "libSceNpCommerce", 1, "libSceNpCommerce", + sceNpCommerceHidePsStoreIcon); + LIB_FUNCTION("uKTDW8hk-ts", "libSceNpCommerce", 1, "libSceNpCommerce", + sceNpCommerceSetPsStoreIconLayout); + LIB_FUNCTION("DHmwsa6S8Tc", "libSceNpCommerce", 1, "libSceNpCommerce", + sceNpCommerceShowPsStoreIcon); +}; + +} // namespace Libraries::Np::NpCommerce diff --git a/src/core/libraries/np/np_commerce.h b/src/core/libraries/np/np_commerce.h new file mode 100644 index 000000000..003e85a58 --- /dev/null +++ b/src/core/libraries/np/np_commerce.h @@ -0,0 +1,16 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/types.h" + +namespace Core::Loader { +class SymbolsResolver; +} + +namespace Libraries::Np::NpCommerce { + +void RegisterLib(Core::Loader::SymbolsResolver* sym); + +} // namespace Libraries::Np::NpCommerce \ No newline at end of file From 052f3260f391491c29caac6133c2eeb8c236437f Mon Sep 17 00:00:00 2001 From: Connor Garey Date: Sun, 30 Nov 2025 17:57:14 +0000 Subject: [PATCH 15/25] Sdl message box when no args provided (#3843) * Added a message box when no arguments are passed. * clang-fix * clang-fix episode 2 * Output message box error to stderr instead of stdout --- src/main.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/main.cpp b/src/main.cpp index 4d05dfe5a..f1e5ce932 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include "functional" #include "iostream" #include "string" @@ -182,6 +183,10 @@ int main(int argc, char* argv[]) { }}}; if (argc == 1) { + if (!SDL_ShowSimpleMessageBox( + SDL_MESSAGEBOX_INFORMATION, "shadPS4", + "This is a CLI application. Please use the QTLauncher for a GUI.", nullptr)) + std::cerr << "Could not display SDL message box! Error: " << SDL_GetError() << "\n"; int dummy = 0; // one does not simply pass 0 directly arg_map.at("-h")(dummy); return -1; From cf866ab294469874e805561a36a7ef2948c43081 Mon Sep 17 00:00:00 2001 From: kalaposfos13 <153381648+kalaposfos13@users.noreply.github.com> Date: Sun, 30 Nov 2025 21:40:58 +0100 Subject: [PATCH 16/25] Don't bother trying to restart the emulator if sceSystemServiceLoadExec is called with an invalid path (#3845) --- src/core/libraries/system/systemservice.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/core/libraries/system/systemservice.cpp b/src/core/libraries/system/systemservice.cpp index c02c4b3c3..ce5542fc8 100644 --- a/src/core/libraries/system/systemservice.cpp +++ b/src/core/libraries/system/systemservice.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include "common/config.h" #include "common/logging/log.h" #include "common/singleton.h" @@ -1874,6 +1875,10 @@ int PS4_SYSV_ABI sceSystemServiceLoadExec(const char* path, const char* argv[]) auto emu = Common::Singleton::Instance(); auto mnt = Common::Singleton::Instance(); auto hostPath = mnt->GetHostPath(std::string_view(path)); + if (hostPath.empty()) { + LOG_INFO(Lib_SystemService, "Restart called with invalid file '{}', exiting.", path); + std::quick_exit(0); + } std::vector args; if (argv != nullptr) { for (const char** ptr = argv; *ptr != nullptr; ptr++) { From a5f928084123c2b4862bcc9c9977ffb7898f3734 Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Mon, 1 Dec 2025 02:21:19 -0600 Subject: [PATCH 17/25] Return CPU mode based on param.sfo attributes (#3846) Values are based on hardware observations. --- src/core/libraries/kernel/process.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/core/libraries/kernel/process.cpp b/src/core/libraries/kernel/process.cpp index 02da041c3..e88446e02 100644 --- a/src/core/libraries/kernel/process.cpp +++ b/src/core/libraries/kernel/process.cpp @@ -42,6 +42,16 @@ s32 PS4_SYSV_ABI sceKernelGetCompiledSdkVersion(s32* ver) { } s32 PS4_SYSV_ABI sceKernelGetCpumode() { + LOG_DEBUG(Lib_Kernel, "called"); + auto& attrs = Common::ElfInfo::Instance().GetPSFAttributes(); + u32 is_cpu6 = attrs.six_cpu_mode.Value(); + u32 is_cpu7 = attrs.seven_cpu_mode.Value(); + if (is_cpu6 == 1 && is_cpu7 == 1) { + return 2; + } + if (is_cpu7 == 1) { + return 5; + } return 0; } From c3f7a4301cecbe773f0415bf41d5ea9c9eca868e Mon Sep 17 00:00:00 2001 From: kalaposfos13 <153381648+kalaposfos13@users.noreply.github.com> Date: Tue, 2 Dec 2025 09:21:01 +0100 Subject: [PATCH 18/25] Add basic mouse-to-touchpad emulation (#3842) --- src/common/config.cpp | 1 + src/input/input_handler.cpp | 4 ++++ src/input/input_handler.h | 11 +++++++---- src/input/input_mouse.cpp | 18 +++++++++++++++++- src/input/input_mouse.h | 1 + src/sdl_window.cpp | 5 +++++ 6 files changed, 35 insertions(+), 5 deletions(-) diff --git a/src/common/config.cpp b/src/common/config.cpp index e79652b32..94d8b488c 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -1310,6 +1310,7 @@ hotkey_pause = f9 hotkey_reload_inputs = f8 hotkey_toggle_mouse_to_joystick = f7 hotkey_toggle_mouse_to_gyro = f6 +hotkey_toggle_mouse_to_touchpad = delete hotkey_quit = lctrl, lshift, end )"; } diff --git a/src/input/input_handler.cpp b/src/input/input_handler.cpp index d38b45ddd..01c6d1fa4 100644 --- a/src/input/input_handler.cpp +++ b/src/input/input_handler.cpp @@ -106,6 +106,7 @@ auto output_array = std::array{ ControllerOutput(HOTKEY_RELOAD_INPUTS), ControllerOutput(HOTKEY_TOGGLE_MOUSE_TO_JOYSTICK), ControllerOutput(HOTKEY_TOGGLE_MOUSE_TO_GYRO), + ControllerOutput(HOTKEY_TOGGLE_MOUSE_TO_TOUCHPAD), ControllerOutput(HOTKEY_RENDERDOC), ControllerOutput(SDL_GAMEPAD_BUTTON_INVALID, SDL_GAMEPAD_AXIS_INVALID), @@ -579,6 +580,9 @@ void ControllerOutput::FinalizeUpdate() { case HOTKEY_TOGGLE_MOUSE_TO_GYRO: PushSDLEvent(SDL_EVENT_MOUSE_TO_GYRO); break; + case HOTKEY_TOGGLE_MOUSE_TO_TOUCHPAD: + PushSDLEvent(SDL_EVENT_MOUSE_TO_TOUCHPAD); + break; case HOTKEY_RENDERDOC: PushSDLEvent(SDL_EVENT_RDOC_CAPTURE); break; diff --git a/src/input/input_handler.h b/src/input/input_handler.h index 0d95d1c4a..eaadd164e 100644 --- a/src/input/input_handler.h +++ b/src/input/input_handler.h @@ -34,9 +34,10 @@ #define SDL_EVENT_RELOAD_INPUTS SDL_EVENT_USER + 5 #define SDL_EVENT_MOUSE_TO_JOYSTICK SDL_EVENT_USER + 6 #define SDL_EVENT_MOUSE_TO_GYRO SDL_EVENT_USER + 7 -#define SDL_EVENT_RDOC_CAPTURE SDL_EVENT_USER + 8 -#define SDL_EVENT_QUIT_DIALOG SDL_EVENT_USER + 9 -#define SDL_EVENT_MOUSE_WHEEL_OFF SDL_EVENT_USER + 10 +#define SDL_EVENT_MOUSE_TO_TOUCHPAD SDL_EVENT_USER + 8 +#define SDL_EVENT_RDOC_CAPTURE SDL_EVENT_USER + 9 +#define SDL_EVENT_QUIT_DIALOG SDL_EVENT_USER + 10 +#define SDL_EVENT_MOUSE_WHEEL_OFF SDL_EVENT_USER + 11 #define LEFTJOYSTICK_HALFMODE 0x00010000 #define RIGHTJOYSTICK_HALFMODE 0x00020000 @@ -52,7 +53,8 @@ #define HOTKEY_RELOAD_INPUTS 0xf0000005 #define HOTKEY_TOGGLE_MOUSE_TO_JOYSTICK 0xf0000006 #define HOTKEY_TOGGLE_MOUSE_TO_GYRO 0xf0000007 -#define HOTKEY_RENDERDOC 0xf0000008 +#define HOTKEY_TOGGLE_MOUSE_TO_TOUCHPAD 0xf0000008 +#define HOTKEY_RENDERDOC 0xf0000009 #define SDL_UNMAPPED UINT32_MAX - 1 @@ -141,6 +143,7 @@ const std::map string_to_cbutton_map = { {"hotkey_reload_inputs", HOTKEY_RELOAD_INPUTS}, {"hotkey_toggle_mouse_to_joystick", HOTKEY_TOGGLE_MOUSE_TO_JOYSTICK}, {"hotkey_toggle_mouse_to_gyro", HOTKEY_TOGGLE_MOUSE_TO_GYRO}, + {"hotkey_toggle_mouse_to_touchpad", HOTKEY_TOGGLE_MOUSE_TO_TOUCHPAD}, {"hotkey_renderdoc_capture", HOTKEY_RENDERDOC}, }; diff --git a/src/input/input_mouse.cpp b/src/input/input_mouse.cpp index 3c718dbd5..55489283c 100644 --- a/src/input/input_mouse.cpp +++ b/src/input/input_mouse.cpp @@ -8,8 +8,12 @@ #include "input/controller.h" #include "input_mouse.h" +#include +#include #include "SDL3/SDL.h" +extern Frontend::WindowSDL* g_window; + namespace Input { int mouse_joystick_binding = 0; @@ -80,7 +84,6 @@ void EmulateJoystick(GameController* controller, u32 interval) { constexpr float constant_down_accel[3] = {0.0f, 10.0f, 0.0f}; void EmulateGyro(GameController* controller, u32 interval) { - // LOG_INFO(Input, "todo gyro"); float d_x = 0, d_y = 0; SDL_GetRelativeMouseState(&d_x, &d_y); controller->Acceleration(1, constant_down_accel); @@ -92,6 +95,16 @@ void EmulateGyro(GameController* controller, u32 interval) { controller->Gyro(1, gyro_from_mouse); } +void EmulateTouchpad(GameController* controller, u32 interval) { + float x, y; + SDL_MouseButtonFlags mouse_buttons = SDL_GetMouseState(&x, &y); + controller->SetTouchpadState(0, (mouse_buttons & SDL_BUTTON_LMASK) != 0, + std::clamp(x / g_window->GetWidth(), 0.0f, 1.0f), + std::clamp(y / g_window->GetHeight(), 0.0f, 1.0f)); + controller->CheckButton(0, Libraries::Pad::OrbisPadButtonDataOffset::TouchPad, + (mouse_buttons & SDL_BUTTON_RMASK) != 0); +} + Uint32 MousePolling(void* param, Uint32 id, Uint32 interval) { auto* controller = (GameController*)param; switch (mouse_mode) { @@ -101,6 +114,9 @@ Uint32 MousePolling(void* param, Uint32 id, Uint32 interval) { case MouseMode::Gyro: EmulateGyro(controller, interval); break; + case MouseMode::Touchpad: + EmulateTouchpad(controller, interval); + break; default: break; diff --git a/src/input/input_mouse.h b/src/input/input_mouse.h index a56ef2d8f..995f836f2 100644 --- a/src/input/input_mouse.h +++ b/src/input/input_mouse.h @@ -12,6 +12,7 @@ enum MouseMode { Off = 0, Joystick, Gyro, + Touchpad, }; bool ToggleMouseModeTo(MouseMode m); diff --git a/src/sdl_window.cpp b/src/sdl_window.cpp index 449defdd1..476a56b52 100644 --- a/src/sdl_window.cpp +++ b/src/sdl_window.cpp @@ -457,6 +457,11 @@ void WindowSDL::WaitEvent() { SDL_SetWindowRelativeMouseMode(this->GetSDLWindow(), Input::ToggleMouseModeTo(Input::MouseMode::Gyro)); break; + case SDL_EVENT_MOUSE_TO_TOUCHPAD: + SDL_SetWindowRelativeMouseMode(this->GetSDLWindow(), + Input::ToggleMouseModeTo(Input::MouseMode::Touchpad)); + SDL_SetWindowRelativeMouseMode(this->GetSDLWindow(), false); + break; case SDL_EVENT_RDOC_CAPTURE: VideoCore::TriggerCapture(); break; From e5ea55e42588407e92a82dad47e84b83df5b4114 Mon Sep 17 00:00:00 2001 From: Pirky <92021796+Pirky10@users.noreply.github.com> Date: Tue, 2 Dec 2025 09:22:41 +0100 Subject: [PATCH 19/25] np: Add dialog state tracking for NpCommerce (#3841) --- src/core/libraries/np/np_commerce.cpp | 63 ++++++++++++++++++++++----- 1 file changed, 52 insertions(+), 11 deletions(-) diff --git a/src/core/libraries/np/np_commerce.cpp b/src/core/libraries/np/np_commerce.cpp index 1e8440ec0..99b03384a 100644 --- a/src/core/libraries/np/np_commerce.cpp +++ b/src/core/libraries/np/np_commerce.cpp @@ -4,46 +4,87 @@ #include "common/logging/log.h" #include "core/libraries/error_codes.h" #include "core/libraries/libs.h" +#include "core/libraries/system/commondialog.h" namespace Libraries::Np::NpCommerce { + +using CommonDialog::Error; +using CommonDialog::Result; +using CommonDialog::Status; + +static Status g_dialog_status = Status::NONE; +static Result g_dialog_result = Result::OK; + s32 PS4_SYSV_ABI sceNpCommerceDialogClose() { - LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + LOG_INFO(Lib_NpCommerce, "called"); + if (g_dialog_status == Status::NONE) { + return static_cast(Error::NOT_INITIALIZED); + } + if (g_dialog_status != Status::FINISHED) { + return static_cast(Error::NOT_FINISHED); + } + g_dialog_status = Status::INITIALIZED; return ORBIS_OK; } s32 PS4_SYSV_ABI sceNpCommerceDialogGetResult(s32* result) { - LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + LOG_INFO(Lib_NpCommerce, "called"); + if (result == nullptr) { + return static_cast(Error::ARG_NULL); + } + if (g_dialog_status != Status::FINISHED) { + return static_cast(Error::NOT_FINISHED); + } + *result = static_cast(g_dialog_result); return ORBIS_OK; } s8 PS4_SYSV_ABI sceNpCommerceDialogGetStatus() { - LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); - return ORBIS_OK; + LOG_DEBUG(Lib_NpCommerce, "called, status = {}", static_cast(g_dialog_status)); + return static_cast(g_dialog_status); } s32 PS4_SYSV_ABI sceNpCommerceDialogInitialize() { - LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + LOG_INFO(Lib_NpCommerce, "called"); + if (g_dialog_status != Status::NONE) { + return static_cast(Error::ALREADY_INITIALIZED); + } + g_dialog_status = Status::INITIALIZED; return ORBIS_OK; } s32 PS4_SYSV_ABI sceNpCommerceDialogInitializeInternal() { - LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); - return ORBIS_OK; + LOG_INFO(Lib_NpCommerce, "called"); + return sceNpCommerceDialogInitialize(); } s16 PS4_SYSV_ABI sceNpCommerceDialogOpen(s64 check) { - LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + LOG_INFO(Lib_NpCommerce, "called, check = {}", check); + if (g_dialog_status != Status::INITIALIZED) { + LOG_WARNING(Lib_NpCommerce, "Dialog not initialized"); + return ORBIS_OK; + } + + g_dialog_status = Status::FINISHED; + g_dialog_result = Result::USER_CANCELED; return ORBIS_OK; } s32 PS4_SYSV_ABI sceNpCommerceDialogTerminate() { - LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); + LOG_INFO(Lib_NpCommerce, "called"); + if (g_dialog_status == Status::NONE) { + return static_cast(Error::NOT_INITIALIZED); + } + if (g_dialog_status == Status::RUNNING) { + return static_cast(Error::NOT_FINISHED); + } + g_dialog_status = Status::NONE; return ORBIS_OK; } s32 PS4_SYSV_ABI sceNpCommerceDialogUpdateStatus() { - LOG_ERROR(Lib_NpCommerce, "(STUBBED) called"); - return ORBIS_OK; + LOG_DEBUG(Lib_NpCommerce, "called, status = {}", static_cast(g_dialog_status)); + return static_cast(g_dialog_status); } s32 PS4_SYSV_ABI sceNpCommerceHidePsStoreIcon() { From dc6013cf0e19a66f489178da2235d2468fbb0186 Mon Sep 17 00:00:00 2001 From: kalaposfos13 <153381648+kalaposfos13@users.noreply.github.com> Date: Tue, 2 Dec 2025 09:41:06 +0100 Subject: [PATCH 20/25] Block normal mouse inputs in mouse-to-touchpad mode shadow sniped my PR. :( --- src/input/input_handler.cpp | 3 +++ src/input/input_mouse.cpp | 19 +++++++++++++++++++ src/input/input_mouse.h | 2 ++ 3 files changed, 24 insertions(+) diff --git a/src/input/input_handler.cpp b/src/input/input_handler.cpp index 01c6d1fa4..e74569737 100644 --- a/src/input/input_handler.cpp +++ b/src/input/input_handler.cpp @@ -777,6 +777,9 @@ void ActivateOutputsFromInputs() { it.ResetUpdate(); } + // Check for input blockers + ApplyMouseInputBlockers(); + // Iterate over all inputs, and update their respecive outputs accordingly for (auto& it : connections) { it.output->AddUpdate(it.ProcessBinding()); diff --git a/src/input/input_mouse.cpp b/src/input/input_mouse.cpp index 55489283c..cead87e53 100644 --- a/src/input/input_mouse.cpp +++ b/src/input/input_mouse.cpp @@ -6,6 +6,7 @@ #include "common/assert.h" #include "common/types.h" #include "input/controller.h" +#include "input/input_handler.h" #include "input_mouse.h" #include @@ -16,6 +17,8 @@ extern Frontend::WindowSDL* g_window; namespace Input { +extern std::list> pressed_keys; + int mouse_joystick_binding = 0; float mouse_deadzone_offset = 0.5, mouse_speed = 1, mouse_speed_offset = 0.1250; bool mouse_gyro_roll_mode = false; @@ -105,6 +108,22 @@ void EmulateTouchpad(GameController* controller, u32 interval) { (mouse_buttons & SDL_BUTTON_RMASK) != 0); } +void ApplyMouseInputBlockers() { + switch (mouse_mode) { + case MouseMode::Touchpad: + LOG_INFO(Input, "Blocking mouse inputs"); + for (auto& k : pressed_keys) { + if (k.first.input.sdl_id == SDL_BUTTON_LEFT || + k.first.input.sdl_id == SDL_BUTTON_RIGHT) { + k.second = true; + } + } + break; + default: + break; + } +} + Uint32 MousePolling(void* param, Uint32 id, Uint32 interval) { auto* controller = (GameController*)param; switch (mouse_mode) { diff --git a/src/input/input_mouse.h b/src/input/input_mouse.h index 995f836f2..da1d874ec 100644 --- a/src/input/input_mouse.h +++ b/src/input/input_mouse.h @@ -23,6 +23,8 @@ void SetMouseGyroRollMode(bool mode); void EmulateJoystick(GameController* controller, u32 interval); void EmulateGyro(GameController* controller, u32 interval); +void ApplyMouseInputBlockers(); + // Polls the mouse for changes Uint32 MousePolling(void* param, Uint32 id, Uint32 interval); From b135a056ba457c7d3ea1a4cfc15f5f340532f41d Mon Sep 17 00:00:00 2001 From: kalaposfos13 <153381648+kalaposfos13@users.noreply.github.com> Date: Tue, 2 Dec 2025 09:50:11 +0100 Subject: [PATCH 21/25] Remove debug logging --- src/input/input_mouse.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/input/input_mouse.cpp b/src/input/input_mouse.cpp index cead87e53..cbb07721b 100644 --- a/src/input/input_mouse.cpp +++ b/src/input/input_mouse.cpp @@ -111,7 +111,6 @@ void EmulateTouchpad(GameController* controller, u32 interval) { void ApplyMouseInputBlockers() { switch (mouse_mode) { case MouseMode::Touchpad: - LOG_INFO(Input, "Blocking mouse inputs"); for (auto& k : pressed_keys) { if (k.first.input.sdl_id == SDL_BUTTON_LEFT || k.first.input.sdl_id == SDL_BUTTON_RIGHT) { From 9db4642f666c1c46dd4f9f816472929cbe765bb7 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Tue, 2 Dec 2025 22:27:01 +0100 Subject: [PATCH 22/25] video_core: Scheduler priority pending operation queue (#3848) * Priority pending ops * Use priority operations on image download * clang-format * Simplify thread * I'm tired, it's too late :( --- .../renderer_vulkan/vk_scheduler.cpp | 29 +++++++++++++++ src/video_core/renderer_vulkan/vk_scheduler.h | 18 ++++++++++ .../texture_cache/texture_cache.cpp | 35 +++---------------- src/video_core/texture_cache/texture_cache.h | 10 ------ 4 files changed, 52 insertions(+), 40 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index cc8f6956d..fee0b408e 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -3,6 +3,7 @@ #include "common/assert.h" #include "common/debug.h" +#include "common/thread.h" #include "imgui/renderer/texture_manager.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -17,6 +18,8 @@ Scheduler::Scheduler(const Instance& instance) profiler_scope = reinterpret_cast(std::malloc(sizeof(tracy::VkCtxScope))); #endif AllocateWorkerCommandBuffers(); + priority_pending_ops_thread = + std::jthread(std::bind_front(&Scheduler::PriorityPendingOpsThread, this)); } Scheduler::~Scheduler() { @@ -167,6 +170,32 @@ void Scheduler::SubmitExecution(SubmitInfo& info) { PopPendingOperations(); } +void Scheduler::PriorityPendingOpsThread(std::stop_token stoken) { + Common::SetCurrentThreadName("shadPS4:GpuSchedPriorityPendingOpsRunner"); + + while (!stoken.stop_requested()) { + PendingOp op; + { + std::unique_lock lk(priority_pending_ops_mutex); + priority_pending_ops_cv.wait(lk, stoken, + [this] { return !priority_pending_ops.empty(); }); + if (stoken.stop_requested()) { + break; + } + + op = std::move(priority_pending_ops.front()); + priority_pending_ops.pop(); + } + + master_semaphore.Wait(op.gpu_tick); + if (stoken.stop_requested()) { + break; + } + + op.callback(); + } +} + void DynamicState::Commit(const Instance& instance, const vk::CommandBuffer& cmdbuf) { if (dirty_state.viewports) { dirty_state.viewports = false; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 506b84159..aff299e54 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -5,6 +5,7 @@ #include #include +#include #include #include "common/unique_function.h" @@ -401,10 +402,21 @@ public: } /// Defers an operation until the gpu has reached the current cpu tick. + /// Will be run when submitting or calling PopPendingOperations. void DeferOperation(Common::UniqueFunction&& func) { pending_ops.emplace(std::move(func), CurrentTick()); } + /// Defers an operation until the gpu has reached the current cpu tick. + /// Runs as soon as possible in another thread. + void DeferPriorityOperation(Common::UniqueFunction&& func) { + { + std::unique_lock lk(priority_pending_ops_mutex); + priority_pending_ops.emplace(std::move(func), CurrentTick()); + } + priority_pending_ops_cv.notify_one(); + } + static std::mutex submit_mutex; private: @@ -412,6 +424,8 @@ private: void SubmitExecution(SubmitInfo& info); + void PriorityPendingOpsThread(std::stop_token stoken); + private: const Instance& instance; MasterSemaphore master_semaphore; @@ -424,6 +438,10 @@ private: u64 gpu_tick; }; std::queue pending_ops; + std::queue priority_pending_ops; + std::mutex priority_pending_ops_mutex; + std::condition_variable_any priority_pending_ops_cv; + std::jthread priority_pending_ops_thread; RenderState render_state; bool is_rendering = false; tracy::VkCtxScope* profiler_scope{}; diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index c7604995a..17c7e67b3 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -52,9 +52,6 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& std::max(std::min(device_local_memory - min_vacancy_critical, min_spacing_critical), DEFAULT_CRITICAL_GC_MEMORY)); trigger_gc_memory = static_cast((device_local_memory - mem_threshold) / 2); - - downloaded_images_thread = - std::jthread([&](const std::stop_token& token) { DownloadedImagesThread(token); }); } TextureCache::~TextureCache() = default; @@ -125,33 +122,11 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { cmdbuf.copyImageToBuffer(image.GetImage(), vk::ImageLayout::eTransferSrcOptimal, download_buffer.Handle(), image_download); - { - std::unique_lock lock(downloaded_images_mutex); - downloaded_images_queue.emplace(scheduler.CurrentTick(), image.info.guest_address, download, - download_size); - downloaded_images_cv.notify_one(); - } -} - -void TextureCache::DownloadedImagesThread(const std::stop_token& token) { - auto* memory = Core::Memory::Instance(); - while (!token.stop_requested()) { - DownloadedImage image; - { - std::unique_lock lock{downloaded_images_mutex}; - downloaded_images_cv.wait(lock, token, - [this] { return !downloaded_images_queue.empty(); }); - if (token.stop_requested()) { - break; - } - image = downloaded_images_queue.front(); - downloaded_images_queue.pop(); - } - - scheduler.GetMasterSemaphore()->Wait(image.tick); - memory->TryWriteBacking(std::bit_cast(image.device_addr), image.download, - image.download_size); - } + scheduler.DeferPriorityOperation( + [this, device_addr = image.info.guest_address, download, download_size] { + Core::Memory::Instance()->TryWriteBacking(std::bit_cast(device_addr), download, + download_size); + }); } void TextureCache::MarkAsMaybeDirty(ImageId image_id, Image& image) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 9d25069db..141ac938f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -314,16 +314,6 @@ private: Common::LeastRecentlyUsedCache lru_cache; PageTable page_table; std::mutex mutex; - struct DownloadedImage { - u64 tick; - VAddr device_addr; - void* download; - size_t download_size; - }; - std::queue downloaded_images_queue; - std::mutex downloaded_images_mutex; - std::condition_variable_any downloaded_images_cv; - std::jthread downloaded_images_thread; struct MetaDataInfo { enum class Type { CMask, From 98fd0689ac46250debd536dcd16b6cf11dfb159d Mon Sep 17 00:00:00 2001 From: kalaposfos13 <153381648+kalaposfos13@users.noreply.github.com> Date: Wed, 3 Dec 2025 14:05:19 +0100 Subject: [PATCH 23/25] Revert non-Linux parts of #3819 (#3852) * Revert non-Linux parts of #3819 * More OpenOrbis stuff that I couldn't be bothered to put in a new PR --- src/core/libraries/fiber/fiber.cpp | 6 +++--- src/core/libraries/kernel/threads/pthread.cpp | 4 ++++ src/core/linker.cpp | 4 ++-- src/core/tls.cpp | 18 ++++++++++++++++++ src/core/tls.h | 3 +++ 5 files changed, 30 insertions(+), 5 deletions(-) diff --git a/src/core/libraries/fiber/fiber.cpp b/src/core/libraries/fiber/fiber.cpp index 776792041..2ebfbd244 100644 --- a/src/core/libraries/fiber/fiber.cpp +++ b/src/core/libraries/fiber/fiber.cpp @@ -6,8 +6,8 @@ #include "common/elf_info.h" #include "common/logging/log.h" #include "core/libraries/fiber/fiber_error.h" -#include "core/libraries/kernel/threads/pthread.h" #include "core/libraries/libs.h" +#include "core/tls.h" namespace Libraries::Fiber { @@ -20,7 +20,7 @@ static constexpr u64 kFiberStackSizeCheck = 0xdeadbeefdeadbeef; static std::atomic context_size_check = false; OrbisFiberContext* GetFiberContext() { - return Libraries::Kernel::g_curthread->tcb->tcb_fiber; + return Core::GetTcbBase()->tcb_fiber; } extern "C" s32 PS4_SYSV_ABI _sceFiberSetJmp(OrbisFiberContext* ctx) asm("_sceFiberSetJmp"); @@ -269,7 +269,7 @@ s32 PS4_SYSV_ABI sceFiberRunImpl(OrbisFiber* fiber, void* addr_context, u64 size return ORBIS_FIBER_ERROR_INVALID; } - Core::Tcb* tcb = Libraries::Kernel::g_curthread->tcb; + Core::Tcb* tcb = Core::GetTcbBase(); if (tcb->tcb_fiber) { return ORBIS_FIBER_ERROR_PERMISSION; } diff --git a/src/core/libraries/kernel/threads/pthread.cpp b/src/core/libraries/kernel/threads/pthread.cpp index 8ab8b72c3..6c11eebc2 100644 --- a/src/core/libraries/kernel/threads/pthread.cpp +++ b/src/core/libraries/kernel/threads/pthread.cpp @@ -663,6 +663,10 @@ void RegisterThread(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("Z4QosVuAsA0", "libkernel", 1, "libkernel", posix_pthread_once); LIB_FUNCTION("EotR8a3ASf4", "libkernel", 1, "libkernel", posix_pthread_self); LIB_FUNCTION("OxhIB8LB-PQ", "libkernel", 1, "libkernel", posix_pthread_create); + LIB_FUNCTION("lZzFeSxPl08", "libkernel", 1, "libkernel", posix_pthread_setcancelstate); + LIB_FUNCTION("CBNtXOoef-E", "libkernel", 1, "libkernel", posix_sched_get_priority_max); + LIB_FUNCTION("m0iS6jNsXds", "libkernel", 1, "libkernel", posix_sched_get_priority_min); + LIB_FUNCTION("Xs9hdiD7sAA", "libkernel", 1, "libkernel", posix_pthread_setschedparam); LIB_FUNCTION("+U1R4WtXvoc", "libkernel", 1, "libkernel", posix_pthread_detach); LIB_FUNCTION("7Xl257M4VNI", "libkernel", 1, "libkernel", posix_pthread_equal); LIB_FUNCTION("h9CcP3J0oVM", "libkernel", 1, "libkernel", posix_pthread_join); diff --git a/src/core/linker.cpp b/src/core/linker.cpp index b7c9a2895..ac6b37769 100644 --- a/src/core/linker.cpp +++ b/src/core/linker.cpp @@ -368,7 +368,7 @@ bool Linker::Resolve(const std::string& name, Loader::SymbolType sym_type, Modul void* Linker::TlsGetAddr(u64 module_index, u64 offset) { std::scoped_lock lk{mutex}; - DtvEntry* dtv_table = Libraries::Kernel::g_curthread->tcb->tcb_dtv; + DtvEntry* dtv_table = GetTcbBase()->tcb_dtv; if (dtv_table[0].counter != dtv_generation_counter) { // Generation counter changed, a dynamic module was either loaded or unloaded. const u32 old_num_dtvs = dtv_table[1].counter; @@ -381,7 +381,7 @@ void* Linker::TlsGetAddr(u64 module_index, u64 offset) { delete[] dtv_table; // Update TCB pointer. - Libraries::Kernel::g_curthread->tcb->tcb_dtv = new_dtv_table; + GetTcbBase()->tcb_dtv = new_dtv_table; dtv_table = new_dtv_table; } diff --git a/src/core/tls.cpp b/src/core/tls.cpp index bcefd6f25..57ed20f38 100644 --- a/src/core/tls.cpp +++ b/src/core/tls.cpp @@ -46,6 +46,10 @@ void SetTcbBase(void* image_address) { ASSERT(result != 0); } +Tcb* GetTcbBase() { + return reinterpret_cast(TlsGetValue(GetTcbKey())); +} + #elif defined(__APPLE__) && defined(ARCH_X86_64) // Apple x86_64 @@ -145,6 +149,12 @@ void SetTcbBase(void* image_address) { "Failed to store thread LDT page pointer: {}", errno); } +Tcb* GetTcbBase() { + Tcb* tcb; + asm volatile("mov %%fs:0x0, %0" : "=r"(tcb)); + return tcb; +} + #elif defined(ARCH_X86_64) // Other POSIX x86_64 @@ -154,6 +164,10 @@ void SetTcbBase(void* image_address) { ASSERT_MSG(ret == 0, "Failed to set GS base: errno {}", errno); } +Tcb* GetTcbBase() { + return Libraries::Kernel::g_curthread->tcb; +} + #else // POSIX non-x86_64 @@ -176,6 +190,10 @@ void SetTcbBase(void* image_address) { ASSERT(pthread_setspecific(GetTcbKey(), image_address) == 0); } +Tcb* GetTcbBase() { + return static_cast(pthread_getspecific(GetTcbKey())); +} + #endif thread_local std::once_flag init_tls_flag; diff --git a/src/core/tls.h b/src/core/tls.h index 0ae512a04..83940be7a 100644 --- a/src/core/tls.h +++ b/src/core/tls.h @@ -39,6 +39,9 @@ u32 GetTcbKey(); /// Sets the data pointer to the TCB block. void SetTcbBase(void* image_address); +/// Retrieves Tcb structure for the calling thread. +Tcb* GetTcbBase(); + /// Makes sure TLS is initialized for the thread before entering guest. void EnsureThreadInitialized(); From 9e80cde60d6805232653bd803320489674ce12ad Mon Sep 17 00:00:00 2001 From: Odukoya Abdullahi Ademola Date: Thu, 4 Dec 2025 09:50:01 +0100 Subject: [PATCH 24/25] Implement http uri escape unescape (#3853) * Implement sceHttpUriEscape and sceHttpUriUnescape * Implement sceHttpUriEscape and sceHttpUriUnescape * edge case --------- Co-authored-by: Pirky10 --- src/core/libraries/network/http.cpp | 123 +++++++++++++++++++++++++++- src/core/libraries/network/http.h | 2 +- 2 files changed, 121 insertions(+), 4 deletions(-) diff --git a/src/core/libraries/network/http.cpp b/src/core/libraries/network/http.cpp index 1ae48dfed..0fb81c639 100644 --- a/src/core/libraries/network/http.cpp +++ b/src/core/libraries/network/http.cpp @@ -712,8 +712,61 @@ int PS4_SYSV_ABI sceHttpUriCopy() { return ORBIS_OK; } -int PS4_SYSV_ABI sceHttpUriEscape() { - LOG_ERROR(Lib_Http, "(STUBBED) called"); +int PS4_SYSV_ABI sceHttpUriEscape(char* out, u64* require, u64 prepare, const char* in) { + LOG_TRACE(Lib_Http, "called"); + + if (!in) { + LOG_ERROR(Lib_Http, "Invalid input string"); + return ORBIS_HTTP_ERROR_INVALID_VALUE; + } + + auto IsUnreserved = [](unsigned char c) -> bool { + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || + c == '-' || c == '_' || c == '.' || c == '~'; + }; + + u64 needed = 0; + const char* src = in; + while (*src) { + unsigned char c = static_cast(*src); + if (IsUnreserved(c)) { + needed++; + } else { + needed += 3; // %XX format + } + src++; + } + needed++; // null terminator + + if (require) { + *require = needed; + } + + if (!out) { + return ORBIS_OK; + } + + if (prepare < needed) { + LOG_ERROR(Lib_Http, "Buffer too small: need {} but only {} available", needed, prepare); + return ORBIS_HTTP_ERROR_OUT_OF_MEMORY; + } + + static const char hex_chars[] = "0123456789ABCDEF"; + src = in; + char* dst = out; + while (*src) { + unsigned char c = static_cast(*src); + if (IsUnreserved(c)) { + *dst++ = *src; + } else { + *dst++ = '%'; + *dst++ = hex_chars[(c >> 4) & 0x0F]; + *dst++ = hex_chars[c & 0x0F]; + } + src++; + } + *dst = '\0'; + return ORBIS_OK; } @@ -1077,7 +1130,71 @@ int PS4_SYSV_ABI sceHttpUriSweepPath(char* dst, const char* src, u64 srcSize) { } int PS4_SYSV_ABI sceHttpUriUnescape(char* out, u64* require, u64 prepare, const char* in) { - LOG_ERROR(Lib_Http, "(STUBBED) called"); + LOG_TRACE(Lib_Http, "called"); + + if (!in) { + LOG_ERROR(Lib_Http, "Invalid input string"); + return ORBIS_HTTP_ERROR_INVALID_VALUE; + } + + // Locale-independent hex digit check + auto IsHex = [](char c) -> bool { + return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); + }; + + // Convert hex char to int value + auto HexToInt = [](char c) -> int { + if (c >= '0' && c <= '9') + return c - '0'; + if (c >= 'A' && c <= 'F') + return c - 'A' + 10; + if (c >= 'a' && c <= 'f') + return c - 'a' + 10; + return 0; + }; + + // Check for valid percent-encoded sequence (%XX) + auto IsValidPercentSequence = [&](const char* s) -> bool { + return s[0] == '%' && s[1] != '\0' && s[2] != '\0' && IsHex(s[1]) && IsHex(s[2]); + }; + + u64 needed = 0; + const char* src = in; + while (*src) { + if (IsValidPercentSequence(src)) { + src += 3; + } else { + src++; + } + needed++; + } + needed++; // null terminator + + if (require) { + *require = needed; + } + + if (!out) { + return ORBIS_OK; + } + + if (prepare < needed) { + LOG_ERROR(Lib_Http, "Buffer too small: need {} but only {} available", needed, prepare); + return ORBIS_HTTP_ERROR_OUT_OF_MEMORY; + } + + src = in; + char* dst = out; + while (*src) { + if (IsValidPercentSequence(src)) { + *dst++ = static_cast((HexToInt(src[1]) << 4) | HexToInt(src[2])); + src += 3; + } else { + *dst++ = *src++; + } + } + *dst = '\0'; + return ORBIS_OK; } diff --git a/src/core/libraries/network/http.h b/src/core/libraries/network/http.h index 701bb0e05..2ad5e171f 100644 --- a/src/core/libraries/network/http.h +++ b/src/core/libraries/network/http.h @@ -148,7 +148,7 @@ int PS4_SYSV_ABI sceHttpUnsetEpoll(); int PS4_SYSV_ABI sceHttpUriBuild(char* out, u64* require, u64 prepare, const OrbisHttpUriElement* srcElement, u32 option); int PS4_SYSV_ABI sceHttpUriCopy(); -int PS4_SYSV_ABI sceHttpUriEscape(); +int PS4_SYSV_ABI sceHttpUriEscape(char* out, u64* require, u64 prepare, const char* in); int PS4_SYSV_ABI sceHttpUriMerge(char* mergedUrl, char* url, char* relativeUri, u64* require, u64 prepare, u32 option); int PS4_SYSV_ABI sceHttpUriParse(OrbisHttpUriElement* out, const char* srcUri, void* pool, From 5183cbe6867c241e75632afbfe6ea3438fcf1316 Mon Sep 17 00:00:00 2001 From: Odukoya Abdullahi Ademola Date: Thu, 4 Dec 2025 09:50:24 +0100 Subject: [PATCH 25/25] sceHttpUriSweepPath (#3854) --- src/core/libraries/network/http.cpp | 89 ++++++++++++++++++++++++++++- 1 file changed, 88 insertions(+), 1 deletion(-) diff --git a/src/core/libraries/network/http.cpp b/src/core/libraries/network/http.cpp index 0fb81c639..ebb10db68 100644 --- a/src/core/libraries/network/http.cpp +++ b/src/core/libraries/network/http.cpp @@ -1125,7 +1125,94 @@ int PS4_SYSV_ABI sceHttpUriParse(OrbisHttpUriElement* out, const char* srcUri, v } int PS4_SYSV_ABI sceHttpUriSweepPath(char* dst, const char* src, u64 srcSize) { - LOG_ERROR(Lib_Http, "(STUBBED) called"); + LOG_TRACE(Lib_Http, "called"); + + if (!dst || !src) { + LOG_ERROR(Lib_Http, "Invalid parameters"); + return ORBIS_HTTP_ERROR_INVALID_VALUE; + } + + if (srcSize == 0) { + dst[0] = '\0'; + return ORBIS_OK; + } + + u64 len = 0; + while (len < srcSize && src[len] != '\0') { + len++; + } + + for (u64 i = 0; i < len; i++) { + dst[i] = src[i]; + } + dst[len] = '\0'; + + char* read = dst; + char* write = dst; + + while (*read) { + if (read[0] == '.' && read[1] == '.' && read[2] == '/') { + read += 3; + continue; + } + + if (read[0] == '.' && read[1] == '/') { + read += 2; + continue; + } + + if (read[0] == '/' && read[1] == '.' && read[2] == '/') { + read += 2; + continue; + } + + if (read[0] == '/' && read[1] == '.' && read[2] == '\0') { + if (write == dst) { + *write++ = '/'; + } + break; + } + + bool is_dotdot_mid = (read[0] == '/' && read[1] == '.' && read[2] == '.' && read[3] == '/'); + bool is_dotdot_end = + (read[0] == '/' && read[1] == '.' && read[2] == '.' && read[3] == '\0'); + + if (is_dotdot_mid || is_dotdot_end) { + if (write > dst) { + if (*(write - 1) == '/') { + write--; + } + while (write > dst && *(write - 1) != '/') { + write--; + } + + if (is_dotdot_mid && write > dst) { + write--; + } + } + + if (is_dotdot_mid) { + read += 3; + } else { + break; + } + continue; + } + + if ((read[0] == '.' && read[1] == '\0') || + (read[0] == '.' && read[1] == '.' && read[2] == '\0')) { + break; + } + + if (read[0] == '/') { + *write++ = *read++; + } + while (*read && *read != '/') { + *write++ = *read++; + } + } + + *write = '\0'; return ORBIS_OK; }