From f7b4d2738be26b233510577bd4e972da298dc578 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 17 Jan 2026 17:43:06 +0100 Subject: [PATCH] VideoCommon: Don't create mipmap vector in TextureInfo The TextureInfo constructor creates a vector of MipLevels. This could be good for performance if MipLevels are accessed very often for each TextureInfo, but that's not the case. Dolphin creates thousands of TextureInfos per second that it never accesses the mipmap levels of because there's a hit in the texture cache, and in the uncommon case of a texture cache miss, the mipmap levels only get looped through once. To make the common case of texture cache hits as fast as possible, let's not create a vector in the TextureInfo constructor. This commit implements a custom iterator for MipLevels instead. In my testing on the Death Star level of Rogue Squadron 2, this speeds up TextureInfo::FromStage by 200%, giving an overall emulation speedup of a bit over 1%. Results on the Hoth level are even better, with TextureInfo::FromStage being close to 300% faster and overall emulation being over 4% faster. (Single core, no GPU texture decoding.) --- Source/Core/VideoCommon/TextureCacheBase.cpp | 34 +++--- Source/Core/VideoCommon/TextureInfo.cpp | 109 +++++++++++-------- Source/Core/VideoCommon/TextureInfo.h | 88 ++++++++++++--- 3 files changed, 152 insertions(+), 79 deletions(-) diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index 6ae8b4f96f7..f46c73e7bd8 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -1721,32 +1721,36 @@ RcTcacheEntry TextureCacheBase::CreateTextureEntry( dst_buffer += decoded_texture_size; } - for (u32 level = 1; level != texLevels; ++level) + for (const auto& mip_level : texture_info.GetMipMapLevels()) { - auto mip_level = texture_info.GetMipMapLevel(level - 1); - if (!mip_level) + if (!mip_level.IsDataValid()) + { + ERROR_LOG_FMT(VIDEO, "Trying to use an invalid mipmap address {:#010x}", + texture_info.GetRawAddress()); continue; + } if (!decode_on_gpu || - !DecodeTextureOnGPU(entry, level, mip_level->GetData(), mip_level->GetTextureSize(), - texture_info.GetTextureFormat(), mip_level->GetRawWidth(), - mip_level->GetRawHeight(), mip_level->GetExpandedWidth(), - mip_level->GetExpandedHeight(), + !DecodeTextureOnGPU(entry, mip_level.GetLevel(), mip_level.GetData(), + mip_level.GetTextureSize(), texture_info.GetTextureFormat(), + mip_level.GetRawWidth(), mip_level.GetRawHeight(), + mip_level.GetExpandedWidth(), mip_level.GetExpandedHeight(), creation_info.bytes_per_block * - (mip_level->GetExpandedWidth() / texture_info.GetBlockWidth()), + (mip_level.GetExpandedWidth() / texture_info.GetBlockWidth()), texture_info.GetTlutAddress(), texture_info.GetTlutFormat())) { // No need to call CheckTempSize here, as the whole buffer is preallocated at the beginning const u32 decoded_mip_size = - mip_level->GetExpandedWidth() * sizeof(u32) * mip_level->GetExpandedHeight(); - TexDecoder_Decode(dst_buffer, mip_level->GetData(), mip_level->GetExpandedWidth(), - mip_level->GetExpandedHeight(), texture_info.GetTextureFormat(), + mip_level.GetExpandedWidth() * sizeof(u32) * mip_level.GetExpandedHeight(); + TexDecoder_Decode(dst_buffer, mip_level.GetData(), mip_level.GetExpandedWidth(), + mip_level.GetExpandedHeight(), texture_info.GetTextureFormat(), texture_info.GetTlutAddress(), texture_info.GetTlutFormat()); - entry->texture->Load(level, mip_level->GetRawWidth(), mip_level->GetRawHeight(), - mip_level->GetExpandedWidth(), dst_buffer, decoded_mip_size); + entry->texture->Load(mip_level.GetLevel(), mip_level.GetRawWidth(), + mip_level.GetRawHeight(), mip_level.GetExpandedWidth(), dst_buffer, + decoded_mip_size); - arbitrary_mip_detector.AddLevel(mip_level->GetRawWidth(), mip_level->GetRawHeight(), - mip_level->GetExpandedWidth(), dst_buffer); + arbitrary_mip_detector.AddLevel(mip_level.GetRawWidth(), mip_level.GetRawHeight(), + mip_level.GetExpandedWidth(), dst_buffer); dst_buffer += decoded_mip_size; } diff --git a/Source/Core/VideoCommon/TextureInfo.cpp b/Source/Core/VideoCommon/TextureInfo.cpp index 27055f462d7..8c73626a357 100644 --- a/Source/Core/VideoCommon/TextureInfo.cpp +++ b/Source/Core/VideoCommon/TextureInfo.cpp @@ -61,9 +61,9 @@ TextureInfo::TextureInfo(u32 stage, std::span data, std::span tmem_odd, std::span tmem_even, std::optional mip_count) - : m_ptr(data.data()), m_tlut_ptr(tlut_data.data()), m_address(address), m_from_tmem(from_tmem), - m_tmem_odd(tmem_odd.data()), m_texture_format(texture_format), m_tlut_format(tlut_format), - m_raw_width(width), m_raw_height(height), m_stage(stage) + : m_data(data), m_tlut_data(tlut_data), m_address(address), m_from_tmem(from_tmem), + m_tmem_even(tmem_even), m_tmem_odd(tmem_odd), m_texture_format(texture_format), + m_tlut_format(tlut_format), m_raw_width(width), m_raw_height(height), m_stage(stage) { const bool is_palette_texture = IsColorIndexed(m_texture_format); if (is_palette_texture) @@ -103,23 +103,8 @@ TextureInfo::TextureInfo(u32 stage, std::span data, std::span(MathUtil::IntLog2(std::max(width, height)) + 1, raw_mip_count + 1) - 1; - - // load mips - std::span src_data = Common::SafeSubspan(data, GetTextureSize()); - tmem_even = Common::SafeSubspan(tmem_even, GetTextureSize()); - - for (u32 i = 0; i < limited_mip_count; i++) - { - MipLevel mip_level(i + 1, *this, m_from_tmem, &src_data, &tmem_even, &tmem_odd); - if (!mip_level.IsDataValid()) - { - ERROR_LOG_FMT(VIDEO, "Trying to use an invalid mipmap address {:#010x}", GetRawAddress()); - break; - } - m_mip_levels.push_back(std::move(mip_level)); - } } } @@ -133,7 +118,7 @@ TextureInfo::NameDetails TextureInfo::CalculateTextureName() const if (!IsDataValid()) return NameDetails{}; - const u8* tlut = m_tlut_ptr; + const u8* tlut = m_tlut_data.data(); size_t tlut_size = m_palette_size ? *m_palette_size : 0; // checking for min/max on paletted textures @@ -146,8 +131,8 @@ TextureInfo::NameDetails TextureInfo::CalculateTextureName() const case 16 * 2: for (size_t i = 0; i < m_texture_size; i++) { - const u32 low_nibble = m_ptr[i] & 0xf; - const u32 high_nibble = m_ptr[i] >> 4; + const u32 low_nibble = m_data[i] & 0xf; + const u32 high_nibble = m_data[i] >> 4; min = std::min({min, low_nibble, high_nibble}); max = std::max({max, low_nibble, high_nibble}); @@ -156,7 +141,7 @@ TextureInfo::NameDetails TextureInfo::CalculateTextureName() const case 256 * 2: for (size_t i = 0; i < m_texture_size; i++) { - const u32 texture_byte = m_ptr[i]; + const u32 texture_byte = m_data[i]; min = std::min(min, texture_byte); max = std::max(max, texture_byte); @@ -165,7 +150,7 @@ TextureInfo::NameDetails TextureInfo::CalculateTextureName() const case 16384 * 2: for (size_t i = 0; i < m_texture_size; i += sizeof(u16)) { - const u32 texture_halfword = Common::swap16(m_ptr[i]) & 0x3fff; + const u32 texture_halfword = Common::swap16(m_data[i]) & 0x3fff; min = std::min(min, texture_halfword); max = std::max(max, texture_halfword); @@ -180,7 +165,7 @@ TextureInfo::NameDetails TextureInfo::CalculateTextureName() const DEBUG_ASSERT(tlut_size <= m_palette_size.value_or(0)); - const u64 tex_hash = XXH64(m_ptr, m_texture_size, 0); + const u64 tex_hash = XXH64(m_data.data(), m_texture_size, 0); const u64 tlut_hash = tlut_size ? XXH64(tlut, tlut_size, 0) : 0; return {.base_name = fmt::format("{}{}x{}{}", format_prefix, m_raw_width, m_raw_height, @@ -197,12 +182,12 @@ bool TextureInfo::IsDataValid() const const u8* TextureInfo::GetData() const { - return m_ptr; + return m_data.data(); } const u8* TextureInfo::GetTlutAddress() const { - return m_tlut_ptr; + return m_tlut_data.data(); } u32 TextureInfo::GetRawAddress() const @@ -217,7 +202,7 @@ bool TextureInfo::IsFromTmem() const const u8* TextureInfo::GetTmemOddAddress() const { - return m_tmem_odd; + return m_tmem_odd.data(); } TextureFormat TextureInfo::GetTextureFormat() const @@ -277,25 +262,43 @@ u32 TextureInfo::GetStage() const bool TextureInfo::HasMipMaps() const { - return !m_mip_levels.empty(); + return m_limited_mip_count != 0; } u32 TextureInfo::GetLevelCount() const { - return static_cast(m_mip_levels.size()) + 1; + return m_limited_mip_count + 1; } -const TextureInfo::MipLevel* TextureInfo::GetMipMapLevel(u32 level) const +TextureInfo::MipLevels TextureInfo::GetMipMapLevels() const { - if (level < m_mip_levels.size()) - return &m_mip_levels[level]; + MipLevelIterator begin; + begin.m_parent = this; + begin.m_from_tmem = m_from_tmem; + begin.m_data = Common::SafeSubspan(m_data, GetTextureSize()); + begin.m_tmem_even = Common::SafeSubspan(m_tmem_even, GetTextureSize()); + begin.m_tmem_odd = m_tmem_odd; + begin.CreateMipLevel(); - return nullptr; + MipLevelIterator end; + end.m_level_index = m_limited_mip_count; + + return MipLevels(begin, end); } -TextureInfo::MipLevel::MipLevel(u32 level, const TextureInfo& parent, bool from_tmem, - std::span* src_data, std::span* tmem_even, - std::span* tmem_odd) +u32 TextureInfo::GetFullLevelSize() const +{ + u32 all_mips_size = 0; + for (const auto& mip_map : GetMipMapLevels()) + { + if (mip_map.IsDataValid()) + all_mips_size += mip_map.GetTextureSize(); + } + return m_texture_size + all_mips_size; +} + +TextureInfo::MipLevel::MipLevel(u32 level, const TextureInfo& parent, std::span* data) + : m_level(level) { m_raw_width = std::max(parent.GetRawWidth() >> level, 1u); m_raw_height = std::max(parent.GetRawHeight() >> level, 1u); @@ -305,23 +308,12 @@ TextureInfo::MipLevel::MipLevel(u32 level, const TextureInfo& parent, bool from_ m_texture_size = TexDecoder_GetTextureSizeInBytes(m_expanded_width, m_expanded_height, parent.GetTextureFormat()); - std::span* data = from_tmem ? ((level % 2) ? tmem_odd : tmem_even) : src_data; m_ptr = data->data(); m_data_valid = data->size() >= m_texture_size; *data = Common::SafeSubspan(*data, m_texture_size); } -u32 TextureInfo::GetFullLevelSize() const -{ - u32 all_mips_size = 0; - for (const auto& mip_map : m_mip_levels) - { - all_mips_size += mip_map.GetTextureSize(); - } - return m_texture_size + all_mips_size; -} - bool TextureInfo::MipLevel::IsDataValid() const { return m_data_valid; @@ -356,3 +348,24 @@ u32 TextureInfo::MipLevel::GetRawHeight() const { return m_raw_height; } + +u32 TextureInfo::MipLevel::GetLevel() const +{ + return m_level; +} + +TextureInfo::MipLevelIterator& TextureInfo::MipLevelIterator::operator++() +{ + ++m_level_index; + CreateMipLevel(); + return *this; +} + +void TextureInfo::MipLevelIterator::CreateMipLevel() +{ + const u32 level = m_level_index + 1; + std::span* data = m_from_tmem ? ((level % 2) ? &m_tmem_odd : &m_tmem_even) : &m_data; + + // The MipLevel constructor mutates the data argument so the next MipLevel gets the right data + m_mip_level = MipLevel(level, *m_parent, data); +} diff --git a/Source/Core/VideoCommon/TextureInfo.h b/Source/Core/VideoCommon/TextureInfo.h index 49c76810c56..b6a0d3497b4 100644 --- a/Source/Core/VideoCommon/TextureInfo.h +++ b/Source/Core/VideoCommon/TextureInfo.h @@ -7,7 +7,6 @@ #include #include #include -#include #include "Common/CommonTypes.h" @@ -61,11 +60,11 @@ public: u32 GetStage() const; - class MipLevel + class MipLevel final { public: - MipLevel(u32 level, const TextureInfo& parent, bool from_tmem, std::span* src_data, - std::span* tmem_even, std::span* tmem_odd); + MipLevel() = default; + MipLevel(u32 level, const TextureInfo& parent, std::span* data); bool IsDataValid() const; @@ -78,43 +77,100 @@ public: u32 GetRawWidth() const; u32 GetRawHeight() const; - private: - bool m_data_valid; + u32 GetLevel() const; - const u8* m_ptr; + private: + bool m_data_valid = false; + + const u8* m_ptr = nullptr; u32 m_texture_size = 0; - u32 m_expanded_width; - u32 m_raw_width; + u32 m_expanded_width = 0; + u32 m_raw_width = 0; - u32 m_expanded_height; - u32 m_raw_height; + u32 m_expanded_height = 0; + u32 m_raw_height = 0; + + u32 m_level = 0; + }; + + class MipLevelIterator final + { + friend TextureInfo; + + public: + using difference_type = u32; + using value_type = MipLevel; + + MipLevel& operator*() { return m_mip_level; } + + const MipLevel& operator*() const { return m_mip_level; } + + MipLevelIterator& operator++(); + + MipLevelIterator operator++(int) + { + auto tmp = *this; + ++*this; + return tmp; + } + + bool operator==(const MipLevelIterator& other) const + { + return m_level_index == other.m_level_index; + } + + private: + void CreateMipLevel(); + + MipLevel m_mip_level; + + const TextureInfo* m_parent = nullptr; + u32 m_level_index = 0; + bool m_from_tmem = false; + std::span m_data; + std::span m_tmem_even; + std::span m_tmem_odd; + }; + + class MipLevels final + { + public: + MipLevels(MipLevelIterator begin, MipLevelIterator end) : m_begin(begin), m_end(end) {} + + MipLevelIterator begin() const { return m_begin; } + MipLevelIterator end() const { return m_end; } + + private: + MipLevelIterator m_begin; + MipLevelIterator m_end; }; bool HasMipMaps() const; u32 GetLevelCount() const; - const MipLevel* GetMipMapLevel(u32 level) const; + MipLevels GetMipMapLevels() const; u32 GetFullLevelSize() const; static constexpr std::string_view format_prefix{"tex1_"}; private: - const u8* m_ptr; - const u8* m_tlut_ptr; + std::span m_data; + std::span m_tlut_data; u32 m_address; bool m_data_valid; bool m_from_tmem; - const u8* m_tmem_odd; + std::span m_tmem_even; + std::span m_tmem_odd; TextureFormat m_texture_format; TLUTFormat m_tlut_format; bool m_mipmaps_enabled = false; - std::vector m_mip_levels; + u32 m_limited_mip_count = 0; u32 m_texture_size = 0; std::optional m_palette_size;