From 94e45435c6d1d57af25319f5c5d0d481ebd75b2b Mon Sep 17 00:00:00 2001 From: turtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Thu, 13 Nov 2025 15:07:34 +0200 Subject: [PATCH] buffer_cache: Begin memory tracker rework --- CMakeLists.txt | 2 - src/common/bit_array.h | 406 ------------------ src/common/range_lock.h | 101 ----- src/video_core/buffer_cache/buffer_cache.cpp | 10 +- src/video_core/buffer_cache/buffer_cache.h | 3 - src/video_core/buffer_cache/memory_tracker.h | 173 +++----- .../buffer_cache/region_definitions.h | 68 ++- src/video_core/buffer_cache/region_manager.h | 352 ++++++++------- src/video_core/page_manager.cpp | 208 ++++----- src/video_core/page_manager.h | 14 +- 10 files changed, 444 insertions(+), 893 deletions(-) delete mode 100644 src/common/bit_array.h delete mode 100644 src/common/range_lock.h diff --git a/CMakeLists.txt b/CMakeLists.txt index d26581790..f30d96e1c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -670,7 +670,6 @@ set(COMMON src/common/logging/backend.cpp src/common/arch.h src/common/assert.cpp src/common/assert.h - src/common/bit_array.h src/common/bit_field.h src/common/bounded_threadsafe_queue.h src/common/concepts.h @@ -697,7 +696,6 @@ set(COMMON src/common/logging/backend.cpp src/common/path_util.h src/common/object_pool.h src/common/polyfill_thread.h - src/common/range_lock.h src/common/rdtsc.cpp src/common/rdtsc.h src/common/recursive_lock.cpp diff --git a/src/common/bit_array.h b/src/common/bit_array.h deleted file mode 100644 index 0ab464390..000000000 --- a/src/common/bit_array.h +++ /dev/null @@ -1,406 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include -#include -#include "common/types.h" - -#ifdef __AVX2__ -#define BIT_ARRAY_USE_AVX -#include -#endif - -namespace Common { - -template -class BitArray { - static_assert(N % 64 == 0, "BitArray size must be a multiple of 64 bits."); - - static constexpr size_t BITS_PER_WORD = 64; - static constexpr size_t WORD_COUNT = N / BITS_PER_WORD; - static constexpr size_t WORDS_PER_AVX = 4; - static constexpr size_t AVX_WORD_COUNT = WORD_COUNT / WORDS_PER_AVX; - -public: - using Range = std::pair; - - class Iterator { - public: - explicit Iterator(const BitArray& bit_array_, u64 start) : bit_array(bit_array_) { - range = bit_array.FirstRangeFrom(start); - } - - Iterator& operator++() { - range = bit_array.FirstRangeFrom(range.second); - return *this; - } - - bool operator==(const Iterator& other) const { - return range == other.range; - } - - bool operator!=(const Iterator& other) const { - return !(*this == other); - } - - const Range& operator*() const { - return range; - } - - const Range* operator->() const { - return ⦥ - } - - private: - const BitArray& bit_array; - Range range; - }; - - using const_iterator = Iterator; - using iterator_category = std::forward_iterator_tag; - using value_type = Range; - using difference_type = std::ptrdiff_t; - using pointer = const Range*; - using reference = const Range&; - - BitArray() = default; - BitArray(const BitArray& other) = default; - BitArray& operator=(const BitArray& other) = default; - BitArray(BitArray&& other) noexcept = default; - BitArray& operator=(BitArray&& other) noexcept = default; - ~BitArray() = default; - - BitArray(const BitArray& other, size_t start, size_t end) { - if (start >= end || end > N) { - return; - } - const size_t first_word = start / BITS_PER_WORD; - const size_t last_word = (end - 1) / BITS_PER_WORD; - const size_t start_bit = start % BITS_PER_WORD; - const size_t end_bit = (end - 1) % BITS_PER_WORD; - const u64 start_mask = ~((1ULL << start_bit) - 1); - const u64 end_mask = end_bit == BITS_PER_WORD - 1 ? ~0ULL : (1ULL << (end_bit + 1)) - 1; - if (first_word == last_word) { - data[first_word] = other.data[first_word] & (start_mask & end_mask); - } else { - data[first_word] = other.data[first_word] & start_mask; - size_t i = first_word + 1; -#ifdef BIT_ARRAY_USE_AVX - for (; i + WORDS_PER_AVX <= last_word; i += WORDS_PER_AVX) { - const __m256i current = - _mm256_loadu_si256(reinterpret_cast(&other.data[i])); - _mm256_storeu_si256(reinterpret_cast<__m256i*>(&data[i]), current); - } -#endif - for (; i < last_word; ++i) { - data[i] = other.data[i]; - } - data[last_word] = other.data[last_word] & end_mask; - } - } - - BitArray(const BitArray& other, const Range& range) - : BitArray(other, range.first, range.second) {} - - const_iterator begin() const { - return Iterator(*this, 0); - } - const_iterator end() const { - return Iterator(*this, N); - } - - inline constexpr void Set(size_t idx) { - data[idx / BITS_PER_WORD] |= (1ULL << (idx % BITS_PER_WORD)); - } - - inline constexpr void Unset(size_t idx) { - data[idx / BITS_PER_WORD] &= ~(1ULL << (idx % BITS_PER_WORD)); - } - - inline constexpr bool Get(size_t idx) const { - return (data[idx / BITS_PER_WORD] & (1ULL << (idx % BITS_PER_WORD))) != 0; - } - - inline void SetRange(size_t start, size_t end) { - if (start >= end || end > N) { - return; - } - const size_t first_word = start / BITS_PER_WORD; - const size_t last_word = (end - 1) / BITS_PER_WORD; - const size_t start_bit = start % BITS_PER_WORD; - const size_t end_bit = (end - 1) % BITS_PER_WORD; - const u64 start_mask = ~((1ULL << start_bit) - 1); - const u64 end_mask = end_bit == BITS_PER_WORD - 1 ? ~0ULL : (1ULL << (end_bit + 1)) - 1; - if (first_word == last_word) { - data[first_word] |= start_mask & end_mask; - } else { - data[first_word] |= start_mask; - size_t i = first_word + 1; -#ifdef BIT_ARRAY_USE_AVX - const __m256i value = _mm256_set1_epi64x(-1); - for (; i + WORDS_PER_AVX <= last_word; i += WORDS_PER_AVX) { - _mm256_storeu_si256(reinterpret_cast<__m256i*>(&data[i]), value); - } -#endif - for (; i < last_word; ++i) { - data[i] = ~0ULL; - } - data[last_word] |= end_mask; - } - } - - inline void UnsetRange(size_t start, size_t end) { - if (start >= end || end > N) { - return; - } - size_t first_word = start / BITS_PER_WORD; - const size_t last_word = (end - 1) / BITS_PER_WORD; - const size_t start_bit = start % BITS_PER_WORD; - const size_t end_bit = (end - 1) % BITS_PER_WORD; - const u64 start_mask = (1ULL << start_bit) - 1; - const u64 end_mask = end_bit == BITS_PER_WORD - 1 ? 0ULL : ~((1ULL << (end_bit + 1)) - 1); - if (first_word == last_word) { - data[first_word] &= start_mask | end_mask; - } else { - data[first_word] &= start_mask; - size_t i = first_word + 1; -#ifdef BIT_ARRAY_USE_AVX - const __m256i value = _mm256_setzero_si256(); - for (; i + WORDS_PER_AVX <= last_word; i += WORDS_PER_AVX) { - _mm256_storeu_si256(reinterpret_cast<__m256i*>(&data[i]), value); - } -#endif - for (; i < last_word; ++i) { - data[i] = 0ULL; - } - data[last_word] &= end_mask; - } - } - - inline constexpr void SetRange(const Range& range) { - SetRange(range.first, range.second); - } - - inline constexpr void UnsetRange(const Range& range) { - UnsetRange(range.first, range.second); - } - - inline constexpr void Clear() { - data.fill(0); - } - - inline constexpr void Fill() { - data.fill(~0ULL); - } - - inline constexpr bool None() const { - u64 result = 0; - for (const auto& word : data) { - result |= word; - } - return result == 0; - } - - inline constexpr bool Any() const { - return !None(); - } - - Range FirstRangeFrom(size_t start) const { - if (start >= N) { - return {N, N}; - } - const auto find_end_bit = [&](size_t word) { -#ifdef BIT_ARRAY_USE_AVX - const __m256i all_one = _mm256_set1_epi64x(-1); - for (; word + WORDS_PER_AVX <= WORD_COUNT; word += WORDS_PER_AVX) { - const __m256i current = - _mm256_loadu_si256(reinterpret_cast(&data[word])); - const __m256i cmp = _mm256_cmpeq_epi64(current, all_one); - if (_mm256_movemask_epi8(cmp) != 0xFFFFFFFF) { - break; - } - } -#endif - for (; word < WORD_COUNT; ++word) { - if (data[word] != ~0ULL) { - return (word * BITS_PER_WORD) + std::countr_one(data[word]); - } - } - return N; - }; - - const auto word_bits = [&](size_t index, u64 word) { - const int empty_bits = std::countr_zero(word); - const int ones_count = std::countr_one(word >> empty_bits); - const size_t start_bit = index * BITS_PER_WORD + empty_bits; - if (ones_count + empty_bits < BITS_PER_WORD) { - return Range{start_bit, start_bit + ones_count}; - } - return Range{start_bit, find_end_bit(index + 1)}; - }; - - const size_t start_word = start / BITS_PER_WORD; - const size_t start_bit = start % BITS_PER_WORD; - const u64 masked_first = data[start_word] & (~((1ULL << start_bit) - 1)); - if (masked_first) { - return word_bits(start_word, masked_first); - } - - size_t word = start_word + 1; -#ifdef BIT_ARRAY_USE_AVX - for (; word + WORDS_PER_AVX <= WORD_COUNT; word += WORDS_PER_AVX) { - const __m256i current = - _mm256_loadu_si256(reinterpret_cast(&data[word])); - if (!_mm256_testz_si256(current, current)) { - break; - } - } -#endif - for (; word < WORD_COUNT; ++word) { - if (data[word] != 0) { - return word_bits(word, data[word]); - } - } - return {N, N}; - } - - inline constexpr Range FirstRange() const { - return FirstRangeFrom(0); - } - - Range LastRangeFrom(size_t end) const { - if (end == 0) { - return {0, 0}; - } - if (end > N) { - end = N; - } - const auto find_start_bit = [&](size_t word) { -#ifdef BIT_ARRAY_USE_AVX - const __m256i all_zero = _mm256_setzero_si256(); - for (; word >= WORDS_PER_AVX; word -= WORDS_PER_AVX) { - const __m256i current = _mm256_loadu_si256( - reinterpret_cast(&data[word - WORDS_PER_AVX])); - const __m256i cmp = _mm256_cmpeq_epi64(current, all_zero); - if (_mm256_movemask_epi8(cmp) != 0xFFFFFFFF) { - break; - } - } -#endif - for (; word > 0; --word) { - if (data[word - 1] != ~0ULL) { - return word * BITS_PER_WORD - std::countl_one(data[word - 1]); - } - } - return size_t(0); - }; - const auto word_bits = [&](size_t index, u64 word) { - const int empty_bits = std::countl_zero(word); - const int ones_count = std::countl_one(word << empty_bits); - const size_t end_bit = index * BITS_PER_WORD - empty_bits; - if (empty_bits + ones_count < BITS_PER_WORD) { - return Range{end_bit - ones_count, end_bit}; - } - return Range{find_start_bit(index - 1), end_bit}; - }; - const size_t end_word = ((end - 1) / BITS_PER_WORD) + 1; - const size_t end_bit = (end - 1) % BITS_PER_WORD; - u64 masked_last = data[end_word - 1]; - if (end_bit < BITS_PER_WORD - 1) { - masked_last &= (1ULL << (end_bit + 1)) - 1; - } - if (masked_last) { - return word_bits(end_word, masked_last); - } - size_t word = end_word - 1; -#ifdef BIT_ARRAY_USE_AVX - for (; word >= WORDS_PER_AVX; word -= WORDS_PER_AVX) { - const __m256i current = - _mm256_loadu_si256(reinterpret_cast(&data[word - WORDS_PER_AVX])); - if (!_mm256_testz_si256(current, current)) { - break; - } - } -#endif - for (; word > 0; --word) { - if (data[word - 1] != 0) { - return word_bits(word, data[word - 1]); - } - } - return {0, 0}; - } - - inline constexpr Range LastRange() const { - return LastRangeFrom(N); - } - - inline constexpr size_t Size() const { - return N; - } - - inline constexpr BitArray& operator|=(const BitArray& other) { - for (size_t i = 0; i < WORD_COUNT; ++i) { - data[i] |= other.data[i]; - } - return *this; - } - - inline constexpr BitArray& operator&=(const BitArray& other) { - for (size_t i = 0; i < WORD_COUNT; ++i) { - data[i] &= other.data[i]; - } - return *this; - } - - inline constexpr BitArray& operator^=(const BitArray& other) { - for (size_t i = 0; i < WORD_COUNT; ++i) { - data[i] ^= other.data[i]; - } - return *this; - } - - inline constexpr BitArray operator|(const BitArray& other) const { - BitArray result = *this; - result |= other; - return result; - } - - inline constexpr BitArray operator&(const BitArray& other) const { - BitArray result = *this; - result &= other; - return result; - } - - inline constexpr BitArray operator^(const BitArray& other) const { - BitArray result = *this; - result ^= other; - return result; - } - - inline constexpr BitArray operator~() const { - BitArray result = *this; - for (size_t i = 0; i < WORD_COUNT; ++i) { - result.data[i] = ~result.data[i]; - } - return result; - } - - inline constexpr bool operator==(const BitArray& other) const { - u64 result = 0; - for (size_t i = 0; i < WORD_COUNT; ++i) { - result |= data[i] ^ other.data[i]; - } - return result == 0; - } - - inline constexpr bool operator!=(const BitArray& other) const { - return !(*this == other); - } - -private: - std::array data{}; -}; - -} // namespace Common diff --git a/src/common/range_lock.h b/src/common/range_lock.h deleted file mode 100644 index efe6eb549..000000000 --- a/src/common/range_lock.h +++ /dev/null @@ -1,101 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include -#include - -namespace Common { - -// From boost thread locking - -template -struct RangeLockGuard { - Iterator begin; - Iterator end; - - RangeLockGuard(Iterator begin_, Iterator end_) : begin(begin_), end(end_) { - LockRange(begin, end); - } - - void release() { - begin = end; - } - - ~RangeLockGuard() { - for (; begin != end; ++begin) { - begin->unlock(); - } - } -}; - -template -Iterator TryLockRange(Iterator begin, Iterator end) { - using LockType = typename std::iterator_traits::value_type; - - if (begin == end) { - return end; - } - - std::unique_lock guard(*begin, std::try_to_lock); - if (!guard.owns_lock()) { - return begin; - } - - Iterator failed = TryLockRange(++begin, end); - if (failed == end) { - guard.release(); - } - - return failed; -} - -template -void LockRange(Iterator begin, Iterator end) { - using LockType = typename std::iterator_traits::value_type; - - if (begin == end) { - return; - } - - bool start_with_begin = true; - Iterator second = begin; - ++second; - Iterator next = second; - - while (true) { - std::unique_lock begin_lock(*begin, std::defer_lock); - if (start_with_begin) { - begin_lock.lock(); - - const Iterator failed_lock = TryLockRange(next, end); - if (failed_lock == end) { - begin_lock.release(); - return; - } - - start_with_begin = false; - next = failed_lock; - } else { - RangeLockGuard guard(next, end); - - if (begin_lock.try_lock()) { - const Iterator failed_lock = TryLockRange(second, next); - if (failed_lock == next) { - begin_lock.release(); - guard.release(); - return; - } - - start_with_begin = false; - next = failed_lock; - } else { - start_with_begin = true; - next = second; - } - } - } -} - -} // namespace Common \ No newline at end of file diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 04c473f1b..acc9e1624 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -5,6 +5,7 @@ #include #include "common/alignment.h" #include "common/debug.h" +#include "common/div_ceil.h" #include "common/scope_exit.h" #include "common/types.h" #include "core/memory.h" @@ -212,10 +213,7 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si memory->TryWriteBacking(std::bit_cast(copy_device_addr), download + dst_offset, copy.size); } - memory_tracker->UnmarkRegionAsGpuModified(device_addr, size); - if (is_write) { - memory_tracker->MarkRegionAsCpuModified(device_addr, size); - } + memory_tracker->UnmarkRegionAsGpuModified(device_addr, size, is_write); }; if constexpr (async) { scheduler.DeferOperation(write_data); @@ -497,10 +495,6 @@ bool BufferCache::IsRegionRegistered(VAddr addr, size_t size) { return buffer_ranges.Intersects(addr, size); } -bool BufferCache::IsRegionCpuModified(VAddr addr, size_t size) { - return memory_tracker->IsRegionCpuModified(addr, size); -} - bool BufferCache::IsRegionGpuModified(VAddr addr, size_t size) { return memory_tracker->IsRegionGpuModified(addr, size); } diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index ccf77b4f5..c42807914 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -142,9 +142,6 @@ public: /// Return true when a region is registered on the cache [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); - /// Return true when a CPU region is modified from the CPU - [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); - /// Return true when a CPU region is modified from the GPU [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); diff --git a/src/video_core/buffer_cache/memory_tracker.h b/src/video_core/buffer_cache/memory_tracker.h index ec0878c3b..a6542d260 100644 --- a/src/video_core/buffer_cache/memory_tracker.h +++ b/src/video_core/buffer_cache/memory_tracker.h @@ -5,9 +5,7 @@ #include #include -#include #include -#include "common/debug.h" #include "common/types.h" #include "video_core/buffer_cache/region_manager.h" @@ -15,148 +13,109 @@ namespace VideoCore { class MemoryTracker { public: - static constexpr size_t MAX_CPU_PAGE_BITS = 40; - static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - TRACKER_HIGHER_PAGE_BITS); - static constexpr size_t MANAGER_POOL_SIZE = 32; + static constexpr u64 MAX_CPU_PAGE_BITS = 40; + static constexpr u64 NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS); + static constexpr u64 MANAGER_POOL_SIZE = 32; public: explicit MemoryTracker(PageManager& tracker_) : tracker{&tracker_} {} ~MemoryTracker() = default; - /// Returns true if a region has been modified from the CPU - bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { - return IteratePages( - query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { - std::scoped_lock lk{manager->lock}; - return manager->template IsRegionModified(offset, size); - }); - } - /// Returns true if a region has been modified from the GPU - bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { - return IteratePages( - query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { - std::scoped_lock lk{manager->lock}; - return manager->template IsRegionModified(offset, size); - }); - } - - /// Mark region as CPU modified, notifying the device_tracker about this change - void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { - IteratePages(dirty_cpu_addr, query_size, - [](RegionManager* manager, u64 offset, size_t size) { - std::scoped_lock lk{manager->lock}; - manager->template ChangeRegionState( - manager->GetCpuAddr() + offset, size); - }); + bool IsRegionGpuModified(VAddr cpu_addr, u64 size) noexcept { + return IteratePages(cpu_addr, size, [](RegionManager* manager, u64 offset, u64 size) { + return manager->template IsRegionModified(offset, size); + }); } /// Unmark region as modified from the host GPU - void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { - IteratePages(dirty_cpu_addr, query_size, - [](RegionManager* manager, u64 offset, size_t size) { - std::scoped_lock lk{manager->lock}; - manager->template ChangeRegionState( - manager->GetCpuAddr() + offset, size); - }); + void UnmarkRegionAsGpuModified(VAddr cpu_addr, u64 size, bool is_write) noexcept { + IteratePages(cpu_addr, size, [is_write](RegionManager* manager, u64 offset, u64 size) { + if (is_write) { + manager->template ChangeRegionState(offset, size); + manager->template ChangeRegionState(offset, size); + } else { + manager->template ChangeRegionState(offset, size); + } + }); } /// Removes all protection from a page and ensures GPU data has been flushed if requested void InvalidateRegion(VAddr cpu_addr, u64 size, auto&& on_flush) noexcept { - IteratePages( - cpu_addr, size, [&on_flush](RegionManager* manager, u64 offset, size_t size) { - const bool should_flush = [&] { - // Perform both the GPU modification check and CPU state change with the lock - // in case we are racing with GPU thread trying to mark the page as GPU - // modified. If we need to flush the flush function is going to perform CPU - // state change. - std::scoped_lock lk{manager->lock}; - if (Config::readbacks() && - manager->template IsRegionModified(offset, size)) { - return true; - } - manager->template ChangeRegionState( - manager->GetCpuAddr() + offset, size); - return false; - }(); - if (should_flush) { - on_flush(); - } - }); + IteratePages(cpu_addr, size, [&on_flush](RegionManager* manager, u64 offset, u64 size) { + const bool should_flush = [&] { + // TODO + /*std::scoped_lock lk{manager->lock}; + if (Config::readbacks() && + manager->template IsRegionModified(offset, size)) { + return true; + }*/ + manager->template ChangeRegionState(offset, size); + return false; + }(); + if (should_flush) { + on_flush(); + } + }); } /// Call 'func' for each CPU modified range and unmark those pages as CPU modified - void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, bool is_written, auto&& func, + void ForEachUploadRange(VAddr cpu_addr, u64 size, bool is_written, auto&& func, auto&& on_upload) { - IteratePages(query_cpu_range, query_size, - [&func, is_written](RegionManager* manager, u64 offset, size_t size) { - manager->lock.lock(); - manager->template ForEachModifiedRange( - manager->GetCpuAddr() + offset, size, func); - if (!is_written) { - manager->lock.unlock(); - } - }); + IteratePages( + cpu_addr, size, [&func, is_written](RegionManager* manager, u64 offset, u64 size) { + if (is_written) { + manager->template ForEachModifiedRange( + offset, size, func); + } else { + manager->template ForEachModifiedRange( + offset, size, func); + } + }); on_upload(); if (!is_written) { return; } - IteratePages(query_cpu_range, query_size, - [&func, is_written](RegionManager* manager, u64 offset, size_t size) { - manager->template ChangeRegionState( - manager->GetCpuAddr() + offset, size); - manager->lock.unlock(); - }); + IteratePages(cpu_addr, size, [&func](RegionManager* manager, u64 offset, u64 size) { + manager->template ChangeRegionState(offset, size); + }); } /// Call 'func' for each GPU modified range and unmark those pages as GPU modified template - void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, auto&& func) { - IteratePages(query_cpu_range, query_size, - [&func](RegionManager* manager, u64 offset, size_t size) { - std::scoped_lock lk{manager->lock}; - manager->template ForEachModifiedRange( - manager->GetCpuAddr() + offset, size, func); - }); + void ForEachDownloadRange(VAddr cpu_addr, u64 size, auto&& func) { + IteratePages(cpu_addr, size, [&func](RegionManager* manager, u64 offset, u64 size) { + manager->template ForEachModifiedRange(offset, size, + func); + }); } private: - /** - * @brief IteratePages Iterates L2 word manager page table. - * @param cpu_address Start byte cpu address - * @param size Size in bytes of the region of iterate. - * @param func Callback for each word manager. - * @return - */ - template - bool IteratePages(VAddr cpu_address, size_t size, Func&& func) { - RENDERER_TRACE; + template + bool IteratePages(VAddr cpu_address, u64 size, Func&& func) { using FuncReturn = typename std::invoke_result::type; static constexpr bool BOOL_BREAK = std::is_same_v; - std::size_t remaining_size{size}; - std::size_t page_index{cpu_address >> TRACKER_HIGHER_PAGE_BITS}; - u64 page_offset{cpu_address & TRACKER_HIGHER_PAGE_MASK}; + u64 remaining_size = size; + u64 page_index = cpu_address >> HIGHER_PAGE_BITS; + u64 page_offset = cpu_address & HIGHER_PAGE_MASK; while (remaining_size > 0) { - const std::size_t copy_amount{ - std::min(TRACKER_HIGHER_PAGE_SIZE - page_offset, remaining_size)}; - auto* manager{top_tier[page_index]}; - if (manager) { + const u64 copy_amount = std::min(HIGHER_PAGE_SIZE - page_offset, remaining_size); + if (auto* region = top_tier[page_index]; region) { if constexpr (BOOL_BREAK) { - if (func(manager, page_offset, copy_amount)) { + if (func(region, page_offset, copy_amount)) { return true; } } else { - func(manager, page_offset, copy_amount); + func(region, page_offset, copy_amount); } - } else if constexpr (create_region_on_fail) { - CreateRegion(page_index); - manager = top_tier[page_index]; + } else if (create_region_on_fail) { + region = CreateRegion(page_index); if constexpr (BOOL_BREAK) { - if (func(manager, page_offset, copy_amount)) { + if (func(region, page_offset, copy_amount)) { return true; } } else { - func(manager, page_offset, copy_amount); + func(region, page_offset, copy_amount); } } page_index++; @@ -166,8 +125,8 @@ private: return false; } - void CreateRegion(std::size_t page_index) { - const VAddr base_cpu_addr = page_index << TRACKER_HIGHER_PAGE_BITS; + RegionManager* CreateRegion(u64 page_index) { + const VAddr base_cpu_addr = page_index << HIGHER_PAGE_BITS; if (free_managers.empty()) { manager_pool.emplace_back(); auto& last_pool = manager_pool.back(); @@ -176,11 +135,11 @@ private: free_managers.push_back(&last_pool[i]); } } - // Each manager tracks a 4_MB virtual address space. auto* new_manager = free_managers.back(); new_manager->SetCpuAddress(base_cpu_addr); free_managers.pop_back(); top_tier[page_index] = new_manager; + return new_manager; } PageManager* tracker; diff --git a/src/video_core/buffer_cache/region_definitions.h b/src/video_core/buffer_cache/region_definitions.h index 260047d40..e6d0bf4c4 100644 --- a/src/video_core/buffer_cache/region_definitions.h +++ b/src/video_core/buffer_cache/region_definitions.h @@ -3,24 +3,70 @@ #pragma once -#include "common/bit_array.h" +#include #include "common/types.h" namespace VideoCore { -constexpr u64 TRACKER_PAGE_BITS = 12; // 4K pages -constexpr u64 TRACKER_BYTES_PER_PAGE = 1ULL << TRACKER_PAGE_BITS; +constexpr u64 PAGES_PER_WORD = 64; +constexpr u64 BYTES_PER_PAGE = 4_KB; +constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; -constexpr u64 TRACKER_HIGHER_PAGE_BITS = 22; // each region is 4MB -constexpr u64 TRACKER_HIGHER_PAGE_SIZE = 1ULL << TRACKER_HIGHER_PAGE_BITS; -constexpr u64 TRACKER_HIGHER_PAGE_MASK = TRACKER_HIGHER_PAGE_SIZE - 1ULL; -constexpr u64 NUM_PAGES_PER_REGION = TRACKER_HIGHER_PAGE_SIZE / TRACKER_BYTES_PER_PAGE; +constexpr u64 HIGHER_PAGE_BITS = 24; +constexpr u64 HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; +constexpr u64 HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; +constexpr u64 NUM_REGION_WORDS = HIGHER_PAGE_SIZE / BYTES_PER_WORD; -enum class Type { - CPU, - GPU, +enum class Type : u8 { + CPU = 1 << 0, + GPU = 1 << 1, }; -using RegionBits = Common::BitArray; +enum class LockOp : u8 { + Lock = 1 << 0, + Unlock = 1 << 1, + Both = Lock | Unlock, +}; + +constexpr bool operator&(LockOp a, LockOp b) noexcept { + return static_cast(a) & static_cast(b); +} + +constexpr LockOp operator|(LockOp a, LockOp b) noexcept { + return static_cast(static_cast(a) | static_cast(b)); +} + +struct Bounds { + u64 start_word; + u64 start_page; + u64 end_word; + u64 end_page; +}; + +constexpr Bounds MIN_BOUNDS = { + .start_word = NUM_REGION_WORDS - 1, + .start_page = PAGES_PER_WORD - 1, + .end_word = 0, + .end_page = 0, +}; + +struct RegionBits { + using AtomicT = std::atomic; + + constexpr void Fill(u64 value) { + data.fill(value); + } + + constexpr bool GetPage(u64 page) const { + return data[page / PAGES_PER_WORD] & (1ULL << (page % PAGES_PER_WORD)); + } + + constexpr AtomicT& operator[](u64 index) { + return reinterpret_cast(data[index]); + } + +private: + alignas(64) std::array data; +}; } // namespace VideoCore diff --git a/src/video_core/buffer_cache/region_manager.h b/src/video_core/buffer_cache/region_manager.h index 608b16fb3..cbe3474ab 100644 --- a/src/video_core/buffer_cache/region_manager.h +++ b/src/video_core/buffer_cache/region_manager.h @@ -4,39 +4,24 @@ #pragma once #include "common/config.h" -#include "common/div_ceil.h" -#include "common/logging/log.h" - -#ifdef __linux__ -#include "common/adaptive_mutex.h" -#else -#include "common/spin_lock.h" -#endif -#include "common/debug.h" #include "common/types.h" #include "video_core/buffer_cache/region_definitions.h" #include "video_core/page_manager.h" namespace VideoCore { -#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP -using LockType = Common::AdaptiveMutex; -#else -using LockType = Common::SpinLock; -#endif - /** - * Allows tracking CPU and GPU modification of pages in a contigious 16MB virtual address region. + * Allows tracking CPU and GPU modification of pages in a contigious virtual address region. * Information is stored in bitsets for spacial locality and fast update of single pages. */ class RegionManager { public: explicit RegionManager(PageManager* tracker_, VAddr cpu_addr_) : tracker{tracker_}, cpu_addr{cpu_addr_} { - cpu.Fill(); - gpu.Clear(); - writeable.Fill(); - readable.Fill(); + cpu.Fill(~0ULL); + gpu.Fill(0ULL); + writeable.Fill(~0ULL); + readable.Fill(~0ULL); } explicit RegionManager() = default; @@ -44,12 +29,202 @@ public: cpu_addr = new_cpu_addr; } - VAddr GetCpuAddr() const { - return cpu_addr; + static constexpr Bounds GetBounds(VAddr address, u64 size) { + const u64 end_address = address + size + BYTES_PER_PAGE - 1ULL; + return Bounds{ + .start_word = address / BYTES_PER_WORD, + .start_page = (address % BYTES_PER_WORD) / BYTES_PER_PAGE, + .end_word = end_address / BYTES_PER_WORD, + .end_page = (end_address % BYTES_PER_WORD) / BYTES_PER_PAGE, + }; } - static constexpr size_t SanitizeAddress(size_t address) { - return static_cast(std::max(static_cast(address), 0LL)); + static constexpr std::pair GetMasks(u64 start_page, u64 end_page) { + const u64 start_mask = ~((1ULL << start_page) - 1); + const u64 end_mask = (1ULL << end_page) - 1; + return std::make_pair(start_mask, end_mask); + } + + static constexpr void IterateWords(Bounds bounds, auto&& func) { + const auto [start_word, start_page, end_word, end_page] = bounds; + const auto [start_mask, end_mask] = GetMasks(start_page, end_page); + if (start_word == end_word) [[likely]] { + func(start_word, start_mask & end_mask); + } else { + func(start_word, start_mask); + for (s64 i = start_word + 1; i < end_word; ++i) { + func(i, ~0ULL); + } + if (end_mask) { + func(end_word, end_mask); + } + } + } + + static constexpr void IteratePages(u64 word, auto&& func) { + u64 offset{}; + while (word != 0) { + const u64 empty_bits = std::countr_zero(word); + offset += empty_bits; + word >>= empty_bits; + const u64 set_bits = std::countr_one(word); + func(offset, set_bits); + word = set_bits < PAGES_PER_WORD ? (word >> set_bits) : 0; + offset += set_bits; + } + } + + template + void ChangeRegionState(u64 offset, u64 size) { + auto& state = GetRegionBits(); + RegionBits prot; + bool update_watchers{}; + auto bounds = GetBounds(offset, size); + auto watcher_bounds = MIN_BOUNDS; + IterateWords(bounds, [&](u64 index, u64 mask) { + if constexpr (lock_op & LockOp::Lock) { + LockWord(index, mask); + } + if constexpr (enable) { + state[index] |= mask; + } else { + state[index] &= ~mask; + } + update_watchers |= UpdateProtection(prot, watcher_bounds, index, mask); + }); + constexpr bool is_gpu = type == Type::GPU; + if (update_watchers && (Config::readbacks() || !is_gpu)) { + constexpr bool track = is_gpu ? enable : !enable; + tracker->UpdatePageWatchersForRegion(cpu_addr, watcher_bounds, prot); + } + if constexpr (lock_op & LockOp::Unlock) { + IterateWords(bounds, [&](u64 index, u64 mask) { UnlockWord(index, mask); }); + } + } + + template + void ForEachModifiedRange(u64 offset, s64 size, auto&& func) { + auto& state = GetRegionBits(); + RegionBits prot; + bool update_watchers{}; + u64 start_page{}; + u64 end_page{}; + auto bounds = GetBounds(offset, size); + auto watcher_bounds = MIN_BOUNDS; + IterateWords(bounds, [&](u64 index, u64 mask) { + if constexpr (lock_op & LockOp::Lock) { + LockWord(index, mask); + } + const u64 word = state[index] & mask; + const u64 base_page = index * PAGES_PER_WORD; + IteratePages(word, [&](u64 pages_offset, u64 pages_size) { + if (end_page) { + if (end_page == base_page + pages_offset) { + end_page += pages_size; + return; + } + func(cpu_addr + start_page * BYTES_PER_PAGE, + (end_page - start_page) * BYTES_PER_PAGE); + } + start_page = base_page + pages_offset; + end_page = start_page + pages_size; + }); + if constexpr (clear) { + state[index] &= ~mask; + update_watchers |= UpdateProtection(prot, watcher_bounds, index, mask); + } + }); + if (end_page) { + func(cpu_addr + start_page * BYTES_PER_PAGE, (end_page - start_page) * BYTES_PER_PAGE); + } + constexpr bool is_gpu = type == Type::GPU; + if (update_watchers) { + tracker->UpdatePageWatchersForRegion(cpu_addr, watcher_bounds, prot); + } + if constexpr (lock_op & LockOp::Unlock) { + IterateWords(bounds, [&](u64 index, u64 mask) { UnlockWord(index, mask); }); + } + } + + template + bool IsRegionModified(u64 offset, u64 size) noexcept { + auto& state = GetRegionBits(); + const auto [start_word, start_page, end_word, end_page] = GetBounds(offset, size); + const auto [start_mask, end_mask] = GetMasks(start_page, end_page); + if (start_word == end_word) [[likely]] { + return state[start_word] & (start_mask & end_mask); + } else { + if (state[start_word] & start_mask) { + return true; + } + for (s64 i = start_word + 1; i < end_word; ++i) { + if (state[i]) { + return true; + } + } + if (state[end_word] & end_mask) { + return true; + } + return false; + } + } + +private: + template + bool UpdateProtection(RegionBits& prot, Bounds& bounds, u64 index, u64 mask) { + if constexpr (type == Type::CPU) { + const u64 perm = writeable[index]; + if constexpr (clear) { + writeable[index] &= ~mask; + } else { + writeable[index] |= mask; + } + prot[index] = (cpu[index] ^ perm) & mask; + } else { + const u64 perm = readable[index]; + if constexpr (clear) { + readable[index] |= mask; + } else { + readable[index] &= ~mask; + } + prot[index] = (~gpu[index] ^ perm) & mask; + } + const u64 prot_word = prot[index]; + if (prot_word) { + if (index <= bounds.start_word) { + bounds.start_word = index; + bounds.start_page = std::countr_zero(prot_word); + } + if (index >= bounds.end_word) { + bounds.end_word = index; + bounds.end_page = PAGES_PER_WORD - std::countl_zero(prot_word) - 1; + } + return true; + } + return false; + } + + void LockWord(u64 index, u64 mask) { + auto& lock = locks[index]; + u64 current_lock = lock.load(); + u64 new_lock; + do { + while (current_lock & mask) { + lock.wait(current_lock); + current_lock = lock.load(); + } + new_lock = current_lock | mask; + } while (!lock.compare_exchange_weak(current_lock, new_lock)); + } + + void UnlockWord(u64 index, u64 mask) { + auto& lock = locks[index]; + u64 current_lock = lock.load(); + u64 new_lock; + do { + new_lock = current_lock & ~mask; + } while (!lock.compare_exchange_weak(current_lock, new_lock)); + lock.notify_all(); } template @@ -61,135 +236,14 @@ public: } } - template - const RegionBits& GetRegionBits() const noexcept { - if constexpr (type == Type::CPU) { - return cpu; - } else if constexpr (type == Type::GPU) { - return gpu; - } - } - - /** - * Change the state of a range of pages - * - * @param dirty_addr Base address to mark or unmark as modified - * @param size Size in bytes to mark or unmark as modified - */ - template - void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) { - RENDERER_TRACE; - const size_t offset = dirty_addr - cpu_addr; - const size_t start_page = SanitizeAddress(offset) / TRACKER_BYTES_PER_PAGE; - const size_t end_page = - Common::DivCeil(SanitizeAddress(offset + size), TRACKER_BYTES_PER_PAGE); - if (start_page >= NUM_PAGES_PER_REGION || end_page <= start_page) { - return; - } - - RegionBits& bits = GetRegionBits(); - if constexpr (enable) { - bits.SetRange(start_page, end_page); - } else { - bits.UnsetRange(start_page, end_page); - } - if constexpr (type == Type::CPU) { - UpdateProtection(); - } else if (Config::readbacks()) { - UpdateProtection(); - } - } - - /** - * Loop over each page in the given range, turn off those bits and notify the tracker if - * needed. Call the given function on each turned off range. - * - * @param query_cpu_range Base CPU address to loop over - * @param size Size in bytes of the CPU range to loop over - * @param func Function to call for each turned off region - */ - template - void ForEachModifiedRange(VAddr query_cpu_range, s64 size, auto&& func) { - RENDERER_TRACE; - const size_t offset = query_cpu_range - cpu_addr; - const size_t start_page = SanitizeAddress(offset) / TRACKER_BYTES_PER_PAGE; - const size_t end_page = - Common::DivCeil(SanitizeAddress(offset + size), TRACKER_BYTES_PER_PAGE); - if (start_page >= NUM_PAGES_PER_REGION || end_page <= start_page) { - return; - } - - RegionBits& bits = GetRegionBits(); - RegionBits mask(bits, start_page, end_page); - - if constexpr (clear) { - bits.UnsetRange(start_page, end_page); - if constexpr (type == Type::CPU) { - UpdateProtection(); - } else if (Config::readbacks()) { - UpdateProtection(); - } - } - - for (const auto& [start, end] : mask) { - func(cpu_addr + start * TRACKER_BYTES_PER_PAGE, (end - start) * TRACKER_BYTES_PER_PAGE); - } - } - - /** - * Returns true when a region has been modified - * - * @param offset Offset in bytes from the start of the buffer - * @param size Size in bytes of the region to query for modifications - */ - template - [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) noexcept { - RENDERER_TRACE; - const size_t start_page = SanitizeAddress(offset) / TRACKER_BYTES_PER_PAGE; - const size_t end_page = - Common::DivCeil(SanitizeAddress(offset + size), TRACKER_BYTES_PER_PAGE); - if (start_page >= NUM_PAGES_PER_REGION || end_page <= start_page) { - return false; - } - - const RegionBits& bits = GetRegionBits(); - RegionBits test(bits, start_page, end_page); - return test.Any(); - } - - LockType lock; - -private: - /** - * Notify tracker about changes in the CPU tracking state of a word in the buffer - * - * @param word_index Index to the word to notify to the tracker - * @param current_bits Current state of the word - * @param new_bits New state of the word - * - * @tparam track True when the tracker should start tracking the new pages - */ - template - void UpdateProtection() { - RENDERER_TRACE; - RegionBits mask = is_read ? (~gpu ^ readable) : (cpu ^ writeable); - if (mask.None()) { - return; - } - if constexpr (is_read) { - readable = ~gpu; - } else { - writeable = cpu; - } - tracker->UpdatePageWatchersForRegion(cpu_addr, mask); - } - - PageManager* tracker; - VAddr cpu_addr = 0; RegionBits cpu; RegionBits gpu; RegionBits writeable; RegionBits readable; + + PageManager* tracker; + std::array, NUM_REGION_WORDS> locks{}; + VAddr cpu_addr{}; }; } // namespace VideoCore diff --git a/src/video_core/page_manager.cpp b/src/video_core/page_manager.cpp index 2bf16afe0..7c0fc218f 100644 --- a/src/video_core/page_manager.cpp +++ b/src/video_core/page_manager.cpp @@ -1,11 +1,9 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include #include "common/assert.h" #include "common/debug.h" #include "common/div_ceil.h" -#include "common/range_lock.h" #include "common/signal_context.h" #include "core/memory.h" #include "core/signals.h" @@ -14,7 +12,6 @@ #ifndef _WIN64 #include -#include "common/adaptive_mutex.h" #ifdef ENABLE_USERFAULTFD #include #include @@ -25,26 +22,17 @@ #endif #else #include -#include "common/spin_lock.h" -#endif - -#ifdef __linux__ -#include "common/adaptive_mutex.h" -#else -#include "common/spin_lock.h" #endif namespace VideoCore { -constexpr size_t PAGE_SIZE = 4_KB; -constexpr size_t PAGE_BITS = 12; - struct PageManager::Impl { struct PageState { - u8 num_write_watchers : 7; - // At the moment only buffer cache can request read watchers. - // And buffers cannot overlap, thus only 1 can exist per page. + u8 num_write_watchers : 6; u8 num_read_watchers : 1; + u8 locked : 1; + + using LockT = std::atomic; Core::MemoryPermission WritePerm() const noexcept { return num_write_watchers == 0 ? Core::MemoryPermission::Write @@ -60,25 +48,56 @@ struct PageManager::Impl { return ReadPerm() | WritePerm(); } - template - u8 AddDelta() { + void Lock() { + auto* lock = reinterpret_cast(this); + PageState current_state = lock->load(); + PageState new_state; + do { + while (current_state.locked) { + lock->wait(current_state); + current_state = lock->load(); + } + new_state = current_state; + new_state.locked = 1; + } while (!lock->compare_exchange_weak(current_state, new_state)); + } + + void Unlock() { + auto* lock = reinterpret_cast(this); + PageState current_state = lock->load(); + PageState new_state; + do { + new_state = current_state; + new_state.locked = 0; + } while (!lock->compare_exchange_weak(current_state, new_state)); + lock->notify_all(); + } + + template + u8 GetPage() const { if constexpr (is_read) { - if constexpr (delta == 1) { + return num_read_watchers; + } else { + return num_write_watchers; + } + } + + template + u8 TouchPage() { + if constexpr (is_read) { + if constexpr (track) { + ASSERT_MSG(num_read_watchers == 0, "Too many watchers"); return ++num_read_watchers; - } else if (delta == -1) { + } else { ASSERT_MSG(num_read_watchers > 0, "Not enough watchers"); return --num_read_watchers; - } else { - return num_read_watchers; } } else { - if constexpr (delta == 1) { + if constexpr (track) { return ++num_write_watchers; - } else if (delta == -1) { + } else { ASSERT_MSG(num_write_watchers > 0, "Not enough watchers"); return --num_write_watchers; - } else { - return num_write_watchers; } } } @@ -86,8 +105,8 @@ struct PageManager::Impl { static constexpr size_t ADDRESS_BITS = 40; static constexpr size_t NUM_ADDRESS_PAGES = 1ULL << (40 - PAGE_BITS); - static constexpr size_t NUM_ADDRESS_LOCKS = NUM_ADDRESS_PAGES / PAGES_PER_LOCK; inline static Vulkan::Rasterizer* rasterizer; + #ifdef ENABLE_USERFAULTFD Impl(Vulkan::Rasterizer* rasterizer_) { rasterizer = rasterizer_; @@ -220,33 +239,24 @@ struct PageManager::Impl { template void UpdatePageWatchers(VAddr addr, u64 size) { - RENDERER_TRACE; - - size_t page = addr >> PAGE_BITS; + const u64 page_start = addr >> PAGE_BITS; const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE); - // Acquire locks for the range of pages - const auto lock_start = locks.begin() + (page / PAGES_PER_LOCK); - const auto lock_end = locks.begin() + Common::DivCeil(page_end, PAGES_PER_LOCK); - Common::RangeLockGuard lk(lock_start, lock_end); - - auto perms = cached_pages[page].Perms(); - u64 range_begin = 0; - u64 range_bytes = 0; - u64 potential_range_bytes = 0; + auto perms = cached_pages[page_start].Perms(); + u64 range_begin = page_start; + u64 range_pages = 0; + u64 potential_pages = 0; const auto release_pending = [&] { - if (range_bytes > 0) { - RENDERER_TRACE; - // Perform pending (un)protect action - Protect(range_begin << PAGE_BITS, range_bytes, perms); - range_bytes = 0; - potential_range_bytes = 0; + if (range_pages > 0) { + Protect(range_begin << PAGE_BITS, range_pages << PAGE_BITS, perms); + range_pages = 0; + potential_pages = 0; } }; // Iterate requested pages - const u64 aligned_addr = page << PAGE_BITS; + const u64 aligned_addr = page_start << PAGE_BITS; const u64 aligned_end = page_end << PAGE_BITS; if (!rasterizer->IsMapped(aligned_addr, aligned_end - aligned_addr)) { LOG_WARNING(Render, @@ -254,84 +264,84 @@ struct PageManager::Impl { aligned_addr, aligned_end); } - for (; page != page_end; ++page) { + for (u64 page = page_start; page != page_end; ++page) { + locks[page].lock(); + } + + for (u64 page = page_start; page != page_end; ++page) { PageState& state = cached_pages[page]; // Apply the change to the page state - const u8 new_count = state.AddDelta(); + const u8 new_count = state.TouchPage(); + const auto new_perms = state.Perms(); - if (auto new_perms = state.Perms(); new_perms != perms) [[unlikely]] { + if (new_perms != perms) [[unlikely]] { // If the protection changed add pending (un)protect action release_pending(); perms = new_perms; - } else if (range_bytes != 0) { - // If the protection did not change, extend the potential range - potential_range_bytes += PAGE_SIZE; + } else if (range_pages != 0) { + ++potential_pages; } // Only start a new range if the page must be (un)protected if ((new_count == 0 && !track) || (new_count == 1 && track)) { - if (range_bytes == 0) { + if (range_pages == 0) { // Start a new potential range range_begin = page; - potential_range_bytes = PAGE_SIZE; + potential_pages = 1; } // Extend current range up to potential range - range_bytes = potential_range_bytes; + range_pages = potential_pages; } } // Add pending (un)protect action release_pending(); + + for (u64 page = page_start; page != page_end; ++page) { + locks[page].unlock(); + } } template - void UpdatePageWatchersForRegion(VAddr base_addr, RegionBits& mask) { - RENDERER_TRACE; - auto start_range = mask.FirstRange(); - auto end_range = mask.LastRange(); + void UpdatePageWatchersForRegion(VAddr base_addr, const Bounds& bounds, RegionBits& mask) { + const u64 base_page = base_addr >> PAGE_BITS; + const u64 page_start = bounds.start_word * PAGES_PER_WORD + bounds.start_page; + const u64 page_end = bounds.end_word * PAGES_PER_WORD + bounds.end_page + 1; - if (start_range.second == end_range.second) { - // if all pages are contiguous, use the regular UpdatePageWatchers - const VAddr start_addr = base_addr + (start_range.first << PAGE_BITS); - const u64 size = (start_range.second - start_range.first) << PAGE_BITS; - return UpdatePageWatchers(start_addr, size); - } - - size_t base_page = (base_addr >> PAGE_BITS); - ASSERT(base_page % PAGES_PER_LOCK == 0); - std::scoped_lock lk(locks[base_page / PAGES_PER_LOCK]); - auto perms = cached_pages[base_page + start_range.first].Perms(); - u64 range_begin = 0; - u64 range_bytes = 0; - u64 potential_range_bytes = 0; + auto perms = cached_pages[base_page + page_start].Perms(); + u64 range_begin = base_page + page_start; + u64 range_pages = 0; + u64 potential_pages = 0; const auto release_pending = [&] { - if (range_bytes > 0) { - RENDERER_TRACE; - // Perform pending (un)protect action - Protect((range_begin << PAGE_BITS), range_bytes, perms); - range_bytes = 0; - potential_range_bytes = 0; + if (range_pages > 0) { + Protect(range_begin << PAGE_BITS, range_pages << PAGE_BITS, perms); + range_pages = 0; + potential_pages = 0; } }; - // Iterate pages - for (size_t page = start_range.first; page < end_range.second; ++page) { + for (u64 page = page_start; page != page_end; ++page) { + locks[base_page + page].lock(); + } + + for (u64 page = page_start; page != page_end; ++page) { PageState& state = cached_pages[base_page + page]; - const bool update = mask.Get(page); + const bool update = mask.GetPage(page); // Apply the change to the page state const u8 new_count = - update ? state.AddDelta() : state.AddDelta<0, is_read>(); + update ? state.TouchPage() : state.GetPage(); + const auto new_perms = state.Perms(); - if (auto new_perms = state.Perms(); new_perms != perms) [[unlikely]] { + if (new_perms != perms) [[unlikely]] { // If the protection changed add pending (un)protect action release_pending(); perms = new_perms; - } else if (range_bytes != 0) { + } else if (range_pages != 0) { // If the protection did not change, extend the potential range - potential_range_bytes += PAGE_SIZE; + ++potential_pages; } // If the page is not being updated, skip it @@ -341,27 +351,26 @@ struct PageManager::Impl { // If the page must be (un)protected if ((new_count == 0 && !track) || (new_count == 1 && track)) { - if (range_bytes == 0) { + if (range_pages == 0) { // Start a new potential range range_begin = base_page + page; - potential_range_bytes = PAGE_SIZE; + potential_pages = 1; } // Extend current rango up to potential range - range_bytes = potential_range_bytes; + range_pages = potential_pages; } } // Add pending (un)protect action release_pending(); + + for (u64 page = page_start; page != page_end; ++page) { + locks[base_page + page].unlock(); + } } std::array cached_pages{}; -#ifdef __linux__ - using LockType = Common::AdaptiveMutex; -#else - using LockType = Common::SpinLock; -#endif - std::array locks{}; + std::array locks; }; PageManager::PageManager(Vulkan::Rasterizer* rasterizer_) @@ -383,19 +392,24 @@ void PageManager::UpdatePageWatchers(VAddr addr, u64 size) const { } template -void PageManager::UpdatePageWatchersForRegion(VAddr base_addr, RegionBits& mask) const { - impl->UpdatePageWatchersForRegion(base_addr, mask); +void PageManager::UpdatePageWatchersForRegion(VAddr base_addr, const Bounds& bounds, + RegionBits& mask) const { + impl->UpdatePageWatchersForRegion(base_addr, bounds, mask); } template void PageManager::UpdatePageWatchers(VAddr addr, u64 size) const; template void PageManager::UpdatePageWatchers(VAddr addr, u64 size) const; template void PageManager::UpdatePageWatchersForRegion(VAddr base_addr, + const Bounds& bounds, RegionBits& mask) const; template void PageManager::UpdatePageWatchersForRegion(VAddr base_addr, + const Bounds& bounds, RegionBits& mask) const; template void PageManager::UpdatePageWatchersForRegion(VAddr base_addr, + const Bounds& bounds, RegionBits& mask) const; template void PageManager::UpdatePageWatchersForRegion(VAddr base_addr, + const Bounds& bounds, RegionBits& mask) const; } // namespace VideoCore diff --git a/src/video_core/page_manager.h b/src/video_core/page_manager.h index 4ca41cb43..2b63329fc 100644 --- a/src/video_core/page_manager.h +++ b/src/video_core/page_manager.h @@ -6,7 +6,7 @@ #include #include "common/alignment.h" #include "common/types.h" -#include "video_core/buffer_cache//region_definitions.h" +#include "video_core/buffer_cache/region_definitions.h" namespace Vulkan { class Rasterizer; @@ -16,12 +16,8 @@ namespace VideoCore { class PageManager { // Use the same page size as the tracker. - static constexpr size_t PAGE_BITS = TRACKER_PAGE_BITS; - static constexpr size_t PAGE_SIZE = TRACKER_BYTES_PER_PAGE; - - // Keep the lock granularity the same as region granularity. (since each regions has - // itself a lock) - static constexpr size_t PAGES_PER_LOCK = NUM_PAGES_PER_REGION; + static constexpr size_t PAGE_BITS = 12; + static constexpr size_t PAGE_SIZE = 1ULL << PAGE_BITS; public: explicit PageManager(Vulkan::Rasterizer* rasterizer); @@ -37,9 +33,9 @@ public: template void UpdatePageWatchers(VAddr addr, u64 size) const; - /// Updates watches in the pages touching the specified region using a mask. + /// Updates watches in the pages touching the inclusive bounds using a mask. template - void UpdatePageWatchersForRegion(VAddr base_addr, RegionBits& mask) const; + void UpdatePageWatchersForRegion(VAddr base_addr, const Bounds& bounds, RegionBits& mask) const; /// Returns page aligned address. static constexpr VAddr GetPageAddr(VAddr addr) {