Merge pull request #14037 from jordan-woyak/presentation-timing

Add "Rush Frame Presentation" and "Smooth Early Presentation" settings.
This commit is contained in:
JMC47 2025-11-22 04:49:03 -05:00 committed by GitHub
commit 3fd8d072bf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 200 additions and 39 deletions

View File

@ -47,6 +47,10 @@ const Info<bool> MAIN_DSP_HLE{{System::Main, "Core", "DSPHLE"}, true};
const Info<int> MAIN_MAX_FALLBACK{{System::Main, "Core", "MaxFallback"}, 100};
const Info<int> MAIN_TIMING_VARIANCE{{System::Main, "Core", "TimingVariance"}, 40};
const Info<bool> MAIN_CORRECT_TIME_DRIFT{{System::Main, "Core", "CorrectTimeDrift"}, false};
const Info<bool> MAIN_RUSH_FRAME_PRESENTATION{{System::Main, "Core", "RushFramePresentation"},
false};
const Info<bool> MAIN_SMOOTH_EARLY_PRESENTATION{{System::Main, "Core", "SmoothEarlyPresentation"},
false};
#if defined(ANDROID)
// Currently enabled by default on Android because the performance boost is really needed.
constexpr bool DEFAULT_CPU_THREAD = true;

View File

@ -65,6 +65,8 @@ extern const Info<bool> MAIN_DSP_HLE;
extern const Info<int> MAIN_MAX_FALLBACK;
extern const Info<int> MAIN_TIMING_VARIANCE;
extern const Info<bool> MAIN_CORRECT_TIME_DRIFT;
extern const Info<bool> MAIN_RUSH_FRAME_PRESENTATION;
extern const Info<bool> MAIN_SMOOTH_EARLY_PRESENTATION;
extern const Info<bool> MAIN_CPU_THREAD;
extern const Info<bool> MAIN_SYNC_ON_SKIP_IDLE;
extern const Info<std::string> MAIN_DEFAULT_ISO;

View File

@ -884,6 +884,10 @@ void Callback_FramePresented(const PresentInfo& present_info)
{
g_perf_metrics.CountFrame();
const auto presentation_offset =
present_info.actual_present_time - present_info.intended_present_time;
g_perf_metrics.SetLatestFramePresentationOffset(presentation_offset);
if (present_info.reason == PresentInfo::PresentReason::VideoInterfaceDuplicate)
return;

View File

@ -29,6 +29,7 @@
#include "VideoCommon/PerformanceMetrics.h"
#include "VideoCommon/VideoBackendBase.h"
#include "VideoCommon/VideoConfig.h"
#include "VideoCommon/VideoEvents.h"
namespace CoreTiming
{
@ -113,6 +114,11 @@ void CoreTimingManager::Init()
ResetThrottle(GetTicks());
}
});
m_throttled_after_presentation = false;
m_frame_hook = m_system.GetVideoEvents().after_present_event.Register([this](const PresentInfo&) {
m_throttled_after_presentation.store(false, std::memory_order_relaxed);
});
}
void CoreTimingManager::Shutdown()
@ -124,6 +130,7 @@ void CoreTimingManager::Shutdown()
ClearPendingEvents();
UnregisterAllEvents();
CPUThreadConfigCallback::RemoveConfigChangedCallback(m_registered_config_callback_id);
m_frame_hook.reset();
}
void CoreTimingManager::RefreshConfig()
@ -134,6 +141,11 @@ void CoreTimingManager::RefreshConfig()
1.0f);
m_config_oc_inv_factor = 1.0f / m_config_oc_factor;
m_config_sync_on_skip_idle = Config::Get(Config::MAIN_SYNC_ON_SKIP_IDLE);
m_config_rush_frame_presentation = Config::Get(Config::MAIN_RUSH_FRAME_PRESENTATION);
// We don't want to skip so much throttling that the audio buffer overfills.
m_max_throttle_skip_time =
std::chrono::milliseconds{Config::Get(Config::MAIN_AUDIO_BUFFER_SIZE)} / 2;
// A maximum fallback is used to prevent the system from sleeping for
// too long or going full speed in an attempt to catch up to timings.
@ -422,6 +434,21 @@ void CoreTimingManager::SleepUntil(TimePoint time_point)
void CoreTimingManager::Throttle(const s64 target_cycle)
{
const TimePoint time = Clock::now();
const bool already_throttled =
m_throttled_after_presentation.exchange(true, std::memory_order_relaxed);
// If RushFramePresentation is enabled, try to Throttle just once after each presentation.
// This lowers input latency by speeding through to presentation after grabbing input.
// Make sure we don't get too far ahead of proper timing though,
// otherwise the emulator unreasonably speeds through loading screens that don't have XFB copies,
// making audio sound terrible.
const bool skip_throttle = already_throttled && m_config_rush_frame_presentation &&
((GetTargetHostTime(target_cycle) - time) < m_max_throttle_skip_time);
if (skip_throttle)
return;
if (IsSpeedUnlimited())
{
ResetThrottle(target_cycle);
@ -441,8 +468,6 @@ void CoreTimingManager::Throttle(const s64 target_cycle)
TimePoint target_time = CalculateTargetHostTimeInternal(target_cycle);
const TimePoint time = Clock::now();
const TimePoint min_target = time - m_max_fallback;
// "Correct Time Drift" setting prevents timing relaxing.

View File

@ -207,6 +207,7 @@ private:
float m_config_oc_factor = 1.0f;
float m_config_oc_inv_factor = 1.0f;
bool m_config_sync_on_skip_idle = false;
bool m_config_rush_frame_presentation = false;
s64 m_throttle_reference_cycle = 0;
TimePoint m_throttle_reference_time = Clock::now();
@ -232,6 +233,11 @@ private:
Common::PrecisionTimer m_precision_gpu_timer;
Common::EventHook m_core_state_changed_hook;
Common::EventHook m_frame_hook;
// Used to optionally minimize throttling for improving input latency.
std::atomic_bool m_throttled_after_presentation = false;
DT m_max_throttle_skip_time{};
};
} // namespace CoreTiming

View File

@ -886,10 +886,10 @@ void VideoInterfaceManager::EndField(FieldType field, u64 ticks)
// Note: OutputField above doesn't present when using GPU-on-Thread or Early/Immediate XFB,
// giving "VBlank" measurements here poor pacing without a Throttle call.
// If the user actually wants the data, we'll Throttle to make the numbers nice.
const bool is_vblank_data_wanted = g_ActiveConfig.bShowVPS || g_ActiveConfig.bShowVTimes ||
g_ActiveConfig.bLogRenderTimeToFile ||
g_ActiveConfig.bShowGraphs;
// We'll throttle so long as Immediate XFB isn't enabled.
// That setting intends to minimize input latency and throttling would be counterproductive.
// The Rush Frame Presentation setting is handled by Throttle itself.
const bool is_vblank_data_wanted = !g_ActiveConfig.bImmediateXFB;
if (is_vblank_data_wanted)
m_system.GetCoreTiming().Throttle(ticks);

View File

@ -114,6 +114,30 @@ void AdvancedPane::CreateLayout()
"<br><br><dolphin_emphasis>If unsure, leave this unchecked.</dolphin_emphasis>"));
timing_group_layout->addWidget(correct_time_drift);
auto* const rush_frame_presentation =
// i18n: "Rush" is a verb
new ConfigBool{tr("Rush Frame Presentation"), Config::MAIN_RUSH_FRAME_PRESENTATION};
rush_frame_presentation->SetDescription(
tr("Limits throttling between input and frame output,"
" speeding through emulation to reach presentation,"
" displaying sooner, and thus reducing input latency."
"<br><br>This will generally make frame pacing worse."
"<br>This setting can work either with or without Immediately Present XFB."
"<br>An Audio Buffer Size of at least 80 ms is recommended to ensure full effect."
"<br><br><dolphin_emphasis>If unsure, leave this unchecked.</dolphin_emphasis>"));
timing_group_layout->addWidget(rush_frame_presentation);
auto* const smooth_early_presentation =
// i18n: "Smooth" is a verb
new ConfigBool{tr("Smooth Early Presentation"), Config::MAIN_SMOOTH_EARLY_PRESENTATION};
smooth_early_presentation->SetDescription(
tr("Adaptively adjusts the timing of early frame presentation."
"<br><br>This can improve frame pacing with Immediately Present XFB"
" and/or Rush Frame Presentation,"
" while still maintaining most of the input latency benefits."
"<br><br><dolphin_emphasis>If unsure, leave this unchecked.</dolphin_emphasis>"));
timing_group_layout->addWidget(smooth_early_presentation);
// Make all labels the same width, so that the sliders are aligned.
const QFontMetrics font_metrics{font()};
const int label_width = font_metrics.boundingRect(QStringLiteral(" 500% (000.00 VPS)")).width();

View File

@ -13,7 +13,6 @@
#include "Common/EnumMap.h"
#include "Common/Logging/Log.h"
#include "Core/CoreTiming.h"
#include "Core/DolphinAnalytics.h"
#include "Core/FifoPlayer/FifoPlayer.h"
#include "Core/FifoPlayer/FifoRecorder.h"
@ -359,14 +358,8 @@ static void BPWritten(PixelShaderManager& pixel_shader_manager, XFStateManager&
if (g_ActiveConfig.bImmediateXFB)
{
// TODO: GetTicks is not sane from the GPU thread.
// This value is currently used for frame dumping and the custom shader "time_ms" value.
// Frame dumping has more calls that aren't sane from the GPU thread.
// i.e. Frame dumping is not sane in "Dual Core" mode in general.
const u64 ticks = system.GetCoreTiming().GetTicks();
// below div two to convert from bytes to pixels - it expects width, not stride
g_presenter->ImmediateSwap(destAddr, destStride / 2, destStride, height, ticks);
g_presenter->ImmediateSwap(destAddr, destStride / 2, destStride, height);
}
else
{

View File

@ -65,6 +65,7 @@ namespace
{
AVRational GetTimeBaseForCurrentRefreshRate(s64 max_denominator)
{
// TODO: GetTargetRefreshRate* are not safe from GPU thread.
auto& vi = Core::System::GetInstance().GetVideoInterface();
int num;
int den;
@ -368,6 +369,7 @@ void FFMpegFrameDump::AddFrame(const FrameData& frame)
// Calculate presentation timestamp from ticks since start.
const s64 pts = av_rescale_q(
frame.state.ticks - m_context->start_ticks,
// TODO: GetTicksPerSecond is not safe from GPU thread.
AVRational{1, int(Core::System::GetInstance().GetSystemTimers().GetTicksPerSecond())},
m_context->codec->time_base);

View File

@ -23,6 +23,8 @@ void PerformanceMetrics::Reset()
m_speed = 0;
m_max_speed = 0;
m_frame_presentation_offset = DT{};
}
void PerformanceMetrics::CountFrame()
@ -98,6 +100,11 @@ double PerformanceMetrics::GetMaxSpeed() const
return m_max_speed.load(std::memory_order_relaxed);
}
void PerformanceMetrics::SetLatestFramePresentationOffset(DT offset)
{
m_frame_presentation_offset.store(offset, std::memory_order_relaxed);
}
void PerformanceMetrics::DrawImGuiStats(const float backbuffer_scale)
{
m_vps_counter.UpdateStats();
@ -293,6 +300,10 @@ void PerformanceMetrics::DrawImGuiStats(const float backbuffer_scale)
DT_ms(m_fps_counter.GetDtAvg()).count());
ImGui::TextColored(ImVec4(r, g, b, 1.0f), " ±:%6.2lfms",
DT_ms(m_fps_counter.GetDtStd()).count());
const auto offset =
DT_ms(m_frame_presentation_offset.load(std::memory_order_relaxed)).count();
ImGui::TextColored(ImVec4(r, g, b, 1.0f), "ofs:%5.1lfms", offset);
}
}
ImGui::End();

View File

@ -43,6 +43,9 @@ public:
double GetSpeed() const;
double GetMaxSpeed() const;
// Call from any thread.
void SetLatestFramePresentationOffset(DT offset);
// ImGui Functions
void DrawImGuiStats(const float backbuffer_scale);
@ -55,6 +58,8 @@ private:
std::atomic<double> m_speed{};
std::atomic<double> m_max_speed{};
std::atomic<DT> m_frame_presentation_offset{};
struct PerfSample
{
TimePoint clock_time;

View File

@ -5,6 +5,7 @@
#include "Common/ChunkFile.h"
#include "Core/Config/GraphicsSettings.h"
#include "Core/Config/MainSettings.h"
#include "Core/CoreTiming.h"
#include "Core/HW/VideoInterface.h"
#include "Core/Host.h"
@ -162,9 +163,12 @@ void Presenter::ViSwap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height,
{
bool is_duplicate = FetchXFB(xfb_addr, fb_width, fb_stride, fb_height, ticks);
PresentInfo present_info;
present_info.emulated_timestamp = ticks;
present_info.present_count = m_present_count++;
PresentInfo present_info{
.present_count = m_present_count++,
.emulated_timestamp = ticks,
.intended_present_time = presentation_time,
};
if (is_duplicate)
{
present_info.frame_count = m_frame_count - 1; // Previous frame
@ -201,33 +205,43 @@ void Presenter::ViSwap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height,
if (!is_duplicate || !g_ActiveConfig.bSkipPresentingDuplicateXFBs)
{
Present(presentation_time);
Present(&present_info);
ProcessFrameDumping(ticks);
video_events.after_present_event.Trigger(present_info);
}
}
void Presenter::ImmediateSwap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u64 ticks)
void Presenter::ImmediateSwap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height)
{
const u64 ticks = m_next_swap_estimated_ticks;
FetchXFB(xfb_addr, fb_width, fb_stride, fb_height, ticks);
PresentInfo present_info;
present_info.emulated_timestamp = ticks; // TODO: This should be the time of the next VI field
present_info.frame_count = m_frame_count++;
present_info.reason = PresentInfo::PresentReason::Immediate;
present_info.present_count = m_present_count++;
PresentInfo present_info{
.frame_count = m_frame_count++,
.present_count = m_present_count++,
.reason = PresentInfo::PresentReason::Immediate,
.emulated_timestamp = ticks,
.intended_present_time = m_next_swap_estimated_time,
};
auto& video_events = GetVideoEvents();
video_events.before_present_event.Trigger(present_info);
Present();
Present(&present_info);
ProcessFrameDumping(ticks);
video_events.after_present_event.Trigger(present_info);
}
void Presenter::SetNextSwapEstimatedTime(u64 ticks, TimePoint host_time)
{
m_next_swap_estimated_ticks = ticks;
m_next_swap_estimated_time = host_time;
}
void Presenter::ProcessFrameDumping(u64 ticks) const
{
if (g_frame_dumper->IsFrameDumping() && m_xfb_entry)
@ -819,7 +833,7 @@ void Presenter::RenderXFBToScreen(const MathUtil::Rectangle<int>& target_rc,
}
}
void Presenter::Present(std::optional<TimePoint> presentation_time)
void Presenter::Present(PresentInfo* present_info)
{
m_present_count++;
@ -873,8 +887,16 @@ void Presenter::Present(std::optional<TimePoint> presentation_time)
{
std::lock_guard<std::mutex> guard(m_swap_mutex);
if (presentation_time.has_value())
Core::System::GetInstance().GetCoreTiming().SleepUntil(*presentation_time);
if (present_info != nullptr)
{
const auto present_time = GetUpdatedPresentationTime(present_info->intended_present_time);
Core::System::GetInstance().GetCoreTiming().SleepUntil(present_time);
// Perhaps in the future a more accurate time can be acquired from the various backends.
present_info->actual_present_time = Clock::now();
present_info->present_time_accuracy = PresentInfo::PresentTimeAccuracy::PresentInProgress;
}
g_gfx->PresentBackbuffer();
}
@ -892,6 +914,34 @@ void Presenter::Present(std::optional<TimePoint> presentation_time)
g_gfx->EndUtilityDrawing();
}
TimePoint Presenter::GetUpdatedPresentationTime(TimePoint intended_presentation_time)
{
const auto now = Clock::now();
const auto arrival_offset = std::min(now - intended_presentation_time, DT{});
if (!Config::Get(Config::MAIN_SMOOTH_EARLY_PRESENTATION))
{
m_presentation_time_offset = arrival_offset;
// When SmoothEarlyPresentation is off and ImmediateXFB or RushFramePresentation are on,
// present as soon as possible as the goal is to achieve low input latency.
if (g_ActiveConfig.bImmediateXFB || Config::Get(Config::MAIN_RUSH_FRAME_PRESENTATION))
return now;
return intended_presentation_time;
}
// Adjust slowly backward in time but quickly forward in time.
// This keeps the pacing moderately smooth even if games produce regular sporadic bumps.
// This was tuned to handle the terrible pacing in Brawl with "Immediate XFB".
// Super Mario Galaxy 1 + 2 still perform poorly here in SingleCore mode.
const auto adjustment_divisor = (arrival_offset < m_presentation_time_offset) ? 100 : 2;
m_presentation_time_offset += (arrival_offset - m_presentation_time_offset) / adjustment_divisor;
return intended_presentation_time + m_presentation_time_offset;
}
void Presenter::SetKeyMap(const DolphinKeyMap& key_map)
{
if (m_onscreen_ui)
@ -931,8 +981,10 @@ void Presenter::DoState(PointerWrap& p)
// This technically counts as the end of the frame
GetVideoEvents().after_frame_event.Trigger(Core::System::GetInstance());
ImmediateSwap(m_last_xfb_addr, m_last_xfb_width, m_last_xfb_stride, m_last_xfb_height,
m_last_xfb_ticks);
m_next_swap_estimated_ticks = m_last_xfb_ticks;
m_next_swap_estimated_time = Clock::now();
ImmediateSwap(m_last_xfb_addr, m_last_xfb_width, m_last_xfb_stride, m_last_xfb_height);
}
}

View File

@ -37,9 +37,11 @@ public:
void ViSwap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u64 ticks,
TimePoint presentation_time);
void ImmediateSwap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u64 ticks);
void ImmediateSwap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height);
void Present(std::optional<TimePoint> presentation_time = std::nullopt);
void SetNextSwapEstimatedTime(u64 ticks, TimePoint host_time);
void Present(PresentInfo* present_info = nullptr);
void ClearLastXfbId() { m_last_xfb_id = std::numeric_limits<u64>::max(); }
bool Initialize();
@ -167,6 +169,18 @@ private:
u32 m_last_xfb_height = MAX_XFB_HEIGHT;
Common::EventHook m_config_changed;
// Updates state for the SmoothEarlyPresentation setting if enabled.
// Returns the desired presentation time regardless.
TimePoint GetUpdatedPresentationTime(TimePoint intended_presentation_time);
// Used by the SmoothEarlyPresentation setting.
DT m_presentation_time_offset{};
// Calculated from the previous swap time and current refresh rate.
// Can be used for presentation of ImmediateXFB swaps which don't have timing information.
u64 m_next_swap_estimated_ticks = 0;
TimePoint m_next_swap_estimated_time{Clock::now()};
};
} // namespace VideoCommon

View File

@ -21,6 +21,8 @@
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/DolphinAnalytics.h"
#include "Core/HW/SystemTimers.h"
#include "Core/HW/VideoInterface.h"
#include "Core/System.h"
// TODO: ugly
@ -93,16 +95,35 @@ std::string VideoBackendBase::BadShaderFilename(const char* shader_stage, int co
void VideoBackendBase::Video_OutputXFB(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height,
u64 ticks)
{
if (m_initialized && g_presenter && !g_ActiveConfig.bImmediateXFB)
if (!m_initialized || !g_presenter)
return;
auto& system = Core::System::GetInstance();
auto& core_timing = system.GetCoreTiming();
if (!g_ActiveConfig.bImmediateXFB)
{
auto& system = Core::System::GetInstance();
system.GetFifo().SyncGPU(Fifo::SyncGPUReason::Swap);
const TimePoint presentation_time = system.GetCoreTiming().GetTargetHostTime(ticks);
const TimePoint presentation_time = core_timing.GetTargetHostTime(ticks);
AsyncRequests::GetInstance()->PushEvent([=] {
g_presenter->ViSwap(xfb_addr, fb_width, fb_stride, fb_height, ticks, presentation_time);
});
}
// Inform the Presenter of the next estimated swap time.
auto& vi = system.GetVideoInterface();
const s64 refresh_rate_den = vi.GetTargetRefreshRateDenominator();
const s64 refresh_rate_num = vi.GetTargetRefreshRateNumerator();
const auto next_swap_estimated_ticks =
ticks + (system.GetSystemTimers().GetTicksPerSecond() * refresh_rate_den / refresh_rate_num);
const auto next_swap_estimated_time = core_timing.GetTargetHostTime(next_swap_estimated_ticks);
AsyncRequests::GetInstance()->PushEvent([=] {
g_presenter->SetNextSwapEstimatedTime(next_swap_estimated_ticks, next_swap_estimated_time);
});
}
u32 VideoBackendBase::Video_GetQueryResult(PerfQueryType type)

View File

@ -34,14 +34,12 @@ struct PresentInfo
PresentReason reason = PresentReason::Immediate;
// The exact emulated time of the when real hardware would have presented this frame
// FIXME: Immediate should predict the timestamp of this present
u64 emulated_timestamp = 0;
// TODO:
// u64 intended_present_time = 0;
TimePoint intended_present_time{};
// AfterPresent only: The actual time the frame was presented
u64 actual_present_time = 0;
TimePoint actual_present_time{};
enum class PresentTimeAccuracy
{