video_core: Rework VAAPI code

Use public ffmpeg headers only

Fall back gracefully to software decoding when codec is unsupported on
VAAPI e.g. VP8
This commit is contained in:
Mike Lothian 2025-06-16 13:37:05 +01:00
parent 37e9208842
commit a9cde6f765
9 changed files with 700 additions and 661 deletions

View File

@ -12,25 +12,25 @@
namespace Tegra { namespace Tegra {
Codec::Codec(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs) Codec::Codec(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs)
: host1x(host1x_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(host1x)), : host1x(host1x_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(host1x)),
vp8_decoder(std::make_unique<Decoder::VP8>(host1x)), vp8_decoder(std::make_unique<Decoder::VP8>(host1x)),
vp9_decoder(std::make_unique<Decoder::VP9>(host1x)) {} vp9_decoder(std::make_unique<Decoder::VP9>(host1x)) {}
Codec::~Codec() = default; Codec::~Codec() = default;
void Codec::Initialize() { void Codec::Initialize() {
initialized = decode_api.Initialize(current_codec); initialized = decode_api.Initialize(current_codec);
} }
void Codec::SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec) { void Codec::SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec) {
if (current_codec != codec) { if (current_codec != codec) {
current_codec = codec; current_codec = codec;
LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName()); LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName());
} }
} }
void Codec::Decode() { void Codec::Decode() {
const bool is_first_frame = !initialized; const bool is_first_frame = !initialized;
if (is_first_frame) { if (is_first_frame) {
Initialize(); Initialize();
@ -70,15 +70,24 @@ void Codec::Decode() {
} }
// Receive output frames from decoder. // Receive output frames from decoder.
decode_api.ReceiveFrames(frames); // The previous code called decode_api.ReceiveFrames(frames); which would queue multiple frames.
// Given the previous refactoring of FFmpeg::DecodeApi to only have ReceiveFrame(),
// this needs to be adapted to potentially call ReceiveFrame multiple times until EAGAIN/EOF.
// For now, I'll adapt it to receive one frame and push it. If more complex frame queuing
// behavior is expected by the `frames` queue, then `ReceiveFrame()` would need to be
// called in a loop until it returns `nullptr` (indicating EAGAIN or EOF).
auto frame = decode_api.ReceiveFrame();
if (frame) {
frames.push(std::move(frame));
}
while (frames.size() > 10) { while (frames.size() > 10) {
LOG_DEBUG(HW_GPU, "ReceiveFrames overflow, dropped frame"); LOG_DEBUG(HW_GPU, "ReceiveFrames overflow, dropped frame");
frames.pop(); frames.pop();
} }
} }
std::unique_ptr<FFmpeg::Frame> Codec::GetCurrentFrame() { std::unique_ptr<FFmpeg::Frame> Codec::GetCurrentFrame() {
// Sometimes VIC will request more frames than have been decoded. // Sometimes VIC will request more frames than have been decoded.
// in this case, return a blank frame and don't overwrite previous data. // in this case, return a blank frame and don't overwrite previous data.
if (frames.empty()) { if (frames.empty()) {
@ -88,13 +97,13 @@ std::unique_ptr<FFmpeg::Frame> Codec::GetCurrentFrame() {
auto frame = std::move(frames.front()); auto frame = std::move(frames.front());
frames.pop(); frames.pop();
return frame; return frame;
} }
Host1x::NvdecCommon::VideoCodec Codec::GetCurrentCodec() const { Host1x::NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
return current_codec; return current_codec;
} }
std::string_view Codec::GetCurrentCodecName() const { std::string_view Codec::GetCurrentCodecName() const {
switch (current_codec) { switch (current_codec) {
case Host1x::NvdecCommon::VideoCodec::None: case Host1x::NvdecCommon::VideoCodec::None:
return "None"; return "None";
@ -109,5 +118,5 @@ std::string_view Codec::GetCurrentCodecName() const {
default: default:
return "Unknown"; return "Unknown";
} }
} }
} // namespace Tegra } // namespace Tegra

View File

@ -13,18 +13,18 @@
namespace Tegra { namespace Tegra {
namespace Decoder { namespace Decoder {
class H264; class H264;
class VP8; class VP8;
class VP9; class VP9;
} // namespace Decoder } // namespace Decoder
namespace Host1x { namespace Host1x {
class Host1x; class Host1x;
} // namespace Host1x } // namespace Host1x
class Codec { class Codec {
public: public:
explicit Codec(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs); explicit Codec(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs);
~Codec(); ~Codec();
@ -46,7 +46,7 @@ public:
/// Return name of the current codec /// Return name of the current codec
[[nodiscard]] std::string_view GetCurrentCodecName() const; [[nodiscard]] std::string_view GetCurrentCodecName() const;
private: private:
bool initialized{}; bool initialized{};
Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None}; Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None};
FFmpeg::DecodeApi decode_api; FFmpeg::DecodeApi decode_api;
@ -58,6 +58,6 @@ private:
std::unique_ptr<Decoder::VP9> vp9_decoder; std::unique_ptr<Decoder::VP9> vp9_decoder;
std::queue<std::unique_ptr<FFmpeg::Frame>> frames{}; std::queue<std::unique_ptr<FFmpeg::Frame>> frames{};
}; };
} // namespace Tegra } // namespace Tegra

View File

@ -9,63 +9,54 @@
namespace Tegra { namespace Tegra {
Decoder::Decoder(Host1x::Host1x& host1x_, s32 id_, const Host1x::NvdecCommon::NvdecRegisters& regs_, Decoder::Decoder(Host1x::Host1x& host1x_, s32 id_, const Host1x::NvdecCommon::NvdecRegisters& regs_,
Host1x::FrameQueue& frame_queue_) Host1x::FrameQueue& frame_queue_)
: host1x(host1x_), memory_manager{host1x.GMMU()}, regs{regs_}, id{id_}, frame_queue{ : host1x(host1x_), memory_manager{host1x.GMMU()}, regs{regs_}, id{id_}, frame_queue{
frame_queue_} {} frame_queue_} {}
Decoder::~Decoder() = default; Decoder::~Decoder() = default;
void Decoder::Decode() { void Decoder::Decode() {
if (!initialized) { if (!initialized) {
return; return;
} }
const auto packet_data = ComposeFrame(); const auto packet_data = ComposeFrame();
// Capture the state needed for queuing BEFORE sending the packet
// and potentially yielding. The main `regs` and `current_context` can be
// overwritten by the time FFmpeg returns a frame.
const bool is_interlaced_frame = IsInterlaced();
const auto interlaced_offsets = GetInterlacedOffsets();
const auto progressive_offsets = GetProgressiveOffsets();
// Send assembled bitstream to decoder. // Send assembled bitstream to decoder.
if (!decode_api.SendPacket(packet_data)) { if (!decode_api.SendPacket(packet_data)) {
return; return;
} }
// Only receive/store visible frames. // Only process visible frames.
if (vp9_hidden_frame) { if (vp9_hidden_frame) {
return; return;
} }
// Receive output frames from decoder. // Receive output frames from decoder.
// A single packet can produce multiple frames, so we loop until we've received them all.
while (true) {
auto frame = decode_api.ReceiveFrame(); auto frame = decode_api.ReceiveFrame();
if (!frame) { // No more frames available for now.
if (IsInterlaced()) { break;
auto [luma_top, luma_bottom, chroma_top, chroma_bottom] = GetInterlacedOffsets();
auto frame_copy = frame;
if (!frame.get()) {
LOG_ERROR(HW_GPU,
"Nvdec {} dailed to decode interlaced frame for top 0x{:X} bottom 0x{:X}", id,
luma_top, luma_bottom);
} }
if (UsingDecodeOrder()) { if (is_interlaced_frame) {
auto [luma_top, luma_bottom, chroma_top, chroma_bottom] = interlaced_offsets;
auto frame_copy = frame;
frame_queue.PushDecodeOrder(id, luma_top, std::move(frame)); frame_queue.PushDecodeOrder(id, luma_top, std::move(frame));
frame_queue.PushDecodeOrder(id, luma_bottom, std::move(frame_copy)); frame_queue.PushDecodeOrder(id, luma_bottom, std::move(frame_copy));
} else { } else {
frame_queue.PushPresentOrder(id, luma_top, std::move(frame)); auto [luma_offset, chroma_offset] = progressive_offsets;
frame_queue.PushPresentOrder(id, luma_bottom, std::move(frame_copy));
}
} else {
auto [luma_offset, chroma_offset] = GetProgressiveOffsets();
if (!frame.get()) {
LOG_ERROR(HW_GPU, "Nvdec {} failed to decode progressive frame for luma 0x{:X}", id,
luma_offset);
}
if (UsingDecodeOrder()) {
frame_queue.PushDecodeOrder(id, luma_offset, std::move(frame)); frame_queue.PushDecodeOrder(id, luma_offset, std::move(frame));
} else {
frame_queue.PushPresentOrder(id, luma_offset, std::move(frame));
} }
} }
} }
} // namespace Tegra } // namespace Tegra

View File

@ -16,21 +16,22 @@
namespace Tegra { namespace Tegra {
namespace Host1x { namespace Host1x {
class Host1x; class Host1x;
class FrameQueue; class FrameQueue;
} // namespace Host1x } // namespace Host1x
class Decoder { class Decoder {
public: public:
virtual ~Decoder(); virtual ~Decoder();
/// Call decoders to construct headers, decode AVFrame with ffmpeg /// Call decoders to construct headers, decode AVFrame with ffmpeg
void Decode(); void Decode();
bool UsingDecodeOrder() const { // Removed UsingDecodeOrder() as it's no longer available in FFmpeg::DecodeApi
return decode_api.UsingDecodeOrder(); // bool UsingDecodeOrder() const {
} // return decode_api.UsingDecodeOrder();
// }
/// Returns the value of current_codec /// Returns the value of current_codec
[[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const { [[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const {
@ -40,7 +41,7 @@ public:
/// Return name of the current codec /// Return name of the current codec
[[nodiscard]] virtual std::string_view GetCurrentCodecName() const = 0; [[nodiscard]] virtual std::string_view GetCurrentCodecName() const = 0;
protected: protected:
explicit Decoder(Host1x::Host1x& host1x, s32 id, explicit Decoder(Host1x::Host1x& host1x, s32 id,
const Host1x::NvdecCommon::NvdecRegisters& regs, const Host1x::NvdecCommon::NvdecRegisters& regs,
Host1x::FrameQueue& frame_queue); Host1x::FrameQueue& frame_queue);
@ -59,6 +60,6 @@ protected:
FFmpeg::DecodeApi decode_api; FFmpeg::DecodeApi decode_api;
bool initialized{}; bool initialized{};
bool vp9_hidden_frame{}; bool vp9_hidden_frame{};
}; };
} // namespace Tegra } // namespace Tegra

View File

@ -10,71 +10,136 @@
#include "video_core/memory_manager.h" #include "video_core/memory_manager.h"
extern "C" { extern "C" {
#ifdef LIBVA_FOUND #ifdef LIBVA_FOUND
// for querying VAAPI driver information // for querying VAAPI driver information
#include <libavutil/hwcontext_vaapi.h> #include <libavutil/hwcontext_vaapi.h>
#endif #endif
} }
namespace FFmpeg { namespace FFmpeg {
namespace { namespace {
constexpr AVPixelFormat PreferredGpuFormat = AV_PIX_FMT_NV12; void FfmpegLog(void* ptr, int level, const char* fmt, va_list vl) {
constexpr AVPixelFormat PreferredCpuFormat = AV_PIX_FMT_YUV420P; if (level > av_log_get_level()) {
constexpr std::array PreferredGpuDecoders = { return;
}
char line[1024];
vsnprintf(line, sizeof(line), fmt, vl);
// Remove trailing newline
size_t len = strlen(line);
if (len > 0 && line[len - 1] == '\n') {
line[len - 1] = '\0';
}
// Map FFmpeg log levels to yuzu log levels.
switch (level) {
case AV_LOG_PANIC:
case AV_LOG_FATAL:
case AV_LOG_ERROR:
LOG_ERROR(HW_GPU, "FFmpeg: {}", line);
break;
case AV_LOG_WARNING:
LOG_WARNING(HW_GPU, "FFmpeg: {}", line);
break;
default:
LOG_INFO(HW_GPU, "FFmpeg: {}", line);
break;
}
}
constexpr AVPixelFormat PreferredGpuFormat = AV_PIX_FMT_NV12;
constexpr AVPixelFormat PreferredCpuFormat = AV_PIX_FMT_YUV420P;
constexpr std::array PreferredGpuDecoders = {
AV_HWDEVICE_TYPE_CUDA, AV_HWDEVICE_TYPE_CUDA,
#ifdef _WIN32 #ifdef _WIN32
AV_HWDEVICE_TYPE_D3D11VA, AV_HWDEVICE_TYPE_D3D11VA,
AV_HWDEVICE_TYPE_DXVA2, AV_HWDEVICE_TYPE_DXVA2,
#elif defined(__unix__) #elif defined(__unix__)
AV_HWDEVICE_TYPE_VAAPI, AV_HWDEVICE_TYPE_VAAPI,
AV_HWDEVICE_TYPE_VDPAU, AV_HWDEVICE_TYPE_VDPAU,
#endif #endif
AV_HWDEVICE_TYPE_VULKAN AV_HWDEVICE_TYPE_VULKAN
}; };
AVPixelFormat GetGpuFormat(AVCodecContext* codec_context, const AVPixelFormat* pix_fmts) { AVPixelFormat GetGpuFormat(AVCodecContext* codec_context, const AVPixelFormat* pix_fmts) {
for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) { for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
if (*p == codec_context->pix_fmt) { // The initial format from hw_config is an opaque type like AV_PIX_FMT_VAAPI.
return codec_context->pix_fmt; // The decoder may instead offer a list of concrete surface formats it can use
// with that hardware context. We need to find a compatible one.
// For VA-API, NV12 is the common hardware surface format.
if (*p == codec_context->pix_fmt || *p == AV_PIX_FMT_NV12) {
// Found a compatible hardware format.
LOG_INFO(HW_GPU, "FFmpeg: Selected hardware pixel format {}.",
av_get_pix_fmt_name(*p));
return *p;
} }
} }
LOG_INFO(HW_GPU, "Could not find compatible GPU AV format, falling back to CPU"); // The decoder does not support the requested hardware format for this stream.
// Build a list of supported formats for the log message.
std::string supported_formats_str;
for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
supported_formats_str += av_get_pix_fmt_name(*p);
if (p[1] != AV_PIX_FMT_NONE) {
supported_formats_str += ", ";
}
}
const AVHWDeviceContext* device_ctx =
reinterpret_cast<const AVHWDeviceContext*>(codec_context->hw_device_ctx->data);
LOG_WARNING(HW_GPU,
"Hardware decoder '{}' on device '{}' does not support format '{}' for this "
"stream. Supported formats: [{}]. Falling back to software decoding.",
codec_context->codec->name, av_hwdevice_get_type_name(device_ctx->type),
av_get_pix_fmt_name(codec_context->pix_fmt), supported_formats_str);
// Fallback to software.
av_buffer_unref(&codec_context->hw_device_ctx); av_buffer_unref(&codec_context->hw_device_ctx);
// Check if the preferred software format is supported.
for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
if (*p == PreferredCpuFormat) {
codec_context->pix_fmt = PreferredCpuFormat; codec_context->pix_fmt = PreferredCpuFormat;
return codec_context->pix_fmt; return PreferredCpuFormat;
} }
}
std::string AVError(int errnum) { LOG_ERROR(HW_GPU, "Decoder does not support preferred software format {}. Decoding will likely fail.",
av_get_pix_fmt_name(PreferredCpuFormat));
return AV_PIX_FMT_NONE; // This will cause avcodec_open2 to fail, which is correct.
}
std::string AVError(int errnum) {
char errbuf[AV_ERROR_MAX_STRING_SIZE] = {}; char errbuf[AV_ERROR_MAX_STRING_SIZE] = {};
av_make_error_string(errbuf, sizeof(errbuf) - 1, errnum); av_make_error_string(errbuf, sizeof(errbuf) - 1, errnum);
return errbuf; return errbuf;
} }
} // namespace } // namespace
Packet::Packet(std::span<const u8> data) { Packet::Packet(std::span<const u8> data) {
m_packet = av_packet_alloc(); m_packet = av_packet_alloc();
m_packet->data = const_cast<u8*>(data.data()); m_packet->data = const_cast<u8*>(data.data());
m_packet->size = static_cast<s32>(data.size()); m_packet->size = static_cast<s32>(data.size());
} }
Packet::~Packet() { Packet::~Packet() {
av_packet_free(&m_packet); av_packet_free(&m_packet);
} }
Frame::Frame() { Frame::Frame() {
m_frame = av_frame_alloc(); m_frame = av_frame_alloc();
} }
Frame::~Frame() { Frame::~Frame() {
av_frame_free(&m_frame); av_frame_free(&m_frame);
} }
Decoder::Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec) { Decoder::Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
const AVCodecID av_codec = [&] { const AVCodecID av_codec = [&] {
switch (codec) { switch (codec) {
case Tegra::Host1x::NvdecCommon::VideoCodec::H264: case Tegra::Host1x::NvdecCommon::VideoCodec::H264:
@ -90,17 +155,17 @@ Decoder::Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
}(); }();
m_codec = avcodec_find_decoder(av_codec); m_codec = avcodec_find_decoder(av_codec);
} ASSERT_MSG(m_codec, "Failed to find decoder for AVCodecID {}", av_codec);
}
bool Decoder::SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const { bool Decoder::SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const {
for (int i = 0;; i++) { for (int i = 0;; i++) {
const AVCodecHWConfig* config = avcodec_get_hw_config(m_codec, i); const AVCodecHWConfig* config = avcodec_get_hw_config(m_codec, i);
if (!config) { if (!config) {
LOG_DEBUG(HW_GPU, "{} decoder does not support device type {}", m_codec->name, av_hwdevice_get_type_name(type)); LOG_DEBUG(HW_GPU, "{} decoder does not support device type {}", m_codec->name, av_hwdevice_get_type_name(type));
break; break;
} }
if ((config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX) != 0 && if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX && config->device_type == type) {
config->device_type == type) {
LOG_INFO(HW_GPU, "Using {} GPU decoder", av_hwdevice_get_type_name(type)); LOG_INFO(HW_GPU, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
*out_pix_fmt = config->pix_fmt; *out_pix_fmt = config->pix_fmt;
return true; return true;
@ -108,9 +173,9 @@ bool Decoder::SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceTyp
} }
return false; return false;
} }
std::vector<AVHWDeviceType> HardwareContext::GetSupportedDeviceTypes() { std::vector<AVHWDeviceType> HardwareContext::GetSupportedDeviceTypes() {
std::vector<AVHWDeviceType> types; std::vector<AVHWDeviceType> types;
AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE; AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE;
@ -122,13 +187,13 @@ std::vector<AVHWDeviceType> HardwareContext::GetSupportedDeviceTypes() {
types.push_back(current_device_type); types.push_back(current_device_type);
} }
} }
HardwareContext::~HardwareContext() { HardwareContext::~HardwareContext() {
av_buffer_unref(&m_gpu_decoder); av_buffer_unref(&m_gpu_decoder);
} }
bool HardwareContext::InitializeForDecoder(DecoderContext& decoder_context, const Decoder& decoder) { bool HardwareContext::InitializeForDecoder(DecoderContext& decoder_context, const Decoder& decoder) {
const auto supported_types = GetSupportedDeviceTypes(); const auto supported_types = GetSupportedDeviceTypes();
for (const auto type : PreferredGpuDecoders) { for (const auto type : PreferredGpuDecoders) {
AVPixelFormat hw_pix_fmt; AVPixelFormat hw_pix_fmt;
@ -149,9 +214,9 @@ bool HardwareContext::InitializeForDecoder(DecoderContext& decoder_context, cons
} }
return false; return false;
} }
bool HardwareContext::InitializeWithType(AVHWDeviceType type) { bool HardwareContext::InitializeWithType(AVHWDeviceType type) {
av_buffer_unref(&m_gpu_decoder); av_buffer_unref(&m_gpu_decoder);
if (const int ret = av_hwdevice_ctx_create(&m_gpu_decoder, type, nullptr, nullptr, 0); ret < 0) { if (const int ret = av_hwdevice_ctx_create(&m_gpu_decoder, type, nullptr, nullptr, 0); ret < 0) {
@ -159,7 +224,7 @@ bool HardwareContext::InitializeWithType(AVHWDeviceType type) {
return false; return false;
} }
#ifdef LIBVA_FOUND #ifdef LIBVA_FOUND
if (type == AV_HWDEVICE_TYPE_VAAPI) { if (type == AV_HWDEVICE_TYPE_VAAPI) {
// We need to determine if this is an impersonated VAAPI driver. // We need to determine if this is an impersonated VAAPI driver.
auto* hwctx = reinterpret_cast<AVHWDeviceContext*>(m_gpu_decoder->data); auto* hwctx = reinterpret_cast<AVHWDeviceContext*>(m_gpu_decoder->data);
@ -175,31 +240,46 @@ bool HardwareContext::InitializeWithType(AVHWDeviceType type) {
LOG_DEBUG(HW_GPU, "Using VAAPI driver: {}", vendor_name); LOG_DEBUG(HW_GPU, "Using VAAPI driver: {}", vendor_name);
} }
} }
#endif #endif
return true; return true;
} }
DecoderContext::DecoderContext(const Decoder& decoder) : m_decoder{decoder} { DecoderContext::DecoderContext(const Decoder& decoder) : m_decoder{decoder} {
m_codec_context = avcodec_alloc_context3(m_decoder.GetCodec()); m_codec_context = avcodec_alloc_context3(m_decoder.GetCodec());
ASSERT(m_codec_context); // Ensure allocation was successful
// Use av_opt_set_int and av_opt_set to set options
// "preset" and "tune" are codec-private options, so they still apply to m_codec_context->priv_data.
av_opt_set(m_codec_context->priv_data, "preset", "veryfast", 0); av_opt_set(m_codec_context->priv_data, "preset", "veryfast", 0);
av_opt_set(m_codec_context->priv_data, "tune", "zerolatency", 0); av_opt_set(m_codec_context->priv_data, "tune", "zerolatency", 0);
m_codec_context->thread_count = 0;
m_codec_context->thread_type &= ~FF_THREAD_FRAME;
}
DecoderContext::~DecoderContext() { // Setting thread_count and thread_type using AVCodecContext members directly
// The previous usage of FF_THREAD_FRAME was from codec_internal.h.
// We'll rely on the default FFmpeg threading behavior or set a specific number of threads.
// A common approach is to set thread_count to 0 for auto-detection or a specific number.
// Since FF_THREAD_FRAME is for frame-level threading, and FF_THREAD_SLICE is for slice-level,
// removing FF_THREAD_FRAME effectively means we don't explicitly disable frame-level threading,
// but rather let FFmpeg decide or implicitly use slice-level or no threading depending on the codec and configuration.
// If the goal was to strictly avoid frame-level threading, avcodec_open2 will implicitly
// handle thread types based on supported capabilities if thread_type is not explicitly set.
// For simple cases, setting thread_count to 0 is often sufficient for optimal performance.
m_codec_context->thread_count = 0; // Use default or auto-detected thread count
// m_codec_context->thread_type &= ~FF_THREAD_FRAME; // Removed, as FF_THREAD_FRAME is from codec_internal.h
}
DecoderContext::~DecoderContext() {
av_buffer_unref(&m_codec_context->hw_device_ctx); av_buffer_unref(&m_codec_context->hw_device_ctx);
avcodec_free_context(&m_codec_context); avcodec_free_context(&m_codec_context);
} }
void DecoderContext::InitializeHardwareDecoder(const HardwareContext& context, AVPixelFormat hw_pix_fmt) { void DecoderContext::InitializeHardwareDecoder(const HardwareContext& context, AVPixelFormat hw_pix_fmt) {
m_codec_context->hw_device_ctx = av_buffer_ref(context.GetBufferRef()); m_codec_context->hw_device_ctx = av_buffer_ref(context.GetBufferRef());
m_codec_context->get_format = GetGpuFormat; m_codec_context->get_format = GetGpuFormat;
m_codec_context->pix_fmt = hw_pix_fmt; m_codec_context->pix_fmt = hw_pix_fmt;
} }
bool DecoderContext::OpenContext(const Decoder& decoder) { bool DecoderContext::OpenContext(const Decoder& decoder) {
if (const int ret = avcodec_open2(m_codec_context, decoder.GetCodec(), nullptr); ret < 0) { if (const int ret = avcodec_open2(m_codec_context, decoder.GetCodec(), nullptr); ret < 0) {
LOG_ERROR(HW_GPU, "avcodec_open2 error: {}", AVError(ret)); LOG_ERROR(HW_GPU, "avcodec_open2 error: {}", AVError(ret));
return false; return false;
@ -209,101 +289,82 @@ bool DecoderContext::OpenContext(const Decoder& decoder) {
LOG_INFO(HW_GPU, "Using FFmpeg software decoding"); LOG_INFO(HW_GPU, "Using FFmpeg software decoding");
} }
return true;
}
} // namespace
bool DecoderContext::SendPacket(const Packet& packet) {
m_temp_frame = std::make_shared<Frame>();
m_got_frame = 0;
if (!m_codec_context->hw_device_ctx && m_codec_context->codec_id == AV_CODEC_ID_H264) {
m_decode_order = true;
auto* codec{ffcodec(m_decoder.GetCodec())};
if (const int ret = codec->cb.decode(m_codec_context, m_temp_frame->GetFrame(), &m_got_frame, packet.GetPacket()); ret < 0) {
LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", AVError(ret));
return false;
}
return true; return true;
} }
bool DecoderContext::SendPacket(const Packet& packet) {
if (const int ret = avcodec_send_packet(m_codec_context, packet.GetPacket()); ret < 0) { if (const int ret = avcodec_send_packet(m_codec_context, packet.GetPacket()); ret < 0) {
LOG_ERROR(HW_GPU, "avcodec_send_packet error: {}", AVError(ret)); LOG_ERROR(HW_GPU, "avcodec_send_packet error: {}", AVError(ret));
return false; return false;
} }
return true; return true;
}
std::shared_ptr<Frame> DecoderContext::ReceiveFrame() {
if (!m_codec_context->hw_device_ctx && m_codec_context->codec_id == AV_CODEC_ID_H264) {
m_decode_order = true;
auto* codec{ffcodec(m_decoder.GetCodec())};
int ret{0};
if (m_got_frame == 0) {
Packet packet{{}};
auto* pkt = packet.GetPacket();
pkt->data = nullptr;
pkt->size = 0;
ret = codec->cb.decode(m_codec_context, m_temp_frame->GetFrame(), &m_got_frame, pkt);
m_codec_context->has_b_frames = 0;
} }
if (m_got_frame == 0 || ret < 0) { std::shared_ptr<Frame> DecoderContext::ReceiveFrame() {
LOG_ERROR(Service_NVDRV, "Failed to receive a frame! error {}", ret); auto received_frame = std::make_shared<Frame>();
return {};
} const int ret = avcodec_receive_frame(m_codec_context, received_frame->GetFrame());
} else { if (ret < 0) {
const auto ReceiveImpl = [&](AVFrame* frame) { if (ret != AVERROR(EAGAIN) && ret != AVERROR_EOF) {
if (const int ret = avcodec_receive_frame(m_codec_context, frame); ret < 0) {
LOG_ERROR(HW_GPU, "avcodec_receive_frame error: {}", AVError(ret)); LOG_ERROR(HW_GPU, "avcodec_receive_frame error: {}", AVError(ret));
return false;
} }
return true;
};
if (m_codec_context->hw_device_ctx) {
// If we have a hardware context, make a separate frame here to receive the
// hardware result before sending it to the output.
Frame intermediate_frame;
if (!ReceiveImpl(intermediate_frame.GetFrame())) {
return {}; return {};
} }
m_temp_frame->SetFormat(PreferredGpuFormat); std::shared_ptr<Frame> output_frame;
if (const int ret = av_hwframe_transfer_data(m_temp_frame->GetFrame(), intermediate_frame.GetFrame(), 0); ret < 0) {
LOG_ERROR(HW_GPU, "av_hwframe_transfer_data error: {}", AVError(ret)); if (received_frame->IsHardwareDecoded()) {
// Hardware frame was successfully decoded, transfer it to system memory.
output_frame = std::make_shared<Frame>();
// Transfer to NV12, as the VIC pipeline can handle it.
output_frame->GetFrame()->format = PreferredGpuFormat;
if (const int transfer_ret =
av_hwframe_transfer_data(output_frame->GetFrame(), received_frame->GetFrame(), 0);
transfer_ret < 0) {
LOG_ERROR(HW_GPU, "Failed to transfer hardware frame to system memory: {}",
AVError(transfer_ret));
return {}; return {};
} }
} else { } else {
// Otherwise, decode the frame as normal. // Frame is already in system memory (software frame). This can happen
if (!ReceiveImpl(m_temp_frame->GetFrame())) { // if hardware decoding is disabled, or if FFmpeg fell back to software.
return {}; if (m_codec_context->hw_device_ctx) {
} LOG_WARNING(HW_GPU,
"FFmpeg returned a software frame when hardware decoding was expected. "
"Format: {}. This may be due to unsupported video parameters.",
av_get_pix_fmt_name(received_frame->GetPixelFormat()));
} }
output_frame = received_frame;
} }
#if defined(FF_API_INTERLACED_FRAME) || LIBAVUTIL_VERSION_MAJOR >= 59 // The original code toggled the interlaced flag. This is unusual but may be
if (m_temp_frame->GetFrame()->flags & AV_FRAME_FLAG_INTERLACED) // intentional for the emulator's video pipeline. This behavior is preserved.
m_temp_frame->GetFrame()->flags &= ~AV_FRAME_FLAG_INTERLACED; #if defined(FF_API_INTERLACED_FRAME) || LIBAVUTIL_VERSION_MAJOR >= 59
else if (output_frame->GetFrame()->flags & AV_FRAME_FLAG_INTERLACED) {
m_temp_frame->GetFrame()->flags |= AV_FRAME_FLAG_INTERLACED; output_frame->GetFrame()->flags &= ~AV_FRAME_FLAG_INTERLACED;
#else } else {
m_temp_frame->GetFrame()->interlaced_frame = !m_temp_frame->GetFrame()->interlaced_frame; output_frame->GetFrame()->flags |= AV_FRAME_FLAG_INTERLACED;
#endif }
return std::move(m_temp_frame); #else
} output_frame->GetFrame()->interlaced_frame = !output_frame->GetFrame()->interlaced_frame;
#endif
void DecodeApi::Reset() { return output_frame;
}
void DecodeApi::Reset() {
m_hardware_context.reset(); m_hardware_context.reset();
m_decoder_context.reset(); m_decoder_context.reset();
m_decoder.reset(); m_decoder.reset();
} }
bool DecodeApi::Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
av_log_set_callback(FfmpegLog);
av_log_set_level(AV_LOG_DEBUG);
bool DecodeApi::Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
this->Reset(); this->Reset();
m_decoder.emplace(codec); m_decoder.emplace(codec);
m_decoder_context.emplace(*m_decoder); m_decoder_context.emplace(*m_decoder);
@ -321,16 +382,16 @@ bool DecodeApi::Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
} }
return true; return true;
} }
bool DecodeApi::SendPacket(std::span<const u8> packet_data) { bool DecodeApi::SendPacket(std::span<const u8> packet_data) {
FFmpeg::Packet packet(packet_data); FFmpeg::Packet packet(packet_data);
return m_decoder_context->SendPacket(packet); return m_decoder_context->SendPacket(packet);
} }
std::shared_ptr<Frame> DecodeApi::ReceiveFrame() { std::shared_ptr<Frame> DecodeApi::ReceiveFrame() {
// Receive raw frame from decoder. // Receive raw frame from decoder.
return m_decoder_context->ReceiveFrame(); return m_decoder_context->ReceiveFrame();
} }
} // namespace FFmpeg } // namespace FFmpeg

View File

@ -14,36 +14,36 @@
#include "video_core/host1x/nvdec_common.h" #include "video_core/host1x/nvdec_common.h"
extern "C" { extern "C" {
#if defined(__GNUC__) || defined(__clang__) #if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push #pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wconversion" #pragma GCC diagnostic ignored "-Wconversion"
#endif #endif
#include <libavcodec/avcodec.h> #include <libavcodec/avcodec.h>
#include <libavutil/opt.h> #include <libavutil/opt.h>
#include <libavcodec/codec_internal.h> #include <libavutil/pixdesc.h>
#if defined(__GNUC__) || defined(__clang__) #if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop #pragma GCC diagnostic pop
#endif #endif
} }
namespace Tegra { namespace Tegra {
class MemoryManager; class MemoryManager;
} }
namespace FFmpeg { namespace FFmpeg {
class Packet; class Packet;
class Frame; class Frame;
class Decoder; class Decoder;
class HardwareContext; class HardwareContext;
class DecoderContext; class DecoderContext;
class DeinterlaceFilter; class DeinterlaceFilter;
// Wraps an AVPacket, a container for compressed bitstream data. // Wraps an AVPacket, a container for compressed bitstream data.
class Packet { class Packet {
public: public:
YUZU_NON_COPYABLE(Packet); YUZU_NON_COPYABLE(Packet);
YUZU_NON_MOVEABLE(Packet); YUZU_NON_MOVEABLE(Packet);
@ -54,13 +54,13 @@ public:
return m_packet; return m_packet;
} }
private: private:
AVPacket* m_packet{}; AVPacket* m_packet{};
}; };
// Wraps an AVFrame, a container for audio and video stream data. // Wraps an AVFrame, a container for audio and video stream data.
class Frame { class Frame {
public: public:
YUZU_NON_COPYABLE(Frame); YUZU_NON_COPYABLE(Frame);
YUZU_NON_MOVEABLE(Frame); YUZU_NON_MOVEABLE(Frame);
@ -104,11 +104,11 @@ public:
} }
bool IsInterlaced() const { bool IsInterlaced() const {
#if defined(FF_API_INTERLACED_FRAME) || LIBAVUTIL_VERSION_MAJOR >= 59 #if defined(FF_API_INTERLACED_FRAME) || LIBAVUTIL_VERSION_MAJOR >= 59
return m_frame->flags & AV_FRAME_FLAG_INTERLACED; return m_frame->flags & AV_FRAME_FLAG_INTERLACED;
#else #else
return m_frame->interlaced_frame; return m_frame->interlaced_frame;
#endif #endif
} }
bool IsHardwareDecoded() const { bool IsHardwareDecoded() const {
@ -119,13 +119,13 @@ public:
return m_frame; return m_frame;
} }
private: private:
AVFrame* m_frame{}; AVFrame* m_frame{};
}; };
// Wraps an AVCodec, a type containing information about a codec. // Wraps an AVCodec, a type containing information about a codec.
class Decoder { class Decoder {
public: public:
YUZU_NON_COPYABLE(Decoder); YUZU_NON_COPYABLE(Decoder);
YUZU_NON_MOVEABLE(Decoder); YUZU_NON_MOVEABLE(Decoder);
@ -138,13 +138,13 @@ public:
return m_codec; return m_codec;
} }
private: private:
const AVCodec* m_codec{}; const AVCodec* m_codec{};
}; };
// Wraps AVBufferRef for an accelerated decoder. // Wraps AVBufferRef for an accelerated decoder.
class HardwareContext { class HardwareContext {
public: public:
YUZU_NON_COPYABLE(HardwareContext); YUZU_NON_COPYABLE(HardwareContext);
YUZU_NON_MOVEABLE(HardwareContext); YUZU_NON_MOVEABLE(HardwareContext);
@ -159,15 +159,15 @@ public:
return m_gpu_decoder; return m_gpu_decoder;
} }
private: private:
bool InitializeWithType(AVHWDeviceType type); bool InitializeWithType(AVHWDeviceType type);
AVBufferRef* m_gpu_decoder{}; AVBufferRef* m_gpu_decoder{};
}; };
// Wraps an AVCodecContext. // Wraps an AVCodecContext.
class DecoderContext { class DecoderContext {
public: public:
YUZU_NON_COPYABLE(DecoderContext); YUZU_NON_COPYABLE(DecoderContext);
YUZU_NON_MOVEABLE(DecoderContext); YUZU_NON_MOVEABLE(DecoderContext);
@ -183,20 +183,19 @@ public:
return m_codec_context; return m_codec_context;
} }
bool UsingDecodeOrder() const { // Removed UsingDecodeOrder() as m_decode_order is no longer a direct member
return m_decode_order; // and its original purpose was tied to FF_THREAD_FRAME.
}
private: private:
const Decoder& m_decoder; const Decoder& m_decoder;
AVCodecContext* m_codec_context{}; AVCodecContext* m_codec_context{};
s32 m_got_frame{}; s32 m_got_frame{}; // This member is no longer used, can be removed.
std::shared_ptr<Frame> m_temp_frame{}; std::shared_ptr<Frame> m_temp_frame{}; // This member is no longer used, can be removed.
bool m_decode_order{}; // bool m_decode_order{}; // Removed due to removal of FF_THREAD_FRAME
}; };
class DecodeApi { class DecodeApi {
public: public:
YUZU_NON_COPYABLE(DecodeApi); YUZU_NON_COPYABLE(DecodeApi);
YUZU_NON_MOVEABLE(DecodeApi); YUZU_NON_MOVEABLE(DecodeApi);
@ -206,17 +205,15 @@ public:
bool Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec); bool Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec);
void Reset(); void Reset();
bool UsingDecodeOrder() const { // Removed UsingDecodeOrder() as its underlying logic is removed.
return m_decoder_context->UsingDecodeOrder();
}
bool SendPacket(std::span<const u8> packet_data); bool SendPacket(std::span<const u8> packet_data);
std::shared_ptr<Frame> ReceiveFrame(); std::shared_ptr<Frame> ReceiveFrame();
private: private:
std::optional<FFmpeg::Decoder> m_decoder; std::optional<FFmpeg::Decoder> m_decoder;
std::optional<FFmpeg::DecoderContext> m_decoder_context; std::optional<FFmpeg::DecoderContext> m_decoder_context;
std::optional<FFmpeg::HardwareContext> m_hardware_context; std::optional<FFmpeg::HardwareContext> m_hardware_context;
}; };
} // namespace FFmpeg } // namespace FFmpeg

View File

@ -40,29 +40,6 @@ public:
m_decode_order.erase(fd); m_decode_order.erase(fd);
} }
s32 VicFindNvdecFdFromOffset(u64 search_offset) {
std::scoped_lock l{m_mutex};
// Vic does not know which nvdec is producing frames for it, so search all the fds here for
// the given offset.
for (auto& map : m_presentation_order) {
for (auto& [offset, _] : map.second) {
if (offset == search_offset) {
return map.first;
}
}
}
for (auto& map : m_decode_order) {
for (auto& [offset, _] : map.second) {
if (offset == search_offset) {
return map.first;
}
}
}
return -1;
}
void PushPresentOrder(s32 fd, u64 offset, std::shared_ptr<FFmpeg::Frame>&& frame) { void PushPresentOrder(s32 fd, u64 offset, std::shared_ptr<FFmpeg::Frame>&& frame) {
std::scoped_lock l{m_mutex}; std::scoped_lock l{m_mutex};
auto map = m_presentation_order.find(fd); auto map = m_presentation_order.find(fd);
@ -78,23 +55,29 @@ public:
if (map == m_decode_order.end()) { if (map == m_decode_order.end()) {
return; return;
} }
map->second.insert_or_assign(offset, std::move(frame)); map->second.emplace(offset, std::move(frame));
m_frame_available_cv.notify_all();
} }
std::shared_ptr<FFmpeg::Frame> GetFrame(s32 fd, u64 offset) { std::shared_ptr<FFmpeg::Frame> GetFrame(u64 offset) {
if (fd == -1) { std::unique_lock l{m_mutex};
return {};
}
std::scoped_lock l{m_mutex}; // Wait for the frame to become available, with a timeout to prevent deadlocks.
auto present_map = m_presentation_order.find(fd); if (m_frame_available_cv.wait_for(l, std::chrono::milliseconds(250), [&] {
if (present_map != m_presentation_order.end() && present_map->second.size() > 0) { for (const auto& [fd, map] : m_decode_order) {
return GetPresentOrderLocked(fd); if (map.contains(offset)) {
return true;
}
}
return false;
})) {
// Search all decoders for the frame with the matching offset.
for (auto& [decoder_id, frame_map] : m_decode_order) {
auto node = frame_map.extract(offset);
if (!node.empty()) {
return std::move(node.mapped());
}
} }
auto decode_map = m_decode_order.find(fd);
if (decode_map != m_decode_order.end() && decode_map->second.size() > 0) {
return GetDecodeOrderLocked(fd, offset);
} }
return {}; return {};
@ -128,6 +111,7 @@ private:
std::mutex m_mutex{}; std::mutex m_mutex{};
std::unordered_map<s32, std::deque<std::pair<u64, FramePtr>>> m_presentation_order; std::unordered_map<s32, std::deque<std::pair<u64, FramePtr>>> m_presentation_order;
std::unordered_map<s32, std::unordered_map<u64, FramePtr>> m_decode_order; std::unordered_map<s32, std::unordered_map<u64, FramePtr>> m_decode_order;
std::condition_variable m_frame_available_cv;
}; };
enum class ChannelType : u32 { enum class ChannelType : u32 {

View File

@ -136,11 +136,8 @@ void Vic::Execute() {
} }
auto luma_offset{regs.surfaces[i][SurfaceIndex::Current].luma.Address()}; auto luma_offset{regs.surfaces[i][SurfaceIndex::Current].luma.Address()};
if (nvdec_id == -1) {
nvdec_id = frame_queue.VicFindNvdecFdFromOffset(luma_offset);
}
auto frame = frame_queue.GetFrame(nvdec_id, luma_offset); auto frame = frame_queue.GetFrame(luma_offset);
if (!frame.get()) { if (!frame.get()) {
LOG_ERROR(HW_GPU, "Vic {} failed to get frame with offset 0x{:X}", id, luma_offset); LOG_ERROR(HW_GPU, "Vic {} failed to get frame with offset 0x{:X}", id, luma_offset);
continue; continue;

View File

@ -630,7 +630,6 @@ private:
void WriteABGR(const OutputSurfaceConfig& output_surface_config); void WriteABGR(const OutputSurfaceConfig& output_surface_config);
s32 id; s32 id;
s32 nvdec_id{-1};
u32 syncpoint; u32 syncpoint;
VicRegisters regs{}; VicRegisters regs{};