rsx: Refactor texture cache lookup core to be easier to read

This commit is contained in:
kd-11 2025-11-06 10:42:55 +03:00 committed by kd-11
parent 8126a199f5
commit 1007aaee4c

View File

@ -1869,314 +1869,319 @@ namespace rsx
{
return{ cached_texture->get_view(remap), cached_texture->get_context(), cached_texture->get_format_class(), scale, cached_texture->get_image_type() };
}
return {};
}
else
// Fast lookup for cyclic reference
if (m_rtts.address_is_bound(attr.address)) [[unlikely]]
{
// Fast lookup for cyclic reference
if (m_rtts.address_is_bound(attr.address)) [[unlikely]]
if (auto texptr = m_rtts.get_surface_at(attr.address);
helpers::check_framebuffer_resource(texptr, attr, extended_dimension))
{
if (auto texptr = m_rtts.get_surface_at(attr.address);
helpers::check_framebuffer_resource(texptr, attr, extended_dimension))
const bool force_convert = !render_target_format_is_compatible(texptr, attr.gcm_format);
auto result = helpers::process_framebuffer_resource_fast<sampled_image_descriptor>(
cmd, texptr, attr, scale, extended_dimension, remap, true, force_convert);
if (!options.skip_texture_barriers && result.is_cyclic_reference)
{
const bool force_convert = !render_target_format_is_compatible(texptr, attr.gcm_format);
auto result = helpers::process_framebuffer_resource_fast<sampled_image_descriptor>(
cmd, texptr, attr, scale, extended_dimension, remap, true, force_convert);
if (!options.skip_texture_barriers && result.is_cyclic_reference)
{
// A texture barrier is only necessary when the rendertarget is going to be bound as a shader input.
// If a temporary copy is to be made, this should not be invoked
insert_texture_barrier(cmd, texptr);
}
return result;
// A texture barrier is only necessary when the rendertarget is going to be bound as a shader input.
// If a temporary copy is to be made, this should not be invoked
insert_texture_barrier(cmd, texptr);
}
return result;
}
}
rsx::simple_array<typename SurfaceStoreType::surface_overlap_info> overlapping_fbos;
rsx::simple_array<section_storage_type*> overlapping_locals;
auto fast_fbo_check = [&]() -> sampled_image_descriptor
{
const auto& last = overlapping_fbos.back();
if (last.src_area.x == 0 && last.src_area.y == 0 && !last.is_clipped)
{
const bool force_convert = !render_target_format_is_compatible(last.surface, attr.gcm_format);
return helpers::process_framebuffer_resource_fast<sampled_image_descriptor>(
cmd, last.surface, attr, scale, extended_dimension, remap, false, force_convert);
}
rsx::simple_array<typename SurfaceStoreType::surface_overlap_info> overlapping_fbos;
rsx::simple_array<section_storage_type*> overlapping_locals;
return {};
};
auto fast_fbo_check = [&]() -> sampled_image_descriptor
// Check surface cache early if the option is enabled
if (options.prefer_surface_cache)
{
const u16 block_h = (attr.depth * attr.slice_h);
overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::shader_read);
if (!overlapping_fbos.empty())
{
const auto& last = overlapping_fbos.back();
if (last.src_area.x == 0 && last.src_area.y == 0 && !last.is_clipped)
if (auto result = fast_fbo_check(); result.validate())
{
const bool force_convert = !render_target_format_is_compatible(last.surface, attr.gcm_format);
return helpers::process_framebuffer_resource_fast<sampled_image_descriptor>(
cmd, last.surface, attr, scale, extended_dimension, remap, false, force_convert);
}
return {};
};
// Check surface cache early if the option is enabled
if (options.prefer_surface_cache)
{
const u16 block_h = (attr.depth * attr.slice_h);
overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::shader_read);
if (!overlapping_fbos.empty())
{
if (auto result = fast_fbo_check(); result.validate())
{
return result;
}
if (options.skip_texture_merge)
{
overlapping_fbos.clear();
}
}
}
// Check shader_read storage. In a given scene, reads from local memory far outnumber reads from the surface cache
const u32 lookup_mask = rsx::texture_upload_context::shader_read | rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::blit_engine_src;
overlapping_locals = find_texture_from_range<true>(memory_range, attr.height > 1 ? attr.pitch : 0, lookup_mask & options.lookup_mask);
// Search for exact match if possible
for (auto& cached_texture : overlapping_locals)
{
if (cached_texture->matches(attr.address, attr.gcm_format, attr.width, attr.height, attr.depth, 0))
{
#ifdef TEXTURE_CACHE_DEBUG
if (!memory_range.inside(cached_texture->get_confirmed_range()))
{
// TODO. This is easily possible for blit_dst textures if the blit is incomplete in Y
// The possibility that a texture will be split into parts on the CPU like this is very rare
continue;
}
#endif
if (attr.swizzled != cached_texture->is_swizzled())
{
// We can have the correct data in cached_texture but it needs decoding before it can be sampled.
// Usually a sign of a game bug where the developer forgot to mark the texture correctly the first time we see it.
// TODO: This section should execute under an exclusive lock, but we're not actually modifying any object references, only flags
rsx_log.warning("A texture was found in cache for address 0x%x, but swizzle flag does not match", attr.address);
cached_texture->unprotect();
cached_texture->set_dirty(true);
break;
}
return{ cached_texture->get_view(remap), cached_texture->get_context(), cached_texture->get_format_class(), scale, cached_texture->get_image_type() };
}
}
if (!overlapping_locals.empty())
{
// Remove everything that is not a transfer target
overlapping_locals.erase_if([](const auto& e)
{
return e->is_dirty() || (e->get_context() != rsx::texture_upload_context::blit_engine_dst);
});
}
if (!options.prefer_surface_cache)
{
// Now check for surface cache hits
const u16 block_h = (attr.depth * attr.slice_h);
overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::shader_read);
}
if (!overlapping_fbos.empty() || !overlapping_locals.empty())
{
int _pool = -1;
if (overlapping_locals.empty()) [[likely]]
{
_pool = 0;
}
else if (overlapping_fbos.empty())
{
_pool = 1;
}
else
{
_pool = (overlapping_locals.back()->last_write_tag < overlapping_fbos.back().surface->last_use_tag) ? 0 : 1;
}
if (_pool == 0)
{
// Surface cache data is newer, check if this thing fits our search parameters
if (!options.prefer_surface_cache)
{
if (auto result = fast_fbo_check(); result.validate())
{
return result;
}
}
}
else if (extended_dimension <= rsx::texture_dimension_extended::texture_dimension_2d)
{
const auto last = overlapping_locals.back();
const auto normalized_width = u16(last->get_width() * get_format_block_size_in_bytes(last->get_gcm_format())) / attr.bpp;
if (last->get_section_base() == attr.address &&
normalized_width >= attr.width && last->get_height() >= attr.height)
{
u32 gcm_format = attr.gcm_format;
const bool gcm_format_is_depth = helpers::is_gcm_depth_format(attr.gcm_format);
if (!gcm_format_is_depth && last->is_depth_texture())
{
// While the copy routines can perform a typeless cast, prefer to not cross the aspect barrier if possible
gcm_format = helpers::get_compatible_depth_format(attr.gcm_format);
}
auto new_attr = attr;
new_attr.gcm_format = gcm_format;
if (last->get_gcm_format() == attr.gcm_format && attr.edge_clamped)
{
// Clipped view
auto viewed_image = last->get_raw_texture();
sampled_image_descriptor result = { viewed_image->get_view(remap), last->get_context(),
viewed_image->format_class(), scale, extended_dimension, false, viewed_image->samples() };
helpers::calculate_sample_clip_parameters(result, position2i(0, 0), size2i(attr.width, attr.height), size2i(normalized_width, last->get_height()));
return result;
}
return { last->get_raw_texture(), deferred_request_command::copy_image_static, new_attr, {},
last->get_context(), classify_format(gcm_format), scale, extended_dimension, remap };
}
}
auto result = helpers::merge_cache_resources<sampled_image_descriptor>(
cmd, overlapping_fbos, overlapping_locals, attr, scale, extended_dimension, remap, _pool);
const bool is_simple_subresource_copy =
(result.external_subresource_desc.op == deferred_request_command::copy_image_static) ||
(result.external_subresource_desc.op == deferred_request_command::copy_image_dynamic) ||
(result.external_subresource_desc.op == deferred_request_command::blit_image_static);
if (attr.edge_clamped &&
!g_cfg.video.strict_rendering_mode &&
is_simple_subresource_copy &&
render_target_format_is_compatible(result.external_subresource_desc.src0(), attr.gcm_format))
{
if (result.external_subresource_desc.op != deferred_request_command::blit_image_static) [[ likely ]]
{
helpers::convert_image_copy_to_clip_descriptor(
result,
position2i(result.external_subresource_desc.x, result.external_subresource_desc.y),
size2i(result.external_subresource_desc.width, result.external_subresource_desc.height),
size2i(result.external_subresource_desc.external_handle->width(), result.external_subresource_desc.external_handle->height()),
remap, false);
}
else
{
helpers::convert_image_blit_to_clip_descriptor(
result,
remap,
false);
}
if (!!result.ref_address && m_rtts.address_is_bound(result.ref_address))
{
result.is_cyclic_reference = true;
auto texptr = ensure(m_rtts.get_surface_at(result.ref_address));
insert_texture_barrier(cmd, texptr);
}
return result;
}
if (options.skip_texture_merge)
{
if (is_simple_subresource_copy)
{
return result;
}
overlapping_fbos.clear();
}
}
}
return {};
// Check shader_read storage. In a given scene, reads from local memory far outnumber reads from the surface cache
const u32 lookup_mask = rsx::texture_upload_context::shader_read | rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::blit_engine_src;
overlapping_locals = find_texture_from_range<true>(memory_range, attr.height > 1 ? attr.pitch : 0, lookup_mask & options.lookup_mask);
// Search for exact match if possible
for (auto& cached_texture : overlapping_locals)
{
if (cached_texture->matches(attr.address, attr.gcm_format, attr.width, attr.height, attr.depth, 0))
{
#ifdef TEXTURE_CACHE_DEBUG
if (!memory_range.inside(cached_texture->get_confirmed_range()))
{
// TODO. This is easily possible for blit_dst textures if the blit is incomplete in Y
// The possibility that a texture will be split into parts on the CPU like this is very rare
continue;
}
#endif
if (attr.swizzled != cached_texture->is_swizzled())
{
// We can have the correct data in cached_texture but it needs decoding before it can be sampled.
// Usually a sign of a game bug where the developer forgot to mark the texture correctly the first time we see it.
// TODO: This section should execute under an exclusive lock, but we're not actually modifying any object references, only flags
rsx_log.warning("A texture was found in cache for address 0x%x, but swizzle flag does not match", attr.address);
cached_texture->unprotect();
cached_texture->set_dirty(true);
break;
}
if (const auto section_count = result.external_subresource_desc.sections_to_copy.size();
section_count > 0)
return{ cached_texture->get_view(remap), cached_texture->get_context(), cached_texture->get_format_class(), scale, cached_texture->get_image_type() };
}
}
if (!overlapping_locals.empty())
{
// Remove everything that is not a transfer target
overlapping_locals.erase_if([](const auto& e)
{
return e->is_dirty() || (e->get_context() != rsx::texture_upload_context::blit_engine_dst);
});
}
if (!options.prefer_surface_cache)
{
// Now check for surface cache hits
const u16 block_h = (attr.depth * attr.slice_h);
overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::shader_read);
}
if (!overlapping_fbos.empty() || !overlapping_locals.empty())
{
int _pool = -1;
if (overlapping_locals.empty()) [[likely]]
{
_pool = 0;
}
else if (overlapping_fbos.empty())
{
_pool = 1;
}
else
{
_pool = (overlapping_locals.back()->last_write_tag < overlapping_fbos.back().surface->last_use_tag) ? 0 : 1;
}
if (_pool == 0)
{
// Surface cache data is newer, check if this thing fits our search parameters
if (!options.prefer_surface_cache)
{
bool result_is_valid;
if (_pool == 0 && !g_cfg.video.write_color_buffers && !g_cfg.video.write_depth_buffer)
if (auto result = fast_fbo_check(); result.validate())
{
// HACK: Avoid WCB requirement for some games with wrongly declared sampler dimensions.
// TODO: Some games may render a small region (e.g 1024x256x2) and sample a huge texture (e.g 1024x1024).
// Seen in APF2k8 - this causes missing bits to be reuploaded from CPU which can cause WCB requirement.
// Properly fix this by introducing partial data upload into the surface cache in such cases and making RCB/RDB
// enabled by default. Blit engine already handles this correctly.
result_is_valid = true;
}
else
{
result_is_valid = result.atlas_covers_target_area(section_count == 1 ? 99 : 90);
}
if (result_is_valid)
{
// Check for possible duplicates
usz max_overdraw_ratio = u32{ umax };
usz max_safe_sections = u32{ umax };
switch (result.external_subresource_desc.op)
{
case deferred_request_command::atlas_gather:
max_overdraw_ratio = 150;
max_safe_sections = 8 + 2 * attr.mipmaps;
break;
case deferred_request_command::cubemap_gather:
max_overdraw_ratio = 150;
max_safe_sections = 6 * 2 * attr.mipmaps;
break;
case deferred_request_command::_3d_gather:
// 3D gather can have very many input sections, try to keep section count low
max_overdraw_ratio = 125;
max_safe_sections = (attr.depth * attr.mipmaps * 110) / 100;
break;
default:
break;
}
if (overlapping_fbos.size() > max_safe_sections)
{
// Are we really over-budget?
u32 coverage_size = 0;
for (const auto& section : overlapping_fbos)
{
const auto area = section.surface->get_native_pitch() * section.surface->template get_surface_height<rsx::surface_metrics::bytes>();
coverage_size += area;
}
if (const auto coverage_ratio = (coverage_size * 100ull) / memory_range.length();
coverage_ratio > max_overdraw_ratio)
{
rsx_log.warning("[Performance warning] Texture gather routine encountered too many objects! Operation=%d, Mipmaps=%d, Depth=%d, Sections=%zu, Ratio=%llu%",
static_cast<int>(result.external_subresource_desc.op), attr.mipmaps, attr.depth, overlapping_fbos.size(), coverage_ratio);
m_rtts.check_for_duplicates(overlapping_fbos);
}
}
// Optionally disallow caching if resource is being written to as it is being read from
for (const auto& section : overlapping_fbos)
{
if (m_rtts.address_is_bound(section.base_address))
{
if (result.external_subresource_desc.op == deferred_request_command::copy_image_static)
{
result.external_subresource_desc.op = deferred_request_command::copy_image_dynamic;
}
else
{
result.external_subresource_desc.do_not_cache = true;
}
break;
}
}
return result;
}
}
}
else if (extended_dimension <= rsx::texture_dimension_extended::texture_dimension_2d)
{
const auto last = overlapping_locals.back();
const auto normalized_width = u16(last->get_width() * get_format_block_size_in_bytes(last->get_gcm_format())) / attr.bpp;
if (last->get_section_base() == attr.address &&
normalized_width >= attr.width && last->get_height() >= attr.height)
{
u32 gcm_format = attr.gcm_format;
const bool gcm_format_is_depth = helpers::is_gcm_depth_format(attr.gcm_format);
if (!gcm_format_is_depth && last->is_depth_texture())
{
// While the copy routines can perform a typeless cast, prefer to not cross the aspect barrier if possible
gcm_format = helpers::get_compatible_depth_format(attr.gcm_format);
}
auto new_attr = attr;
new_attr.gcm_format = gcm_format;
if (last->get_gcm_format() == attr.gcm_format && attr.edge_clamped)
{
// Clipped view
auto viewed_image = last->get_raw_texture();
sampled_image_descriptor result = { viewed_image->get_view(remap), last->get_context(),
viewed_image->format_class(), scale, extended_dimension, false, viewed_image->samples() };
helpers::calculate_sample_clip_parameters(result, position2i(0, 0), size2i(attr.width, attr.height), size2i(normalized_width, last->get_height()));
return result;
}
return { last->get_raw_texture(), deferred_request_command::copy_image_static, new_attr, {},
last->get_context(), classify_format(gcm_format), scale, extended_dimension, remap };
}
}
auto result = helpers::merge_cache_resources<sampled_image_descriptor>(
cmd, overlapping_fbos, overlapping_locals, attr, scale, extended_dimension, remap, _pool);
const bool is_simple_subresource_copy =
(result.external_subresource_desc.op == deferred_request_command::copy_image_static) ||
(result.external_subresource_desc.op == deferred_request_command::copy_image_dynamic) ||
(result.external_subresource_desc.op == deferred_request_command::blit_image_static);
if (attr.edge_clamped &&
!g_cfg.video.strict_rendering_mode &&
is_simple_subresource_copy &&
render_target_format_is_compatible(result.external_subresource_desc.src0(), attr.gcm_format))
{
if (result.external_subresource_desc.op != deferred_request_command::blit_image_static) [[ likely ]]
{
helpers::convert_image_copy_to_clip_descriptor(
result,
position2i(result.external_subresource_desc.x, result.external_subresource_desc.y),
size2i(result.external_subresource_desc.width, result.external_subresource_desc.height),
size2i(result.external_subresource_desc.external_handle->width(), result.external_subresource_desc.external_handle->height()),
remap, false);
}
else
{
helpers::convert_image_blit_to_clip_descriptor(
result,
remap,
false);
}
if (!!result.ref_address && m_rtts.address_is_bound(result.ref_address))
{
result.is_cyclic_reference = true;
auto texptr = ensure(m_rtts.get_surface_at(result.ref_address));
insert_texture_barrier(cmd, texptr);
}
return result;
}
if (options.skip_texture_merge)
{
if (is_simple_subresource_copy)
{
return result;
}
return {};
}
const auto section_count = result.external_subresource_desc.sections_to_copy.size();
if (section_count == 0)
{
// Fail
return {};
}
bool result_is_valid;
if (_pool == 0 && !g_cfg.video.write_color_buffers && !g_cfg.video.write_depth_buffer)
{
// HACK: Avoid WCB requirement for some games with wrongly declared sampler dimensions.
// TODO: Some games may render a small region (e.g 1024x256x2) and sample a huge texture (e.g 1024x1024).
// Seen in APF2k8 - this causes missing bits to be reuploaded from CPU which can cause WCB requirement.
// Properly fix this by introducing partial data upload into the surface cache in such cases and making RCB/RDB
// enabled by default. Blit engine already handles this correctly.
result_is_valid = true;
}
else
{
result_is_valid = result.atlas_covers_target_area(section_count == 1 ? 99 : 90);
}
if (!result_is_valid)
{
return {};
}
// Check for possible duplicates
usz max_overdraw_ratio = u32{ umax };
usz max_safe_sections = u32{ umax };
switch (result.external_subresource_desc.op)
{
case deferred_request_command::atlas_gather:
max_overdraw_ratio = 150;
max_safe_sections = 8 + 2 * attr.mipmaps;
break;
case deferred_request_command::cubemap_gather:
max_overdraw_ratio = 150;
max_safe_sections = 6 * 2 * attr.mipmaps;
break;
case deferred_request_command::_3d_gather:
// 3D gather can have very many input sections, try to keep section count low
max_overdraw_ratio = 125;
max_safe_sections = (attr.depth * attr.mipmaps * 110) / 100;
break;
default:
break;
}
if (overlapping_fbos.size() > max_safe_sections)
{
// Are we really over-budget?
u32 coverage_size = 0;
for (const auto& section : overlapping_fbos)
{
const auto area = section.surface->get_native_pitch() * section.surface->template get_surface_height<rsx::surface_metrics::bytes>();
coverage_size += area;
}
if (const auto coverage_ratio = (coverage_size * 100ull) / memory_range.length();
coverage_ratio > max_overdraw_ratio)
{
rsx_log.warning("[Performance warning] Texture gather routine encountered too many objects! Operation=%d, Mipmaps=%d, Depth=%d, Sections=%zu, Ratio=%llu%",
static_cast<int>(result.external_subresource_desc.op), attr.mipmaps, attr.depth, overlapping_fbos.size(), coverage_ratio);
m_rtts.check_for_duplicates(overlapping_fbos);
}
}
// Optionally disallow caching if resource is being written to as it is being read from
for (const auto& section : overlapping_fbos)
{
if (m_rtts.address_is_bound(section.base_address))
{
if (result.external_subresource_desc.op == deferred_request_command::copy_image_static)
{
result.external_subresource_desc.op = deferred_request_command::copy_image_dynamic;
}
else
{
result.external_subresource_desc.do_not_cache = true;
}
break;
}
}
return result;
}
return {};