mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-12-16 04:09:07 +00:00
rsx: Refactor texture cache lookup core to be easier to read
This commit is contained in:
parent
8126a199f5
commit
1007aaee4c
@ -1869,314 +1869,319 @@ namespace rsx
|
||||
{
|
||||
return{ cached_texture->get_view(remap), cached_texture->get_context(), cached_texture->get_format_class(), scale, cached_texture->get_image_type() };
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
else
|
||||
|
||||
// Fast lookup for cyclic reference
|
||||
if (m_rtts.address_is_bound(attr.address)) [[unlikely]]
|
||||
{
|
||||
// Fast lookup for cyclic reference
|
||||
if (m_rtts.address_is_bound(attr.address)) [[unlikely]]
|
||||
if (auto texptr = m_rtts.get_surface_at(attr.address);
|
||||
helpers::check_framebuffer_resource(texptr, attr, extended_dimension))
|
||||
{
|
||||
if (auto texptr = m_rtts.get_surface_at(attr.address);
|
||||
helpers::check_framebuffer_resource(texptr, attr, extended_dimension))
|
||||
const bool force_convert = !render_target_format_is_compatible(texptr, attr.gcm_format);
|
||||
|
||||
auto result = helpers::process_framebuffer_resource_fast<sampled_image_descriptor>(
|
||||
cmd, texptr, attr, scale, extended_dimension, remap, true, force_convert);
|
||||
|
||||
if (!options.skip_texture_barriers && result.is_cyclic_reference)
|
||||
{
|
||||
const bool force_convert = !render_target_format_is_compatible(texptr, attr.gcm_format);
|
||||
|
||||
auto result = helpers::process_framebuffer_resource_fast<sampled_image_descriptor>(
|
||||
cmd, texptr, attr, scale, extended_dimension, remap, true, force_convert);
|
||||
|
||||
if (!options.skip_texture_barriers && result.is_cyclic_reference)
|
||||
{
|
||||
// A texture barrier is only necessary when the rendertarget is going to be bound as a shader input.
|
||||
// If a temporary copy is to be made, this should not be invoked
|
||||
insert_texture_barrier(cmd, texptr);
|
||||
}
|
||||
|
||||
return result;
|
||||
// A texture barrier is only necessary when the rendertarget is going to be bound as a shader input.
|
||||
// If a temporary copy is to be made, this should not be invoked
|
||||
insert_texture_barrier(cmd, texptr);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
rsx::simple_array<typename SurfaceStoreType::surface_overlap_info> overlapping_fbos;
|
||||
rsx::simple_array<section_storage_type*> overlapping_locals;
|
||||
|
||||
auto fast_fbo_check = [&]() -> sampled_image_descriptor
|
||||
{
|
||||
const auto& last = overlapping_fbos.back();
|
||||
if (last.src_area.x == 0 && last.src_area.y == 0 && !last.is_clipped)
|
||||
{
|
||||
const bool force_convert = !render_target_format_is_compatible(last.surface, attr.gcm_format);
|
||||
|
||||
return helpers::process_framebuffer_resource_fast<sampled_image_descriptor>(
|
||||
cmd, last.surface, attr, scale, extended_dimension, remap, false, force_convert);
|
||||
}
|
||||
|
||||
rsx::simple_array<typename SurfaceStoreType::surface_overlap_info> overlapping_fbos;
|
||||
rsx::simple_array<section_storage_type*> overlapping_locals;
|
||||
return {};
|
||||
};
|
||||
|
||||
auto fast_fbo_check = [&]() -> sampled_image_descriptor
|
||||
// Check surface cache early if the option is enabled
|
||||
if (options.prefer_surface_cache)
|
||||
{
|
||||
const u16 block_h = (attr.depth * attr.slice_h);
|
||||
overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::shader_read);
|
||||
|
||||
if (!overlapping_fbos.empty())
|
||||
{
|
||||
const auto& last = overlapping_fbos.back();
|
||||
if (last.src_area.x == 0 && last.src_area.y == 0 && !last.is_clipped)
|
||||
if (auto result = fast_fbo_check(); result.validate())
|
||||
{
|
||||
const bool force_convert = !render_target_format_is_compatible(last.surface, attr.gcm_format);
|
||||
|
||||
return helpers::process_framebuffer_resource_fast<sampled_image_descriptor>(
|
||||
cmd, last.surface, attr, scale, extended_dimension, remap, false, force_convert);
|
||||
}
|
||||
|
||||
return {};
|
||||
};
|
||||
|
||||
// Check surface cache early if the option is enabled
|
||||
if (options.prefer_surface_cache)
|
||||
{
|
||||
const u16 block_h = (attr.depth * attr.slice_h);
|
||||
overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::shader_read);
|
||||
|
||||
if (!overlapping_fbos.empty())
|
||||
{
|
||||
if (auto result = fast_fbo_check(); result.validate())
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
if (options.skip_texture_merge)
|
||||
{
|
||||
overlapping_fbos.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check shader_read storage. In a given scene, reads from local memory far outnumber reads from the surface cache
|
||||
const u32 lookup_mask = rsx::texture_upload_context::shader_read | rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::blit_engine_src;
|
||||
overlapping_locals = find_texture_from_range<true>(memory_range, attr.height > 1 ? attr.pitch : 0, lookup_mask & options.lookup_mask);
|
||||
|
||||
// Search for exact match if possible
|
||||
for (auto& cached_texture : overlapping_locals)
|
||||
{
|
||||
if (cached_texture->matches(attr.address, attr.gcm_format, attr.width, attr.height, attr.depth, 0))
|
||||
{
|
||||
#ifdef TEXTURE_CACHE_DEBUG
|
||||
if (!memory_range.inside(cached_texture->get_confirmed_range()))
|
||||
{
|
||||
// TODO. This is easily possible for blit_dst textures if the blit is incomplete in Y
|
||||
// The possibility that a texture will be split into parts on the CPU like this is very rare
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
if (attr.swizzled != cached_texture->is_swizzled())
|
||||
{
|
||||
// We can have the correct data in cached_texture but it needs decoding before it can be sampled.
|
||||
// Usually a sign of a game bug where the developer forgot to mark the texture correctly the first time we see it.
|
||||
// TODO: This section should execute under an exclusive lock, but we're not actually modifying any object references, only flags
|
||||
rsx_log.warning("A texture was found in cache for address 0x%x, but swizzle flag does not match", attr.address);
|
||||
cached_texture->unprotect();
|
||||
cached_texture->set_dirty(true);
|
||||
break;
|
||||
}
|
||||
|
||||
return{ cached_texture->get_view(remap), cached_texture->get_context(), cached_texture->get_format_class(), scale, cached_texture->get_image_type() };
|
||||
}
|
||||
}
|
||||
|
||||
if (!overlapping_locals.empty())
|
||||
{
|
||||
// Remove everything that is not a transfer target
|
||||
overlapping_locals.erase_if([](const auto& e)
|
||||
{
|
||||
return e->is_dirty() || (e->get_context() != rsx::texture_upload_context::blit_engine_dst);
|
||||
});
|
||||
}
|
||||
|
||||
if (!options.prefer_surface_cache)
|
||||
{
|
||||
// Now check for surface cache hits
|
||||
const u16 block_h = (attr.depth * attr.slice_h);
|
||||
overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::shader_read);
|
||||
}
|
||||
|
||||
if (!overlapping_fbos.empty() || !overlapping_locals.empty())
|
||||
{
|
||||
int _pool = -1;
|
||||
if (overlapping_locals.empty()) [[likely]]
|
||||
{
|
||||
_pool = 0;
|
||||
}
|
||||
else if (overlapping_fbos.empty())
|
||||
{
|
||||
_pool = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
_pool = (overlapping_locals.back()->last_write_tag < overlapping_fbos.back().surface->last_use_tag) ? 0 : 1;
|
||||
}
|
||||
|
||||
if (_pool == 0)
|
||||
{
|
||||
// Surface cache data is newer, check if this thing fits our search parameters
|
||||
if (!options.prefer_surface_cache)
|
||||
{
|
||||
if (auto result = fast_fbo_check(); result.validate())
|
||||
{
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (extended_dimension <= rsx::texture_dimension_extended::texture_dimension_2d)
|
||||
{
|
||||
const auto last = overlapping_locals.back();
|
||||
const auto normalized_width = u16(last->get_width() * get_format_block_size_in_bytes(last->get_gcm_format())) / attr.bpp;
|
||||
|
||||
if (last->get_section_base() == attr.address &&
|
||||
normalized_width >= attr.width && last->get_height() >= attr.height)
|
||||
{
|
||||
u32 gcm_format = attr.gcm_format;
|
||||
const bool gcm_format_is_depth = helpers::is_gcm_depth_format(attr.gcm_format);
|
||||
|
||||
if (!gcm_format_is_depth && last->is_depth_texture())
|
||||
{
|
||||
// While the copy routines can perform a typeless cast, prefer to not cross the aspect barrier if possible
|
||||
gcm_format = helpers::get_compatible_depth_format(attr.gcm_format);
|
||||
}
|
||||
|
||||
auto new_attr = attr;
|
||||
new_attr.gcm_format = gcm_format;
|
||||
|
||||
if (last->get_gcm_format() == attr.gcm_format && attr.edge_clamped)
|
||||
{
|
||||
// Clipped view
|
||||
auto viewed_image = last->get_raw_texture();
|
||||
sampled_image_descriptor result = { viewed_image->get_view(remap), last->get_context(),
|
||||
viewed_image->format_class(), scale, extended_dimension, false, viewed_image->samples() };
|
||||
|
||||
helpers::calculate_sample_clip_parameters(result, position2i(0, 0), size2i(attr.width, attr.height), size2i(normalized_width, last->get_height()));
|
||||
return result;
|
||||
}
|
||||
|
||||
return { last->get_raw_texture(), deferred_request_command::copy_image_static, new_attr, {},
|
||||
last->get_context(), classify_format(gcm_format), scale, extended_dimension, remap };
|
||||
}
|
||||
}
|
||||
|
||||
auto result = helpers::merge_cache_resources<sampled_image_descriptor>(
|
||||
cmd, overlapping_fbos, overlapping_locals, attr, scale, extended_dimension, remap, _pool);
|
||||
|
||||
const bool is_simple_subresource_copy =
|
||||
(result.external_subresource_desc.op == deferred_request_command::copy_image_static) ||
|
||||
(result.external_subresource_desc.op == deferred_request_command::copy_image_dynamic) ||
|
||||
(result.external_subresource_desc.op == deferred_request_command::blit_image_static);
|
||||
|
||||
if (attr.edge_clamped &&
|
||||
!g_cfg.video.strict_rendering_mode &&
|
||||
is_simple_subresource_copy &&
|
||||
render_target_format_is_compatible(result.external_subresource_desc.src0(), attr.gcm_format))
|
||||
{
|
||||
if (result.external_subresource_desc.op != deferred_request_command::blit_image_static) [[ likely ]]
|
||||
{
|
||||
helpers::convert_image_copy_to_clip_descriptor(
|
||||
result,
|
||||
position2i(result.external_subresource_desc.x, result.external_subresource_desc.y),
|
||||
size2i(result.external_subresource_desc.width, result.external_subresource_desc.height),
|
||||
size2i(result.external_subresource_desc.external_handle->width(), result.external_subresource_desc.external_handle->height()),
|
||||
remap, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
helpers::convert_image_blit_to_clip_descriptor(
|
||||
result,
|
||||
remap,
|
||||
false);
|
||||
}
|
||||
|
||||
if (!!result.ref_address && m_rtts.address_is_bound(result.ref_address))
|
||||
{
|
||||
result.is_cyclic_reference = true;
|
||||
|
||||
auto texptr = ensure(m_rtts.get_surface_at(result.ref_address));
|
||||
insert_texture_barrier(cmd, texptr);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
if (options.skip_texture_merge)
|
||||
{
|
||||
if (is_simple_subresource_copy)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
overlapping_fbos.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
// Check shader_read storage. In a given scene, reads from local memory far outnumber reads from the surface cache
|
||||
const u32 lookup_mask = rsx::texture_upload_context::shader_read | rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::blit_engine_src;
|
||||
overlapping_locals = find_texture_from_range<true>(memory_range, attr.height > 1 ? attr.pitch : 0, lookup_mask & options.lookup_mask);
|
||||
|
||||
// Search for exact match if possible
|
||||
for (auto& cached_texture : overlapping_locals)
|
||||
{
|
||||
if (cached_texture->matches(attr.address, attr.gcm_format, attr.width, attr.height, attr.depth, 0))
|
||||
{
|
||||
#ifdef TEXTURE_CACHE_DEBUG
|
||||
if (!memory_range.inside(cached_texture->get_confirmed_range()))
|
||||
{
|
||||
// TODO. This is easily possible for blit_dst textures if the blit is incomplete in Y
|
||||
// The possibility that a texture will be split into parts on the CPU like this is very rare
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
if (attr.swizzled != cached_texture->is_swizzled())
|
||||
{
|
||||
// We can have the correct data in cached_texture but it needs decoding before it can be sampled.
|
||||
// Usually a sign of a game bug where the developer forgot to mark the texture correctly the first time we see it.
|
||||
// TODO: This section should execute under an exclusive lock, but we're not actually modifying any object references, only flags
|
||||
rsx_log.warning("A texture was found in cache for address 0x%x, but swizzle flag does not match", attr.address);
|
||||
cached_texture->unprotect();
|
||||
cached_texture->set_dirty(true);
|
||||
break;
|
||||
}
|
||||
|
||||
if (const auto section_count = result.external_subresource_desc.sections_to_copy.size();
|
||||
section_count > 0)
|
||||
return{ cached_texture->get_view(remap), cached_texture->get_context(), cached_texture->get_format_class(), scale, cached_texture->get_image_type() };
|
||||
}
|
||||
}
|
||||
|
||||
if (!overlapping_locals.empty())
|
||||
{
|
||||
// Remove everything that is not a transfer target
|
||||
overlapping_locals.erase_if([](const auto& e)
|
||||
{
|
||||
return e->is_dirty() || (e->get_context() != rsx::texture_upload_context::blit_engine_dst);
|
||||
});
|
||||
}
|
||||
|
||||
if (!options.prefer_surface_cache)
|
||||
{
|
||||
// Now check for surface cache hits
|
||||
const u16 block_h = (attr.depth * attr.slice_h);
|
||||
overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::shader_read);
|
||||
}
|
||||
|
||||
if (!overlapping_fbos.empty() || !overlapping_locals.empty())
|
||||
{
|
||||
int _pool = -1;
|
||||
if (overlapping_locals.empty()) [[likely]]
|
||||
{
|
||||
_pool = 0;
|
||||
}
|
||||
else if (overlapping_fbos.empty())
|
||||
{
|
||||
_pool = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
_pool = (overlapping_locals.back()->last_write_tag < overlapping_fbos.back().surface->last_use_tag) ? 0 : 1;
|
||||
}
|
||||
|
||||
if (_pool == 0)
|
||||
{
|
||||
// Surface cache data is newer, check if this thing fits our search parameters
|
||||
if (!options.prefer_surface_cache)
|
||||
{
|
||||
bool result_is_valid;
|
||||
if (_pool == 0 && !g_cfg.video.write_color_buffers && !g_cfg.video.write_depth_buffer)
|
||||
if (auto result = fast_fbo_check(); result.validate())
|
||||
{
|
||||
// HACK: Avoid WCB requirement for some games with wrongly declared sampler dimensions.
|
||||
// TODO: Some games may render a small region (e.g 1024x256x2) and sample a huge texture (e.g 1024x1024).
|
||||
// Seen in APF2k8 - this causes missing bits to be reuploaded from CPU which can cause WCB requirement.
|
||||
// Properly fix this by introducing partial data upload into the surface cache in such cases and making RCB/RDB
|
||||
// enabled by default. Blit engine already handles this correctly.
|
||||
result_is_valid = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
result_is_valid = result.atlas_covers_target_area(section_count == 1 ? 99 : 90);
|
||||
}
|
||||
|
||||
if (result_is_valid)
|
||||
{
|
||||
// Check for possible duplicates
|
||||
usz max_overdraw_ratio = u32{ umax };
|
||||
usz max_safe_sections = u32{ umax };
|
||||
|
||||
switch (result.external_subresource_desc.op)
|
||||
{
|
||||
case deferred_request_command::atlas_gather:
|
||||
max_overdraw_ratio = 150;
|
||||
max_safe_sections = 8 + 2 * attr.mipmaps;
|
||||
break;
|
||||
case deferred_request_command::cubemap_gather:
|
||||
max_overdraw_ratio = 150;
|
||||
max_safe_sections = 6 * 2 * attr.mipmaps;
|
||||
break;
|
||||
case deferred_request_command::_3d_gather:
|
||||
// 3D gather can have very many input sections, try to keep section count low
|
||||
max_overdraw_ratio = 125;
|
||||
max_safe_sections = (attr.depth * attr.mipmaps * 110) / 100;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (overlapping_fbos.size() > max_safe_sections)
|
||||
{
|
||||
// Are we really over-budget?
|
||||
u32 coverage_size = 0;
|
||||
for (const auto& section : overlapping_fbos)
|
||||
{
|
||||
const auto area = section.surface->get_native_pitch() * section.surface->template get_surface_height<rsx::surface_metrics::bytes>();
|
||||
coverage_size += area;
|
||||
}
|
||||
|
||||
if (const auto coverage_ratio = (coverage_size * 100ull) / memory_range.length();
|
||||
coverage_ratio > max_overdraw_ratio)
|
||||
{
|
||||
rsx_log.warning("[Performance warning] Texture gather routine encountered too many objects! Operation=%d, Mipmaps=%d, Depth=%d, Sections=%zu, Ratio=%llu%",
|
||||
static_cast<int>(result.external_subresource_desc.op), attr.mipmaps, attr.depth, overlapping_fbos.size(), coverage_ratio);
|
||||
m_rtts.check_for_duplicates(overlapping_fbos);
|
||||
}
|
||||
}
|
||||
|
||||
// Optionally disallow caching if resource is being written to as it is being read from
|
||||
for (const auto& section : overlapping_fbos)
|
||||
{
|
||||
if (m_rtts.address_is_bound(section.base_address))
|
||||
{
|
||||
if (result.external_subresource_desc.op == deferred_request_command::copy_image_static)
|
||||
{
|
||||
result.external_subresource_desc.op = deferred_request_command::copy_image_dynamic;
|
||||
}
|
||||
else
|
||||
{
|
||||
result.external_subresource_desc.do_not_cache = true;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (extended_dimension <= rsx::texture_dimension_extended::texture_dimension_2d)
|
||||
{
|
||||
const auto last = overlapping_locals.back();
|
||||
const auto normalized_width = u16(last->get_width() * get_format_block_size_in_bytes(last->get_gcm_format())) / attr.bpp;
|
||||
|
||||
if (last->get_section_base() == attr.address &&
|
||||
normalized_width >= attr.width && last->get_height() >= attr.height)
|
||||
{
|
||||
u32 gcm_format = attr.gcm_format;
|
||||
const bool gcm_format_is_depth = helpers::is_gcm_depth_format(attr.gcm_format);
|
||||
|
||||
if (!gcm_format_is_depth && last->is_depth_texture())
|
||||
{
|
||||
// While the copy routines can perform a typeless cast, prefer to not cross the aspect barrier if possible
|
||||
gcm_format = helpers::get_compatible_depth_format(attr.gcm_format);
|
||||
}
|
||||
|
||||
auto new_attr = attr;
|
||||
new_attr.gcm_format = gcm_format;
|
||||
|
||||
if (last->get_gcm_format() == attr.gcm_format && attr.edge_clamped)
|
||||
{
|
||||
// Clipped view
|
||||
auto viewed_image = last->get_raw_texture();
|
||||
sampled_image_descriptor result = { viewed_image->get_view(remap), last->get_context(),
|
||||
viewed_image->format_class(), scale, extended_dimension, false, viewed_image->samples() };
|
||||
|
||||
helpers::calculate_sample_clip_parameters(result, position2i(0, 0), size2i(attr.width, attr.height), size2i(normalized_width, last->get_height()));
|
||||
return result;
|
||||
}
|
||||
|
||||
return { last->get_raw_texture(), deferred_request_command::copy_image_static, new_attr, {},
|
||||
last->get_context(), classify_format(gcm_format), scale, extended_dimension, remap };
|
||||
}
|
||||
}
|
||||
|
||||
auto result = helpers::merge_cache_resources<sampled_image_descriptor>(
|
||||
cmd, overlapping_fbos, overlapping_locals, attr, scale, extended_dimension, remap, _pool);
|
||||
|
||||
const bool is_simple_subresource_copy =
|
||||
(result.external_subresource_desc.op == deferred_request_command::copy_image_static) ||
|
||||
(result.external_subresource_desc.op == deferred_request_command::copy_image_dynamic) ||
|
||||
(result.external_subresource_desc.op == deferred_request_command::blit_image_static);
|
||||
|
||||
if (attr.edge_clamped &&
|
||||
!g_cfg.video.strict_rendering_mode &&
|
||||
is_simple_subresource_copy &&
|
||||
render_target_format_is_compatible(result.external_subresource_desc.src0(), attr.gcm_format))
|
||||
{
|
||||
if (result.external_subresource_desc.op != deferred_request_command::blit_image_static) [[ likely ]]
|
||||
{
|
||||
helpers::convert_image_copy_to_clip_descriptor(
|
||||
result,
|
||||
position2i(result.external_subresource_desc.x, result.external_subresource_desc.y),
|
||||
size2i(result.external_subresource_desc.width, result.external_subresource_desc.height),
|
||||
size2i(result.external_subresource_desc.external_handle->width(), result.external_subresource_desc.external_handle->height()),
|
||||
remap, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
helpers::convert_image_blit_to_clip_descriptor(
|
||||
result,
|
||||
remap,
|
||||
false);
|
||||
}
|
||||
|
||||
if (!!result.ref_address && m_rtts.address_is_bound(result.ref_address))
|
||||
{
|
||||
result.is_cyclic_reference = true;
|
||||
|
||||
auto texptr = ensure(m_rtts.get_surface_at(result.ref_address));
|
||||
insert_texture_barrier(cmd, texptr);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
if (options.skip_texture_merge)
|
||||
{
|
||||
if (is_simple_subresource_copy)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
const auto section_count = result.external_subresource_desc.sections_to_copy.size();
|
||||
if (section_count == 0)
|
||||
{
|
||||
// Fail
|
||||
return {};
|
||||
}
|
||||
|
||||
bool result_is_valid;
|
||||
if (_pool == 0 && !g_cfg.video.write_color_buffers && !g_cfg.video.write_depth_buffer)
|
||||
{
|
||||
// HACK: Avoid WCB requirement for some games with wrongly declared sampler dimensions.
|
||||
// TODO: Some games may render a small region (e.g 1024x256x2) and sample a huge texture (e.g 1024x1024).
|
||||
// Seen in APF2k8 - this causes missing bits to be reuploaded from CPU which can cause WCB requirement.
|
||||
// Properly fix this by introducing partial data upload into the surface cache in such cases and making RCB/RDB
|
||||
// enabled by default. Blit engine already handles this correctly.
|
||||
result_is_valid = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
result_is_valid = result.atlas_covers_target_area(section_count == 1 ? 99 : 90);
|
||||
}
|
||||
|
||||
if (!result_is_valid)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
// Check for possible duplicates
|
||||
usz max_overdraw_ratio = u32{ umax };
|
||||
usz max_safe_sections = u32{ umax };
|
||||
|
||||
switch (result.external_subresource_desc.op)
|
||||
{
|
||||
case deferred_request_command::atlas_gather:
|
||||
max_overdraw_ratio = 150;
|
||||
max_safe_sections = 8 + 2 * attr.mipmaps;
|
||||
break;
|
||||
case deferred_request_command::cubemap_gather:
|
||||
max_overdraw_ratio = 150;
|
||||
max_safe_sections = 6 * 2 * attr.mipmaps;
|
||||
break;
|
||||
case deferred_request_command::_3d_gather:
|
||||
// 3D gather can have very many input sections, try to keep section count low
|
||||
max_overdraw_ratio = 125;
|
||||
max_safe_sections = (attr.depth * attr.mipmaps * 110) / 100;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (overlapping_fbos.size() > max_safe_sections)
|
||||
{
|
||||
// Are we really over-budget?
|
||||
u32 coverage_size = 0;
|
||||
for (const auto& section : overlapping_fbos)
|
||||
{
|
||||
const auto area = section.surface->get_native_pitch() * section.surface->template get_surface_height<rsx::surface_metrics::bytes>();
|
||||
coverage_size += area;
|
||||
}
|
||||
|
||||
if (const auto coverage_ratio = (coverage_size * 100ull) / memory_range.length();
|
||||
coverage_ratio > max_overdraw_ratio)
|
||||
{
|
||||
rsx_log.warning("[Performance warning] Texture gather routine encountered too many objects! Operation=%d, Mipmaps=%d, Depth=%d, Sections=%zu, Ratio=%llu%",
|
||||
static_cast<int>(result.external_subresource_desc.op), attr.mipmaps, attr.depth, overlapping_fbos.size(), coverage_ratio);
|
||||
m_rtts.check_for_duplicates(overlapping_fbos);
|
||||
}
|
||||
}
|
||||
|
||||
// Optionally disallow caching if resource is being written to as it is being read from
|
||||
for (const auto& section : overlapping_fbos)
|
||||
{
|
||||
if (m_rtts.address_is_bound(section.base_address))
|
||||
{
|
||||
if (result.external_subresource_desc.op == deferred_request_command::copy_image_static)
|
||||
{
|
||||
result.external_subresource_desc.op = deferred_request_command::copy_image_dynamic;
|
||||
}
|
||||
else
|
||||
{
|
||||
result.external_subresource_desc.do_not_cache = true;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
return {};
|
||||
|
||||
Loading…
Reference in New Issue
Block a user