mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-12-16 04:09:07 +00:00
gl: Implement hardware deswizzle for small texel formats
This commit is contained in:
parent
ff72f944ba
commit
cffc13696d
@ -263,8 +263,6 @@ namespace gl
|
||||
|
||||
cs_deswizzle_3d()
|
||||
{
|
||||
ensure((sizeof(_BlockType) & 3) == 0); // "Unsupported block type"
|
||||
|
||||
initialize();
|
||||
|
||||
m_src =
|
||||
@ -294,8 +292,10 @@ namespace gl
|
||||
{ "%loc", std::to_string(GL_COMPUTE_BUFFER_SLOT(0))},
|
||||
{ "%push_block", fmt::format("binding=%d, std140", GL_COMPUTE_BUFFER_SLOT(2)) },
|
||||
{ "%ws", std::to_string(optimal_group_size) },
|
||||
{ "%_wordcount", std::to_string(sizeof(_BlockType) / 4) },
|
||||
{ "%f", transform }
|
||||
{ "%_wordcount", std::to_string(std::max<u32>(sizeof(_BlockType) / 4u, 1u)) },
|
||||
{ "%f", transform },
|
||||
{ "%_8bit", sizeof(_BlockType) == 1 ? "1" : "0" },
|
||||
{ "%_16bit", sizeof(_BlockType) == 2 ? "1" : "0" },
|
||||
};
|
||||
|
||||
m_src = fmt::replace_all(m_src, syntax_replace);
|
||||
@ -339,7 +339,8 @@ namespace gl
|
||||
set_parameters(cmd);
|
||||
|
||||
const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size);
|
||||
const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation);
|
||||
const u32 texels_per_dword = std::max<u32>(4u / sizeof(_BlockType), 1u); // For block sizes less than 4 bytes wide
|
||||
const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation) / texels_per_dword;
|
||||
compute_task::run(cmd, linear_invocations);
|
||||
}
|
||||
};
|
||||
|
||||
@ -36,6 +36,16 @@ namespace gl
|
||||
{
|
||||
switch (block_size)
|
||||
{
|
||||
case 1:
|
||||
gl::get_compute_task<gl::cs_deswizzle_3d<u8, WordType, SwapBytes>>()->run(
|
||||
cmd, dst, dst_offset, src, src_offset,
|
||||
data_length, width, height, depth, 1);
|
||||
break;
|
||||
case 2:
|
||||
gl::get_compute_task<gl::cs_deswizzle_3d<u16, WordType, SwapBytes>>()->run(
|
||||
cmd, dst, dst_offset, src, src_offset,
|
||||
data_length, width, height, depth, 1);
|
||||
break;
|
||||
case 4:
|
||||
gl::get_compute_task<gl::cs_deswizzle_3d<u32, WordType, SwapBytes>>()->run(
|
||||
cmd, dst, dst_offset, src, src_offset,
|
||||
@ -748,39 +758,54 @@ namespace gl
|
||||
g_upload_transfer_buffer.copy_to(&g_deswizzle_scratch_buffer.get(), upload_scratch_mem.second, deswizzle_data_offset, static_cast<u32>(image_linear_size));
|
||||
|
||||
// 2.2 Apply compute transform to deswizzle input and dump it in compute_scratch_mem
|
||||
ensure(op.element_size == 2 || op.element_size == 4);
|
||||
const auto block_size = op.element_size * op.block_length;
|
||||
|
||||
if (op.require_swap)
|
||||
{
|
||||
mem_layout.swap_bytes = false;
|
||||
|
||||
if (op.element_size == 4) [[ likely ]]
|
||||
switch (op.element_size)
|
||||
{
|
||||
do_deswizzle_transformation<u32, true>(cmd, block_size,
|
||||
case 1:
|
||||
do_deswizzle_transformation<u8, true>(cmd, block_size,
|
||||
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
||||
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
case 2:
|
||||
do_deswizzle_transformation<u16, true>(cmd, block_size,
|
||||
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
||||
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
break;
|
||||
case 4:
|
||||
do_deswizzle_transformation<u32, true>(cmd, block_size,
|
||||
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
||||
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
break;
|
||||
default:
|
||||
fmt::throw_exception("Unimplemented element size deswizzle");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (op.element_size == 4) [[ likely ]]
|
||||
switch (op.element_size)
|
||||
{
|
||||
do_deswizzle_transformation<u32, false>(cmd, block_size,
|
||||
case 1:
|
||||
do_deswizzle_transformation<u8, false>(cmd, block_size,
|
||||
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
||||
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
case 2:
|
||||
do_deswizzle_transformation<u16, false>(cmd, block_size,
|
||||
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
||||
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
break;
|
||||
case 4:
|
||||
do_deswizzle_transformation<u32, false>(cmd, block_size,
|
||||
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
||||
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
break;
|
||||
default:
|
||||
fmt::throw_exception("Unimplemented element size deswizzle");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -476,21 +476,8 @@ namespace vk
|
||||
params.logd = rsx::ceil_log2(depth);
|
||||
|
||||
const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size);
|
||||
u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation);
|
||||
|
||||
// Check if we need to do subaddressing and adjust invocation count accordingly
|
||||
switch (sizeof(_BlockType))
|
||||
{
|
||||
case 1:
|
||||
linear_invocations /= 4;
|
||||
break;
|
||||
case 2:
|
||||
linear_invocations /= 2;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
const u32 texels_per_dword = std::max<u32>(4u / sizeof(_BlockType), 1u); // For block sizes less than 4 bytes wide
|
||||
const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation) / texels_per_dword;
|
||||
compute_task::run(cmd, linear_invocations);
|
||||
}
|
||||
};
|
||||
|
||||
Loading…
Reference in New Issue
Block a user