vk: Move draw-time constants to vertex layout stream and make it SSBO

This commit is contained in:
kd-11 2025-07-20 17:27:47 +03:00 committed by kd-11
parent a819b9fc2a
commit ecc0fe4678
6 changed files with 61 additions and 54 deletions

View File

@ -19,7 +19,16 @@ struct vertex_context_t
float point_size;
float z_near;
float z_far;
// float reserved[3];
float reserved[3];
};
struct vertex_layout_t
{
uint vertex_base_index;
uint vertex_index_offset;
uint draw_id;
uint reserved;
uvec2 attrib_data[16];
};
)"

View File

@ -156,7 +156,7 @@ attribute_desc fetch_desc(const in int location)
#ifdef VULKAN
// Fetch parameters streamed separately from draw parameters
uvec2 attrib = texelFetch(vertex_layout_stream, location + int(layout_ptr_offset)).xy;
uvec2 attrib = vertex_layouts[vs_attrib_layout_offset].attrib_data[location];
#else
// Data is packed into a ubo
const int block = (location >> 1);
@ -178,6 +178,11 @@ attribute_desc fetch_desc(const in int location)
return result;
}
#ifdef VULKAN
#define vertex_index_offset vertex_layouts[vs_attrib_layout_offset].vertex_index_offset
#define vertex_base_index vertex_layouts[vs_attrib_layout_offset].vertex_base_index
#endif
vec4 read_location(const in int location)
{
int vertex_id;

View File

@ -820,30 +820,17 @@ void VKGSRender::emit_geometry(u32 sub_index)
update_descriptors = true;
// Allocate stream layout memory for this batch
m_vertex_layout_stream_info.range = rsx::method_registers.current_draw_clause.pass_count() * 128;
m_vertex_layout_stream_info.offset = m_vertex_layout_ring_info.alloc<256>(m_vertex_layout_stream_info.range);
if (vk::test_status_interrupt(vk::heap_changed))
{
if (m_vertex_layout_storage &&
m_vertex_layout_storage->info.buffer != m_vertex_layout_ring_info.heap->value)
{
vk::get_resource_manager()->dispose(m_vertex_layout_storage);
}
vk::clear_status_interrupt(vk::heap_changed);
}
const u64 alloc_size = rsx::method_registers.current_draw_clause.pass_count() * 144;
m_vertex_layout_dynamic_offset = m_vertex_layout_ring_info.alloc<16>(alloc_size);
}
// Update vertex fetch parameters
update_vertex_env(sub_index, upload_info);
ensure(m_vertex_layout_storage);
if (update_descriptors)
{
m_program->bind_uniform(persistent_buffer, vk::glsl::binding_set_index_vertex, m_vs_binding_table->vertex_buffers_location);
m_program->bind_uniform(volatile_buffer, vk::glsl::binding_set_index_vertex, m_vs_binding_table->vertex_buffers_location + 1);
m_program->bind_uniform(m_vertex_layout_storage->value, vk::glsl::binding_set_index_vertex, m_vs_binding_table->vertex_buffers_location + 2);
}
bool reload_state = (!m_current_draw.subdraw_id++);

View File

@ -513,7 +513,7 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar)
m_fragment_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment env buffer");
m_vertex_env_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex env buffer");
m_fragment_texture_params_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment texture params buffer");
m_vertex_layout_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex layout buffer", 0x10000, VK_TRUE);
m_vertex_layout_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex layout buffer", 0x10000, VK_TRUE);
m_fragment_constants_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment constants buffer");
m_transform_constants_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, "transform constants buffer");
m_index_buffer_ring_info.create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer");
@ -556,6 +556,7 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar)
m_fragment_env_buffer_info = { m_fragment_env_ring_info.heap->value, 0, 16 };
m_fragment_texture_params_buffer_info = { m_fragment_texture_params_ring_info.heap->value, 0, 16 };
m_raster_env_buffer_info = { m_raster_env_ring_info.heap->value, 0, 128 };
m_vertex_layout_stream_info = { m_vertex_layout_ring_info.heap->value, 0, VK_WHOLE_SIZE };
const auto& limits = m_device->gpu().get_limits();
m_texbuffer_view_size = std::min(limits.maxTexelBufferElements, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000u);
@ -812,7 +813,6 @@ VKGSRender::~VKGSRender()
m_persistent_attribute_storage.reset();
m_volatile_attribute_storage.reset();
m_vertex_layout_storage.reset();
// Upscaler (references some global resources)
m_upscaler.reset();
@ -2095,6 +2095,7 @@ void VKGSRender::load_program_env()
}
m_program->bind_uniform(m_vertex_env_buffer_info, vk::glsl::binding_set_index_vertex, m_vs_binding_table->context_buffer_location);
m_program->bind_uniform(m_vertex_layout_stream_info, vk::glsl::binding_set_index_vertex, m_vs_binding_table->vertex_buffers_location + 2);
m_program->bind_uniform(m_fragment_env_buffer_info, vk::glsl::binding_set_index_fragment, m_fs_binding_table->context_buffer_location);
m_program->bind_uniform(m_fragment_texture_params_buffer_info, vk::glsl::binding_set_index_fragment, m_fs_binding_table->tex_param_location);
m_program->bind_uniform(m_raster_env_buffer_info, vk::glsl::binding_set_index_fragment, m_fs_binding_table->polygon_stipple_params_location);
@ -2189,43 +2190,31 @@ void VKGSRender::upload_transform_constants(const rsx::io_buffer& buffer)
void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_info)
{
struct rsx_prog_push_constants_block_t
{
u32 xform_constants_offset;
u32 vs_context_offset;
u32 vs_attrib_layout_offset;
};
struct rsx_prog_vertex_layout_entry_t
{
u32 vertex_base_index;
u32 vertex_index_offset;
u32 draw_id;
u32 layout_ptr_offset;
u32 xform_constants_offset;
u32 vs_context_offset;
u32 reserved;
s32 attrib_data[1];
};
// Actual allocation must have been done previously
u32 base_offset;
const u32 offset32 = static_cast<u32>(m_vertex_layout_stream_info.offset);
const u32 range32 = static_cast<u32>(m_vertex_layout_stream_info.range);
if (!m_vertex_layout_storage || !m_vertex_layout_storage->in_range(offset32, range32, base_offset))
{
ensure(m_texbuffer_view_size >= m_vertex_layout_stream_info.range);
vk::get_resource_manager()->dispose(m_vertex_layout_storage);
const usz alloc_addr = m_vertex_layout_stream_info.offset;
const usz view_size = (alloc_addr + m_texbuffer_view_size) > m_vertex_layout_ring_info.size() ? m_vertex_layout_ring_info.size() - alloc_addr : m_texbuffer_view_size;
m_vertex_layout_storage = std::make_unique<vk::buffer_view>(*m_device, m_vertex_layout_ring_info.heap->value, VK_FORMAT_R32G32_UINT, alloc_addr, view_size);
base_offset = 0;
}
const u32 vertex_layout_offset = (id * 16) + (base_offset / 8);
const u32 constant_id_offset = static_cast<u32>(m_xform_constants_dynamic_offset) / 16u;
const u32 vertex_context_offset = static_cast<u32>(m_vertex_env_dynamic_offset) / 128u;
const u32 vertex_layout_offset = static_cast<u32>(m_vertex_layout_dynamic_offset) / 144u;
// Pack
rsx_prog_push_constants_block_t push_constants;
push_constants.vertex_base_index = vertex_info.vertex_index_base;
push_constants.vertex_index_offset = vertex_info.vertex_index_offset;
push_constants.draw_id = id;
push_constants.layout_ptr_offset = vertex_layout_offset;
push_constants.xform_constants_offset = constant_id_offset;
push_constants.vs_context_offset = vertex_context_offset;
push_constants.vs_attrib_layout_offset = vertex_layout_offset + id;
vkCmdPushConstants(
*m_current_command_buffer,
@ -2235,15 +2224,20 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_
sizeof(push_constants),
&push_constants);
const usz data_offset = (id * 128) + m_vertex_layout_stream_info.offset;
auto dst = m_vertex_layout_ring_info.map(data_offset, 128);
// Now actually fill in the data
auto buf = m_vertex_layout_ring_info.map(m_vertex_layout_dynamic_offset + (144u * id), 144);
auto dst = reinterpret_cast<rsx_prog_vertex_layout_entry_t*>(buf);
dst->vertex_base_index = vertex_info.vertex_index_base;
dst->vertex_index_offset = vertex_info.vertex_index_offset;
dst->draw_id = id;
dst->reserved = 0;
m_draw_processor.fill_vertex_layout_state(
m_vertex_layout,
current_vp_metadata,
vertex_info.first_vertex,
vertex_info.allocated_vertex_count,
static_cast<s32*>(dst),
dst->attrib_data,
vertex_info.persistent_window_offset,
vertex_info.volatile_window_offset);

View File

@ -79,7 +79,6 @@ private:
std::unique_ptr<vk::buffer_view> m_persistent_attribute_storage;
std::unique_ptr<vk::buffer_view> m_volatile_attribute_storage;
std::unique_ptr<vk::buffer_view> m_vertex_layout_storage;
VkDependencyInfoKHR m_async_compute_dependency_info {};
VkMemoryBarrier2KHR m_async_compute_memory_barrier {};
@ -153,6 +152,7 @@ private:
rsx::simple_array<u8> m_multidraw_parameters_buffer;
u64 m_xform_constants_dynamic_offset = 0; // We manage transform_constants dynamic offset manually to alleviate performance penalty of doing a hot-patch of constants.
u64 m_vertex_env_dynamic_offset = 0;
u64 m_vertex_layout_dynamic_offset = 0;
std::array<vk::frame_context_t, VK_MAX_ASYNC_FRAMES> frame_context_storage;
//Temp frame context to use if the real frame queue is overburdened. Only used for storage

View File

@ -118,21 +118,34 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS)
}
OS <<
"layout(push_constant) uniform VertexLayoutBuffer\n"
"layout(std430, set=0, binding=" << vk_prog->binding_table.vertex_buffers_location + 2 << ") readonly buffer VertexLayoutBuffer\n"
"{\n"
" vertex_layout_t vertex_layouts[];\n"
"};\n\n";
const vk::glsl::program_input layouts_input
{
.domain = glsl::glsl_vertex_program,
.type = vk::glsl::input_type_storage_buffer,
.set = vk::glsl::binding_set_index_vertex,
.location = vk_prog->binding_table.vertex_buffers_location + 2,
.name = "VertexLayoutBuffer"
};
inputs.push_back(layouts_input);
OS <<
"layout(push_constant) uniform push_constants_block\n"
"{\n"
" uint vertex_base_index;\n"
" uint vertex_index_offset;\n"
" uint draw_id;\n"
" uint layout_ptr_offset;\n"
" uint xform_constants_offset;\n"
" uint vs_context_offset;\n"
" uint vs_attrib_layout_offset;\n"
"};\n\n";
const vk::glsl::program_input push_constants
{
.domain = glsl::glsl_vertex_program,
.type = vk::glsl::input_type_push_constant,
.bound_data = vk::glsl::push_constant_ref{ .offset = 0, .size = 24 },
.bound_data = vk::glsl::push_constant_ref{ .offset = 0, .size = 12 },
.set = vk::glsl::binding_set_index_vertex,
.location = umax,
.name = "push_constants_block"
@ -145,8 +158,7 @@ void VKVertexDecompilerThread::insertInputs(std::stringstream& OS, const std::ve
static const char* input_streams[] =
{
"persistent_input_stream", // Data stream with persistent vertex data (cacheable)
"volatile_input_stream", // Data stream with per-draw data (registers and immediate draw data)
"vertex_layout_stream" // Data stream defining vertex data layout"
"volatile_input_stream" // Data stream with per-draw data (registers and immediate draw data)
};
u32 location = vk_prog->binding_table.vertex_buffers_location;