Remove global memory barrier and only sync images that would actually race

This commit is contained in:
goeiecool9999 2026-01-06 00:09:33 +01:00
parent 8c2c1e965f
commit 8432fc0293
3 changed files with 57 additions and 23 deletions

View File

@ -78,6 +78,8 @@ protected:
LatteTextureView* CreateView(Latte::E_DIM dim, Latte::E_GX2SURFFMT format, sint32 firstMip, sint32 mipCount, sint32 firstSlice, sint32 sliceCount) override;
public:
uint64 m_vkFlushIndex{}; // used to track read-write dependencies within the same renderpass
uint64 m_vkFlushIndex_read{};
uint64 m_vkFlushIndex_write{};

View File

@ -412,7 +412,7 @@ private:
}
// invalidation / flushing
uint64 currentFlushIndex{0};
uint64 currentFlushIndex{1};
bool requestFlush{ false }; // flush after every draw operation. The renderpass dependencies dont handle dependencies across multiple drawcalls inside a single renderpass
// draw sequence
@ -549,7 +549,7 @@ private:
void draw_handleSpecialState5();
// draw synchronization helper
void sync_performFlushBarrier();
void sync_performFlushBarrier(CachedFBOVk* fboVk);
bool sync_isInputTexturesSyncRequired();
void sync_RenderPassLoadTextures(CachedFBOVk* fboVk);
void sync_RenderPassStoreTextures(CachedFBOVk* fboVk);

View File

@ -1035,30 +1035,62 @@ VkDescriptorSetInfo* VulkanRenderer::draw_getOrCreateDescriptorSet(PipelineInfo*
return dsInfo;
}
void VulkanRenderer::sync_performFlushBarrier()
void VulkanRenderer::sync_performFlushBarrier(CachedFBOVk* fboVk)
{
VkMemoryBarrier memoryBarrier{};
memoryBarrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
memoryBarrier.srcAccessMask = 0;
memoryBarrier.dstAccessMask = 0;
size_t barrierCount = 0;
VkImageMemoryBarrier imageMemBarriers[8 + 2 + LATTE_NUM_MAX_TEX_UNITS]{};
auto addImgMemBarrierForTexView = [&](LatteTextureViewVk* view) {
VkImageSubresourceRange range = {
view->GetBaseImage()->GetImageAspect(),
(uint32_t)view->firstMip,
(uint32_t)view->numMip,
(uint32_t)view->firstSlice,
(uint32_t)view->numSlice};
auto baseTex = (LatteTextureVk*)view->baseTexture;
const auto idx = barrierCount++;
imageMemBarriers[idx].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
imageMemBarriers[idx].image = baseTex->GetImageObj()->m_image;
imageMemBarriers[idx].subresourceRange = range;
imageMemBarriers[idx].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
imageMemBarriers[idx].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
imageMemBarriers[idx].oldLayout = VK_IMAGE_LAYOUT_GENERAL;
imageMemBarriers[idx].newLayout = VK_IMAGE_LAYOUT_GENERAL;
imageMemBarriers[idx].srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
imageMemBarriers[idx].srcAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
imageMemBarriers[idx].dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
imageMemBarriers[idx].dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
imageMemBarriers[idx].dstAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
view->GetBaseImage()->m_vkFlushIndex = m_state.currentFlushIndex;
};
for (auto& i : fboVk->colorBuffer)
{
if (!i.texture)
continue;
addImgMemBarrierForTexView(static_cast<LatteTextureViewVk*>(i.texture));
}
if (auto i = fboVk->depthBuffer.texture)
{
addImgMemBarrierForTexView(static_cast<LatteTextureViewVk*>(i));
}
if (m_state.activeVertexDS)
m_state.activeVertexDS->ForEachView(addImgMemBarrierForTexView);
if (m_state.activeGeometryDS)
m_state.activeGeometryDS->ForEachView(addImgMemBarrierForTexView);
if (m_state.activePixelDS)
m_state.activePixelDS->ForEachView(addImgMemBarrierForTexView);
VkPipelineStageFlags stages = 0;
// src & dst
stages |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
stages |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
stages |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
// src
memoryBarrier.srcAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
memoryBarrier.srcAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
// dst
memoryBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
memoryBarrier.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
memoryBarrier.dstAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
vkCmdPipelineBarrier(m_state.currentCommandBuffer, stages, stages, 0, 1, &memoryBarrier, 0, nullptr, 0, nullptr);
vkCmdPipelineBarrier(m_state.currentCommandBuffer, stages, stages, 0, 0, nullptr, 0, nullptr, barrierCount, imageMemBarriers);
performanceMonitor.vk.numDrawBarriersPerFrame.increment();
@ -1074,7 +1106,7 @@ bool VulkanRenderer::sync_isInputTexturesSyncRequired()
for (auto& tex : info->list_fboCandidates)
{
tex->m_vkFlushIndex_read = m_state.currentFlushIndex;
if (tex->m_vkFlushIndex_write == m_state.currentFlushIndex)
if (tex->m_vkFlushIndex < tex->m_vkFlushIndex_write)
required = true;
}
};
@ -1090,10 +1122,10 @@ void VulkanRenderer::sync_RenderPassLoadTextures(CachedFBOVk* fboVk)
auto checkImageSyncHazard = [&](LatteTextureVk* texVk, bool isWrite = false) {
//RAW / WAW
if (texVk->m_vkFlushIndex_write == m_state.currentFlushIndex)
if (texVk->m_vkFlushIndex < texVk->m_vkFlushIndex_write)
flushRequired = true;
//WAR
if (isWrite && texVk->m_vkFlushIndex_read == m_state.currentFlushIndex)
if (isWrite && texVk->m_vkFlushIndex < texVk->m_vkFlushIndex_read)
flushRequired = true;
};
@ -1112,7 +1144,7 @@ void VulkanRenderer::sync_RenderPassLoadTextures(CachedFBOVk* fboVk)
m_state.activePixelDS->ForEachView(checkViewSync);
if (flushRequired)
sync_performFlushBarrier();
sync_performFlushBarrier(fboVk);
for (auto& tex : fboVk->GetTextures())
{
@ -1225,7 +1257,7 @@ void VulkanRenderer::draw_setRenderPass()
sync_RenderPassLoadTextures(fboVk);
if (sync_isInputTexturesSyncRequired())
sync_performFlushBarrier();
sync_performFlushBarrier(fboVk);
if (m_featureControl.deviceExtensions.dynamic_rendering)
{